#!/usr/bin/env python
# Copyright 2013,2016,2019 by Akkana Peck:
# share and enjoy under the GPL v2 or later.
"""
These are the base class for metapho images and taggers.
Programs with better UI can inherit from these classes.
"""
# Image and Tagger classes have to be defined here in order for
# other files to be able to use them as metapho.Image rather than
# metapho.Image.Image. I haven't found any way that lets me split
# the classes into separate files. Sigh!
import sys, os
import re
import collections # for OrderedDict
import shlex
from itertools import takewhile
# commonprefix is buggy, doesn't restrict itself to path components, see
# http://rosettacode.org/wiki/Find_common_directory_path#Python
# A replacement:
[docs]def commonprefix(paths):
def allnamesequal(name):
return all(n==name[0] for n in name[1:])
bydirectorylevels = zip(*[p.split(os.path.sep) for p in paths])
return os.path.sep.join(x[0] for x in takewhile(allnamesequal,
bydirectorylevels))
[docs]class Image:
"""An image, with additional info such as rotation and tags.
"""
# A list of all the filenames the program knows about.
# Note that this is a class variable! So if you finish using
# one set of files and want to continue using the class in the
# same program (or in a suite of automated tests), you may
# need to clear this out.
g_image_list = []
def __init__(self, filename, displayed=True):
"""Initialize an image filename.
Pass displayed=False if this image isn't to be shown
in the current session, only used for remembering
previously set tags.
"""
# filename is an absolute path
self.filename = os.path.abspath(filename)
self.tags = []
self.displayed = displayed
# Rotation of the image relative to what it is on disk.
# None means we don't know yet, 0 means stay at 0.
# Note: use 270 for counter-clockwise rotation, not -90.
self.rot = None
def __repr__(self):
str = "Image '%s'" % self.filename
if self.rot:
str += " (rotation %s)" % self.rot
if self.tags:
str += ": Tags: " + self.tags.__repr__()
# str += '\n'
return str
def __eq__(self, other):
if hasattr(other, 'filename') and hasattr(other, 'tags'):
return self.filename == other.filename and self.tags == other.tags
return other == self.filename
def __lt__(self, other):
return self.filename < other.filename
def __gt__(self, other):
return self.filename > other.filename
def __le__(self, other):
return self.filename <= other.filename
def __ge__(self, other):
return self.filename >= other.filename
[docs] def delete(self):
"""Delete the image file FROM DISK, and the image object
from the imageList. DOES NOT ASK FOR CONFIRMATION --
do that (if desired) from the calling program.
"""
print("Deleting", self.filename)
os.unlink(self.filename)
Image.g_image_list.remove(self)
[docs] @classmethod
def image_index(cls, filename):
"""Find a name in the global image list. Return index, or None."""
for i, img in enumerate(cls.g_image_list):
if img.filename == filename:
return i
return None
[docs] @classmethod
def find_nonexistent_files(cls):
"""Returns a list of images in the imagelist that don't exist on disk.
"""
not_on_disk = set()
for im in cls.g_image_list:
if not os.path.exists(im.filename):
not_on_disk.add(im.filename)
not_on_disk = list(not_on_disk)
not_on_disk.sort()
return not_on_disk
[docs] @classmethod
def clean_up_nonexistent_files(cls, topdir):
"""For any file that was referenced in a tag file but doesn't
exist on disk, see if perhaps it's been moved to a different
subdirectory under topdir. If so, adjust file path appropriately.
"""
nefbases = set()
nefdict = {}
for f in cls.find_nonexistent_files():
fbase = os.path.basename(f)
nefbases.add(fbase)
if fbase in nefdict:
print("Warning: multiple files named", fbase)
else:
nefdict[fbase] = f
for root, dirs, files in os.walk(topdir):
root = os.path.normpath(root)
for f in files:
if f in nefbases:
try:
i = cls.image_index(nefdict[f])
cls.g_image_list[i].filename = os.path.join(root, f)
except ValueError:
print("Eek!", nefdict[f], \
"has vanished from the global image list")
nefbases.remove(f)
# Now we've adjusted paths for any file that's moved.
# But what about files that have simply been removed?
# Those are still in nefbases.
if nefbases:
# print("Removing missing files from Tags file:", \
# ' '.join([nefdict[f] for f in nefbases]))
for f in nefbases:
Image.g_image_list.remove(nefdict[f])
[docs]class Tagger(object):
"""Manages tags for images.
"""
# Extensions we explicitly don't handle that might nevertheless
# be in the same directory as images:
try:
SKIP_EXTENSIONS = os.getenv("NOTAGS_SKIP_EXTENSIONS").split()
except:
SKIP_EXTENSIONS = [
".cr2", ".arw", ".xcf",
".mvi", ".avi", ".mov", ".thm", ".mp4", ".mkv",
".pto", ".txt", ".wav", ".mp3",
".xml"
]
try:
IGNORE_DIRNAMES = os.getenv("NOTAGS_IGNORE_DIRNAMES").split()
except:
IGNORE_DIRNAMES = [ "html", "web", "bad", ".*_assets$" ]
def __init__(self):
"""tagger: an object to manage metapho image tags"""
# The actual per-image lists of tags live in the Image class.
# Each image has img.tags, which is a list of tag indices.
# The category list is an OrderedDict
# { "First category": [ 3, 5, 11 ] }
# means category 0 has the name "First category" and includes
# tags 3, 5 and 11 from the tag_list.
self.categories = collections.OrderedDict()
# The tag list is a list of all tags we know about (strings).
# A tag may be in several categories.
self.tag_list = []
# Files from which we've read tags (named Tags or Keywords)
self.tagfiles = []
# the directory common to them, where we'll try to store tags
self.commondir = None
# Have any tags changed during this run?
# Don't update the Tags file if the user doesn't change anything.
self.changed = False
# What category are we currently processing? Default is Tags.
self.current_category = None
# All the Tags files we read to initialize.
# We don't necessarily use this, but callers might want to know.
self.all_tags_files = []
def __repr__(self):
"""Returns a string summarizing all known images and tags,
suitable for printing on stdout or pasting into a Tags file.
"""
outstr = ''
commondirlen = len(self.commondir)
for cat in sorted(self.categories):
outstr += '\ncategory ' + cat + '\n\n'
# self.categories[cat] is a list of numeric tag indices,
# so sorting tags would require a lot more code
# and there's no particular reason to.
for tagno in self.categories[cat]:
tagstr = self.tag_list[tagno]
# No empty tag strings
if tagstr.strip() == '':
continue
imgstr = ''
imglist = []
for img in Image.g_image_list:
if tagno in img.tags:
imglist.append(img)
# Now we have all the images in this category.
# Sort them alphabetically by name.
imglist.sort()
for img in imglist:
filename = img.filename
if filename.startswith(self.commondir):
filename = filename[commondirlen+1:]
if ' ' in filename:
imgstr += ' "' + filename + '"'
else:
imgstr += ' ' + filename
if imgstr:
outstr += "tag %s :" % tagstr + imgstr + '\n'
return outstr
[docs] def rename_category(self, old, new):
for i in range(len(self.categories)):
k,v = self.categories.popitem(False)
self.categories[new if old == k else k] = v
[docs] def write_tag_file(self):
"""Save the current set of tags to a Tags file chosen from
the top-level directory used in the images we've seen.
If there was a previous Tags file there, it will be saved
as Tags.bak.
"""
if not self.changed:
print("No tags changed; not rewriting Tags file")
return
outpath = os.path.join(self.commondir, "Tags")
print("Saving to", outpath)
if os.path.exists(outpath):
os.rename(outpath, outpath + ".bak")
outfile = open(outpath, "w")
outfile.write(str(self))
outfile.close()
[docs] def check_commondir(self, d):
"""Keep track of the dir common to all directories we use:
XXX commondir code is still somewhat experimental.
"""
if self.commondir == None:
self.commondir = d
else:
# self.commondir = os.path.commonprefix([self.commondir, d])
self.commondir = commonprefix([self.commondir, d])
[docs] def process_tag(self, tagname, filenames):
"""After reading a tag from a tags file, add it to the global
tags list if it isn't there already, and add the given filenames
to it.
"""
try:
tagindex = self.tag_list.index(tagname)
except:
tagindex = len(self.tag_list)
self.tag_list.append(tagname)
try:
self.categories[self.current_category].append(tagindex)
# KeyError if the key doesn't exist, AttributeError if
# self.categories[current_category] exists but isn't a list.
except KeyError:
self.categories[self.current_category] = [tagindex]
# Search for images matching the names in filenames
# XXX pathname issue here: filenames in tag files generally don't
# have absolute pathnames, so we're only matching basenames and
# there could be collisions.
for fil in filenames:
tagged = False
for img in Image.g_image_list:
if img.filename.endswith(fil) and tagindex not in img.tags:
img.tags.append(tagindex)
tagged = True
break
# Did we find an image matching fil?
# If not, add it as a non-displayed image.
if not tagged:
newim = Image(fil, displayed=False)
newim.tags.append(tagindex)
Image.g_image_list.append(newim)
[docs] def add_tag(self, tag, img):
"""Add a tag to the given image.
img is a metapho.Image.
tag may be a string, which can be a new string or an existing one,
or an integer index into the tag list.
Return the index (in the global tags list) of the tag just added,
or None if error.
"""
self.changed = True
if type(tag) is int:
if tag not in img.tags:
img.tags.append(tag)
return tag
# Else it's a string. Is it already inthe tag list?
if tag in self.tag_list:
tagno = self.tag_list.index(tag)
if tagno not in self.categories[self.current_category]:
self.categories[self.current_category].append(tagno)
img.tags.append(tagno)
return tagno
# Make a new tag.
self.tag_list.append(tag)
newindex = len(self.tag_list) - 1
img.tags.append(newindex)
self.categories[self.current_category].append(newindex)
return newindex
[docs] def remove_tag(self, tag, img):
self.changed = True
if type(tag) is int:
if tag in img.tags:
img.tags.remove(tag)
# Else it's a string. Remove it if it's there.
try:
self.tag_list.remove(tag)
except:
pass
[docs] def toggle_tag(self, tagno, img):
"""Toggle tag number tagno for the given img."""
self.changed = True
if tagno in img.tags:
img.tags.remove(tagno)
return
# It's not there yet. See if it exists in the global tag list.
# if tagno > len(self.tag_list):
# print "Warning: adding a not yet existent tag", tagno
img.tags.append(tagno)
[docs] def match_tag(self, pattern):
"""Return a list of tags matching the pattern."""
return None
[docs] def find_untagged_files(self, topdir):
"""Return a list of untagged files and a list of directories
in which nothing is tagged, under topdir.
"""
untagged_files = []
untagged_dirs = []
for root, dirs, files in os.walk(topdir):
deletes = []
for d in dirs:
# Build up a list of ignored directories
# since we can't delete from dirs while iterating over it.
if Tagger.ignore_directory(d, root):
deletes.append(d)
for d in deletes:
dirs.remove(d)
some_local_tags = False
local_untagged = []
nfiles = 0
for f in files:
if f.startswith("Tags") or f.startswith("Keywords"):
continue
# Assume all image files will have an extension
if '.' not in f:
continue
# Filter out file extensions we know we don't handle:
base, ext = os.path.splitext(f)
if ext in self.SKIP_EXTENSIONS:
continue
# Now we have a file that should be tagged. Is it?
nfiles += 1
filepath = os.path.abspath(os.path.join(root, f))
if filepath not in Image.g_image_list:
local_untagged.append(filepath)
elif not some_local_tags:
some_local_tags = True
if some_local_tags: # Something was tagged in this root
untagged_files += local_untagged
elif nfiles: # There are files, but nothing was tagged
untagged_dirs.append(os.path.abspath(root))
return untagged_files, untagged_dirs
[docs] @classmethod
def ignore_directory(cls, d, path=None):
"""Detect directory names that don't need to be indexed separately
and aren't likely to have a Tags file;
for instance, those that likely contain copies of what's in
the parent, or small copies for a web page.
Also, you can skip tagging by creating a file named NoTags.
"""
for ipat in Tagger.IGNORE_DIRNAMES:
if re.match(ipat, d):
return True
if path and os.path.exists(os.path.join(path, d, "NoTags")):
return True
return False
[docs] @staticmethod
def print_files_by_directory(filelist):
"""Given a list of pathnames, group them by which directory
they belong to and print them in an organized way.
"""
dirdic = {}
for f in filelist:
# Split into dirname and basename:
dn, bn = os.path.split(f)
if dn in dirdic:
dirdic[dn].append(bn)
else:
dirdic[dn] = [ bn ]
dirlist = list(dirdic.keys())
dirlist.sort()
for d in dirlist:
if d.strip():
print(' %s:' % d)
print(Tagger.split_by_line_length(' '.join(dirdic[d]), 74, ' '))
[docs] @staticmethod
def split_by_line_length(s, linelen, prefix=''):
"""Given a long string, split it into lines no longer than linelen,
with each line optionally prefixed, e.g. with indentation.
Currently this splits only at spaces, not tabs.
"""
ret = ''
while True:
if len(s) <= linelen:
return ret + prefix + s
lastspace = linelen
while s[lastspace] != ' ':
lastspace -= 1
# s[lastspace] is the last space before linelen.
# Now go back to the last non-space character.
pos = lastspace
while s[pos] == ' ':
pos -= 1
ret += prefix + s[:pos+1] + '\n'
s = s[lastspace + 1:]
[docs]def Usage():
progname = os.path.basename(sys.argv[0])
print("Usage:", progname)
print()
print("""Find directories under the current one that have image files
but lack a file named either Tags or Keywords.""")
print()
print(progname, "will ignore files with the following extensions:")
print(' ', ' '.join(Tagger.SKIP_EXTENSIONS))
print(" (you can configure that with an environment variable,")
print(" e.g. export NOTAGS_SKIP_EXTENSIONS='.cr2 .mp3')")
print(progname, "will ignore directories with these names (regex):")
print(' ', ' '.join(Tagger.IGNORE_DIRNAMES))
print(" (configure that with the environment variable "
"NOTAGS_IGNORE_DIRNAMES)")
print(" as well as directories with the same name "
"as the parent directory,\n e.g. yosemite/yosemite")
print("It will also ignore any directory containing a file named NoTags.")
sys.exit(0)
[docs]def main():
"""Read tags and report any inconsistencies:
images in the Tags file that don't exist on disk,
images on disk that aren't in ./Tags.
"""
if len(sys.argv) > 1 and (sys.argv[1] == '-h' or sys.argv[1] == '--help'):
Usage()
tagger = Tagger()
tagger.read_tags('.')
print()
curdir = os.path.abspath('.')
curdirlen = len(curdir)
def rel_dirs(dirs):
"""Take absolute paths and make them relative to curdir
"""
# Nested list comprehension, ugh.
# Remove leading curdir when it exists,
# but if that results in a null string, substitute '.'.
return [ d if d else '.'
for d in [ p[curdirlen+1:]
if p.startswith(curdir)
else p for p in dirs ] ]
# This might be interesting information but it's too long a list
# when evaluating a year's photos.
# print "Found Tags files in:", ' '.join(tagger.all_tags_files)
# print
nef = Image.find_nonexistent_files()
if nef:
print("Tagged files that don't exist on disk:", ' '.join(rel_dirs(nef)))
print()
utf, utd = tagger.find_untagged_files('.')
if utd:
print("Directories that need a Tags file:", ' '.join(rel_dirs(utd)))
print()
if utf:
print("Individual files that aren't tagged:")
tagger.print_files_by_directory(rel_dirs(utf))
if __name__ == '__main__':
main()