1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
|
#!/usr/bin/env python3
#
# Copyright (C) 2020-2022 Dirk Bergstrom <dirk@otisbean.com>. All Rights Reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
import json
import re
import datetime
from io import BytesIO
from pathlib import Path
import argparse
import sys
import logging
import binascii
from fractions import Fraction
from PIL import Image, ImageFilter, ExifTags, TiffImagePlugin
from titlecase import titlecase
CONTENT_FILE = "content.json"
ORDERING_FILE = "image-order.txt"
MAX_WIDTH = 1600
MAX_HEIGHT = 1400
THUMBNAIL_HEIGHT = 225
logging.basicConfig(level=logging.DEBUG,
format="%(asctime)s %(levelname)s: %(message)s")
def isoformat(timeint):
return datetime.datetime.fromtimestamp(timeint) \
.replace(microsecond=0).isoformat().replace("T", " ")
def exif_to_isodate(exif_date):
dt = datetime.datetime.strptime(exif_date, "%Y:%m:%d %H:%M:%S")
return dt.replace(microsecond=0).isoformat().replace("T", " ")
def fixexif(val):
"""Remove non-ASCII characters and strip leading & trailing
whitespace.
The text fields in EXIF data are full of garbage.
"""
return "".join([x for x in val if (x.isprintable() or x.isspace())]).strip()
def read_exif_metadata(img, data):
"""Read EXIF from the photo and map it to Nanogallery data.
Model, Make & LensModel => exifModel
Flash => exifFlash (as "" or "Flash")
FocalLength => exifFocalLength (as an integer)
FNumber => exifFStop (as '.1f')
ExposureTime => exifExposure (as either int seconds or a fraction)
ISOSpeedRatings => ExifIso
DateTimeOriginal => exifTime
UserComment => description ("Caption" field in DigiKam)
DocumentName => title ("Name" field in DigiKam)
"""
# EXIF tag data is a disgusting swamp of badly formatted information
raw_exif = img._getexif()
if not raw_exif:
logging.info("No exif in photo")
return
exif_tags = {ExifTags.TAGS.get(k, k): v for k, v in raw_exif.items()}
mod = exif_tags.get('Model')
if mod:
mod = fixexif(mod)
make = exif_tags.get('Make')
if make:
make = titlecase(fixexif(make))
mod = "{} {}".format(make, mod)
lens = exif_tags.get('LensModel')
if lens:
mod = "{}; {}".format(mod, fixexif(lens))
data['exifModel'] = mod
# exif flash is a bitmask where the first bit is "did it fire"
flash = exif_tags.get('Flash', 0)
data['exifFlash'] = "Flash" if (flash & 1) else ""
fl = exif_tags.get('FocalLength')
if fl:
if isinstance(fl, tuple):
# A tuple. One hopes that the first element is always the focal
# length, and the second 1, but...
data['exifFocalLength'] = int(fl[0] / fl[1])
else:
# Hope it's a number...
data['exifFocalLength'] = int(fl)
fn = exif_tags.get('FNumber')
if fn:
if isinstance(fn, tuple):
# Another tuple
data['exifFStop'] = "{:.1f}".format(fn[0] / fn[1])
else:
data['exifFStop'] = "{:.1f}".format(float(fn))
et = exif_tags.get('ExposureTime')
if et:
if isinstance(et, TiffImagePlugin.IFDRational):
if (float(et)) > 1:
et = float(et)
else:
et = (et.numerator, et.denominator)
if isinstance(et, tuple):
if et[0] == et[1]:
et = "1"
elif et[1] == 1:
# Integer number of seconds
et = f"{et[0]}"
else:
# Format as a fraction.
# FIXME Should do a better job with times > 1s
et = f"{et[0]}/{et[1]}"
elif isinstance(et, float):
# Try to turn floats into something that looks like it came out
# of a camera UI.
# <1s: 1/250
# 1s - 2s: 1.33"
# > 2s: 2.4"
# Integer seconds: 4"
if et == int(et):
et = f'{et:.0f}'
if 1 < et < 2:
et = f'{et:.2f}'
elif et >= 2:
et = f'{et:.1f}'
else:
et = str(Fraction(et).limit_denominator())
else:
try:
et = str(et)
except Exception:
et = "???"
data['exifExposure'] = et
iso = exif_tags.get('ISOSpeedRatings')
if iso:
data['exifIso'] = iso
dto = exif_tags.get('DateTimeOriginal')
if dto:
data['exifTime'] = exif_to_isodate(dto)
# UserComment => description
# Along the way we convert newlines to <br> tags and linkify URLs
uc = exif_tags.get('UserComment')
if uc:
uc = fixexif(uc.decode())
if uc.startswith("ASCII"):
# As written by DigiKam the UserComment field has
# a prefix of 'ASCII\x00\x00\x00'
uc = uc[5:]
if uc.startswith("UNICODE"):
# Or sometimes 'UNICODE\x00\x00\x00'
uc = uc[7:]
uc = re.sub("\n", "\n<br>", uc)
uc = re.sub(r"(https?://\S+)", r'<a href="\1">\1</a>', uc)
data['description'] = uc
dn = exif_tags.get('DocumentName')
if (dn and not
re.search(r'\.jpg', dn, re.IGNORECASE) and not
re.search(r'\d{5}', dn)):
# Looks like an actual title, not just a filename
data['title'] = dn
def doit(directory, force, force_resize, sorting, include_originals, dry_run):
"""Do the thing.
"""
orig_dir = directory / "originals"
if not orig_dir.exists():
print("Expected to find a sub-directory named 'originals' "
"containing image files.", file=sys.stderr)
sys.exit(1)
rsync_exclude_file = directory / "exclude-originals.txt"
if rsync_exclude_file.exists():
include_originals = False
content_file = directory / CONTENT_FILE
if not content_file.exists():
content = []
else:
with content_file.open() as cf:
content = json.load(cf)
old = {c['filename']: c for c in content}
new = []
done = []
for path in sorted(orig_dir.glob('*.jpg')):
if path.is_file():
# A candidate
mtime = isoformat(path.stat().st_mtime)
oe = old.get(path.name)
if (force or oe is None or oe.get('mtime') != mtime):
# File is new or changed
newfilespec = dict(
filename=path.name,
mtime=mtime,
path=path,
ID=re.sub(r'[^\w-]', '-', path.stem),
)
if include_originals:
newfilespec["downloadURL"] = f"{orig_dir.name}/{path.name}"
new.append(newfilespec)
else:
# We have up-to-date info for this file
done.append(oe)
if len(new) == 0:
logging.info("No changes, exiting.")
return
resized_dir = directory / "resized"
if not resized_dir.exists():
resized_dir.mkdir()
# Process new files
for data in new:
path = data.pop("path")
logging.info("Processing %s", path.name)
img = Image.open(path)
# Save the EXIF data so we can write it back out
exif_bytes = img.info.get('exif', b'')
if img.width > MAX_WIDTH or img.height > MAX_HEIGHT:
# Image too large, need maxpect image for web display
logging.info("Image too large (%d x %d)", img.width, img.height)
resized_name = f"web-{path.name}"
resized_path = resized_dir / resized_name
if resized_path.exists() and not force_resize:
logging.info("Reading size of existing maxpect")
maxpect = Image.open(resized_path)
else:
logging.info("Making maxpect")
maxpect = img.copy()
# thumbnail() method modifies image, preserves aspect ratio.
# Image.LANCZOS is the best quality and seems plenty fast
# Image.BICUBIC is faster but lower quality.
maxpect.thumbnail(
(MAX_WIDTH, MAX_HEIGHT), resample=Image.LANCZOS)
logging.debug('Saving maxpect as "%s"', resized_path)
if not dry_run:
maxpect.save(resized_path,
quality=90,
progressive=True,
optimized=True,
exif=exif_bytes,
icc_profile=img.info.get('icc_profile'))
data["imgWidth"] = maxpect.width
data["imgHeight"] = maxpect.height
data["src"] = f'{resized_dir.name}/{resized_name}'
else:
data["src"] = data["downloadURL"]
data["imgWidth"] = img.width
data["imgHeight"] = img.height
read_exif_metadata(img, data)
if "title" not in data:
# Nothing in EXIF, use the filename
if not re.search(r'\d{5}', path.name):
# Doesn't look like a serial number, assume it's text and try
# to make it pretty.
data['title'] = titlecase(re.sub(r'[_-]', ' ', path.stem))
else:
data['title'] = path.name
# make thumbnail (cropping to 90%)
thumb_path = resized_dir / f"thumb-{path.name}"
logging.info("Making thumbnail %s", thumb_path)
crop_coords = (
img.width / 20,
img.height / 20,
img.width - img.width / 20,
img.height - img.height / 20
)
thumb = img.crop(crop_coords)
hratio = thumb.height / THUMBNAIL_HEIGHT
thumb.thumbnail((thumb.width * hratio, THUMBNAIL_HEIGHT))
if not dry_run:
thumb.save(thumb_path)
data["srct"] = f"{resized_dir.name}/{thumb_path.name}"
data["imgtWidth"] = thumb.width
data["imgtHeight"] = thumb.height
# Get dominant colors
# Resize to ~20x20, blur, create gif, base64 encode
# (Fancier method: https://github.com/fengsp/color-thief-py)
logging.info("Creating 'dominant colors' gif")
thumb.thumbnail((15, 15))
blurred = thumb.filter(filter=ImageFilter.BLUR)
bio = BytesIO()
blurred.save(bio, format="GIF")
gif_encoded = binascii.b2a_base64(bio.getvalue()).decode('utf8')
# Add to new dict
data['imageDominantColors'] = f"data:image/gif;base64,{gif_encoded}"
if not include_originals and "downloadURL" in data:
del(data["downloadURL"])
done.append(data)
# FIXME Remove orphaned thumbs and originals
ordering_file = directory / ORDERING_FILE
if ordering_file.exists():
# Put images in the order given in the file
with ordering_file.open() as of:
image_order = [fname.strip() for fname in of.readlines()]
def getindex(entry):
try:
return image_order.index(entry['filename'])
except ValueError:
# Unknown files go at the end
return 9999
done.sort(key=getindex)
else:
if sorting == 'alnum':
# Sort images by title
done.sort(key=lambda x: x.get('title', x['filename']))
elif sorting in ('revchron', 'chron'):
# Sort images by exif time & mtime
done.sort(key=lambda x: x.get('exifTime', x['mtime']),
reverse=(sorting == 'revchron'))
else:
raise Exception(f"Unsupported sort order {sorting}")
# Write new CONTENT_FILE
if dry_run:
print(json.dumps(done, indent=1), file=sys.stderr)
else:
# Handle semaphore file for display/exclude originals
if include_originals and rsync_exclude_file.exists():
rsync_exclude_file.unlink()
elif not include_originals:
with open(rsync_exclude_file, "w") as ref:
print("H originals/**\n", file=ref)
# Make symlink to latest thumbnail image
latest = Path(done[0]['srct']).name
symlink_path = resized_dir / 'latest.jpg'
try:
if symlink_path.exists() or symlink_path.is_symlink():
logging.debug("unlinking old symlink %s", symlink_path)
symlink_path.unlink()
logging.info("Creating 'latest.jpg' symlink %s -> %s", symlink_path, latest)
(symlink_path).symlink_to(latest)
except OSError as e:
logging.error("Failed to create 'latest.jpg' symlink: " + str(e))
# Write JSON
logging.info("Writing %s", directory / CONTENT_FILE)
with (directory / CONTENT_FILE).open(mode='w') as fp:
json.dump(done, fp, indent=1)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
epilog=f"If the file `{ORDERING_FILE}` is present in the directory, "
"images will be presented in the order listed there.")
parser.add_argument("directory", metavar="DIR", type=str,
help="Directory holding images and content.json")
parser.add_argument("--sorting", choices=["revchron", "chron", "alnum"],
default="revchron", help="Sort order")
parser.add_argument("--exclude-originals", action="store_true",
help="Don't publish original images or download links.")
parser.add_argument("--dry-run", action="store_true",
help="Don't modify any files")
parser.add_argument("--force", action="store_true",
help="Reprocess all files")
parser.add_argument("--force-resize", action="store_true",
help="Reprocess all files and recreate maxpect images")
args = parser.parse_args()
doit(Path(args.directory),
(args.force or args.force_resize),
args.force_resize,
args.sorting,
not args.exclude_originals,
args.dry_run)
|