Nanogallery Image Wrangler

The reason for rebuilding my site with Hugo was to add a nice gallery to display the fruits of my new photography hobby. I built a Hugo shortcode for the wonderful Nanogallery, but I still needed a way to automate the process of creating thumbnails, resizing images for the web, and collecting metadata.

The script below handles all the gruntwork. Point it at a directory of images and it will:

It can be run incrementally to pick up new images or handle deletions.

It requires Python >= 3.6; the only dependency is the Pillow library, a Python3 fork of PIL. I recommend installing that in a virtual environment.

As of 2020-01-31 it’s functional, but still very much a work in progress.

$ ../pyenv/bin/python scripts/gallery.py --help
usage: gallery.py [-h] [--dry-run] [--force] [--force-resize] DIR

positional arguments:
  DIR             Directory holding images and content.json

optional arguments:
  -h, --help      show this help message and exit
  --dry-run       Don't modify any files
  --force         Reprocess all files
  --force-resize  Reprocess all files and recreate maxpect images

scripts/gallery.py:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
#!/usr/bin/env python3
#
# Copyright (C) 2020 Dirk Bergstrom <dirk@otisbean.com>. All Rights Reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#

import json
import re
import datetime
from io import BytesIO
from pathlib import Path
import argparse
import sys
import logging
import binascii

from PIL import Image, ImageFilter, ExifTags


CONTENT_FILE = "content.json"
MAX_WIDTH = 1400
MAX_HEIGHT = 1200
THUMBNAIL_HEIGHT = 225

logging.basicConfig(level=logging.DEBUG,
    format="%(asctime)s %(levelname)s: %(message)s")


def isoformat(timeint):
    return datetime.datetime.fromtimestamp(timeint) \
        .replace(microsecond=0).isoformat().replace("T", " ")


def exif_to_isodate(exif_date):
    dt = datetime.datetime.strptime(exif_date, "%Y:%m:%d %H:%M:%S")
    return dt.replace(microsecond=0).isoformat().replace("T", " ")


def fixexif(val):
    """Remove non-ASCII characters and strip leading & trailing
    whitespace.

    The text fields in EXIF data are full of garbage.
    """
    return "".join([x for x in val if (x.isprintable() or x.isspace())]).strip()


def read_exif_metadata(img, data):
    """Read EXIF from the photo and map it to Nanogallery data.

    Model, Make & LensModel => exifModel
    Flash => exifFlash  (as "" or "Flash")
    FocalLength => exifFocalLength  (as an integer)
    FNumber => exifFStop  (as '.1f')
    ExposureTime => exifExposure  (as either int seconds or a fraction)
    ISOSpeedRatings => ExifIso
    DateTimeOriginal => exifTime
    UserComment => description  ("Caption" field in DigiKam)
    DocumentName => title  ("Name" field in DigiKam)
    """
    # EXIF tag data is a disgusting swamp of badly formatted information
    raw_exif = img._getexif()
    if not raw_exif:
        logging.info("No exif in photo")
        return
    exif_tags = {ExifTags.TAGS.get(k, k): v for k, v in raw_exif.items()}

    mod = exif_tags.get('Model')
    if mod:
        mod = fixexif(mod)
        make = exif_tags.get('Make')
        if make:
            make = " ".join(
                [x.capitalize() for x in fixexif(make).split()])
            mod = "{} {}".format(make, mod)
        lens = exif_tags.get('LensModel')
        if lens:
            mod = "{}; {}".format(mod, fixexif(lens))
        data['exifModel'] = mod

    # exif flash is a bitmask where the first bit is "did it fire"
    flash = exif_tags.get('Flash', 0)
    data['exifFlash'] = "Flash" if (flash & 1) else ""

    fl = exif_tags.get('FocalLength')
    if fl:
        # A tuple.  One hopes that the first element is always the focal
        # length, and the second 1, but...
        data['exifFocalLength'] = int(fl[0] / fl[1])

    fn = exif_tags.get('FNumber')
    if fn:
        # Another tuple
        data['exifFStop'] = "{:.1f}".format(fn[0] / fn[1])

    et = exif_tags.get('ExposureTime')
    if et:
        # Tuple.  Format sub-second times as a fraction.
        if et[1] == 1:
            et = '{}"'.format(et[0])
        else:
            et = "{}/{}".format(*et)
        data['exifExposure'] = et

    iso = exif_tags.get('ISOSpeedRatings')
    if iso:
        data['exifIso'] = iso

    dto = exif_tags.get('DateTimeOriginal')
    if dto:
        data['exifTime'] = exif_to_isodate(dto)

    # UserComment => description
    # Along the way we convert newlines to <br> tags and linkify URLs
    uc = exif_tags.get('UserComment')
    if uc:
        uc = fixexif(uc.decode())
        if uc.startswith("ASCII"):
            # As written by DigiKam the UserComment field has
            # a prefix of 'ASCII\x00\x00\x00'
            uc = uc[5:]
        uc = re.sub("\n", "\n<br>", uc)
        uc = re.sub(r"(https?://\S+)", r'<a href="\1">\1</a>', uc)
        data['description'] = uc

    dn = exif_tags.get('DocumentName')
    if (dn and not
            re.search(r'\.jpg', dn, re.IGNORECASE) and not
            re.search(r'\d{5}', dn)):
        # Looks like an actual title, not just a filename
        data['title'] = dn


def doit(directory, force, force_resize, dry_run):
    """Do the thing.
    """
    orig_dir = directory / "originals"
    if not orig_dir.exists():
        print("Expected to find a sub-directory named 'originals' "
            "containing image files.", file=sys.stderr)
        sys.exit(1)
    content_file = directory / CONTENT_FILE
    if not content_file.exists():
        content = []
    else:
        with content_file.open() as cf:
            content = json.load(cf)
    old = {c['filename']: c for c in content}
    new = []
    done = []
    for path in sorted(orig_dir.glob('*.jpg')):
        if path.is_file():
            # A candidate
            mtime = isoformat(path.stat().st_mtime)
            oe = old.get(path.name)
            if (force or oe is None or oe.get('mtime') != mtime):
                # File is new or changed
                new.append(dict(
                    filename=path.name,
                    mtime=mtime,
                    path=path,
                    ID=re.sub(r'[^\w-]', '-', path.stem),
                    downloadURL=f"{orig_dir.name}/{path.name}"))
            else:
                # We have up-to-date info for this file
                done.append(oe)

    if len(new) == 0:
        logging.info("No changes, exiting.")
        return

    resized_dir = directory / "resized"
    if not resized_dir.exists():
        resized_dir.mkdir()

    # Process new files
    for data in new:
        path = data.pop("path")
        logging.info("Processing %s", path.name)
        img = Image.open(path)

        # Save the EXIF data so we can write it back out
        exif_bytes = img.info.get('exif', b'')

        if img.width > MAX_WIDTH or img.height > MAX_HEIGHT:
            # Image too large, need maxpect image for web display
            logging.info("Image too large (%d x %d)", img.width, img.height)
            resized_name = f"web-{path.name}"
            resized_path = resized_dir / resized_name
            if resized_path.exists() and not force_resize:
                logging.info("Reading size of existing maxpect")
                maxpect = Image.open(resized_path)
            else:
                logging.info("Making maxpect")
                maxpect = img.copy()
                # thumbnail() method modifies image, preserves aspect ratio.
                # Image.LANCZOS is the best quality and seems plenty fast
                # Image.BICUBIC is faster but lower quality.
                maxpect.thumbnail(
                    (MAX_WIDTH, MAX_HEIGHT), resample=Image.LANCZOS)
                logging.debug('Saving maxpect as "%s"', resized_path)
                if not dry_run:
                    maxpect.save(resized_path,
                        quality=90,
                        progressive=True,
                        optimized=True,
                        exif=exif_bytes,
                        icc_profile=img.info.get('icc_profile'))
            data["imgWidth"] = maxpect.width
            data["imgHeight"] = maxpect.height
            data["src"] = f'{resized_dir.name}/{resized_name}'
        else:
            data["src"] = data["downloadURL"]
            data["imgWidth"] = img.width
            data["imgHeight"] = img.height

        read_exif_metadata(img, data)
        if "title" not in data:
            # Nothing in EXIF, use the filename
            if not re.search(r'\d{5}', path.name):
                # Doesn't look like a serial number, assume it's text and try
                # to make it pretty.
                data['title'] = " ".join([x.capitalize()
                    for x in re.split(r'[_-]', path.stem)])
            else:
                data['title'] = path.name

        # make thumbnail (cropping to 90%)
        thumb_path = resized_dir / f"thumb-{path.name}"
        logging.info("Making thumbnail %s", thumb_path)
        crop_coords = (
            img.width / 20,
            img.height / 20,
            img.width - img.width / 20,
            img.height - img.height / 20
        )
        thumb = img.crop(crop_coords)
        hratio = thumb.height / THUMBNAIL_HEIGHT
        thumb.thumbnail((thumb.width * hratio, THUMBNAIL_HEIGHT))
        if not dry_run:
            thumb.save(thumb_path)
        data["srct"] = f"{resized_dir.name}/{thumb_path.name}"
        data["imgtWidth"] = thumb.width
        data["imgtHeight"] = thumb.height

        # Get dominant colors
        #  Resize to ~20x20, blur, create gif, base64 encode
        # (Fancier method: https://github.com/fengsp/color-thief-py)
        logging.info("Creating 'dominant colors' gif")
        thumb.thumbnail((15, 15))
        blurred = thumb.filter(filter=ImageFilter.BLUR)
        bio = BytesIO()
        blurred.save(bio, format="GIF")
        gif_encoded = binascii.b2a_base64(bio.getvalue()).decode('utf8')
        # Add to new dict
        data['imageDominantColors'] = f"data:image/gif;base64,{gif_encoded}"

        done.append(data)

    # FIXME Remove orphaned thumbs and originals

    # Write new CONTENT_FILE
    done.sort(key=lambda x: x.get('exifTime', x['mtime']), reverse=True)
    logging.info("Writing %s", directory / CONTENT_FILE)
    if dry_run:
        print(json.dumps(done, indent=1), file=sys.stderr)
    else:
        with (directory / CONTENT_FILE).open(mode='w') as fp:
            json.dump(done, fp, indent=1)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("directory", metavar="DIR", type=str,
        help="Directory holding images and content.json")
    parser.add_argument("--dry-run", action="store_true",
        help="Don't modify any files")
    parser.add_argument("--force", action="store_true",
        help="Reprocess all files")
    parser.add_argument("--force-resize", action="store_true",
        help="Reprocess all files and recreate maxpect images")
    args = parser.parse_args()
    doit(Path(args.directory),
         (args.force or args.force_resize), args.force_resize, args.dry_run)