Skip to content

Instantly share code, notes, and snippets.

@cwoffenden
Last active October 3, 2022 17:27
Show Gist options
  • Select an option

  • Save cwoffenden/98780e9009a2d4f62433ea9f77ef4113 to your computer and use it in GitHub Desktop.

Select an option

Save cwoffenden/98780e9009a2d4f62433ea9f77ef4113 to your computer and use it in GitHub Desktop.
Tool to compress a directory of PNG files with bc7enc and log the error metrics
#!/usr/bin/env python3
# Tool to compress a directory of PNG files with bc7enc and log the error
# metrics as a CSV, with the aim to verify that any code changes neither impact
# the quality nor execution time. Since the aim isn't to keep the generated
# files, the outputs are always the same file (deleteme.dds|png), overwriting
# each time. bc7enc is currently limited to loading PNGs, so too is this script.
# Example usage:
#
# ./runbc7enc.py -b 5 -o /Volumes/Temp -x ./bc7enc-mine -l mine.csv -t -s .
#
# This will BC5 compress the contents of the current directory with the custom
# 'bc7enc-mine' executable, storing the generated files in /Volumes/Temp,
# logging the metrics and a hash of the output, but not the processing time.
#
# The optional hash can verify nothing changed, and when tested on the same
# system lodepng's writer appears to be consistent.
#
# Note: the temporary files can be written to RAM disk, if only to stop
# thousands of unnecessary writes to SSD:
#
# Mac, with a 1GB disk at /Volumes/Temp:
# diskutil erasevolume HFS+ 'Temp' `hdiutil attach -nobrowse -nomount ram://2097152`
#
# Linux, with a 1GB disk at /mnt/Temp:
# mkdir /mnt/Temp
# mount -t tmpfs -o size=1g tmpfs /mnt/Temp
#
# TODO: look at New-IscsiVirtualDisk for Windows
#
# Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain)
import argparse, glob, hashlib, os, re, shlex, shutil, sys
from pathlib import Path
from subprocess import Popen, PIPE
from typing import Any, Dict, Optional, Pattern, TextIO
# Container for the compression results; see compress_file().
#
class Metrics:
# Encoding time in seconds
time: float = 0.0
# Maximum error for a single pixel
maxe: int = 0
# RMSE for the whole image
rmse: float = 0.0
# PSNR for the whole image
psnr: float = 0.0
# Hash of the output file
hash: str = 'N/A'
# Destination log file object (or stdout if no output file was supplied).
log_file: TextIO = sys.stdout
# Destination directory for the generated DDS and other temporary files.
out_path: Optional[Path] = None
# Executable to use.
exe_path: Optional[Path] = None
# Compiled regex Patern to capture numbers (with or without a decimal point).
number_regex: Pattern[str] = re.compile(r'(\d+(?:\.\d+)?)')
# Dictionary of compiled regex Patterns to match the metric lines for the
# various encoded BC types, keyed by their colour type, e.g. metrics_regex['r']
# for the red-only BC4. Note: we regex due to the variable spaces for aligning
# the metrics.
#
metrics_regex: Dict[str, Pattern[str]] = {
'r': re.compile(r'^Red\s+Max\s+error:', re.IGNORECASE),
'rg': re.compile(r'^RG\s+Max\s+error:', re.IGNORECASE),
'rgb': re.compile(r'^RGB\s+Max\s+error:', re.IGNORECASE),
'rgba': re.compile(r'^RGBA\s+Max\s+error:', re.IGNORECASE)
}
# Logs 'line' to the open 'log_file' (or stdout).
#
def log_line(line: str) -> None:
print(line, file=log_file, flush=True)
# Logs 'line' to stderr. This is also used for general notifications that we
# don't want to go to stdout (so the source can be piped).
#
def err_line(line: Any) -> None:
print(line, file=sys.stderr)
# Given a path, resolves it to validate that it exists (and removes the / on
# directories, excess .., etc.), returning the result. If the path doesn't
# resolve then an error is shown and the script exits. If no path is passed then
# the 'fallback' is returned. 'path_type' allows the error to be tailored
# (defaulting to the generic 'path')
#
def validate_path(path: Optional[Path], path_type:str = 'path', fallback: Optional[str] = None) -> Optional[Path]:
if (path):
try:
return path.resolve(strict=True)
except FileNotFoundError:
err_line(f'Invalid {path_type}: {path}')
exit()
if (fallback and os.path.exists(fallback)):
try:
return Path(fallback).resolve(strict=True)
except FileNotFoundError:
return None
return None
# Generate a hash of a given file. Since this is only for verifying image
# content changes SHA-1 is considered sufficient (a trade-off between processing
# time and hash length in characters, SHA-1 being 40 chars).
#
def generate_hash(path: Path) -> str:
hash = hashlib.sha1()
with open(path, 'rb') as file:
while (True):
chunk = file.read(16384)
if (chunk):
hash.update(chunk)
else:
break
return hash.hexdigest()
# Compresses a single 'image' as BC 'type'. The global 'exe_path' is the
# compression tool to run, and the output files are written to 'out_path'. The
# metrics are extracted from stdout and returned.
#
def compress_file(image: Path, type: int, hash: bool = False) -> Metrics:
# bc7enc writes the encoded DDS, a decoded PNG, and an optional decoded
# alpha channel (automatically named 'deleteme_alpha.png'). The return is
# '0' on success, stdout contains the logging and metrics, stderr (which we
# also redirect) contains details of any failure.
metrics = Metrics()
if (out_path and exe_path):
dds_file = out_path / 'deleteme.dds'
png_file = out_path / 'deleteme.png'
proc = Popen([exe_path, f'-{type}', image, dds_file, png_file], stdout=PIPE, universal_newlines=True)
(log, err) = proc.communicate()
if (proc.wait() == 0):
for line in log.splitlines():
if (line.startswith('Total encoding time:')):
found = number_regex.search(line)
if (found):
metrics.time = float(found.group(1))
else:
# We look for the metrics lines with the known strings a
# the start, then extract all the numbers.
found = None
if (type == 4):
found = metrics_regex['r'].match(line)
else:
if (type == 5):
found = metrics_regex['rg'].match(line)
else:
if (type == 1):
found = metrics_regex['rgb'].match(line)
else:
found = metrics_regex['rgba'].match(line)
if (found):
all_numbers = number_regex.findall(line)
if (all_numbers and len(all_numbers) >= 3):
metrics.maxe = int (all_numbers[0])
metrics.rmse = float(all_numbers[1])
metrics.psnr = float(all_numbers[2])
if (hash):
try:
metrics.hash = generate_hash(png_file)
except FileNotFoundError:
err_line(f'Failed to read generated: {png_file}')
else:
err_line(f'Failed to open: {image.name}')
else:
err_line('Set the global compressor executable and destination before calling')
exit()
return metrics
# Start here
parser = argparse.ArgumentParser(description='rgbcx runner', epilog=f'example: {sys.argv[0]} -bc 4 -o ../tmp/path -l logfile.csv -f source/path')
parser.add_argument('-b', '--bc', type=int, default=4, choices=[1, 3, 4, 5], help='BC variant 1, 3, 4 or 5')
parser.add_argument('-o', '--out', type=Path, help='directory for the temporary output files (e.g. RAM disk)')
parser.add_argument('-x', '--exe', type=Path, help='bc7enc executable to use (otherwise whichever is on the path)')
parser.add_argument('-l', '--log', type=argparse.FileType('w'), help='log file (otherwise stdout)')
parser.add_argument('-t', '--notime', action='store_true', help='exclude the encoding time from the log')
parser.add_argument('-e', '--noerror', action='store_true', help='exclude error metrics from the log')
parser.add_argument('-s', '--hash', action='store_true', help='generate a hash of the decoded DDS file')
parser.add_argument('src', type=Path, help='source directory of test testures')
args = parser.parse_args()
# Find the default executable either on the path or the current dir
def_exe: Optional[str] = shutil.which('bc7enc', path=f'{os.environ["PATH"]}{os.path.pathsep}{os.curdir}')
# Fail early on an invalid inputs
args.src = validate_path(args.src, 'source directory')
out_path = validate_path(args.out, 'temporary output', os.getcwd())
exe_path = validate_path(args.exe, 'executable', def_exe)
if (not (out_path and os.path.isdir(out_path))):
err_line(f'Not a directory: {out_path}')
exit()
if (not (exe_path and os.access(exe_path, os.X_OK))):
err_line(f'Not an executable: {exe_path}')
exit()
# We got this far: we have valid source and output dirs, plus an executable, so
# we simply recurse the source to find PNGs then compress and log each entry.
try:
if (args.log):
log_file = args.log
total_time = 0.0
for path in glob.iglob(f'{args.src}/**', recursive=True):
src_path = Path(path);
if (src_path.is_file() and src_path.suffix.lower() == '.png'):
# Ignore the deleteme* files so this can run in the launch directory
if (not src_path.stem.startswith('deleteme')):
metrics = compress_file(src_path, args.bc, args.hash)
log_text = f'"{src_path.name}"'
if (not args.notime):
log_text += f',{metrics.time}'
if (not args.noerror):
log_text += f',{metrics.maxe},{metrics.rmse},{metrics.psnr}'
if (args.hash):
log_text += f',{metrics.hash}'
total_time += metrics.time
log_line(log_text)
else:
err_line(f'Ignoring: {src_path}')
# Log this to stderr just so we don't have it cluttering the output
err_line(f'Total time: {total_time}')
except KeyboardInterrupt:
exit()
finally:
if (log_file):
log_file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment