Last active
October 3, 2022 17:27
-
-
Save cwoffenden/98780e9009a2d4f62433ea9f77ef4113 to your computer and use it in GitHub Desktop.
Tool to compress a directory of PNG files with bc7enc and log the error metrics
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # Tool to compress a directory of PNG files with bc7enc and log the error | |
| # metrics as a CSV, with the aim to verify that any code changes neither impact | |
| # the quality nor execution time. Since the aim isn't to keep the generated | |
| # files, the outputs are always the same file (deleteme.dds|png), overwriting | |
| # each time. bc7enc is currently limited to loading PNGs, so too is this script. | |
| # Example usage: | |
| # | |
| # ./runbc7enc.py -b 5 -o /Volumes/Temp -x ./bc7enc-mine -l mine.csv -t -s . | |
| # | |
| # This will BC5 compress the contents of the current directory with the custom | |
| # 'bc7enc-mine' executable, storing the generated files in /Volumes/Temp, | |
| # logging the metrics and a hash of the output, but not the processing time. | |
| # | |
| # The optional hash can verify nothing changed, and when tested on the same | |
| # system lodepng's writer appears to be consistent. | |
| # | |
| # Note: the temporary files can be written to RAM disk, if only to stop | |
| # thousands of unnecessary writes to SSD: | |
| # | |
| # Mac, with a 1GB disk at /Volumes/Temp: | |
| # diskutil erasevolume HFS+ 'Temp' `hdiutil attach -nobrowse -nomount ram://2097152` | |
| # | |
| # Linux, with a 1GB disk at /mnt/Temp: | |
| # mkdir /mnt/Temp | |
| # mount -t tmpfs -o size=1g tmpfs /mnt/Temp | |
| # | |
| # TODO: look at New-IscsiVirtualDisk for Windows | |
| # | |
| # Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain) | |
| import argparse, glob, hashlib, os, re, shlex, shutil, sys | |
| from pathlib import Path | |
| from subprocess import Popen, PIPE | |
| from typing import Any, Dict, Optional, Pattern, TextIO | |
| # Container for the compression results; see compress_file(). | |
| # | |
| class Metrics: | |
| # Encoding time in seconds | |
| time: float = 0.0 | |
| # Maximum error for a single pixel | |
| maxe: int = 0 | |
| # RMSE for the whole image | |
| rmse: float = 0.0 | |
| # PSNR for the whole image | |
| psnr: float = 0.0 | |
| # Hash of the output file | |
| hash: str = 'N/A' | |
| # Destination log file object (or stdout if no output file was supplied). | |
| log_file: TextIO = sys.stdout | |
| # Destination directory for the generated DDS and other temporary files. | |
| out_path: Optional[Path] = None | |
| # Executable to use. | |
| exe_path: Optional[Path] = None | |
| # Compiled regex Patern to capture numbers (with or without a decimal point). | |
| number_regex: Pattern[str] = re.compile(r'(\d+(?:\.\d+)?)') | |
| # Dictionary of compiled regex Patterns to match the metric lines for the | |
| # various encoded BC types, keyed by their colour type, e.g. metrics_regex['r'] | |
| # for the red-only BC4. Note: we regex due to the variable spaces for aligning | |
| # the metrics. | |
| # | |
| metrics_regex: Dict[str, Pattern[str]] = { | |
| 'r': re.compile(r'^Red\s+Max\s+error:', re.IGNORECASE), | |
| 'rg': re.compile(r'^RG\s+Max\s+error:', re.IGNORECASE), | |
| 'rgb': re.compile(r'^RGB\s+Max\s+error:', re.IGNORECASE), | |
| 'rgba': re.compile(r'^RGBA\s+Max\s+error:', re.IGNORECASE) | |
| } | |
| # Logs 'line' to the open 'log_file' (or stdout). | |
| # | |
| def log_line(line: str) -> None: | |
| print(line, file=log_file, flush=True) | |
| # Logs 'line' to stderr. This is also used for general notifications that we | |
| # don't want to go to stdout (so the source can be piped). | |
| # | |
| def err_line(line: Any) -> None: | |
| print(line, file=sys.stderr) | |
| # Given a path, resolves it to validate that it exists (and removes the / on | |
| # directories, excess .., etc.), returning the result. If the path doesn't | |
| # resolve then an error is shown and the script exits. If no path is passed then | |
| # the 'fallback' is returned. 'path_type' allows the error to be tailored | |
| # (defaulting to the generic 'path') | |
| # | |
| def validate_path(path: Optional[Path], path_type:str = 'path', fallback: Optional[str] = None) -> Optional[Path]: | |
| if (path): | |
| try: | |
| return path.resolve(strict=True) | |
| except FileNotFoundError: | |
| err_line(f'Invalid {path_type}: {path}') | |
| exit() | |
| if (fallback and os.path.exists(fallback)): | |
| try: | |
| return Path(fallback).resolve(strict=True) | |
| except FileNotFoundError: | |
| return None | |
| return None | |
| # Generate a hash of a given file. Since this is only for verifying image | |
| # content changes SHA-1 is considered sufficient (a trade-off between processing | |
| # time and hash length in characters, SHA-1 being 40 chars). | |
| # | |
| def generate_hash(path: Path) -> str: | |
| hash = hashlib.sha1() | |
| with open(path, 'rb') as file: | |
| while (True): | |
| chunk = file.read(16384) | |
| if (chunk): | |
| hash.update(chunk) | |
| else: | |
| break | |
| return hash.hexdigest() | |
| # Compresses a single 'image' as BC 'type'. The global 'exe_path' is the | |
| # compression tool to run, and the output files are written to 'out_path'. The | |
| # metrics are extracted from stdout and returned. | |
| # | |
| def compress_file(image: Path, type: int, hash: bool = False) -> Metrics: | |
| # bc7enc writes the encoded DDS, a decoded PNG, and an optional decoded | |
| # alpha channel (automatically named 'deleteme_alpha.png'). The return is | |
| # '0' on success, stdout contains the logging and metrics, stderr (which we | |
| # also redirect) contains details of any failure. | |
| metrics = Metrics() | |
| if (out_path and exe_path): | |
| dds_file = out_path / 'deleteme.dds' | |
| png_file = out_path / 'deleteme.png' | |
| proc = Popen([exe_path, f'-{type}', image, dds_file, png_file], stdout=PIPE, universal_newlines=True) | |
| (log, err) = proc.communicate() | |
| if (proc.wait() == 0): | |
| for line in log.splitlines(): | |
| if (line.startswith('Total encoding time:')): | |
| found = number_regex.search(line) | |
| if (found): | |
| metrics.time = float(found.group(1)) | |
| else: | |
| # We look for the metrics lines with the known strings a | |
| # the start, then extract all the numbers. | |
| found = None | |
| if (type == 4): | |
| found = metrics_regex['r'].match(line) | |
| else: | |
| if (type == 5): | |
| found = metrics_regex['rg'].match(line) | |
| else: | |
| if (type == 1): | |
| found = metrics_regex['rgb'].match(line) | |
| else: | |
| found = metrics_regex['rgba'].match(line) | |
| if (found): | |
| all_numbers = number_regex.findall(line) | |
| if (all_numbers and len(all_numbers) >= 3): | |
| metrics.maxe = int (all_numbers[0]) | |
| metrics.rmse = float(all_numbers[1]) | |
| metrics.psnr = float(all_numbers[2]) | |
| if (hash): | |
| try: | |
| metrics.hash = generate_hash(png_file) | |
| except FileNotFoundError: | |
| err_line(f'Failed to read generated: {png_file}') | |
| else: | |
| err_line(f'Failed to open: {image.name}') | |
| else: | |
| err_line('Set the global compressor executable and destination before calling') | |
| exit() | |
| return metrics | |
| # Start here | |
| parser = argparse.ArgumentParser(description='rgbcx runner', epilog=f'example: {sys.argv[0]} -bc 4 -o ../tmp/path -l logfile.csv -f source/path') | |
| parser.add_argument('-b', '--bc', type=int, default=4, choices=[1, 3, 4, 5], help='BC variant 1, 3, 4 or 5') | |
| parser.add_argument('-o', '--out', type=Path, help='directory for the temporary output files (e.g. RAM disk)') | |
| parser.add_argument('-x', '--exe', type=Path, help='bc7enc executable to use (otherwise whichever is on the path)') | |
| parser.add_argument('-l', '--log', type=argparse.FileType('w'), help='log file (otherwise stdout)') | |
| parser.add_argument('-t', '--notime', action='store_true', help='exclude the encoding time from the log') | |
| parser.add_argument('-e', '--noerror', action='store_true', help='exclude error metrics from the log') | |
| parser.add_argument('-s', '--hash', action='store_true', help='generate a hash of the decoded DDS file') | |
| parser.add_argument('src', type=Path, help='source directory of test testures') | |
| args = parser.parse_args() | |
| # Find the default executable either on the path or the current dir | |
| def_exe: Optional[str] = shutil.which('bc7enc', path=f'{os.environ["PATH"]}{os.path.pathsep}{os.curdir}') | |
| # Fail early on an invalid inputs | |
| args.src = validate_path(args.src, 'source directory') | |
| out_path = validate_path(args.out, 'temporary output', os.getcwd()) | |
| exe_path = validate_path(args.exe, 'executable', def_exe) | |
| if (not (out_path and os.path.isdir(out_path))): | |
| err_line(f'Not a directory: {out_path}') | |
| exit() | |
| if (not (exe_path and os.access(exe_path, os.X_OK))): | |
| err_line(f'Not an executable: {exe_path}') | |
| exit() | |
| # We got this far: we have valid source and output dirs, plus an executable, so | |
| # we simply recurse the source to find PNGs then compress and log each entry. | |
| try: | |
| if (args.log): | |
| log_file = args.log | |
| total_time = 0.0 | |
| for path in glob.iglob(f'{args.src}/**', recursive=True): | |
| src_path = Path(path); | |
| if (src_path.is_file() and src_path.suffix.lower() == '.png'): | |
| # Ignore the deleteme* files so this can run in the launch directory | |
| if (not src_path.stem.startswith('deleteme')): | |
| metrics = compress_file(src_path, args.bc, args.hash) | |
| log_text = f'"{src_path.name}"' | |
| if (not args.notime): | |
| log_text += f',{metrics.time}' | |
| if (not args.noerror): | |
| log_text += f',{metrics.maxe},{metrics.rmse},{metrics.psnr}' | |
| if (args.hash): | |
| log_text += f',{metrics.hash}' | |
| total_time += metrics.time | |
| log_line(log_text) | |
| else: | |
| err_line(f'Ignoring: {src_path}') | |
| # Log this to stderr just so we don't have it cluttering the output | |
| err_line(f'Total time: {total_time}') | |
| except KeyboardInterrupt: | |
| exit() | |
| finally: | |
| if (log_file): | |
| log_file.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment