cwoffenden · October 3, 2022 17:27
diff --git a/runbc7enc.py b/runbc7enc.py
 #!/usr/bin/env python3

 # Tool to compress a directory of PNG files with bc7enc and log the error
 # metrics as a CSV, with the aim to verify that any code changes neither impact
 # the quality nor execution time. Since the aim isn't to keep the generated
 # files, the outputs are always the same file (deleteme.dds|png), overwriting
 # each time. bc7enc is currently limited to loading PNGs, so too is this script.
 # Example usage:
 # 
 # ./runbc7enc.py -b 5 -o /Volumes/Temp -x ./bc7enc-mine -l mine.csv -t -s .
 # 
 # This will BC5 compress the contents of the current directory with the custom
 # 'bc7enc-mine' executable, storing the generated files in /Volumes/Temp,
 # logging the metrics and a hash of the output, but not the processing time.
 # 
 # The optional hash can verify nothing changed, and when tested on the same
 # system lodepng's writer appears to be consistent.
 # 
 # Note: the temporary files can be written to RAM disk, if only to stop
 # thousands of unnecessary writes to SSD:
 # 
 # Mac, with a 1GB disk at /Volumes/Temp:
 #	diskutil erasevolume HFS+ 'Temp' `hdiutil attach -nobrowse -nomount ram://2097152`
 # 
 # Linux, with a 1GB disk at /mnt/Temp:
 #	mkdir /mnt/Temp
 #	mount -t tmpfs -o size=1g tmpfs /mnt/Temp
 # 
 # TODO: look at New-IscsiVirtualDisk for Windows
 # 
 # Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain)

 import argparse, glob, hashlib, os, re, shlex, shutil, sys

 from pathlib import Path
 from subprocess import Popen, PIPE
 from typing import Any, Dict, Optional, Pattern, TextIO

 # Container for the compression results; see compress_file().
 # 
 class Metrics:
    # Encoding time in seconds
    time: float = 0.0
    # Maximum error for a single pixel
    maxe: int   = 0
    # RMSE for the whole image
    rmse: float = 0.0
    # PSNR for the whole image
    psnr: float = 0.0
    # Hash of the output file
    hash: str   = 'N/A'

 # Destination log file object (or stdout if no output file was supplied).
 log_file: TextIO = sys.stdout

 # Destination directory for the generated DDS and other temporary files.
 out_path: Optional[Path] = None

 # Executable to use.
 exe_path: Optional[Path] = None

 # Compiled regex Patern to capture numbers (with or without a decimal point).
 number_regex: Pattern[str] = re.compile(r'(\d+(?:\.\d+)?)')

 # Dictionary of compiled regex Patterns to match the metric lines for the
 # various encoded BC types, keyed by their colour type, e.g. metrics_regex['r']
 # for the red-only BC4. Note: we regex due to the variable spaces for aligning
 # the metrics.
 # 
 metrics_regex: Dict[str, Pattern[str]] = {
    'r':    re.compile(r'^Red\s+Max\s+error:',  re.IGNORECASE),
    'rg':   re.compile(r'^RG\s+Max\s+error:',   re.IGNORECASE),
    'rgb':  re.compile(r'^RGB\s+Max\s+error:',  re.IGNORECASE),
    'rgba': re.compile(r'^RGBA\s+Max\s+error:', re.IGNORECASE)
 }

 # Logs 'line' to the open 'log_file' (or stdout).
 # 
 def log_line(line: str) -> None:
    print(line, file=log_file, flush=True)

 # Logs 'line' to stderr. This is also used for general notifications that we
 # don't want to go to stdout (so the source can be piped).
 # 
 def err_line(line: Any) -> None:
    print(line, file=sys.stderr)

 # Given a path, resolves it to validate that it exists (and removes the / on
 # directories, excess .., etc.), returning the result. If the path doesn't
 # resolve then an error is shown and the script exits. If no path is passed then
 # the 'fallback' is returned. 'path_type' allows the error to be tailored
 # (defaulting to the generic 'path')
 # 
 def validate_path(path: Optional[Path], path_type:str = 'path', fallback: Optional[str] = None) -> Optional[Path]:
    if (path):
        try:
            return path.resolve(strict=True)
        except FileNotFoundError:
            err_line(f'Invalid {path_type}: {path}')
            exit()
    if (fallback and os.path.exists(fallback)):
        try:
            return Path(fallback).resolve(strict=True)
        except FileNotFoundError:
            return None
    return None

 # Generate a hash of a given file. Since this is only for verifying image
 # content changes SHA-1 is considered sufficient (a trade-off between processing
 # time and hash length in characters, SHA-1 being 40 chars).
 # 
 def generate_hash(path: Path) -> str:
    hash = hashlib.sha1()
    with open(path, 'rb') as file:
        while (True):
            chunk = file.read(16384)
            if (chunk):
                hash.update(chunk)
            else:
                break
    return hash.hexdigest()

 # Compresses a single 'image' as BC 'type'. The global 'exe_path' is the
 # compression tool to run, and the output files are written to 'out_path'. The
 # metrics are extracted from stdout and returned.
 # 
 def compress_file(image: Path, type: int, hash: bool = False) -> Metrics:
    # bc7enc writes the encoded DDS, a decoded PNG, and an optional decoded
    # alpha channel (automatically named 'deleteme_alpha.png'). The return is
    # '0' on success, stdout contains the logging and metrics, stderr (which we
    # also redirect) contains details of any failure.
    metrics = Metrics()
    if (out_path and exe_path):
        dds_file = out_path / 'deleteme.dds'
        png_file = out_path / 'deleteme.png'
        proc = Popen([exe_path, f'-{type}', image, dds_file, png_file], stdout=PIPE, universal_newlines=True)
        (log, err) = proc.communicate()
        if (proc.wait() == 0):
            for line in log.splitlines():
                if (line.startswith('Total encoding time:')):
                    found = number_regex.search(line)
                    if (found):
                        metrics.time = float(found.group(1))
                else:
                    # We look for the metrics lines with the known strings a
                    # the start, then extract all the numbers.
                    found = None
                    if (type == 4):
                        found = metrics_regex['r'].match(line)
                    else:
                        if (type == 5):
                            found = metrics_regex['rg'].match(line)
                        else:
                            if (type == 1):
                                found = metrics_regex['rgb'].match(line)
                            else:
                                found = metrics_regex['rgba'].match(line)
                    if (found):
                        all_numbers = number_regex.findall(line)
                        if (all_numbers and len(all_numbers) >= 3):
                            metrics.maxe = int  (all_numbers[0])
                            metrics.rmse = float(all_numbers[1])
                            metrics.psnr = float(all_numbers[2])
            if (hash):
                try:
                    metrics.hash = generate_hash(png_file)
                except FileNotFoundError:
                    err_line(f'Failed to read generated: {png_file}')
        else:
            err_line(f'Failed to open: {image.name}')
    else:
        err_line('Set the global compressor executable and destination before calling')
        exit()
    return metrics

 # Start here
 parser = argparse.ArgumentParser(description='rgbcx runner', epilog=f'example: {sys.argv[0]} -bc 4 -o ../tmp/path -l logfile.csv -f source/path')
 parser.add_argument('-b', '--bc', type=int, default=4, choices=[1, 3, 4, 5], help='BC variant 1, 3, 4 or 5')
 parser.add_argument('-o', '--out', type=Path, help='directory for the temporary output files (e.g. RAM disk)')
 parser.add_argument('-x', '--exe', type=Path, help='bc7enc executable to use (otherwise whichever is on the path)')
 parser.add_argument('-l', '--log', type=argparse.FileType('w'), help='log file (otherwise stdout)')
 parser.add_argument('-t', '--notime', action='store_true', help='exclude the encoding time from the log')
 parser.add_argument('-e', '--noerror', action='store_true', help='exclude error metrics from the log')
 parser.add_argument('-s', '--hash', action='store_true', help='generate a hash of the decoded DDS file')
 parser.add_argument('src', type=Path, help='source directory of test testures')
 args = parser.parse_args()

 # Find the default executable either on the path or the current dir
 def_exe: Optional[str] = shutil.which('bc7enc', path=f'{os.environ["PATH"]}{os.path.pathsep}{os.curdir}')

 # Fail early on an invalid inputs
 args.src = validate_path(args.src, 'source directory')
 out_path = validate_path(args.out, 'temporary output', os.getcwd())
 exe_path = validate_path(args.exe, 'executable', def_exe)
 if (not (out_path and os.path.isdir(out_path))):
    err_line(f'Not a directory: {out_path}')
    exit()
 if (not (exe_path and os.access(exe_path, os.X_OK))):
    err_line(f'Not an executable: {exe_path}')
    exit()

 # We got this far: we have valid source and output dirs, plus an executable, so
 # we simply recurse the source to find PNGs then compress and log each entry.
 try:
    if (args.log):
        log_file = args.log
    total_time = 0.0
    for path in glob.iglob(f'{args.src}/**', recursive=True):
        src_path = Path(path);
        if (src_path.is_file() and src_path.suffix.lower() == '.png'):
            # Ignore the deleteme* files so this can run in the launch directory
            if (not src_path.stem.startswith('deleteme')):
                metrics  = compress_file(src_path, args.bc, args.hash)
                log_text = f'"{src_path.name}"'
                if (not args.notime):
                    log_text += f',{metrics.time}'
                if (not args.noerror):
                    log_text += f',{metrics.maxe},{metrics.rmse},{metrics.psnr}'
                if (args.hash):
                    log_text += f',{metrics.hash}'
                total_time += metrics.time
                log_line(log_text)
            else:
                err_line(f'Ignoring: {src_path}')
    # Log this to stderr just so we don't have it cluttering the output
    err_line(f'Total time: {total_time}')
 except KeyboardInterrupt:
    exit()
 finally:
    if (log_file):
        log_file.close()
	#!/usr/bin/env python3

	# Tool to compress a directory of PNG files with bc7enc and log the error
	# metrics as a CSV, with the aim to verify that any code changes neither impact
	# the quality nor execution time. Since the aim isn't to keep the generated
	# files, the outputs are always the same file (deleteme.dds\|png), overwriting
	# each time. bc7enc is currently limited to loading PNGs, so too is this script.
	# Example usage:
	#
	# ./runbc7enc.py -b 5 -o /Volumes/Temp -x ./bc7enc-mine -l mine.csv -t -s .
	#
	# This will BC5 compress the contents of the current directory with the custom
	# 'bc7enc-mine' executable, storing the generated files in /Volumes/Temp,
	# logging the metrics and a hash of the output, but not the processing time.
	#
	# The optional hash can verify nothing changed, and when tested on the same
	# system lodepng's writer appears to be consistent.
	#
	# Note: the temporary files can be written to RAM disk, if only to stop
	# thousands of unnecessary writes to SSD:
	#
	# Mac, with a 1GB disk at /Volumes/Temp:
	# diskutil erasevolume HFS+ 'Temp' `hdiutil attach -nobrowse -nomount ram://2097152`
	#
	# Linux, with a 1GB disk at /mnt/Temp:
	# mkdir /mnt/Temp
	# mount -t tmpfs -o size=1g tmpfs /mnt/Temp
	#
	# TODO: look at New-IscsiVirtualDisk for Windows
	#
	# Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain)

	import argparse, glob, hashlib, os, re, shlex, shutil, sys

	from pathlib import Path
	from subprocess import Popen, PIPE
	from typing import Any, Dict, Optional, Pattern, TextIO

	# Container for the compression results; see compress_file().
	#
	class Metrics:
	# Encoding time in seconds
	time: float = 0.0
	# Maximum error for a single pixel
	maxe: int = 0
	# RMSE for the whole image
	rmse: float = 0.0
	# PSNR for the whole image
	psnr: float = 0.0
	# Hash of the output file
	hash: str = 'N/A'

	# Destination log file object (or stdout if no output file was supplied).
	log_file: TextIO = sys.stdout

	# Destination directory for the generated DDS and other temporary files.
	out_path: Optional[Path] = None

	# Executable to use.
	exe_path: Optional[Path] = None

	# Compiled regex Patern to capture numbers (with or without a decimal point).
	number_regex: Pattern[str] = re.compile(r'(\d+(?:\.\d+)?)')

	# Dictionary of compiled regex Patterns to match the metric lines for the
	# various encoded BC types, keyed by their colour type, e.g. metrics_regex['r']
	# for the red-only BC4. Note: we regex due to the variable spaces for aligning
	# the metrics.
	#
	metrics_regex: Dict[str, Pattern[str]] = {
	'r': re.compile(r'^Red\s+Max\s+error:', re.IGNORECASE),
	'rg': re.compile(r'^RG\s+Max\s+error:', re.IGNORECASE),
	'rgb': re.compile(r'^RGB\s+Max\s+error:', re.IGNORECASE),
	'rgba': re.compile(r'^RGBA\s+Max\s+error:', re.IGNORECASE)
	}

	# Logs 'line' to the open 'log_file' (or stdout).
	#
	def log_line(line: str) -> None:
	print(line, file=log_file, flush=True)

	# Logs 'line' to stderr. This is also used for general notifications that we
	# don't want to go to stdout (so the source can be piped).
	#
	def err_line(line: Any) -> None:
	print(line, file=sys.stderr)

	# Given a path, resolves it to validate that it exists (and removes the / on
	# directories, excess .., etc.), returning the result. If the path doesn't
	# resolve then an error is shown and the script exits. If no path is passed then
	# the 'fallback' is returned. 'path_type' allows the error to be tailored
	# (defaulting to the generic 'path')
	#
	def validate_path(path: Optional[Path], path_type:str = 'path', fallback: Optional[str] = None) -> Optional[Path]:
	if (path):
	try:
	return path.resolve(strict=True)
	except FileNotFoundError:
	err_line(f'Invalid {path_type}: {path}')
	exit()
	if (fallback and os.path.exists(fallback)):
	try:
	return Path(fallback).resolve(strict=True)
	except FileNotFoundError:
	return None
	return None

	# Generate a hash of a given file. Since this is only for verifying image
	# content changes SHA-1 is considered sufficient (a trade-off between processing
	# time and hash length in characters, SHA-1 being 40 chars).
	#
	def generate_hash(path: Path) -> str:
	hash = hashlib.sha1()
	with open(path, 'rb') as file:
	while (True):
	chunk = file.read(16384)
	if (chunk):
	hash.update(chunk)
	else:
	break
	return hash.hexdigest()

	# Compresses a single 'image' as BC 'type'. The global 'exe_path' is the
	# compression tool to run, and the output files are written to 'out_path'. The
	# metrics are extracted from stdout and returned.
	#
	def compress_file(image: Path, type: int, hash: bool = False) -> Metrics:
	# bc7enc writes the encoded DDS, a decoded PNG, and an optional decoded
	# alpha channel (automatically named 'deleteme_alpha.png'). The return is
	# '0' on success, stdout contains the logging and metrics, stderr (which we
	# also redirect) contains details of any failure.
	metrics = Metrics()
	if (out_path and exe_path):
	dds_file = out_path / 'deleteme.dds'
	png_file = out_path / 'deleteme.png'
	proc = Popen([exe_path, f'-{type}', image, dds_file, png_file], stdout=PIPE, universal_newlines=True)
	(log, err) = proc.communicate()
	if (proc.wait() == 0):
	for line in log.splitlines():
	if (line.startswith('Total encoding time:')):
	found = number_regex.search(line)
	if (found):
	metrics.time = float(found.group(1))
	else:
	# We look for the metrics lines with the known strings a
	# the start, then extract all the numbers.
	found = None
	if (type == 4):
	found = metrics_regex['r'].match(line)
	else:
	if (type == 5):
	found = metrics_regex['rg'].match(line)
	else:
	if (type == 1):
	found = metrics_regex['rgb'].match(line)
	else:
	found = metrics_regex['rgba'].match(line)
	if (found):
	all_numbers = number_regex.findall(line)
	if (all_numbers and len(all_numbers) >= 3):
	metrics.maxe = int (all_numbers[0])
	metrics.rmse = float(all_numbers[1])
	metrics.psnr = float(all_numbers[2])
	if (hash):
	try:
	metrics.hash = generate_hash(png_file)
	except FileNotFoundError:
	err_line(f'Failed to read generated: {png_file}')
	else:
	err_line(f'Failed to open: {image.name}')
	else:
	err_line('Set the global compressor executable and destination before calling')
	exit()
	return metrics

	# Start here
	parser = argparse.ArgumentParser(description='rgbcx runner', epilog=f'example: {sys.argv[0]} -bc 4 -o ../tmp/path -l logfile.csv -f source/path')
	parser.add_argument('-b', '--bc', type=int, default=4, choices=[1, 3, 4, 5], help='BC variant 1, 3, 4 or 5')
	parser.add_argument('-o', '--out', type=Path, help='directory for the temporary output files (e.g. RAM disk)')
	parser.add_argument('-x', '--exe', type=Path, help='bc7enc executable to use (otherwise whichever is on the path)')
	parser.add_argument('-l', '--log', type=argparse.FileType('w'), help='log file (otherwise stdout)')
	parser.add_argument('-t', '--notime', action='store_true', help='exclude the encoding time from the log')
	parser.add_argument('-e', '--noerror', action='store_true', help='exclude error metrics from the log')
	parser.add_argument('-s', '--hash', action='store_true', help='generate a hash of the decoded DDS file')
	parser.add_argument('src', type=Path, help='source directory of test testures')
	args = parser.parse_args()

	# Find the default executable either on the path or the current dir
	def_exe: Optional[str] = shutil.which('bc7enc', path=f'{os.environ["PATH"]}{os.path.pathsep}{os.curdir}')

	# Fail early on an invalid inputs
	args.src = validate_path(args.src, 'source directory')
	out_path = validate_path(args.out, 'temporary output', os.getcwd())
	exe_path = validate_path(args.exe, 'executable', def_exe)
	if (not (out_path and os.path.isdir(out_path))):
	err_line(f'Not a directory: {out_path}')
	exit()
	if (not (exe_path and os.access(exe_path, os.X_OK))):
	err_line(f'Not an executable: {exe_path}')
	exit()

	# We got this far: we have valid source and output dirs, plus an executable, so
	# we simply recurse the source to find PNGs then compress and log each entry.
	try:
	if (args.log):
	log_file = args.log
	total_time = 0.0
	for path in glob.iglob(f'{args.src}/**', recursive=True):
	src_path = Path(path);
	if (src_path.is_file() and src_path.suffix.lower() == '.png'):
	# Ignore the deleteme* files so this can run in the launch directory
	if (not src_path.stem.startswith('deleteme')):
	metrics = compress_file(src_path, args.bc, args.hash)
	log_text = f'"{src_path.name}"'
	if (not args.notime):
	log_text += f',{metrics.time}'
	if (not args.noerror):
	log_text += f',{metrics.maxe},{metrics.rmse},{metrics.psnr}'
	if (args.hash):
	log_text += f',{metrics.hash}'
	total_time += metrics.time
	log_line(log_text)
	else:
	err_line(f'Ignoring: {src_path}')
	# Log this to stderr just so we don't have it cluttering the output
	err_line(f'Total time: {total_time}')
	except KeyboardInterrupt:
	exit()
	finally:
	if (log_file):
	log_file.close()
No results found