Skip to content

Instantly share code, notes, and snippets.

@IcedShake
Forked from TypeA2/pixel_hash.py
Last active April 4, 2025 11:21
Show Gist options
  • Select an option

  • Save IcedShake/3dd4a70e4e898bb97a24181c80a3493a to your computer and use it in GitHub Desktop.

Select an option

Save IcedShake/3dd4a70e4e898bb97a24181c80a3493a to your computer and use it in GitHub Desktop.
Danbooru Pixel Hash
#!/usr/bin/env python3
# Calculates an image's pixel hash as per the algorithm used by Danbooru
from pathlib import Path
import hashlib
from pyvips import Image, Error as VipsError
class DanbooruMediaFileImage:
"""Mimics Danbooru's MediaFile::Image, but for hash computation only.
See https://github.com/danbooru/danbooru/blob/bd0c6a37a81f851bd3e7862b97f7cf2fae7d5381/app/logical/media_file/image.rb
"""
def __init__(self, image_path: str):
self.image_path = image_path
self.file_ext = Path(image_path).suffix.lower()
self.image = self.open_image(fail=True)
def open_image(self, **kwargs) -> Image:
if self.file_ext in ['jpeg', 'jpg']:
return Image.new_from_file(self.image_path, autorotate=True, **kwargs)
return Image.new_from_file(self.image_path, **kwargs)
def md5(self) -> str:
hash = hashlib.md5()
with open(self.image_path, 'rb') as f:
hash.update(f.read())
return hash.hexdigest()
def n_pages(self) -> int | None:
try:
return self.image.get('n-pages')
except VipsError:
return None
def frame_count(self) -> int | None:
match self.file_ext:
case 'gif':
return self.n_pages()
case 'webp':
return self.n_pages()
case 'png':
pass # Not implementing exiftool atm
case 'avif':
pass # Not implementing ffmpeg atm
case _:
return None
def is_video(self) -> bool:
return self.file_ext in ['webm', 'mp4']
def is_animated(self) -> bool:
frame_count = self.frame_count() or 1
return self.is_video() or frame_count > 1
def pixel_hash(self) -> str:
if self.is_animated():
return self.md5()
try:
return self.pixel_hash_file()
except VipsError:
return self.md5()
def pixel_hash_file(self) -> str:
"""Slightly different from Danbooru implementation; it does not write a PAM file."""
image = self.image
if image.get_typeof("icc-profile-data") != 0:
image = image.icc_transform("srgb")
if image.interpretation != "srgb":
image = image.colourspace("srgb")
if not image.hasalpha():
image = image.addalpha()
header = (
"P7\n"
f"WIDTH {image.width}\n"
f"HEIGHT {image.height}\n"
f"DEPTH {image.bands}\n"
"MAXVAL 255\n"
"TUPLTYPE RGB_ALPHA\n"
"ENDHDR\n"
)
hash = hashlib.md5()
hash.update(header.encode())
hash.update(image.rawsave_buffer())
return hash.hexdigest()
if __name__ == '__main__':
import sys
if len(sys.argv) != 2:
print("No file supplied")
exit(1)
infile = Path(sys.argv[1]).resolve()
if not infile.exists():
print("File does not exist")
exit(1)
image = DanbooruMediaFileImage(infile)
print(f"{infile.name} {image.pixel_hash()}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment