Skip to content

Instantly share code, notes, and snippets.

@asalt
Created February 19, 2025 03:14
Show Gist options
  • Select an option

  • Save asalt/6766258e42557d46e6fa5a00f4d1679b to your computer and use it in GitHub Desktop.

Select an option

Save asalt/6766258e42557d46e6fa5a00f4d1679b to your computer and use it in GitHub Desktop.
import pickle
import hashlib
import time
import os
import pandas as pd
from datetime import datetime
class BaseCache:
"""Base class for caching strategies."""
def __init__(self):
self.cache = {}
def save(self, key, value, notes=None):
"""Save value in cache."""
self.cache[key] = (value, notes)
def load(self, key):
"""Load value from cache, return None if not found."""
return self.cache.get(key, (None, None))[0]
class TSVCaching(BaseCache):
"""Cache system that saves tabular results as TSV files."""
def __init__(self, cache_dir="cache_tsv"):
super().__init__()
self.cache_dir = cache_dir
os.makedirs(self.cache_dir, exist_ok=True)
def get_filepath(self, key):
"""Generate TSV file path based on hash key."""
return os.path.join(self.cache_dir, f"{key}.tsv")
def save(self, key, value, notes=None):
super().save(key, value, notes)
if isinstance(value, pd.DataFrame):
value.to_csv(self.get_filepath(key), sep="\t", index=False)
def load(self, key):
cached_value = super().load(key)
if cached_value is not None:
return cached_value
filepath = self.get_filepath(key)
if os.path.exists(filepath):
return pd.read_csv(filepath, sep="\t")
return None
class CachedComputation:
def __init__(self, compute_func, cache_strategy=None):
self.compute_func = compute_func
self.cache_strategy = cache_strategy or BaseCache()
def __call__(self, *args):
"""Execute and cache using automatic hashing."""
try:
key = hashlib.sha256(pickle.dumps((self.compute_func, args))).hexdigest()
except pickle.PicklingError:
log("pickling error: cannot automatically hash arguments, use `%` for manual hash", important=True)
return None
return self % (key, *args) # Delegate to `%` for caching logic
def __mod__(self, key_and_args):
"""Allow explicit manual hash control with proper function execution."""
key, *args = key_and_args # Extract hash key and arguments
start_time = time.time()
cached_result = self.cache_strategy.load(key)
if cached_result is not None:
log(f"cache HIT for hash: {key}", important=True)
else:
log(f"cache MISS for hash: {key}", important=True)
log(f"evaluating {self.compute_func.__name__} (manual hash: {key}) with args {args}")
cached_result = self.compute_func(*args)
self.cache_strategy.save(key, cached_result, str(self.compute_func))
elapsed_time = time.time() - start_time
log(f"execution time: {elapsed_time:.4f} seconds")
return DebugWrapper(cached_result)
class DebugWrapper:
"""Wraps function return values for automatic debug output."""
def __init__(self, value):
self.value = value
log(f"returned: {repr(value)}")
def __repr__(self):
return repr(self.value)
def __str__(self):
return str(self.value)
def unwrap(self):
"""Explicitly get the stored value."""
return self.value
def log(msg, important=False):
"""Logs messages with timestamps, enforcing case rules."""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
msg = msg.upper() if important else msg.lower()
print(f"[{timestamp}] {msg}", flush=True)
# Example Usage
def expensive_computation(x, y):
time.sleep(x + y) # Simulate expensive computation
return x + y
cached_expensive_func = CachedComputation(expensive_computation)
# First call (cache miss, auto hash)
result1 = cached_expensive_func(2, 3)
# Second call (cache hit, auto hash)
result2 = cached_expensive_func(2, 3)
# Different arguments (cache miss, auto hash)
result3 = cached_expensive_func(5, 7)
# Tabular data caching (explicit hash to avoid pickling issue)
cached_dataframe_func = CachedComputation(lambda x: pd.DataFrame({"value": [x]}), cache_strategy=TSVCaching())
df_result = cached_dataframe_func % ("df_42", 42) # Manual hash + function args
df_result = cached_dataframe_func % ("df_42", 42) # Manual hash + function args called again
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment