Created
February 19, 2025 03:14
-
-
Save asalt/6766258e42557d46e6fa5a00f4d1679b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pickle | |
| import hashlib | |
| import time | |
| import os | |
| import pandas as pd | |
| from datetime import datetime | |
| class BaseCache: | |
| """Base class for caching strategies.""" | |
| def __init__(self): | |
| self.cache = {} | |
| def save(self, key, value, notes=None): | |
| """Save value in cache.""" | |
| self.cache[key] = (value, notes) | |
| def load(self, key): | |
| """Load value from cache, return None if not found.""" | |
| return self.cache.get(key, (None, None))[0] | |
| class TSVCaching(BaseCache): | |
| """Cache system that saves tabular results as TSV files.""" | |
| def __init__(self, cache_dir="cache_tsv"): | |
| super().__init__() | |
| self.cache_dir = cache_dir | |
| os.makedirs(self.cache_dir, exist_ok=True) | |
| def get_filepath(self, key): | |
| """Generate TSV file path based on hash key.""" | |
| return os.path.join(self.cache_dir, f"{key}.tsv") | |
| def save(self, key, value, notes=None): | |
| super().save(key, value, notes) | |
| if isinstance(value, pd.DataFrame): | |
| value.to_csv(self.get_filepath(key), sep="\t", index=False) | |
| def load(self, key): | |
| cached_value = super().load(key) | |
| if cached_value is not None: | |
| return cached_value | |
| filepath = self.get_filepath(key) | |
| if os.path.exists(filepath): | |
| return pd.read_csv(filepath, sep="\t") | |
| return None | |
| class CachedComputation: | |
| def __init__(self, compute_func, cache_strategy=None): | |
| self.compute_func = compute_func | |
| self.cache_strategy = cache_strategy or BaseCache() | |
| def __call__(self, *args): | |
| """Execute and cache using automatic hashing.""" | |
| try: | |
| key = hashlib.sha256(pickle.dumps((self.compute_func, args))).hexdigest() | |
| except pickle.PicklingError: | |
| log("pickling error: cannot automatically hash arguments, use `%` for manual hash", important=True) | |
| return None | |
| return self % (key, *args) # Delegate to `%` for caching logic | |
| def __mod__(self, key_and_args): | |
| """Allow explicit manual hash control with proper function execution.""" | |
| key, *args = key_and_args # Extract hash key and arguments | |
| start_time = time.time() | |
| cached_result = self.cache_strategy.load(key) | |
| if cached_result is not None: | |
| log(f"cache HIT for hash: {key}", important=True) | |
| else: | |
| log(f"cache MISS for hash: {key}", important=True) | |
| log(f"evaluating {self.compute_func.__name__} (manual hash: {key}) with args {args}") | |
| cached_result = self.compute_func(*args) | |
| self.cache_strategy.save(key, cached_result, str(self.compute_func)) | |
| elapsed_time = time.time() - start_time | |
| log(f"execution time: {elapsed_time:.4f} seconds") | |
| return DebugWrapper(cached_result) | |
| class DebugWrapper: | |
| """Wraps function return values for automatic debug output.""" | |
| def __init__(self, value): | |
| self.value = value | |
| log(f"returned: {repr(value)}") | |
| def __repr__(self): | |
| return repr(self.value) | |
| def __str__(self): | |
| return str(self.value) | |
| def unwrap(self): | |
| """Explicitly get the stored value.""" | |
| return self.value | |
| def log(msg, important=False): | |
| """Logs messages with timestamps, enforcing case rules.""" | |
| timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| msg = msg.upper() if important else msg.lower() | |
| print(f"[{timestamp}] {msg}", flush=True) | |
| # Example Usage | |
| def expensive_computation(x, y): | |
| time.sleep(x + y) # Simulate expensive computation | |
| return x + y | |
| cached_expensive_func = CachedComputation(expensive_computation) | |
| # First call (cache miss, auto hash) | |
| result1 = cached_expensive_func(2, 3) | |
| # Second call (cache hit, auto hash) | |
| result2 = cached_expensive_func(2, 3) | |
| # Different arguments (cache miss, auto hash) | |
| result3 = cached_expensive_func(5, 7) | |
| # Tabular data caching (explicit hash to avoid pickling issue) | |
| cached_dataframe_func = CachedComputation(lambda x: pd.DataFrame({"value": [x]}), cache_strategy=TSVCaching()) | |
| df_result = cached_dataframe_func % ("df_42", 42) # Manual hash + function args | |
| df_result = cached_dataframe_func % ("df_42", 42) # Manual hash + function args called again | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment