Skip to content

Instantly share code, notes, and snippets.

@myuanz
Created November 28, 2025 02:45
Show Gist options
  • Select an option

  • Save myuanz/4c7e2a15147b6b498c48989a793c7ad6 to your computer and use it in GitHub Desktop.

Select an option

Save myuanz/4c7e2a15147b6b498c48989a793c7ad6 to your computer and use it in GitHub Desktop.
A type-safe file cache decorator snippet
# try it in pyright playground
# https://pyright-play.net/?code=GYJw9gtgBMCuB2BjALmMAbAzlAlhADmCMlAO4gCG%2BmAsAFCiRT4XIAW6OARrgUSQAVWbeo2jIAnvhzwA5r0LEoAYQrp0FLugCmAGigAZHMm2V0%2BgLJVpc-QPCpEGfQGVtAR1jakeqIgqYyPT0eIokLPAAJgFQMfiRIXxKhBog2HHoouDQKRRpAHTRyKxS2tih-FAAIqwUACqlsdgCGHmYNcUN%2BNrBdPSIGpjYqohs2pEdFABilBDaUwiIANoAVCsCALoAFPZgjhgAlABc9FBnUJHawFAA%2Bjf%2B6ndbmNrowPorebKYR1AC%2BV9MB8VgBrUiA37-MGAg5QAC0AD5mOh8pMZhQ5r98tjeucLlc-Do8vcKKNtM9Xu8oJ8QN9IQDaUDqaDwYz6dDGbDEVAAHJgeDaLE4voigYBYaksZVKYnOh4y7XO4yYxPF5vfRcALaG5FCg6nAgX6BEBQAA%2Bf2EUAAvFAAOS2rlIvkC2V4vFq4D5G6al462r6k02oTsLY%2B7W6gMHU5us4e-Jhv3FAP5CAgyIGrYsEDeZCYK11EBefTaAAeOECNzAIPzhe0UZFeIAAvhwN1iBJo2cFVAExH0yAKW9HRb2K6Y1Bs8hYCB4FA496tYm9f3cedG73-f38i9kCYQJ38dcN0n%2B4OqfgjcgTebg2xh87BQf3ZSvcflwbrSO2Jn6wfu3cHnQO5VnWbYn3OD1dHAs54AxQU5yvM0vyQ1R1E0HQliWKd8B0fR0xQDZ9GNQjoKgUtkF%2BIw9zUJZbUQTAADdbX0W0s08bRkFtDZPzoxjmNI4AiEQeCuDQdBPymNQXiguVx0wMkID1MAGNMEAcEuH4oCsfAbFkJZjTsVo0kmLptG4803HYnwlhaVJ2lqUzzN5fltE-B8ZLxYdUI0LRtEw7z0L8gR9Bw1FanROYkPiMLigiszCJUSVxjRWZ5kWGyNg2McY27VAbnwLYaTpP4GW%2BYEOWKqFWW%2BYdb2y8cYLgm5jU-WC5i2QF9AqzBYRwa5AMCrY2rrMisFc40hrg%2BsGrxSdp1nW8z3jRc%2BwNWEAHoYFtABvYbmqvABffJtvIg7bVXHKCUuJxKFQAc4CQX4At8myQpRFK4KiyIYumVKNi8pKJnC1KFiQDL6oaxtyCoTAtgexBppmrsCWhnTTEKiESs65luvZaqevhJFQo%2BzFSIalh2E-PKCqKpk1m639ZKRvE%2BuYYR8lLctcy2WEKCiKB4D2GAhMfJnmZjbMKEiG54c-bayfF85eKY34NAgLhomYX5Qsl6X6IYzMiIUpSVJANSNKteSxkUytTfNsoDg8xWkdYvJ2M47WUV1-K3a8ZAnedvEDqWciNgVpG5pnCdtClmXFkzYRGcDi5rhteGMcZcr8cR53WfLGRAj54Stkic9vpJusIcD0vP1CsQfaiAIS%2BAJPA-lsXk%2BV21PZ%2BuL8nIYxtX1gPk7tNi-e75Fe9S-u1JMH2QHdkfxeD0Pm5CxPw7dSPZ1Li6Zu7AYY5AEkyQzyrSrpllMaqmFCecl0t7xCm2CpsB8vPq%2BGaf85WZfjmyyBFhscH%2BMZ-4IE4PAEEPN94NR3KwK8WxUZthYkfYk-gyTMUJMfU%2BYwc4zR3n4AIyAtgjDGEDWKIN0qbH0Mg0wrcJYcXmviG6rAiC9H6ElHUqdEpkmlKGFam4DRWkWncYAOAdB3AOPkLMOYZF5DkbI%2BAJBNp0SSraKAqjS64O0A6DhdBGwYLGNwrYtoTAVlLlg8iVpXaLwnvWXKZQSEUF%2BDIf21IuCuOUVnO%2B3JibAzgvVe0tp4bIBwPyC4YB6JXhkLIYJB5CH%2BMoXBLY21bQUEnksAAjPoAATPoAAzAlW0XBMm2hLFg20EhKkAC8uIHUZiERUNw9o3GtDaW0dxFIyDuN3P8PDzEkKyfglsbjm74MGfkNBJ8jHkmGfQIAA
from functools import wraps
from pathlib import Path
from typing import Callable, Literal, Mapping, Protocol, Sequence, cast
import pandas as pd
import polars as pl
from polars.datatypes import DataType as PolarsDataType
class CachedDataFrameFunc[**P](Protocol):
def __call__(self, *args: P.args, **kwargs: P.kwargs) -> pl.DataFrame: ...
def clear_cache(self, *args: P.args, **kwargs: P.kwargs) -> None: ...
class CacheDF:
def __init__(self, base_data_dir: str | Path = '') -> None:
self._base_data_dir = Path(base_data_dir)
self.base_data_dir.mkdir(parents=True, exist_ok=True)
@property
def base_data_dir(self) -> Path:
return self._base_data_dir
@base_data_dir.setter
def base_data_dir(self, p: str | Path) -> None:
self._base_data_dir = Path(p)
def __call__[**P](
self,
name: str | Path | Callable[[tuple, dict], str],
ext: Literal['csv', 'parquet'] = 'csv',
force: bool = False,
schema_overrides: Mapping[str, PolarsDataType] | Sequence[PolarsDataType] | None = None,
) -> Callable[[Callable[P, pl.DataFrame | pd.DataFrame]], CachedDataFrameFunc[P]]:
def to_p(*args: P.args, **kwargs: P.kwargs) -> Path:
name_str = name(args, kwargs) if callable(name) else str(name)
return Path(self.base_data_dir) / f'{name_str}.{ext}'
def decorator(func: Callable[P, pl.DataFrame | pd.DataFrame]) -> CachedDataFrameFunc[P]:
@wraps(func)
def wrapper(*args: P.args, **kwargs: P.kwargs) -> pl.DataFrame:
path = to_p(*args, **kwargs)
if path.exists() and not force:
read_func = {
'csv': lambda p: pl.read_csv(p, schema_overrides=schema_overrides),
'parquet': pl.read_parquet,
}[ext]
return read_func(path)
df = func(*args, **kwargs)
if isinstance(df, pd.DataFrame):
df = pl.from_pandas(df)
{
'csv': pl.DataFrame.write_csv,
'parquet': pl.DataFrame.write_parquet,
}[ext](df, path)
return df
def clear_cache(*args: P.args, **kwargs: P.kwargs) -> None:
path = to_p(*args, **kwargs)
if path.exists():
path.unlink()
setattr(wrapper, 'clear_cache', clear_cache)
return cast(CachedDataFrameFunc[P], wrapper)
return decorator
cache_df = CacheDF(base_data_dir=Path(__file__).parent.parent.parent / 'cache' / 'df_cache')
@cache_df('test_df', ext='parquet')
def test(a: int, *b: int, **kwargs) -> pl.DataFrame:
'''function docstring'''
return pl.DataFrame({'a': [1, 2, 3], 'b': ['x', 'y', 'z']})
if __name__ == '__main__':
df = test(1)
print(df)
test.clear_cache(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment