Created
November 28, 2025 02:45
-
-
Save myuanz/4c7e2a15147b6b498c48989a793c7ad6 to your computer and use it in GitHub Desktop.
A type-safe file cache decorator snippet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # try it in pyright playground | |
| # https://pyright-play.net/?code=GYJw9gtgBMCuB2BjALmMAbAzlAlhADmCMlAO4gCG%2BmAsAFCiRT4XIAW6OARrgUSQAVWbeo2jIAnvhzwA5r0LEoAYQrp0FLugCmAGigAZHMm2V0%2BgLJVpc-QPCpEGfQGVtAR1jakeqIgqYyPT0eIokLPAAJgFQMfiRIXxKhBog2HHoouDQKRRpAHTRyKxS2tih-FAAIqwUACqlsdgCGHmYNcUN%2BNrBdPSIGpjYqohs2pEdFABilBDaUwiIANoAVCsCALoAFPZgjhgAlABc9FBnUJHawFAA%2Bjf%2B6ndbmNrowPorebKYR1AC%2BV9MB8VgBrUiA37-MGAg5QAC0AD5mOh8pMZhQ5r98tjeucLlc-Do8vcKKNtM9Xu8oJ8QN9IQDaUDqaDwYz6dDGbDEVAAHJgeDaLE4voigYBYaksZVKYnOh4y7XO4yYxPF5vfRcALaG5FCg6nAgX6BEBQAA%2Bf2EUAAvFAAOS2rlIvkC2V4vFq4D5G6al462r6k02oTsLY%2B7W6gMHU5us4e-Jhv3FAP5CAgyIGrYsEDeZCYK11EBefTaAAeOECNzAIPzhe0UZFeIAAvhwN1iBJo2cFVAExH0yAKW9HRb2K6Y1Bs8hYCB4FA496tYm9f3cedG73-f38i9kCYQJ38dcN0n%2B4OqfgjcgTebg2xh87BQf3ZSvcflwbrSO2Jn6wfu3cHnQO5VnWbYn3OD1dHAs54AxQU5yvM0vyQ1R1E0HQliWKd8B0fR0xQDZ9GNQjoKgUtkF%2BIw9zUJZbUQTAADdbX0W0s08bRkFtDZPzoxjmNI4AiEQeCuDQdBPymNQXiguVx0wMkID1MAGNMEAcEuH4oCsfAbFkJZjTsVo0kmLptG4803HYnwlhaVJ2lqUzzN5fltE-B8ZLxYdUI0LRtEw7z0L8gR9Bw1FanROYkPiMLigiszCJUSVxjRWZ5kWGyNg2McY27VAbnwLYaTpP4GW%2BYEOWKqFWW%2BYdb2y8cYLgm5jU-WC5i2QF9AqzBYRwa5AMCrY2rrMisFc40hrg%2BsGrxSdp1nW8z3jRc%2BwNWEAHoYFtABvYbmqvABffJtvIg7bVXHKCUuJxKFQAc4CQX4At8myQpRFK4KiyIYumVKNi8pKJnC1KFiQDL6oaxtyCoTAtgexBppmrsCWhnTTEKiESs65luvZaqevhJFQo%2BzFSIalh2E-PKCqKpk1m639ZKRvE%2BuYYR8lLctcy2WEKCiKB4D2GAhMfJnmZjbMKEiG54c-bayfF85eKY34NAgLhomYX5Qsl6X6IYzMiIUpSVJANSNKteSxkUytTfNsoDg8xWkdYvJ2M47WUV1-K3a8ZAnedvEDqWciNgVpG5pnCdtClmXFkzYRGcDi5rhteGMcZcr8cR53WfLGRAj54Stkic9vpJusIcD0vP1CsQfaiAIS%2BAJPA-lsXk%2BV21PZ%2BuL8nIYxtX1gPk7tNi-e75Fe9S-u1JMH2QHdkfxeD0Pm5CxPw7dSPZ1Li6Zu7AYY5AEkyQzyrSrpllMaqmFCecl0t7xCm2CpsB8vPq%2BGaf85WZfjmyyBFhscH%2BMZ-4IE4PAEEPN94NR3KwK8WxUZthYkfYk-gyTMUJMfU%2BYwc4zR3n4AIyAtgjDGEDWKIN0qbH0Mg0wrcJYcXmviG6rAiC9H6ElHUqdEpkmlKGFam4DRWkWncYAOAdB3AOPkLMOYZF5DkbI%2BAJBNp0SSraKAqjS64O0A6DhdBGwYLGNwrYtoTAVlLlg8iVpXaLwnvWXKZQSEUF%2BDIf21IuCuOUVnO%2B3JibAzgvVe0tp4bIBwPyC4YB6JXhkLIYJB5CH%2BMoXBLY21bQUEnksAAjPoAATPoAAzAlW0XBMm2hLFg20EhKkAC8uIHUZiERUNw9o3GtDaW0dxFIyDuN3P8PDzEkKyfglsbjm74MGfkNBJ8jHkmGfQIAA | |
| from functools import wraps | |
| from pathlib import Path | |
| from typing import Callable, Literal, Mapping, Protocol, Sequence, cast | |
| import pandas as pd | |
| import polars as pl | |
| from polars.datatypes import DataType as PolarsDataType | |
| class CachedDataFrameFunc[**P](Protocol): | |
| def __call__(self, *args: P.args, **kwargs: P.kwargs) -> pl.DataFrame: ... | |
| def clear_cache(self, *args: P.args, **kwargs: P.kwargs) -> None: ... | |
| class CacheDF: | |
| def __init__(self, base_data_dir: str | Path = '') -> None: | |
| self._base_data_dir = Path(base_data_dir) | |
| self.base_data_dir.mkdir(parents=True, exist_ok=True) | |
| @property | |
| def base_data_dir(self) -> Path: | |
| return self._base_data_dir | |
| @base_data_dir.setter | |
| def base_data_dir(self, p: str | Path) -> None: | |
| self._base_data_dir = Path(p) | |
| def __call__[**P]( | |
| self, | |
| name: str | Path | Callable[[tuple, dict], str], | |
| ext: Literal['csv', 'parquet'] = 'csv', | |
| force: bool = False, | |
| schema_overrides: Mapping[str, PolarsDataType] | Sequence[PolarsDataType] | None = None, | |
| ) -> Callable[[Callable[P, pl.DataFrame | pd.DataFrame]], CachedDataFrameFunc[P]]: | |
| def to_p(*args: P.args, **kwargs: P.kwargs) -> Path: | |
| name_str = name(args, kwargs) if callable(name) else str(name) | |
| return Path(self.base_data_dir) / f'{name_str}.{ext}' | |
| def decorator(func: Callable[P, pl.DataFrame | pd.DataFrame]) -> CachedDataFrameFunc[P]: | |
| @wraps(func) | |
| def wrapper(*args: P.args, **kwargs: P.kwargs) -> pl.DataFrame: | |
| path = to_p(*args, **kwargs) | |
| if path.exists() and not force: | |
| read_func = { | |
| 'csv': lambda p: pl.read_csv(p, schema_overrides=schema_overrides), | |
| 'parquet': pl.read_parquet, | |
| }[ext] | |
| return read_func(path) | |
| df = func(*args, **kwargs) | |
| if isinstance(df, pd.DataFrame): | |
| df = pl.from_pandas(df) | |
| { | |
| 'csv': pl.DataFrame.write_csv, | |
| 'parquet': pl.DataFrame.write_parquet, | |
| }[ext](df, path) | |
| return df | |
| def clear_cache(*args: P.args, **kwargs: P.kwargs) -> None: | |
| path = to_p(*args, **kwargs) | |
| if path.exists(): | |
| path.unlink() | |
| setattr(wrapper, 'clear_cache', clear_cache) | |
| return cast(CachedDataFrameFunc[P], wrapper) | |
| return decorator | |
| cache_df = CacheDF(base_data_dir=Path(__file__).parent.parent.parent / 'cache' / 'df_cache') | |
| @cache_df('test_df', ext='parquet') | |
| def test(a: int, *b: int, **kwargs) -> pl.DataFrame: | |
| '''function docstring''' | |
| return pl.DataFrame({'a': [1, 2, 3], 'b': ['x', 'y', 'z']}) | |
| if __name__ == '__main__': | |
| df = test(1) | |
| print(df) | |
| test.clear_cache(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment