Created
October 5, 2024 01:05
-
-
Save TheBlackPlague/a1b98660399e729ee8891177600832ec to your computer and use it in GitHub Desktop.
Loads killmails from EVERef style data folder.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import pickle | |
| import tarfile | |
| import json | |
| from types import SimpleNamespace | |
| from tqdm import tqdm | |
| def parse_killmail(json_string, restriction_callback=None) -> SimpleNamespace or None: | |
| result = json.loads(json_string, object_hook=lambda d: SimpleNamespace(**d)) | |
| if restriction_callback is None or restriction_callback(result): | |
| return result | |
| def killmail_single_batch_load(archive_path, restriction_callback=None) -> list: | |
| result = [] | |
| try: | |
| with tarfile.open(archive_path, 'r:bz2') as archive: | |
| for member in archive.getmembers(): | |
| if member.isfile() and member.name.endswith('.json'): | |
| file = archive.extractfile(member) | |
| if file is not None: | |
| json_obj = file.read().decode('utf-8') | |
| killmail = parse_killmail(json_obj, restriction_callback) | |
| if killmail: | |
| result.append(killmail) | |
| return result | |
| except Exception as e: | |
| print(f"Failed to process archive {archive_path}: {e}") | |
| return result | |
| def killmail_multiple_batch_load(archive_directory, start, end, restriction_callback=None) -> list: | |
| result = [] | |
| files = sorted( | |
| [ | |
| file for file in os.listdir(archive_directory) | |
| if file.endswith('.tar.bz2') and f'killmails-{start}.tar.bz2' <= file <= f'killmails-{end}.tar.bz2' | |
| ] | |
| ) | |
| with tqdm(total=len(files), desc="Loading ... ", unit='day') as progress_bar: | |
| for killmail_batch_file in files: | |
| progress_bar.set_description( | |
| f"Loading ... {killmail_batch_file}, total killmails loaded so far: {len(result)}" | |
| ) | |
| killmail_batch = killmail_single_batch_load( | |
| os.path.join(archive_directory, killmail_batch_file), | |
| restriction_callback | |
| ) | |
| result.extend(killmail_batch) | |
| progress_bar.update(1) | |
| return result | |
| directory = 'data.everef.net/killmails/2024/' | |
| def load_killmails(first, last, cache=True, restriction_callback=None) -> list: | |
| if cache and restriction_callback is None: | |
| possible_cache_file = f'data/killmails-{first}-{last}.pkl' | |
| if os.path.isfile(possible_cache_file): | |
| with open(possible_cache_file, 'rb') as f: | |
| return pickle.load(f) | |
| else: | |
| killmails = killmail_multiple_batch_load(directory, first, last, restriction_callback) | |
| with open(possible_cache_file, 'wb') as f: | |
| # noinspection PyTypeChecker | |
| pickle.dump(killmails, f) | |
| return killmails | |
| return killmail_multiple_batch_load(directory, first, last, restriction_callback) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment