Skip to content

Instantly share code, notes, and snippets.

@TheBlackPlague
Created October 5, 2024 01:05
Show Gist options
  • Select an option

  • Save TheBlackPlague/a1b98660399e729ee8891177600832ec to your computer and use it in GitHub Desktop.

Select an option

Save TheBlackPlague/a1b98660399e729ee8891177600832ec to your computer and use it in GitHub Desktop.
Loads killmails from EVERef style data folder.
import os
import pickle
import tarfile
import json
from types import SimpleNamespace
from tqdm import tqdm
def parse_killmail(json_string, restriction_callback=None) -> SimpleNamespace or None:
result = json.loads(json_string, object_hook=lambda d: SimpleNamespace(**d))
if restriction_callback is None or restriction_callback(result):
return result
def killmail_single_batch_load(archive_path, restriction_callback=None) -> list:
result = []
try:
with tarfile.open(archive_path, 'r:bz2') as archive:
for member in archive.getmembers():
if member.isfile() and member.name.endswith('.json'):
file = archive.extractfile(member)
if file is not None:
json_obj = file.read().decode('utf-8')
killmail = parse_killmail(json_obj, restriction_callback)
if killmail:
result.append(killmail)
return result
except Exception as e:
print(f"Failed to process archive {archive_path}: {e}")
return result
def killmail_multiple_batch_load(archive_directory, start, end, restriction_callback=None) -> list:
result = []
files = sorted(
[
file for file in os.listdir(archive_directory)
if file.endswith('.tar.bz2') and f'killmails-{start}.tar.bz2' <= file <= f'killmails-{end}.tar.bz2'
]
)
with tqdm(total=len(files), desc="Loading ... ", unit='day') as progress_bar:
for killmail_batch_file in files:
progress_bar.set_description(
f"Loading ... {killmail_batch_file}, total killmails loaded so far: {len(result)}"
)
killmail_batch = killmail_single_batch_load(
os.path.join(archive_directory, killmail_batch_file),
restriction_callback
)
result.extend(killmail_batch)
progress_bar.update(1)
return result
directory = 'data.everef.net/killmails/2024/'
def load_killmails(first, last, cache=True, restriction_callback=None) -> list:
if cache and restriction_callback is None:
possible_cache_file = f'data/killmails-{first}-{last}.pkl'
if os.path.isfile(possible_cache_file):
with open(possible_cache_file, 'rb') as f:
return pickle.load(f)
else:
killmails = killmail_multiple_batch_load(directory, first, last, restriction_callback)
with open(possible_cache_file, 'wb') as f:
# noinspection PyTypeChecker
pickle.dump(killmails, f)
return killmails
return killmail_multiple_batch_load(directory, first, last, restriction_callback)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment