Skip to content

Instantly share code, notes, and snippets.

@dutta-alankar
Last active February 3, 2026 09:18
Show Gist options
  • Select an option

  • Save dutta-alankar/2888b0b75097170e034cc08eec2349fa to your computer and use it in GitHub Desktop.

Select an option

Save dutta-alankar/2888b0b75097170e034cc08eec2349fa to your computer and use it in GitHub Desktop.
Cross match properties across snaps in `Gadget 4` cosmo sim halos
#!/bin/python3
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 31 13:23:48 2026
@author: alankar.
Usage: time python tag-halos.py
"""
import numpy as np
import h5py
import os
import numba
class tag_halos:
def __init__(self, directory: str, snap_num:int = 7):
self.dark_matter = 1
self.baryon = 0
self._total_particle_types = 2
self.directory = directory
self.snap_num = snap_num
self._sort = True
self._already_sorted = [False, False]
self._cache = True
self._cache_dir = "./halo_cache"
self.other_snap = None
self._load_prop = True
self.log_freq = 10000
# Initilization
print(directory)
print(f"SnapNum: {snap_num}")
if snap_num!=0:
with h5py.File(f"{directory}/groups_{snap_num:03d}/fof_subhalo_tab_{snap_num:03d}.0.hdf5", 'r') as group_data:
# print(list(group_data["/Header"].attrs.keys()))
Ngroups_Total = group_data["/Header"].attrs["Ngroups_Total"]
Nids_Total = group_data["/Header"].attrs["Nids_Total"]
NumFiles_Group = group_data["/Header"].attrs["NumFiles"]
Redshift = group_data["/Header"].attrs["Redshift"]
# sore the number of halos in each group part file
self.num_groups_part = np.zeros(NumFiles_Group, dtype=np.int64)
self.Redshift = Redshift
for group_part_file in range(0, NumFiles_Group):
with h5py.File(f"{directory}/groups_{snap_num:03d}/fof_subhalo_tab_{snap_num:03d}.{group_part_file}.hdf5", 'r') as group_data:
Ngroups_ThisFile = group_data["/Header"].attrs["Ngroups_ThisFile"]
self.num_groups_part[group_part_file] = Ngroups_ThisFile
assert int(np.sum(self.num_groups_part)) == Ngroups_Total
# store the particle count of every group
self._group_sizes()
# print(Ngroups_Total, np.sum(halo_id_halo_len))
extra = "ics_" if snap_num==0 else ""
with h5py.File(f"{directory}/snapdir_{snap_num:03d}/snapshot_{extra}{snap_num:03d}.0.hdf5", 'r') as snap_data:
NumFilesPerSnapshot = snap_data["/Header"].attrs["NumFilesPerSnapshot"]
NumPart_Total = snap_data["/Header"].attrs["NumPart_Total"]
print("Particles: ", NumPart_Total[:self._total_particle_types])
# store the number of particles in each snapshot part file
self.num_particle_len = np.zeros((NumFilesPerSnapshot, self._total_particle_types), dtype=np.int64)
for snap_file in range(0, NumFilesPerSnapshot):
with h5py.File(f"{directory}/snapdir_{snap_num:03d}/snapshot_{extra}{snap_num:03d}.{snap_file}.hdf5", 'r') as snap_data:
NumPart_ThisFile = snap_data["/Header"].attrs["NumPart_ThisFile"]
self.num_particle_len[snap_file, self.baryon] = NumPart_ThisFile[self.baryon]
self.num_particle_len[snap_file, self.dark_matter] = NumPart_ThisFile[self.dark_matter]
assert np.sum(np.sum(self.num_particle_len, axis=0) == NumPart_Total[:self._total_particle_types]) == self._total_particle_types
self.cumulative_particle_count = np.cumsum(self.num_particle_len, axis=0)
print("Intialization complete!")
def group_index_to_global(self, group_index_local: int, group_part_number:int) -> int:
assert group_part_number<=(self.num_groups_part.shape[0]-1)
offset = np.cumsum(self.num_groups_part)[group_part_number-1]
return offset + group_index_local
def group_index_to_local(self, group_index_global:int) -> int:
cumulative_halo_count = np.cumsum(self.num_groups_part)
group_part_number = np.sum(cumulative_halo_count <= (group_index_global+1))-1
return group_index_global - cumulative_halo_count[group_part_number-1]
def particle_index_to_global(self, particle_index_local: int, snap_part_number:int, particle_type:int) -> int:
assert snap_part_number<=(self.num_particle_len.shape[0]-1)
offset = np.cumsum(self.num_particle_len[:,particle_type])[snap_part_number-1]
return offset + particle_index_local
def particle_index_to_local(self, particle_index_global:int, particle_type:int) -> int:
cumulative_particle_count_by_type = self.cumulative_particle_count[:,particle_type]
snap_part_file = np.sum(cumulative_particle_count_by_type <= (particle_index_global+1))-1
return particle_index_global - cumulative_particle_count_by_type[snap_part_file-1]
def id_sort(self, particle_type:int):
if self._already_sorted[particle_type]:
# no disk IO/calculation is needed as sorted index has already been loaded or calculated
return
# clear memory if present
self.ID_indx_sorted = None
# self.particle_IDS = []
# particle_types = np.sort([self.dark_matter, self.baryon])
extra = "ics_" if self.snap_num==0 else ""
ID_sorted_file = f"{self._cache_dir}/argsort-ID_snap{self.snap_num}_PType{particle_type}.npy"
if not(self._cache and os.path.isfile(ID_sorted_file)):
IDs = np.zeros(np.sum(self.num_particle_len[:,particle_type]), dtype=np.uint64)
print(f"/PartType{particle_type}")
for snap_part_file in range(self.num_particle_len.shape[0]):
print(f"part file: {snap_part_file}", end='\r' if snap_part_file<(self.num_particle_len.shape[0]-1) else ' ... Done!\n')
cumulative_particle_count_by_type = self.cumulative_particle_count[:,particle_type]
start = 0 if snap_part_file==0 else cumulative_particle_count_by_type[snap_part_file-1]
stop = cumulative_particle_count_by_type[snap_part_file] - 1
with h5py.File(f"{self.directory}/snapdir_{self.snap_num:03d}/snapshot_{extra}{self.snap_num:03d}.{snap_part_file}.hdf5", 'r') as snap_data:
IDs[start:stop+1] = np.array(snap_data[f"/PartType{particle_type}/ParticleIDs"])
# self.particle_IDS.append(IDs)
if self._cache and os.path.isfile(ID_sorted_file):
print(f"Loading file {ID_sorted_file}")
self.ID_indx_sorted = np.load(ID_sorted_file)
else:
os.makedirs(self._cache_dir, exist_ok=True)
if self._sort:
print(f"Sorting particle type {particle_type} by ID")
self.ID_indx_sorted = np.argsort(IDs)
print(f"Sorting completed")
print(f"Saving file {ID_sorted_file}")
np.save(ID_sorted_file, self.ID_indx_sorted)
IDs = None # clear the memory
else:
# dummy
self.ID_indx_sorted = np.arange(0, np.sum(self.num_particle_len[:,particle_type]), dtype=np.int64)
self._already_sorted = [False, False]
self._already_sorted[particle_type] = True
# self.particle_IDS = None
def global_particle_index_to_group_index(self, global_particle_index:int, particle_type:int) -> int:
cumulative_particle_count_by_type = self.cumulative_particle_count[:,particle_type]
return np.searchsorted(cumulative_particle_count_by_type, global_particle_index, side='right')
# np.sum(cumulative_particle_count_by_type <= global_particle_index)
# def particle_id_to_global_particle_index(self, particle_id:int, particle_type:int) -> int:
# self.ID_indx_sorted
def _group_sizes(self) -> None:
NumFiles_Group = self.num_groups_part.shape[0]
num_all_halos = np.sum(self.num_groups_part)
cumulative_group_count = np.cumsum(self.num_groups_part)
self.num_group_particle_len = np.zeros((num_all_halos, self._total_particle_types), dtype=np.int64)
for group_part_file in range(0, NumFiles_Group):
start = 0 if group_part_file==0 else cumulative_group_count[group_part_file-1]
stop = cumulative_group_count[group_part_file] - 1
with h5py.File(f"{directory}/groups_{snap_num:03d}/fof_subhalo_tab_{snap_num:03d}.{group_part_file}.hdf5", 'r') as group_data:
num_particles_per_group = np.array(group_data["/Group/GroupLenType"])
self.num_group_particle_len[start:stop+1,self.baryon] = num_particles_per_group[:, self.baryon]
self.num_group_particle_len[start:stop+1,self.dark_matter] = num_particles_per_group[:, self.dark_matter]
#typically relate to an earlier progenitor snap
def relate_other_snap(self, snap_num:int, halo_prop:str, particle_type:int) -> None:
# ="Velocities"
self.id_sort(particle_type)
if self.other_snap is None:
self.other_snap = tag_halos(self.directory, snap_num)
self.other_snap.id_sort(particle_type)
print(f"Cross-matching to snap {self.snap_num} with snap {snap_num} for particle type {particle_type} for halo prop: {halo_prop}")
if not(self._load_prop):
print("Dummy run, no props loaded!")
extra = "ics_" if snap_num==0 else ""
if self._load_prop:
with h5py.File(f"{self.other_snap.directory}/snapdir_{self.other_snap.snap_num:03d}/snapshot_{extra}{self.other_snap.snap_num:03d}.0.hdf5", 'r') as snap_data:
prop_data = snap_data[f"/PartType{particle_type}/{halo_prop}"]
if prop_data.ndim>1:
prop_length = snap_data[f"/PartType{particle_type}/{halo_prop}"].shape[1]
else:
prop_length = 1
prop_array = np.zeros((np.sum(self.other_snap.num_particle_len[:,particle_type]), prop_length), dtype=np.float64)
particle_masses = np.zeros(np.sum(self.other_snap.num_particle_len[:,particle_type]), dtype=np.float64)
if self._load_prop:
print(f"Loading particle property {halo_prop} for SnapNum: {self.other_snap.snap_num}")
if not(self._load_prop):
print("Dummy run, no props loaded!")
for snap_part_file in range(self.other_snap.num_particle_len.shape[0]):
cumulative_particle_count_by_type = self.other_snap.cumulative_particle_count[:,particle_type]
start = 0 if snap_part_file==0 else cumulative_particle_count_by_type[snap_part_file-1]
stop = cumulative_particle_count_by_type[snap_part_file] - 1
print(f"part file: {snap_part_file}", end='\r' if snap_part_file<(self.num_particle_len.shape[0]-1) else ' ... Done!\n')
with h5py.File(f"{self.other_snap.directory}/snapdir_{self.other_snap.snap_num:03d}/snapshot_{extra}{self.other_snap.snap_num:03d}.{snap_part_file}.hdf5", 'r') as snap_data:
if self._load_prop:
prop_array[start:stop+1,:] = np.array(snap_data[f"/PartType{particle_type}/{halo_prop}"])
particle_masses[start:stop+1] = np.array(snap_data[f"/PartType{particle_type}/Masses"])
print("Calling group accumulator")
if self._load_prop:
self.calculate_group_props(prop_array, particle_masses, halo_prop, particle_type)
else:
self.calculate_group_props(None, particle_masses, halo_prop, particle_type)
# @numba.njit
def calculate_group_props(self, prop_array:np.ndarray, particle_masses:np.ndarray, halo_prop:str, particle_type:int) -> None:
halo_prop_file = f"{self._cache_dir}/Group_{halo_prop}_snap{self.other_snap.snap_num}_PType{particle_type}.npy"
print("Calculating group properties ...")
if self._load_prop and prop_array is not None:
prop_length = prop_array.shape[1]
group_props = np.zeros((np.sum(self.num_groups_part), prop_length), dtype=np.float64)
group_mass = np.zeros(np.sum(self.num_groups_part), dtype=np.float64)
group_particle_count = np.zeros(np.sum(self.num_groups_part), dtype=np.int64)
for count, indx in enumerate(self.ID_indx_sorted):
if (count%self.log_freq)==0:
print(f"count = {count+1}/{self.ID_indx_sorted.shape[0]} ({((count+1)/self.ID_indx_sorted.shape[0]*100):.1f}%)",
end='\r' if count<(self.ID_indx_sorted.shape[0]-1) else '\n')
halo_indx = self.global_particle_index_to_group_index(indx, particle_type)
particle_indx_other_snap = self.other_snap.ID_indx_sorted[count]
if self._load_prop and prop_array is not None:
group_props[halo_indx,:] += (prop_array[particle_indx_other_snap,:] * particle_masses[particle_indx_other_snap])
group_mass[halo_indx] += particle_masses[particle_indx_other_snap]
group_particle_count[halo_indx] += 1
# normalize
if self._load_prop and prop_array is not None:
print("Normalizing group properties ...")
for halo_indx in range(0, np.sum(self.num_groups_part)):
group_props[halo_indx,:] = group_props[halo_indx,:]/group_mass[halo_indx]
if prop_length==1:
group_props = group_props.flatten()
print(f"Saving file {halo_prop_file}")
np.save(halo_prop_file, group_props)
print(f"Saving file {halo_prop_file.split('.')[0]}-mass.npy")
np.save(f"{halo_prop_file.split('.')[0]}-mass.npy", group_mass)
print(f"Saving file {halo_prop_file.split('.')[0]}-particle-count.npy")
np.save(f"{halo_prop_file.split('.')[0]}-particle-count.npy", group_particle_count)
# free memory
prop_array = None
particle_masses = None
group_particle_count = None
group_props = None
def refresh(self) -> None:
self.other_snap = None
def debug(self):
self.id_sort()
print(self.ID_indx_sorted[self.dark_matter])
print(self.ID_indx_sorted[self.baryon])
print(self.ID_indx_sorted[self.dark_matter].shape)
print(self.ID_indx_sorted[self.baryon].shape)
print(np.sum(self.num_particle_len, axis=0))
print(np.sum(self.num_particle_len, axis=0))
if __name__ == "__main__":
dark_matter = 1
baryon = 0
directory = "/orion/ptmp/adutt/arepo-multi-zoom/gadget-files/gadget4_development/examples/multi-zoom/streaming/parent/output_dumps"
snap_num = 7
halo = tag_halos(directory, snap_num)
print("Dark matter: ", dark_matter)
dm_com_vel_in_ic = halo.relate_other_snap(0, "Velocities", dark_matter)
print("Baryon: ", baryon)
baryon_com_vel_in_ic = halo.relate_other_snap(0, "Velocities", baryon)
# halo = tag_halos(directory, 0)
# halo.id_sort()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment