Last active
February 3, 2026 09:18
-
-
Save dutta-alankar/2888b0b75097170e034cc08eec2349fa to your computer and use it in GitHub Desktop.
Cross match properties across snaps in `Gadget 4` cosmo sim halos
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Sat Jan 31 13:23:48 2026 | |
| @author: alankar. | |
| Usage: time python tag-halos.py | |
| """ | |
| import numpy as np | |
| import h5py | |
| import os | |
| import numba | |
| class tag_halos: | |
| def __init__(self, directory: str, snap_num:int = 7): | |
| self.dark_matter = 1 | |
| self.baryon = 0 | |
| self._total_particle_types = 2 | |
| self.directory = directory | |
| self.snap_num = snap_num | |
| self._sort = True | |
| self._already_sorted = [False, False] | |
| self._cache = True | |
| self._cache_dir = "./halo_cache" | |
| self.other_snap = None | |
| self._load_prop = True | |
| self.log_freq = 10000 | |
| # Initilization | |
| print(directory) | |
| print(f"SnapNum: {snap_num}") | |
| if snap_num!=0: | |
| with h5py.File(f"{directory}/groups_{snap_num:03d}/fof_subhalo_tab_{snap_num:03d}.0.hdf5", 'r') as group_data: | |
| # print(list(group_data["/Header"].attrs.keys())) | |
| Ngroups_Total = group_data["/Header"].attrs["Ngroups_Total"] | |
| Nids_Total = group_data["/Header"].attrs["Nids_Total"] | |
| NumFiles_Group = group_data["/Header"].attrs["NumFiles"] | |
| Redshift = group_data["/Header"].attrs["Redshift"] | |
| # sore the number of halos in each group part file | |
| self.num_groups_part = np.zeros(NumFiles_Group, dtype=np.int64) | |
| self.Redshift = Redshift | |
| for group_part_file in range(0, NumFiles_Group): | |
| with h5py.File(f"{directory}/groups_{snap_num:03d}/fof_subhalo_tab_{snap_num:03d}.{group_part_file}.hdf5", 'r') as group_data: | |
| Ngroups_ThisFile = group_data["/Header"].attrs["Ngroups_ThisFile"] | |
| self.num_groups_part[group_part_file] = Ngroups_ThisFile | |
| assert int(np.sum(self.num_groups_part)) == Ngroups_Total | |
| # store the particle count of every group | |
| self._group_sizes() | |
| # print(Ngroups_Total, np.sum(halo_id_halo_len)) | |
| extra = "ics_" if snap_num==0 else "" | |
| with h5py.File(f"{directory}/snapdir_{snap_num:03d}/snapshot_{extra}{snap_num:03d}.0.hdf5", 'r') as snap_data: | |
| NumFilesPerSnapshot = snap_data["/Header"].attrs["NumFilesPerSnapshot"] | |
| NumPart_Total = snap_data["/Header"].attrs["NumPart_Total"] | |
| print("Particles: ", NumPart_Total[:self._total_particle_types]) | |
| # store the number of particles in each snapshot part file | |
| self.num_particle_len = np.zeros((NumFilesPerSnapshot, self._total_particle_types), dtype=np.int64) | |
| for snap_file in range(0, NumFilesPerSnapshot): | |
| with h5py.File(f"{directory}/snapdir_{snap_num:03d}/snapshot_{extra}{snap_num:03d}.{snap_file}.hdf5", 'r') as snap_data: | |
| NumPart_ThisFile = snap_data["/Header"].attrs["NumPart_ThisFile"] | |
| self.num_particle_len[snap_file, self.baryon] = NumPart_ThisFile[self.baryon] | |
| self.num_particle_len[snap_file, self.dark_matter] = NumPart_ThisFile[self.dark_matter] | |
| assert np.sum(np.sum(self.num_particle_len, axis=0) == NumPart_Total[:self._total_particle_types]) == self._total_particle_types | |
| self.cumulative_particle_count = np.cumsum(self.num_particle_len, axis=0) | |
| print("Intialization complete!") | |
| def group_index_to_global(self, group_index_local: int, group_part_number:int) -> int: | |
| assert group_part_number<=(self.num_groups_part.shape[0]-1) | |
| offset = np.cumsum(self.num_groups_part)[group_part_number-1] | |
| return offset + group_index_local | |
| def group_index_to_local(self, group_index_global:int) -> int: | |
| cumulative_halo_count = np.cumsum(self.num_groups_part) | |
| group_part_number = np.sum(cumulative_halo_count <= (group_index_global+1))-1 | |
| return group_index_global - cumulative_halo_count[group_part_number-1] | |
| def particle_index_to_global(self, particle_index_local: int, snap_part_number:int, particle_type:int) -> int: | |
| assert snap_part_number<=(self.num_particle_len.shape[0]-1) | |
| offset = np.cumsum(self.num_particle_len[:,particle_type])[snap_part_number-1] | |
| return offset + particle_index_local | |
| def particle_index_to_local(self, particle_index_global:int, particle_type:int) -> int: | |
| cumulative_particle_count_by_type = self.cumulative_particle_count[:,particle_type] | |
| snap_part_file = np.sum(cumulative_particle_count_by_type <= (particle_index_global+1))-1 | |
| return particle_index_global - cumulative_particle_count_by_type[snap_part_file-1] | |
| def id_sort(self, particle_type:int): | |
| if self._already_sorted[particle_type]: | |
| # no disk IO/calculation is needed as sorted index has already been loaded or calculated | |
| return | |
| # clear memory if present | |
| self.ID_indx_sorted = None | |
| # self.particle_IDS = [] | |
| # particle_types = np.sort([self.dark_matter, self.baryon]) | |
| extra = "ics_" if self.snap_num==0 else "" | |
| ID_sorted_file = f"{self._cache_dir}/argsort-ID_snap{self.snap_num}_PType{particle_type}.npy" | |
| if not(self._cache and os.path.isfile(ID_sorted_file)): | |
| IDs = np.zeros(np.sum(self.num_particle_len[:,particle_type]), dtype=np.uint64) | |
| print(f"/PartType{particle_type}") | |
| for snap_part_file in range(self.num_particle_len.shape[0]): | |
| print(f"part file: {snap_part_file}", end='\r' if snap_part_file<(self.num_particle_len.shape[0]-1) else ' ... Done!\n') | |
| cumulative_particle_count_by_type = self.cumulative_particle_count[:,particle_type] | |
| start = 0 if snap_part_file==0 else cumulative_particle_count_by_type[snap_part_file-1] | |
| stop = cumulative_particle_count_by_type[snap_part_file] - 1 | |
| with h5py.File(f"{self.directory}/snapdir_{self.snap_num:03d}/snapshot_{extra}{self.snap_num:03d}.{snap_part_file}.hdf5", 'r') as snap_data: | |
| IDs[start:stop+1] = np.array(snap_data[f"/PartType{particle_type}/ParticleIDs"]) | |
| # self.particle_IDS.append(IDs) | |
| if self._cache and os.path.isfile(ID_sorted_file): | |
| print(f"Loading file {ID_sorted_file}") | |
| self.ID_indx_sorted = np.load(ID_sorted_file) | |
| else: | |
| os.makedirs(self._cache_dir, exist_ok=True) | |
| if self._sort: | |
| print(f"Sorting particle type {particle_type} by ID") | |
| self.ID_indx_sorted = np.argsort(IDs) | |
| print(f"Sorting completed") | |
| print(f"Saving file {ID_sorted_file}") | |
| np.save(ID_sorted_file, self.ID_indx_sorted) | |
| IDs = None # clear the memory | |
| else: | |
| # dummy | |
| self.ID_indx_sorted = np.arange(0, np.sum(self.num_particle_len[:,particle_type]), dtype=np.int64) | |
| self._already_sorted = [False, False] | |
| self._already_sorted[particle_type] = True | |
| # self.particle_IDS = None | |
| def global_particle_index_to_group_index(self, global_particle_index:int, particle_type:int) -> int: | |
| cumulative_particle_count_by_type = self.cumulative_particle_count[:,particle_type] | |
| return np.searchsorted(cumulative_particle_count_by_type, global_particle_index, side='right') | |
| # np.sum(cumulative_particle_count_by_type <= global_particle_index) | |
| # def particle_id_to_global_particle_index(self, particle_id:int, particle_type:int) -> int: | |
| # self.ID_indx_sorted | |
| def _group_sizes(self) -> None: | |
| NumFiles_Group = self.num_groups_part.shape[0] | |
| num_all_halos = np.sum(self.num_groups_part) | |
| cumulative_group_count = np.cumsum(self.num_groups_part) | |
| self.num_group_particle_len = np.zeros((num_all_halos, self._total_particle_types), dtype=np.int64) | |
| for group_part_file in range(0, NumFiles_Group): | |
| start = 0 if group_part_file==0 else cumulative_group_count[group_part_file-1] | |
| stop = cumulative_group_count[group_part_file] - 1 | |
| with h5py.File(f"{directory}/groups_{snap_num:03d}/fof_subhalo_tab_{snap_num:03d}.{group_part_file}.hdf5", 'r') as group_data: | |
| num_particles_per_group = np.array(group_data["/Group/GroupLenType"]) | |
| self.num_group_particle_len[start:stop+1,self.baryon] = num_particles_per_group[:, self.baryon] | |
| self.num_group_particle_len[start:stop+1,self.dark_matter] = num_particles_per_group[:, self.dark_matter] | |
| #typically relate to an earlier progenitor snap | |
| def relate_other_snap(self, snap_num:int, halo_prop:str, particle_type:int) -> None: | |
| # ="Velocities" | |
| self.id_sort(particle_type) | |
| if self.other_snap is None: | |
| self.other_snap = tag_halos(self.directory, snap_num) | |
| self.other_snap.id_sort(particle_type) | |
| print(f"Cross-matching to snap {self.snap_num} with snap {snap_num} for particle type {particle_type} for halo prop: {halo_prop}") | |
| if not(self._load_prop): | |
| print("Dummy run, no props loaded!") | |
| extra = "ics_" if snap_num==0 else "" | |
| if self._load_prop: | |
| with h5py.File(f"{self.other_snap.directory}/snapdir_{self.other_snap.snap_num:03d}/snapshot_{extra}{self.other_snap.snap_num:03d}.0.hdf5", 'r') as snap_data: | |
| prop_data = snap_data[f"/PartType{particle_type}/{halo_prop}"] | |
| if prop_data.ndim>1: | |
| prop_length = snap_data[f"/PartType{particle_type}/{halo_prop}"].shape[1] | |
| else: | |
| prop_length = 1 | |
| prop_array = np.zeros((np.sum(self.other_snap.num_particle_len[:,particle_type]), prop_length), dtype=np.float64) | |
| particle_masses = np.zeros(np.sum(self.other_snap.num_particle_len[:,particle_type]), dtype=np.float64) | |
| if self._load_prop: | |
| print(f"Loading particle property {halo_prop} for SnapNum: {self.other_snap.snap_num}") | |
| if not(self._load_prop): | |
| print("Dummy run, no props loaded!") | |
| for snap_part_file in range(self.other_snap.num_particle_len.shape[0]): | |
| cumulative_particle_count_by_type = self.other_snap.cumulative_particle_count[:,particle_type] | |
| start = 0 if snap_part_file==0 else cumulative_particle_count_by_type[snap_part_file-1] | |
| stop = cumulative_particle_count_by_type[snap_part_file] - 1 | |
| print(f"part file: {snap_part_file}", end='\r' if snap_part_file<(self.num_particle_len.shape[0]-1) else ' ... Done!\n') | |
| with h5py.File(f"{self.other_snap.directory}/snapdir_{self.other_snap.snap_num:03d}/snapshot_{extra}{self.other_snap.snap_num:03d}.{snap_part_file}.hdf5", 'r') as snap_data: | |
| if self._load_prop: | |
| prop_array[start:stop+1,:] = np.array(snap_data[f"/PartType{particle_type}/{halo_prop}"]) | |
| particle_masses[start:stop+1] = np.array(snap_data[f"/PartType{particle_type}/Masses"]) | |
| print("Calling group accumulator") | |
| if self._load_prop: | |
| self.calculate_group_props(prop_array, particle_masses, halo_prop, particle_type) | |
| else: | |
| self.calculate_group_props(None, particle_masses, halo_prop, particle_type) | |
| # @numba.njit | |
| def calculate_group_props(self, prop_array:np.ndarray, particle_masses:np.ndarray, halo_prop:str, particle_type:int) -> None: | |
| halo_prop_file = f"{self._cache_dir}/Group_{halo_prop}_snap{self.other_snap.snap_num}_PType{particle_type}.npy" | |
| print("Calculating group properties ...") | |
| if self._load_prop and prop_array is not None: | |
| prop_length = prop_array.shape[1] | |
| group_props = np.zeros((np.sum(self.num_groups_part), prop_length), dtype=np.float64) | |
| group_mass = np.zeros(np.sum(self.num_groups_part), dtype=np.float64) | |
| group_particle_count = np.zeros(np.sum(self.num_groups_part), dtype=np.int64) | |
| for count, indx in enumerate(self.ID_indx_sorted): | |
| if (count%self.log_freq)==0: | |
| print(f"count = {count+1}/{self.ID_indx_sorted.shape[0]} ({((count+1)/self.ID_indx_sorted.shape[0]*100):.1f}%)", | |
| end='\r' if count<(self.ID_indx_sorted.shape[0]-1) else '\n') | |
| halo_indx = self.global_particle_index_to_group_index(indx, particle_type) | |
| particle_indx_other_snap = self.other_snap.ID_indx_sorted[count] | |
| if self._load_prop and prop_array is not None: | |
| group_props[halo_indx,:] += (prop_array[particle_indx_other_snap,:] * particle_masses[particle_indx_other_snap]) | |
| group_mass[halo_indx] += particle_masses[particle_indx_other_snap] | |
| group_particle_count[halo_indx] += 1 | |
| # normalize | |
| if self._load_prop and prop_array is not None: | |
| print("Normalizing group properties ...") | |
| for halo_indx in range(0, np.sum(self.num_groups_part)): | |
| group_props[halo_indx,:] = group_props[halo_indx,:]/group_mass[halo_indx] | |
| if prop_length==1: | |
| group_props = group_props.flatten() | |
| print(f"Saving file {halo_prop_file}") | |
| np.save(halo_prop_file, group_props) | |
| print(f"Saving file {halo_prop_file.split('.')[0]}-mass.npy") | |
| np.save(f"{halo_prop_file.split('.')[0]}-mass.npy", group_mass) | |
| print(f"Saving file {halo_prop_file.split('.')[0]}-particle-count.npy") | |
| np.save(f"{halo_prop_file.split('.')[0]}-particle-count.npy", group_particle_count) | |
| # free memory | |
| prop_array = None | |
| particle_masses = None | |
| group_particle_count = None | |
| group_props = None | |
| def refresh(self) -> None: | |
| self.other_snap = None | |
| def debug(self): | |
| self.id_sort() | |
| print(self.ID_indx_sorted[self.dark_matter]) | |
| print(self.ID_indx_sorted[self.baryon]) | |
| print(self.ID_indx_sorted[self.dark_matter].shape) | |
| print(self.ID_indx_sorted[self.baryon].shape) | |
| print(np.sum(self.num_particle_len, axis=0)) | |
| print(np.sum(self.num_particle_len, axis=0)) | |
| if __name__ == "__main__": | |
| dark_matter = 1 | |
| baryon = 0 | |
| directory = "/orion/ptmp/adutt/arepo-multi-zoom/gadget-files/gadget4_development/examples/multi-zoom/streaming/parent/output_dumps" | |
| snap_num = 7 | |
| halo = tag_halos(directory, snap_num) | |
| print("Dark matter: ", dark_matter) | |
| dm_com_vel_in_ic = halo.relate_other_snap(0, "Velocities", dark_matter) | |
| print("Baryon: ", baryon) | |
| baryon_com_vel_in_ic = halo.relate_other_snap(0, "Velocities", baryon) | |
| # halo = tag_halos(directory, 0) | |
| # halo.id_sort() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment