Skip to content

Instantly share code, notes, and snippets.

@zaemyung
Created October 14, 2025 01:52
Show Gist options
  • Select an option

  • Save zaemyung/64ec963b6c664369da4846550b2cb464 to your computer and use it in GitHub Desktop.

Select an option

Save zaemyung/64ec963b6c664369da4846550b2cb464 to your computer and use it in GitHub Desktop.
Computing mf-idf
def compute_mfidf_distribution(machines, humans, motif_size):
# document here is either "machine" or "human"
machine_mf = np.array([sample[f'motif_m{motif_size}_hist_norm_by_edges'] for sample in machines])
machine_mf = np.mean(machine_mf, axis=0)
human_mf = np.array([sample[f'motif_m{motif_size}_hist_norm_by_edges'] for sample in humans])
human_mf = np.mean(human_mf, axis=0)
machine_df = np.array([sample[f'motif_m{motif_size}_hist_raw'] for sample in machines])
machine_df = (machine_df > 0).astype(int)
machine_df = np.mean(machine_df, axis=0)
human_df = np.array([sample[f'motif_m{motif_size}_hist_raw'] for sample in humans])
human_df = (human_df > 0).astype(int)
human_df = np.mean(human_df, axis=0)
df = machine_df + human_df
idf = np.log((2 + 1) / (df + 1))
machine_mfidf = np.multiply(machine_mf, idf)
human_mfidf = np.multiply(human_mf, idf)
machine_top_motif_indices = machine_mfidf.argsort()[::-1]
human_top_motif_indices = human_mfidf.argsort()[::-1]
return {'machine_top_motif_indices': machine_top_motif_indices, 'human_top_motif_indices': human_top_motif_indices,
'machine_mfidf': machine_mfidf, 'human_mfidf': human_mfidf, 'machine_mfidf-human_mfidf': machine_mfidf - human_mfidf}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment