Created
December 4, 2025 09:05
-
-
Save sjvrijn/b7eef78aa093caa917162fe9d990c1f3 to your computer and use it in GitHub Desktop.
A script to illustrate the correlation between which experiment produced a spore, and which cluster they ended up in
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # coding: utf-8 | |
| # Author: Sander van Rijn <[email protected]> | |
| # Date: 2025-12-04 10:03 | |
| from pathlib import Path | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| # location of data file | |
| p = Path('/mnt/c/Users/Sander/Downloads/spores_carrier_capacities_clustered.csv') | |
| df = pd.read_csv(p, header=0) | |
| # make a dictionary to give a fixed index to each experiment name | |
| experiments = sorted(df["experiment"].unique().tolist()) | |
| idx = {exp: i for i, exp in enumerate(experiments)} | |
| # To simplify the data, we only look at one 'carrier' value | |
| df = df[df["carrier"] == "Air heat pump"] | |
| # count all combinations of ('cluster', 'experiment') | |
| # `cluster` is used as `cluster+1` to make them valid indexes, since they start at -1 | |
| counts = np.zeros((32, 30)) | |
| for n, row in df.iterrows(): | |
| counts[row.cluster+1, idx[row.experiment]] += 1 | |
| # Normalize per experiment: a high value shows that spores from an | |
| # experiment only end up in a single cluster | |
| divcount = counts / counts.sum(axis=0).reshape(1, -1) | |
| plt.pcolor(divcount) | |
| plt.xlabel("experiment (n=30)") | |
| plt.ylabel("clusters (n=32)") | |
| plt.show() | |
| # Now normalize per cluster: a high value shows a cluster only contains | |
| # spores from a single experiment | |
| plt.pcolor(counts / counts.sum(axis=1).reshape(-1, 1)) | |
| plt.xlabel("experiment (n=30)") | |
| plt.ylabel("clusters (n=32)") | |
| plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment