sjvrijn · December 4, 2025 09:05
diff --git a/plot_cluster_counts.py b/plot_cluster_counts.py
 # coding: utf-8
 # Author: Sander van Rijn <[email protected]>
 # Date: 2025-12-04 10:03

 from pathlib import Path

 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np

 # location of data file
 p = Path('/mnt/c/Users/Sander/Downloads/spores_carrier_capacities_clustered.csv')
 df = pd.read_csv(p, header=0)

 # make a dictionary to give a fixed index to each experiment name
 experiments = sorted(df["experiment"].unique().tolist())
 idx = {exp: i for i, exp in enumerate(experiments)}

 # To simplify the data, we only look at one 'carrier' value
 df = df[df["carrier"] == "Air heat pump"]

 # count all combinations of ('cluster', 'experiment')
 # `cluster` is used as `cluster+1` to make them valid indexes, since they start at -1
 counts = np.zeros((32, 30))
 for n, row in df.iterrows():
    counts[row.cluster+1, idx[row.experiment]] += 1

 # Normalize per experiment: a high value shows that spores from an
 # experiment only end up in a single cluster
 divcount = counts / counts.sum(axis=0).reshape(1, -1)
 plt.pcolor(divcount)
 plt.xlabel("experiment (n=30)")
 plt.ylabel("clusters (n=32)")
 plt.show()

 # Now normalize per cluster: a high value shows a cluster only contains
 # spores from a single experiment
 plt.pcolor(counts / counts.sum(axis=1).reshape(-1, 1))
 plt.xlabel("experiment (n=30)")
 plt.ylabel("clusters (n=32)")
 plt.show()
	# coding: utf-8
	# Author: Sander van Rijn <[email protected]>
	# Date: 2025-12-04 10:03

	from pathlib import Path

	import pandas as pd
	import matplotlib.pyplot as plt
	import numpy as np

	# location of data file
	p = Path('/mnt/c/Users/Sander/Downloads/spores_carrier_capacities_clustered.csv')
	df = pd.read_csv(p, header=0)

	# make a dictionary to give a fixed index to each experiment name
	experiments = sorted(df["experiment"].unique().tolist())
	idx = {exp: i for i, exp in enumerate(experiments)}

	# To simplify the data, we only look at one 'carrier' value
	df = df[df["carrier"] == "Air heat pump"]

	# count all combinations of ('cluster', 'experiment')
	# `cluster` is used as `cluster+1` to make them valid indexes, since they start at -1
	counts = np.zeros((32, 30))
	for n, row in df.iterrows():
	counts[row.cluster+1, idx[row.experiment]] += 1

	# Normalize per experiment: a high value shows that spores from an
	# experiment only end up in a single cluster
	divcount = counts / counts.sum(axis=0).reshape(1, -1)
	plt.pcolor(divcount)
	plt.xlabel("experiment (n=30)")
	plt.ylabel("clusters (n=32)")
	plt.show()

	# Now normalize per cluster: a high value shows a cluster only contains
	# spores from a single experiment
	plt.pcolor(counts / counts.sum(axis=1).reshape(-1, 1))
	plt.xlabel("experiment (n=30)")
	plt.ylabel("clusters (n=32)")
	plt.show()
No results found