Skip to content

Instantly share code, notes, and snippets.

@chumpblocckami
Created November 6, 2024 21:24
Show Gist options
  • Select an option

  • Save chumpblocckami/dc977f129bc504f96f1d6b4201c34ebf to your computer and use it in GitHub Desktop.

Select an option

Save chumpblocckami/dc977f129bc504f96f1d6b4201c34ebf to your computer and use it in GitHub Desktop.
utility functions for statistic relevance
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import kstest
def draw_boxplot(df: pd.DataFrame):
df_melted = df.melt(id_vars=["checkpoints", "label"], value_vars=["BARI", "MIRI", "ORPI"],
var_name="Index", value_name="Value")
print(df_melted)
plt.figure(figsize=(10, 6))
sns.boxplot(x='Index', y='Value', hue='label', data=df_melted)
plt.title("Boxplot of BARI, MIRI, and ORPI for different labels (F, FM, FNM)")
plt.xlabel("Index (BARI, MIRI, ORPI)")
plt.ylabel("Values")
plt.legend(title="Label", loc="upper left")
plt.grid(True)
plt.tight_layout()
plt.savefig("boxplot.png")
def test_difference(d1:pd.Series, d2:pd.Series, significance_level=0.1):
ks_test = kstest(d1, d2, N=11)
print("D", ks_test.statistic)
print("pvalue", ks_test.pvalue)
if ks_test.pvalue < significance_level:
print("Reject the null hypotesis")
else:
print("Accept the null hypotesis")
return ks_test
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment