larsbratholm · March 8, 2020 11:05
diff --git a/mae_ci.py b/mae_ci.py
 """
 Calculate 95% confidence interval for rapported MAE. 
 The data is assumed to follow a laplacian distribution.
 See https://waset.org/publications/8809/confidence-intervals-for-double-exponential-distribution-a-simulation-approach
 for derivation.
 """

 import numpy as np
 import scipy.stats as ss

 def get_mean_lower_and_upper_bound(x, alpha=0.95):
    # Number of datapoints
    n = x.size
    mae = np.mean(abs(x))
    lb = 2 * mae * n / ss.chi2.ppf((1 + alpha) / 2, 2 * n)
    ub = 2 * mae * n / ss.chi2.ppf((1 - alpha) / 2, 2 * n)
    return mae, lb, ub

 # Create 100 fake data points. Each data point correspond to the error of a prediction.
 # If doing K-fold cross validation, this should be the errors from all folds,
 # concatenated to a single array.
 x = ss.laplace.rvs(size=100)


 # Confidence interval
 alpha = 0.95
 mae, lb, ub = get_mean_lower_and_upper_bound(x, alpha)
	"""
	Calculate 95% confidence interval for rapported MAE.
	The data is assumed to follow a laplacian distribution.
	See https://waset.org/publications/8809/confidence-intervals-for-double-exponential-distribution-a-simulation-approach
	for derivation.
	"""

	import numpy as np
	import scipy.stats as ss

	def get_mean_lower_and_upper_bound(x, alpha=0.95):
	# Number of datapoints
	n = x.size
	mae = np.mean(abs(x))
	lb = 2 * mae * n / ss.chi2.ppf((1 + alpha) / 2, 2 * n)
	ub = 2 * mae * n / ss.chi2.ppf((1 - alpha) / 2, 2 * n)
	return mae, lb, ub

	# Create 100 fake data points. Each data point correspond to the error of a prediction.
	# If doing K-fold cross validation, this should be the errors from all folds,
	# concatenated to a single array.
	x = ss.laplace.rvs(size=100)


	# Confidence interval
	alpha = 0.95
	mae, lb, ub = get_mean_lower_and_upper_bound(x, alpha)
No results found