papaemman · May 6, 2022 22:45 · Jason2Brownlee · May 26, 2024
diff --git a/hyperparameter_tuning.py b/hyperparameter_tuning.py
 # Load Libraries
 import pandas as pd
 from sklearn.model_selection import cross_val_score
 from sklearn.metrics import roc_auc_score, accuracy_score
 from hyperopt import STATUS_OK, Trials, fmin, hp, tpe

 # Define datasets
 X_train = pd.DataFrame()
 y_train = pd.DataFrame()
 X_test = pd.DataFrame()
 y_test = pd.DataFrame()


 # Explain hyperopt
 # - hp.choice(label, options) — Returns one of the options, which should be a list or tuple.
 # - hp.randint(label, upper) — Returns a random integer in the range [0, upper).
 # - hp.uniform(label, low, high) — Returns a value uniformly between low and high.
 # - hp.quniform(label, low, high, q) — Returns a value like round(uniform(low, high) / q) * q
 # - hp.normal(label, mu, sigma) — Returns a real value that’s normally-distributed with mean mu and standard deviation sigma.


 ### XGBOOST HYPERPARAMETER OPTIMIZATION ###

 import xgboost as xgb

 # Define hyperparameter space
 space_xgboost = {'max_depth' : hp.quniform('max_depth', 3, 18, 1),
                 'gamma' : hp.uniform('gamma', 1,9),
                 'reg_alpha': hp.quniform('reg_alpha', 40,180,1),
                 'reg_lambda': hp.uniform('reg_lambda', 0, 1),
                 'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
                 'min_child_weight': hp.quniform('min_child_weight', 0, 10,1),
                 'n_estimators' : hp.quniform('n_estimators', 50, 500, 10),
                 'seed':0}


 # Define hyperparameter optimization function
 def objective(space):

    clf = xgb.XGBClassifier(n_estimators = space['n_estimators'],
                            max_depth = int(space['max_depth']),
                            gamma = space['gamma'],
                            reg_alpha = int(space['reg_alpha']),
                            min_child_weight = int(space['min_child_weight']),
                            colsample_bytree= int(space['colsample_bytree']))
    
    evaluation =[(X_train, y_train), (X_test, y_test)]
    
    clf.fit(X_train, y_train,
            eval_set = evaluation,
            eval_metric = 'auc',
            early_stopping_rounds=10,
            verbose=False)

    preds = clf.predict(X_test)
    y_score = clf.predict_proba(X_test)[:,1]
    accuracy = accuracy_score(y_test, preds)
    Roc_Auc_Score = roc_auc_score(y_test, y_score)
    
    print('Roc_Auc_Score:', Roc_Auc_Score)
    print('Accuracy:', accuracy)
    return {'loss': -Roc_Auc_Score, 'status': STATUS_OK}
    
 # Run hyperparameter optimization
 trials = Trials()

 best_hyperparameters = fmin(fn=objective,
                            space=space_xgboost,
                            algo=tpe.suggest,
                            max_evals=100,
                            trials=trials)

 print("The best hyperparameters are:", best_hyperparameters)



 ### LIGHTGBM HYPERPARAMETER OPTIMIZATION ###

 import lightgbm as lgb

 space_lightgbm = {'n_estimators' : hp.quniform('n_estimators', 50, 2000, 10),
                  'max_depth' : hp.quniform('max_depth', 3, 18, 1),
                  'learning_rate' : hp.logquniform('learning_rate', -4,-1),
                  'gamma' : hp.quniform('gamma', 0.1,0.5,0.1),
                  'num_leaves' : hp.quniform('num_leaves', 30,50,1),
                  'reg_alpha': hp.quniform('reg_alpha', 0, 1.5, 0.1),
                  'reg_lambda': hp.uniform('reg_lambda', 0, 1.5),
                  'colsample_bytree': hp.uniform('colsample_bytree', 0.1, 0.5),
                  'min_child_weight': hp.quniform('min_child_weight', 0, 10,1),
                  'seed':0}

 # hp.choice("n_estimators", [180, 400, 600, 1000])
 # 'learning_rate': hp.choice("learning_rate", [0.1, 0.01, 0.02, 0.05, 0.15, 0.2, 0.3])

 def objective(space_lightgbm):

    model = lgb.LightGBM(n_estimators = int(space_lightgbm['n_estimators']),
                         max_depth = int(space_lightgbm['max_depth']),
                         learning_rate = space_lightgbm['learning_rate'],
                         gamma = space_lightgbm['gamma'],
                         num_leaves = int(space_lightgbm['num_leaves']),
                         reg_alpha = int(space_lightgbm['reg_alpha']),
                         reg_lambda = space_lightgbm['reg_lambda'],
                         colsample_bytree = space_lightgbm['colsample_bytree'],
                         min_child_weight = int(space_lightgbm['min_child_weight']))

    score = - cross_val_score(model, 
                             X_train, y_train,
                             cv=10, scorring="roc_auc").mean()

    return score


 # Run hyperparameter optimization
 trials = Trials()

 best_hyperparameters = fmin(fn=objective,
                            space=space_xgboost,
                            algo=tpe.suggest,
                            max_evals=100,
                            trials=trials)

 print("The best hyperparameters are:", best_hyperparameters)
	# Load Libraries
	import pandas as pd
	from sklearn.model_selection import cross_val_score
	from sklearn.metrics import roc_auc_score, accuracy_score
	from hyperopt import STATUS_OK, Trials, fmin, hp, tpe

	# Define datasets
	X_train = pd.DataFrame()
	y_train = pd.DataFrame()
	X_test = pd.DataFrame()
	y_test = pd.DataFrame()


	# Explain hyperopt
	# - hp.choice(label, options) — Returns one of the options, which should be a list or tuple.
	# - hp.randint(label, upper) — Returns a random integer in the range [0, upper).
	# - hp.uniform(label, low, high) — Returns a value uniformly between low and high.
	# - hp.quniform(label, low, high, q) — Returns a value like round(uniform(low, high) / q) * q
	# - hp.normal(label, mu, sigma) — Returns a real value that’s normally-distributed with mean mu and standard deviation sigma.


	### XGBOOST HYPERPARAMETER OPTIMIZATION ###

	import xgboost as xgb

	# Define hyperparameter space
	space_xgboost = {'max_depth' : hp.quniform('max_depth', 3, 18, 1),
	'gamma' : hp.uniform('gamma', 1,9),
	'reg_alpha': hp.quniform('reg_alpha', 40,180,1),
	'reg_lambda': hp.uniform('reg_lambda', 0, 1),
	'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
	'min_child_weight': hp.quniform('min_child_weight', 0, 10,1),
	'n_estimators' : hp.quniform('n_estimators', 50, 500, 10),
	'seed':0}


	# Define hyperparameter optimization function
	def objective(space):

	clf = xgb.XGBClassifier(n_estimators = space['n_estimators'],
	max_depth = int(space['max_depth']),
	gamma = space['gamma'],
	reg_alpha = int(space['reg_alpha']),
	min_child_weight = int(space['min_child_weight']),
	colsample_bytree= int(space['colsample_bytree']))

	evaluation =[(X_train, y_train), (X_test, y_test)]

	clf.fit(X_train, y_train,
	eval_set = evaluation,
	eval_metric = 'auc',
	early_stopping_rounds=10,
	verbose=False)

	preds = clf.predict(X_test)
	y_score = clf.predict_proba(X_test)[:,1]
	accuracy = accuracy_score(y_test, preds)
	Roc_Auc_Score = roc_auc_score(y_test, y_score)

	print('Roc_Auc_Score:', Roc_Auc_Score)
	print('Accuracy:', accuracy)
	return {'loss': -Roc_Auc_Score, 'status': STATUS_OK}

	# Run hyperparameter optimization
	trials = Trials()

	best_hyperparameters = fmin(fn=objective,
	space=space_xgboost,
	algo=tpe.suggest,
	max_evals=100,
	trials=trials)

	print("The best hyperparameters are:", best_hyperparameters)



	### LIGHTGBM HYPERPARAMETER OPTIMIZATION ###

	import lightgbm as lgb

	space_lightgbm = {'n_estimators' : hp.quniform('n_estimators', 50, 2000, 10),
	'max_depth' : hp.quniform('max_depth', 3, 18, 1),
	'learning_rate' : hp.logquniform('learning_rate', -4,-1),
	'gamma' : hp.quniform('gamma', 0.1,0.5,0.1),
	'num_leaves' : hp.quniform('num_leaves', 30,50,1),
	'reg_alpha': hp.quniform('reg_alpha', 0, 1.5, 0.1),
	'reg_lambda': hp.uniform('reg_lambda', 0, 1.5),
	'colsample_bytree': hp.uniform('colsample_bytree', 0.1, 0.5),
	'min_child_weight': hp.quniform('min_child_weight', 0, 10,1),
	'seed':0}

	# hp.choice("n_estimators", [180, 400, 600, 1000])
	# 'learning_rate': hp.choice("learning_rate", [0.1, 0.01, 0.02, 0.05, 0.15, 0.2, 0.3])

	def objective(space_lightgbm):

	model = lgb.LightGBM(n_estimators = int(space_lightgbm['n_estimators']),
	max_depth = int(space_lightgbm['max_depth']),
	learning_rate = space_lightgbm['learning_rate'],
	gamma = space_lightgbm['gamma'],
	num_leaves = int(space_lightgbm['num_leaves']),
	reg_alpha = int(space_lightgbm['reg_alpha']),
	reg_lambda = space_lightgbm['reg_lambda'],
	colsample_bytree = space_lightgbm['colsample_bytree'],
	min_child_weight = int(space_lightgbm['min_child_weight']))

	score = - cross_val_score(model,
	X_train, y_train,
	cv=10, scorring="roc_auc").mean()

	return score


	# Run hyperparameter optimization
	trials = Trials()

	best_hyperparameters = fmin(fn=objective,
	space=space_xgboost,
	algo=tpe.suggest,
	max_evals=100,
	trials=trials)

	print("The best hyperparameters are:", best_hyperparameters)
No results found