Skip to content

Instantly share code, notes, and snippets.

@papaemman
Created May 6, 2022 22:45
Show Gist options
  • Select an option

  • Save papaemman/70f363d54f57a2dfced84b7c7bdb6220 to your computer and use it in GitHub Desktop.

Select an option

Save papaemman/70f363d54f57a2dfced84b7c7bdb6220 to your computer and use it in GitHub Desktop.
Machine Learning hyperparameter for XGBoost and LightGBM using Bayesian optimazation with hyperopt
# Load Libraries
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.metrics import roc_auc_score, accuracy_score
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
# Define datasets
X_train = pd.DataFrame()
y_train = pd.DataFrame()
X_test = pd.DataFrame()
y_test = pd.DataFrame()
# Explain hyperopt
# - hp.choice(label, options) — Returns one of the options, which should be a list or tuple.
# - hp.randint(label, upper) — Returns a random integer in the range [0, upper).
# - hp.uniform(label, low, high) — Returns a value uniformly between low and high.
# - hp.quniform(label, low, high, q) — Returns a value like round(uniform(low, high) / q) * q
# - hp.normal(label, mu, sigma) — Returns a real value that’s normally-distributed with mean mu and standard deviation sigma.
### XGBOOST HYPERPARAMETER OPTIMIZATION ###
import xgboost as xgb
# Define hyperparameter space
space_xgboost = {'max_depth' : hp.quniform('max_depth', 3, 18, 1),
'gamma' : hp.uniform('gamma', 1,9),
'reg_alpha': hp.quniform('reg_alpha', 40,180,1),
'reg_lambda': hp.uniform('reg_lambda', 0, 1),
'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
'min_child_weight': hp.quniform('min_child_weight', 0, 10,1),
'n_estimators' : hp.quniform('n_estimators', 50, 500, 10),
'seed':0}
# Define hyperparameter optimization function
def objective(space):
clf = xgb.XGBClassifier(n_estimators = space['n_estimators'],
max_depth = int(space['max_depth']),
gamma = space['gamma'],
reg_alpha = int(space['reg_alpha']),
min_child_weight = int(space['min_child_weight']),
colsample_bytree= int(space['colsample_bytree']))
evaluation =[(X_train, y_train), (X_test, y_test)]
clf.fit(X_train, y_train,
eval_set = evaluation,
eval_metric = 'auc',
early_stopping_rounds=10,
verbose=False)
preds = clf.predict(X_test)
y_score = clf.predict_proba(X_test)[:,1]
accuracy = accuracy_score(y_test, preds)
Roc_Auc_Score = roc_auc_score(y_test, y_score)
print('Roc_Auc_Score:', Roc_Auc_Score)
print('Accuracy:', accuracy)
return {'loss': -Roc_Auc_Score, 'status': STATUS_OK}
# Run hyperparameter optimization
trials = Trials()
best_hyperparameters = fmin(fn=objective,
space=space_xgboost,
algo=tpe.suggest,
max_evals=100,
trials=trials)
print("The best hyperparameters are:", best_hyperparameters)
### LIGHTGBM HYPERPARAMETER OPTIMIZATION ###
import lightgbm as lgb
space_lightgbm = {'n_estimators' : hp.quniform('n_estimators', 50, 2000, 10),
'max_depth' : hp.quniform('max_depth', 3, 18, 1),
'learning_rate' : hp.logquniform('learning_rate', -4,-1),
'gamma' : hp.quniform('gamma', 0.1,0.5,0.1),
'num_leaves' : hp.quniform('num_leaves', 30,50,1),
'reg_alpha': hp.quniform('reg_alpha', 0, 1.5, 0.1),
'reg_lambda': hp.uniform('reg_lambda', 0, 1.5),
'colsample_bytree': hp.uniform('colsample_bytree', 0.1, 0.5),
'min_child_weight': hp.quniform('min_child_weight', 0, 10,1),
'seed':0}
# hp.choice("n_estimators", [180, 400, 600, 1000])
# 'learning_rate': hp.choice("learning_rate", [0.1, 0.01, 0.02, 0.05, 0.15, 0.2, 0.3])
def objective(space_lightgbm):
model = lgb.LightGBM(n_estimators = int(space_lightgbm['n_estimators']),
max_depth = int(space_lightgbm['max_depth']),
learning_rate = space_lightgbm['learning_rate'],
gamma = space_lightgbm['gamma'],
num_leaves = int(space_lightgbm['num_leaves']),
reg_alpha = int(space_lightgbm['reg_alpha']),
reg_lambda = space_lightgbm['reg_lambda'],
colsample_bytree = space_lightgbm['colsample_bytree'],
min_child_weight = int(space_lightgbm['min_child_weight']))
score = - cross_val_score(model,
X_train, y_train,
cv=10, scorring="roc_auc").mean()
return score
# Run hyperparameter optimization
trials = Trials()
best_hyperparameters = fmin(fn=objective,
space=space_xgboost,
algo=tpe.suggest,
max_evals=100,
trials=trials)
print("The best hyperparameters are:", best_hyperparameters)
@Jason2Brownlee
Copy link

Excellent example of hyperparameter optimization!

I'm a fan of hyperopt, but often find myself using scikit-optimize for bayes opt with xgboost for simplicity.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment