Created
May 6, 2022 22:45
-
-
Save papaemman/70f363d54f57a2dfced84b7c7bdb6220 to your computer and use it in GitHub Desktop.
Machine Learning hyperparameter for XGBoost and LightGBM using Bayesian optimazation with hyperopt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Load Libraries | |
| import pandas as pd | |
| from sklearn.model_selection import cross_val_score | |
| from sklearn.metrics import roc_auc_score, accuracy_score | |
| from hyperopt import STATUS_OK, Trials, fmin, hp, tpe | |
| # Define datasets | |
| X_train = pd.DataFrame() | |
| y_train = pd.DataFrame() | |
| X_test = pd.DataFrame() | |
| y_test = pd.DataFrame() | |
| # Explain hyperopt | |
| # - hp.choice(label, options) — Returns one of the options, which should be a list or tuple. | |
| # - hp.randint(label, upper) — Returns a random integer in the range [0, upper). | |
| # - hp.uniform(label, low, high) — Returns a value uniformly between low and high. | |
| # - hp.quniform(label, low, high, q) — Returns a value like round(uniform(low, high) / q) * q | |
| # - hp.normal(label, mu, sigma) — Returns a real value that’s normally-distributed with mean mu and standard deviation sigma. | |
| ### XGBOOST HYPERPARAMETER OPTIMIZATION ### | |
| import xgboost as xgb | |
| # Define hyperparameter space | |
| space_xgboost = {'max_depth' : hp.quniform('max_depth', 3, 18, 1), | |
| 'gamma' : hp.uniform('gamma', 1,9), | |
| 'reg_alpha': hp.quniform('reg_alpha', 40,180,1), | |
| 'reg_lambda': hp.uniform('reg_lambda', 0, 1), | |
| 'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1), | |
| 'min_child_weight': hp.quniform('min_child_weight', 0, 10,1), | |
| 'n_estimators' : hp.quniform('n_estimators', 50, 500, 10), | |
| 'seed':0} | |
| # Define hyperparameter optimization function | |
| def objective(space): | |
| clf = xgb.XGBClassifier(n_estimators = space['n_estimators'], | |
| max_depth = int(space['max_depth']), | |
| gamma = space['gamma'], | |
| reg_alpha = int(space['reg_alpha']), | |
| min_child_weight = int(space['min_child_weight']), | |
| colsample_bytree= int(space['colsample_bytree'])) | |
| evaluation =[(X_train, y_train), (X_test, y_test)] | |
| clf.fit(X_train, y_train, | |
| eval_set = evaluation, | |
| eval_metric = 'auc', | |
| early_stopping_rounds=10, | |
| verbose=False) | |
| preds = clf.predict(X_test) | |
| y_score = clf.predict_proba(X_test)[:,1] | |
| accuracy = accuracy_score(y_test, preds) | |
| Roc_Auc_Score = roc_auc_score(y_test, y_score) | |
| print('Roc_Auc_Score:', Roc_Auc_Score) | |
| print('Accuracy:', accuracy) | |
| return {'loss': -Roc_Auc_Score, 'status': STATUS_OK} | |
| # Run hyperparameter optimization | |
| trials = Trials() | |
| best_hyperparameters = fmin(fn=objective, | |
| space=space_xgboost, | |
| algo=tpe.suggest, | |
| max_evals=100, | |
| trials=trials) | |
| print("The best hyperparameters are:", best_hyperparameters) | |
| ### LIGHTGBM HYPERPARAMETER OPTIMIZATION ### | |
| import lightgbm as lgb | |
| space_lightgbm = {'n_estimators' : hp.quniform('n_estimators', 50, 2000, 10), | |
| 'max_depth' : hp.quniform('max_depth', 3, 18, 1), | |
| 'learning_rate' : hp.logquniform('learning_rate', -4,-1), | |
| 'gamma' : hp.quniform('gamma', 0.1,0.5,0.1), | |
| 'num_leaves' : hp.quniform('num_leaves', 30,50,1), | |
| 'reg_alpha': hp.quniform('reg_alpha', 0, 1.5, 0.1), | |
| 'reg_lambda': hp.uniform('reg_lambda', 0, 1.5), | |
| 'colsample_bytree': hp.uniform('colsample_bytree', 0.1, 0.5), | |
| 'min_child_weight': hp.quniform('min_child_weight', 0, 10,1), | |
| 'seed':0} | |
| # hp.choice("n_estimators", [180, 400, 600, 1000]) | |
| # 'learning_rate': hp.choice("learning_rate", [0.1, 0.01, 0.02, 0.05, 0.15, 0.2, 0.3]) | |
| def objective(space_lightgbm): | |
| model = lgb.LightGBM(n_estimators = int(space_lightgbm['n_estimators']), | |
| max_depth = int(space_lightgbm['max_depth']), | |
| learning_rate = space_lightgbm['learning_rate'], | |
| gamma = space_lightgbm['gamma'], | |
| num_leaves = int(space_lightgbm['num_leaves']), | |
| reg_alpha = int(space_lightgbm['reg_alpha']), | |
| reg_lambda = space_lightgbm['reg_lambda'], | |
| colsample_bytree = space_lightgbm['colsample_bytree'], | |
| min_child_weight = int(space_lightgbm['min_child_weight'])) | |
| score = - cross_val_score(model, | |
| X_train, y_train, | |
| cv=10, scorring="roc_auc").mean() | |
| return score | |
| # Run hyperparameter optimization | |
| trials = Trials() | |
| best_hyperparameters = fmin(fn=objective, | |
| space=space_xgboost, | |
| algo=tpe.suggest, | |
| max_evals=100, | |
| trials=trials) | |
| print("The best hyperparameters are:", best_hyperparameters) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Excellent example of hyperparameter optimization!
I'm a fan of hyperopt, but often find myself using scikit-optimize for bayes opt with xgboost for simplicity.