Skip to content

Instantly share code, notes, and snippets.

@Corwinpro
Created March 27, 2019 21:27
Show Gist options
  • Select an option

  • Save Corwinpro/7b674e00d1cfc190e89cd0b9f266f19c to your computer and use it in GitHub Desktop.

Select an option

Save Corwinpro/7b674e00d1cfc190e89cd0b9f266f19c to your computer and use it in GitHub Desktop.
from sklearn.base import BaseEstimator
from sklearn.model_selection import cross_val_score
from sklearn.metrics import roc_auc_score
class RandomForestClassifierCustom(BaseEstimator):
def __init__(self, n_estimators=10, max_depth=10, max_features=10,
random_state=SEED):
self.n_estimators = n_estimators
self.max_depth = max_depth
self.max_features = max_features
self.random_state = random_state
self.trees = []
self.feat_ids_by_tree = []
def fit(self, X, y):
features = X.columns.values.tolist()
for i in range(self.n_estimators):
current_seed = self.random_state + i
np.random.seed(current_seed)
# Create random feature set without replacement
random_feature_set = np.random.choice(features, self.max_features, replace=False)
self.feat_ids_by_tree.append(random_feature_set)
# Create bootstrap sample
indices = np.random.randint(0, len(y), len(y))
X_train = X.iloc[indices][random_feature_set]
y_train = y.iloc[indices]
# Training
tree = DecisionTreeClassifier(max_depth=self.max_depth, max_features=self.max_features,
random_state=current_seed)
tree.fit(X_train, y_train)
self.trees.append(tree)
return self
def predict_proba(self, X):
results = []
for i, tree in enumerate(self.trees):
X_test = X[self.feat_ids_by_tree[i]]
result = tree.predict_proba(X_test)
results.append(result)
final_result = sum(results)/len(self.trees)
return final_result[:,1]
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
score = 0
for train_index, test_index in skf.split(X, y):
my_classifier = RandomForestClassifierCustom(max_depth=6, max_features=7)
X_train, X_test = X.iloc[train_index], X.iloc[test_index]
y_train, y_test = y.iloc[train_index], y.iloc[test_index]
my_classifier.fit(X_train, y_train)
y_score = my_classifier.predict_proba(X_test)
score += roc_auc_score(y_test, y_score) / 5.
print(score) # 0.8269868195120791
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment