Skip to content

Instantly share code, notes, and snippets.

@L-Ramos
Last active July 31, 2020 08:51
Show Gist options
  • Select an option

  • Save L-Ramos/0b1552ba1a54e03244a10985dd0b06d9 to your computer and use it in GitHub Desktop.

Select an option

Save L-Ramos/0b1552ba1a54e03244a10985dd0b06d9 to your computer and use it in GitHub Desktop.
breast_cancer.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "breast_cancer.ipynb",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true,
"authorship_tag": "ABX9TyPA+JGOtsBvGdEApUHa68UN",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/L-Ramos/0b1552ba1a54e03244a10985dd0b06d9/breast_cancer.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "XEmRBmAnlF2g",
"colab_type": "code",
"colab": {}
},
"source": [
"import pandas as pd\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.model_selection import KFold\n",
"import numpy as np\n",
"from sklearn import svm\n",
"from sklearn.model_selection import RandomizedSearchCV,GridSearchCV\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.metrics import roc_auc_score\n",
"from sklearn.pipeline import Pipeline\n",
"\n",
"\n",
"def get_params_SVC(): \n",
" tuned_parameters = {\n",
" 'C': ([0.1, 0.01, 0.001, 1, 10, 100]),\n",
" #'kernel': ['linear', 'rbf'], \n",
" 'kernel': ['linear', 'rbf','poly'], \n",
" 'degree': ([1,2,3]),\n",
" 'gamma': [1, 0.1, 0.01, 0.001, 0.0001],\n",
" #'tol': [1, 0.1, 0.01, 0.001, 0.0001],\n",
" }\n",
" return(tuned_parameters)\n",
"\n",
"#__scv necessary for pipeline function\n",
"def get_params_SVC_pipeline(): \n",
" tuned_parameters = {\n",
" 'svc__C': ([0.1, 0.01, 0.001, 1, 10, 100]),\n",
" #'svc__kernel': ['linear', 'rbf'], \n",
" 'svc__kernel': ['linear', 'rbf','poly'], \n",
" 'svc__degree': ([1,2,3]),\n",
" 'svc__gamma': [1, 0.1, 0.01, 0.001, 0.0001],\n",
" #'svc__tol': [1, 0.1, 0.01, 0.001, 0.0001],\n",
" }\n",
" return(tuned_parameters) \n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "9_zggNxtyvJy",
"colab_type": "code",
"colab": {}
},
"source": [
"r_state = 1\n",
"\n",
"df = pd.read_csv('data.csv')\n",
"df['diagnosis'] = pd.Categorical(df['diagnosis'])\n",
"y = np.array(df['diagnosis'].cat.codes)\n",
"X = df.drop(['diagnosis','id','Unnamed: 32'],axis=1)\n",
"X = np.array(X)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "0JYbpOTmyv0D",
"colab_type": "code",
"colab": {}
},
"source": [
"r_state = 1\n",
"df = pd.read_csv('diabetes.csv')\n",
"y = np.array(df['Outcome'])\n",
"X = df.drop(['Outcome'],axis=1)\n",
"X = np.array(X)\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "qz_CdA07lWOb",
"colab_type": "code",
"colab": {}
},
"source": [
"cv = 5\n",
"kf = KFold(n_splits=5)\n",
"\n",
"list_params_split = [] \n",
"list_scores_split = [] \n",
"test_scores_split = []\n",
"\n",
"list_params_pipe = [] \n",
"list_scores_pipe = [] \n",
"test_scores_pipe = []\n",
"\n",
"for i, (train_index, test_index) in enumerate(kf.split(X)):\n",
" print(\"Iteration: \",i)\n",
" X_train, X_test = X[train_index,:], X[test_index,:]\n",
" y_train, y_test = y[train_index], y[test_index]\n",
"\n",
" scaler = StandardScaler()\n",
" scaler = scaler.fit(X_train)\n",
" X_train = scaler.transform(X_train)\n",
" X_test = scaler.transform(X_test)\n",
"\n",
" parameters = get_params_SVC()\n",
" svc = svm.SVC(class_weight ='balanced',probability = True)\n",
" clf = GridSearchCV(svc, parameters,scoring = 'roc_auc', n_jobs = -1,\n",
" verbose = 1,cv = cv)\n",
" clf.fit(X_train, y_train) \n",
"\n",
" list_params_split.append(clf.best_params_)\n",
" list_scores_split.append(clf.best_score_)\n",
"\n",
" preds = clf.predict_proba(X_test)\n",
" test_scores_split.append(roc_auc_score(y_test,preds[:,1]))\n",
"\n",
" pipe = Pipeline([('scaler', StandardScaler()), ('svc', svm.SVC(\n",
" class_weight ='balanced',probability = True))])\n",
" \n",
" parameters = get_params_SVC_pipeline()\n",
"\n",
" clf = GridSearchCV(pipe, param_grid = parameters,scoring = 'roc_auc',\n",
" n_jobs = -1,verbose = 1,cv = cv)\n",
" clf.fit(X_train, y_train)\n",
"\n",
" list_params_pipe.append(clf.best_params_)\n",
" list_scores_pipe.append(clf.best_score_)\n",
"\n",
" preds = clf.predict_proba(X_test)\n",
" test_scores_pipe.append(roc_auc_score(y_test,preds[:,1]))\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "7ieKSY-7qQiK",
"colab_type": "code",
"colab": {}
},
"source": [
"\n",
"print(\"Average test AUC (split) : %.2f std: (%.2f)\"%(np.mean(test_scores_split),np.std(test_scores_split)))\n",
"print(\"Average test AUC (Pipeline) : %.2f std: (%.2f)\"%(np.mean(test_scores_pipe),np.std(test_scores_split)))"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "LmrsMHIJmZb4",
"colab_type": "code",
"colab": {}
},
"source": [
"\n",
"list_params_split"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "yZjPc_o7maqk",
"colab_type": "code",
"colab": {}
},
"source": [
"list_params_pipe"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "hmx12bypmtGR",
"colab_type": "code",
"colab": {}
},
"source": [
"list_scores_split"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "M8N5qgPZ03Mn",
"colab_type": "code",
"colab": {}
},
"source": [
"list_scores_pipe"
],
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment