Skip to content

Instantly share code, notes, and snippets.

@tensorvijay
Created October 3, 2023 08:50
Show Gist options
  • Select an option

  • Save tensorvijay/ff365171602c127f34659ce8f3b5c6cd to your computer and use it in GitHub Desktop.

Select an option

Save tensorvijay/ff365171602c127f34659ce8f3b5c6cd to your computer and use it in GitHub Desktop.
Random_Forest_Creditcard_Fraud.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/tensorvijay/ff365171602c127f34659ce8f3b5c6cd/random_forest_creditcard_fraud.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "XVbif3K6SXwY"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import average_precision_score\n",
"from sklearn.metrics import recall_score\n",
"from sklearn.metrics import accuracy_score\n",
"import scipy.stats\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "MCdIUyJgRxnZ"
},
"outputs": [],
"source": [
"\n",
"\n",
"df=pd.read_csv('/content/drive/MyDrive/Fraud.csv')\n",
"df = df.replace(to_replace={'PAYMENT':1,'TRANSFER':2,'CASH_OUT':3,\n",
" 'CASH_IN':4,'DEBIT':5,'No':0,'Yes':1})\n",
"df.drop(['nameOrig','nameDest','isFlaggedFraud'],axis=1,inplace=True)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 175
},
"id": "LiuYRDCMSpke",
"outputId": "9084aceb-96fd-4d48-9a4b-f272192500e3"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div id=\"df-a228dd9d-b2b6-4310-bdd2-6915ebf3f727\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>step</th>\n",
" <th>type</th>\n",
" <th>amount</th>\n",
" <th>oldbalanceOrg</th>\n",
" <th>newbalanceOrig</th>\n",
" <th>oldbalanceDest</th>\n",
" <th>newbalanceDest</th>\n",
" <th>isFraud</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1814542</th>\n",
" <td>163</td>\n",
" <td>4</td>\n",
" <td>131783.46</td>\n",
" <td>1544963.45</td>\n",
" <td>1676746.92</td>\n",
" <td>4624925.7</td>\n",
" <td>4493142.24</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3643655</th>\n",
" <td>275</td>\n",
" <td>3</td>\n",
" <td>86656.57</td>\n",
" <td>402013.00</td>\n",
" <td>315356.43</td>\n",
" <td>0.0</td>\n",
" <td>86656.57</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6015227</th>\n",
" <td>451</td>\n",
" <td>3</td>\n",
" <td>184559.81</td>\n",
" <td>82705.00</td>\n",
" <td>0.00</td>\n",
" <td>3079590.5</td>\n",
" <td>3264150.31</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1915608</th>\n",
" <td>166</td>\n",
" <td>3</td>\n",
" <td>63102.08</td>\n",
" <td>12497.00</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>63102.08</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-a228dd9d-b2b6-4310-bdd2-6915ebf3f727')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-a228dd9d-b2b6-4310-bdd2-6915ebf3f727 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-a228dd9d-b2b6-4310-bdd2-6915ebf3f727');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-e1753b54-5478-4ecb-9f41-d5ddb03dd4fc\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-e1753b54-5478-4ecb-9f41-d5ddb03dd4fc')\"\n",
" title=\"Suggest charts.\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-e1753b54-5478-4ecb-9f41-d5ddb03dd4fc button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"text/plain": [
" step type amount oldbalanceOrg newbalanceOrig oldbalanceDest \\\n",
"1814542 163 4 131783.46 1544963.45 1676746.92 4624925.7 \n",
"3643655 275 3 86656.57 402013.00 315356.43 0.0 \n",
"6015227 451 3 184559.81 82705.00 0.00 3079590.5 \n",
"1915608 166 3 63102.08 12497.00 0.00 0.0 \n",
"\n",
" newbalanceDest isFraud \n",
"1814542 4493142.24 0 \n",
"3643655 86656.57 0 \n",
"6015227 3264150.31 0 \n",
"1915608 63102.08 0 "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sample(4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Ycoo_tWYSqmJ"
},
"outputs": [],
"source": [
"y = df[['isFraud']]\n",
"X = df.drop(['isFraud'],axis=1)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "93fyTPDqS09I"
},
"outputs": [],
"source": [
"train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.2, random_state = 121)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Nd6QkY7AS_nE"
},
"outputs": [],
"source": [
"clf = RandomForestClassifier(n_estimators=50,criterion=\"entropy\",max_depth=10,min_samples_leaf=5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "3kLNvs2NTWB9"
},
"outputs": [],
"source": [
"if True:\n",
" probabilities = clf.fit(train_X, train_y.values.ravel()).predict(test_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "zeXcTaJjVAKk",
"outputId": "d7a043f3-674b-4a60-bfed-8bd31d07eab8"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.7154750639146131\n"
]
}
],
"source": [
"if True:\n",
" print(average_precision_score(test_y,probabilities))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Pr4NH4xeVT1x"
},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HI4gWqnJWOks",
"outputId": "d82cd37c-50d9-4f44-8469-c10c8041310d"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.860777662661556\n"
]
}
],
"source": [
"import numpy as np\n",
"\n",
"\n",
"print(recall_score(test_y,probabilities, average='macro'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Dcfy7DcpWSd9"
},
"outputs": [],
"source": [
"from sklearn.metrics import accuracy_score"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "xxFzqmOVYnwf",
"outputId": "16c221b4-c6dc-486f-849e-4ea1769c9ca0"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.9996220110583376\n"
]
}
],
"source": [
"print(accuracy_score(test_y,probabilities))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "qIKaweZSY_Xx"
},
"outputs": [],
"source": [
"clf1 = RandomForestClassifier()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4-7hyRWsZIpP"
},
"outputs": [],
"source": [
"if True:\n",
" probabilities = clf1.fit(train_X, train_y.values.ravel()).predict(test_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BMIHe9OhZMvX",
"outputId": "2b9a4f7e-832c-47bd-cef0-0d9f158111b2"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.7654222817166213\n"
]
}
],
"source": [
"if True:\n",
" print(average_precision_score(test_y,probabilities))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pFUtxQYMZUM4",
"outputId": "cd6a13ea-0beb-461d-b807-99e937f74649"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8903922206733668\n"
]
}
],
"source": [
"print(recall_score(test_y,probabilities, average='macro'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XKqw5YkhZVBI",
"outputId": "a5f835a1-e789-478c-d088-1a56778ad22f"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.9996880216011643\n"
]
}
],
"source": [
"print(accuracy_score(test_y,probabilities))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4M_BNbSqGumJ"
},
"source": [
"Let us reduce the number of estimators"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "cKcvh6IId6Qb"
},
"outputs": [],
"source": [
"clf1 = RandomForestClassifier(n_estimators=10,criterion=\"entropy\",max_depth=10,min_samples_leaf=5)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "zmkUuDeXG-fq"
},
"outputs": [],
"source": [
"if True:\n",
" probabilities = clf1.fit(train_X, train_y.values.ravel()).predict(test_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8BKyXOprI3yA",
"outputId": "c5ceebc3-1234-42ac-ad66-48fe60fb1dec"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.6929567392759193\n",
"0.8480426630458983\n",
"0.9995921491461065\n"
]
}
],
"source": [
"if True:\n",
" print(average_precision_score(test_y,probabilities))\n",
" print(recall_score(test_y,probabilities, average='macro'))\n",
" print(accuracy_score(test_y,probabilities))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UJb6bxfTJXtS"
},
"source": [
"The ideal number of estimators is around 15, 10 gives you less precision and recall. However, there is an slight increase in accuracy when the estimator size is reduced and deotg and minimum number of samples are adjusted. Further fine tuning will be interesting.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "pjX27afxKQd8"
},
"outputs": [],
"source": [
"if True:\n",
" probabilitiest = clf1.fit(train_X, train_y.values.ravel()).predict(train_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "OS0lBiZHOLvL",
"outputId": "aa012486-e00d-4409-d232-ad7613bfd631"
},
"outputs": [
{
"data": {
"text/plain": [
"array([0, 0, 0, ..., 0, 0, 0])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"probabilitiest.predict(train_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "x3sx3uT1PagO",
"outputId": "50baaf70-4964-404a-bb5a-921ab00f45f9"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.7431942749256791\n",
"0.8740196443998027\n",
"0.9996703402057643\n"
]
}
],
"source": [
"if True:\n",
" print(average_precision_score(train_y,probabilitiest))\n",
" print(recall_score(train_y,probabilitiest, average='macro'))\n",
" print(accuracy_score(train_y,probabilitiest))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "O9XEgE_EQ63f"
},
"source": [
"Here wer are comparing the performance between the training and the test data set, and we can see that the difference between the raining results and the test results are not particularly high. THis shows that there isn't much overfitting. However, it is highly likely that there is some undefitting since both training and test values are close together, so this is a case of underfitting."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "H1COVTppRIVB"
},
"outputs": [],
"source": [
"clf15 = RandomForestClassifier(n_estimators=15)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "QhpEMjc3R04V"
},
"outputs": [],
"source": [
"\n",
" probabilities15 = clf15.fit(train_X, train_y.values.ravel()).predict(test_X)\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "0pgto4OLViPk"
},
"source": [
"These are the results for the training data."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "cvEegDT-VpkC",
"outputId": "13535188-42bc-4353-f8bf-a79ae6eb2a2c"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.763744863422387\n",
"0.890686855436983\n",
"0.9996856640817776\n"
]
}
],
"source": [
" print(average_precision_score(test_y,probabilities15))\n",
" print(recall_score(test_y,probabilities15, average='macro'))\n",
" print(accuracy_score(test_y,probabilities15))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "x3szVV9WV7Fb"
},
"outputs": [],
"source": [
"probabilities15t = clf15.fit(train_X, train_y.values.ravel()).predict(train_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "jlNnyCRZWFh5",
"outputId": "27d76ac0-eac1-4b86-cc8c-2fc6dbb481ad"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.98484673816048\n",
"0.9925668530963148\n",
"0.99998055046506\n"
]
}
],
"source": [
" print(average_precision_score(train_y,probabilities15t))\n",
" print(recall_score(train_y,probabilities15t, average='macro'))\n",
" print(accuracy_score(train_y,probabilities15t))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "56Ea137hXtUF"
},
"source": [
"This is a case of overfitting, where the training model has high variance. The training parameters need to be changed to reduce overfitting. So the previous result with n=10 parameters is a much better option than a highly overfitting model."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "IYLUN1IBWax4"
},
"outputs": [],
"source": [
"clf12 = RandomForestClassifier(n_estimators=12)\n",
"\n",
"probabilities12 = clf15.fit(train_X, train_y.values.ravel()).predict(test_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "3fHOFgIAZNNE",
"outputId": "e27a1e86-a72b-4284-b950-304ec8b1120a"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.7692047292921947\n",
"0.8948318076592758\n",
"0.9996927366399376\n"
]
}
],
"source": [
" print(average_precision_score(test_y,probabilities12))\n",
" print(recall_score(test_y,probabilities12, average='macro'))\n",
" print(accuracy_score(test_y,probabilities12))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "mJnz__wcaKYt"
},
"outputs": [],
"source": [
"probabilities12 = clf12.fit(train_X, train_y.values.ravel()).predict(train_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "JmunW9RjfiFH",
"outputId": "3a90bf05-2a72-4e0b-e2fe-3e35d7c8ef45"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.9761224293704273\n",
"0.9881225070079275\n",
"0.9999693522479733\n"
]
}
],
"source": [
" print(average_precision_score(train_y,probabilities12))\n",
" print(recall_score(train_y,probabilities12, average='macro'))\n",
" print(accuracy_score(train_y,probabilities12))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "OS4901l68a0S"
},
"source": [
"Data Analysis"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "bLXBoFVI7iTi",
"outputId": "af38d0ac-38f0-40ea-9d26-0f5bf0bf3f43"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div id=\"df-38ba0967-e9c5-4186-9bff-c50c044f9f49\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>step</th>\n",
" <th>type</th>\n",
" <th>amount</th>\n",
" <th>oldbalanceOrg</th>\n",
" <th>newbalanceOrig</th>\n",
" <th>oldbalanceDest</th>\n",
" <th>newbalanceDest</th>\n",
" <th>isFraud</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>9839.64</td>\n",
" <td>170136.0</td>\n",
" <td>160296.36</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1864.28</td>\n",
" <td>21249.0</td>\n",
" <td>19384.72</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>181.00</td>\n",
" <td>181.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>181.00</td>\n",
" <td>181.0</td>\n",
" <td>0.00</td>\n",
" <td>21182.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>11668.14</td>\n",
" <td>41554.0</td>\n",
" <td>29885.86</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-38ba0967-e9c5-4186-9bff-c50c044f9f49')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-38ba0967-e9c5-4186-9bff-c50c044f9f49 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-38ba0967-e9c5-4186-9bff-c50c044f9f49');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-a2fd117f-919f-4061-9c93-a83447ffeaba\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-a2fd117f-919f-4061-9c93-a83447ffeaba')\"\n",
" title=\"Suggest charts.\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-a2fd117f-919f-4061-9c93-a83447ffeaba button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"text/plain": [
" step type amount oldbalanceOrg newbalanceOrig oldbalanceDest \\\n",
"0 1 1 9839.64 170136.0 160296.36 0.0 \n",
"1 1 1 1864.28 21249.0 19384.72 0.0 \n",
"2 1 2 181.00 181.0 0.00 0.0 \n",
"3 1 3 181.00 181.0 0.00 21182.0 \n",
"4 1 1 11668.14 41554.0 29885.86 0.0 \n",
"\n",
" newbalanceDest isFraud \n",
"0 0.0 0 \n",
"1 0.0 0 \n",
"2 0.0 1 \n",
"3 0.0 1 \n",
"4 0.0 0 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "fgPe1gyBCJaE"
},
"source": [
"From the data analysis in"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "9JYpKg_u7mVW",
"outputId": "601419ed-43e7-4ab6-932e-5bd52ff68376"
},
"outputs": [
{
"data": {
"text/plain": [
"PearsonRResult(statistic=0.9988027631729837, pvalue=0.0)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scipy.stats.pearsonr(df.iloc[:,3],df.iloc[:,4] )"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "fx2m3F19FH87"
},
"source": [
"From above it is clear that the oldbalanceOrg and NewbalanceOrig are highly correlated. For highly correlated data pearson correlation coefficient is 1, this data is highly correlated."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "F6OnlCcEFP2h",
"outputId": "3fc82b6b-a35d-43e5-bb3e-4a5b6a59491b"
},
"outputs": [
{
"data": {
"text/plain": [
"PearsonRResult(statistic=-0.020403492367990984, pvalue=7.526433468566388e-253)"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scipy.stats.pearsonr(df.iloc[:,3],df.iloc[:,5] )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TBCNP7-MYyqQ",
"outputId": "1b14cfff-ee22-49a9-8c21-9d17c6efeebc"
},
"outputs": [
{
"data": {
"text/plain": [
"PearsonRResult(statistic=0.9700604740242301, pvalue=0.0)"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scipy.stats.pearsonr(df.iloc[:,6],df.iloc[:,5] )"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "tqQxGV5mZJhZ"
},
"source": [
"IN additon oldbalacneDest and new balance dest are also showing high correlation"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Ia8tPCu3FwnX"
},
"source": [
"However, other combinations show very little correlation"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "M03XZlOFFXiM"
},
"source": [
"Amount and"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "szhJqw1prfPm"
},
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 484
},
"id": "bWkcTJui9tIT",
"outputId": "b134ad87-8ee1-4e32-939b-51c3cc471076"
},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Fraud')"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"var = df.groupby('type').isFraud.sum()\n",
"fig = plt.figure()\n",
"ax1 = fig.add_subplot(1,1,1)\n",
"var.plot(kind='bar')\n",
"ax1.set_title(\"Which Transaction has more fraud\")\n",
"ax1.set_xlabel('Type of Transaction')\n",
"ax1.set_ylabel('Fraud')"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "jzuYw4kKIFAG"
},
"source": [
"{'PAYMENT':1,'TRANSFER':2,'CASH_OUT':3,'CASH_IN':4,'DEBIT':5}"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "VB75WHM1JF8j"
},
"source": [
"It is clear that the Fraudulent transactions are among the Transfer and Cash out transactions. Only two types of transactions are fraudulent"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "tMsfmIkgDoTA"
},
"outputs": [],
"source": [
"df2=df.copy()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "RQUfyp-oOxpR"
},
"source": [
"Removing all those contributions which do not contribute to a fraud"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "0CYK1n_iJ06x"
},
"outputs": [],
"source": [
"df = df.drop(df[df['type'] == 1].index)\n",
"df = df.drop(df[df['type'] == 4].index)\n",
"df = df.drop(df[df['type'] == 5].index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "ztaVgbkEN677",
"outputId": "558346e5-16cf-4b9e-ad9c-441d548abfef"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div id=\"df-2100021e-3d4c-4a69-816f-95e342820615\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>step</th>\n",
" <th>type</th>\n",
" <th>amount</th>\n",
" <th>oldbalanceOrg</th>\n",
" <th>newbalanceOrig</th>\n",
" <th>oldbalanceDest</th>\n",
" <th>newbalanceDest</th>\n",
" <th>isFraud</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5774894</th>\n",
" <td>400</td>\n",
" <td>2</td>\n",
" <td>307161.86</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>974551.18</td>\n",
" <td>1281713.04</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1059852</th>\n",
" <td>119</td>\n",
" <td>2</td>\n",
" <td>2495.37</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>810922.40</td>\n",
" <td>813417.77</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1126821</th>\n",
" <td>131</td>\n",
" <td>2</td>\n",
" <td>507953.32</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2190855.92</td>\n",
" <td>2728658.94</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2680670</th>\n",
" <td>210</td>\n",
" <td>3</td>\n",
" <td>147178.76</td>\n",
" <td>27590.0</td>\n",
" <td>0.0</td>\n",
" <td>190691.71</td>\n",
" <td>337870.47</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3240962</th>\n",
" <td>250</td>\n",
" <td>3</td>\n",
" <td>168555.07</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>378995.71</td>\n",
" <td>547550.78</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2100021e-3d4c-4a69-816f-95e342820615')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-2100021e-3d4c-4a69-816f-95e342820615 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-2100021e-3d4c-4a69-816f-95e342820615');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-eefe9c55-aaee-439e-9f9d-a7331935e64d\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-eefe9c55-aaee-439e-9f9d-a7331935e64d')\"\n",
" title=\"Suggest charts.\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-eefe9c55-aaee-439e-9f9d-a7331935e64d button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"text/plain": [
" step type amount oldbalanceOrg newbalanceOrig oldbalanceDest \\\n",
"5774894 400 2 307161.86 0.0 0.0 974551.18 \n",
"1059852 119 2 2495.37 0.0 0.0 810922.40 \n",
"1126821 131 2 507953.32 0.0 0.0 2190855.92 \n",
"2680670 210 3 147178.76 27590.0 0.0 190691.71 \n",
"3240962 250 3 168555.07 0.0 0.0 378995.71 \n",
"\n",
" newbalanceDest isFraud \n",
"5774894 1281713.04 0 \n",
"1059852 813417.77 0 \n",
"1126821 2728658.94 0 \n",
"2680670 337870.47 0 \n",
"3240962 547550.78 0 "
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sample(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Qfdig9RYN8_J"
},
"outputs": [],
"source": [
"new=df.iloc[:,3]+df.iloc[:,4]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NrJxtE2lWhlM"
},
"outputs": [],
"source": [
"df2=df.copy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "AHeTOKlVWl8l",
"outputId": "3fa83804-40b5-45a5-d041-edcb06fd1fad"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div id=\"df-d25fbc4e-1e79-41a8-8f89-151740eca5f8\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>step</th>\n",
" <th>type</th>\n",
" <th>amount</th>\n",
" <th>oldbalanceOrg</th>\n",
" <th>newbalanceOrig</th>\n",
" <th>oldbalanceDest</th>\n",
" <th>newbalanceDest</th>\n",
" <th>isFraud</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>181.00</td>\n",
" <td>181.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>181.00</td>\n",
" <td>181.0</td>\n",
" <td>0.0</td>\n",
" <td>21182.0</td>\n",
" <td>0.00</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>229133.94</td>\n",
" <td>15325.0</td>\n",
" <td>0.0</td>\n",
" <td>5083.0</td>\n",
" <td>51513.44</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>215310.30</td>\n",
" <td>705.0</td>\n",
" <td>0.0</td>\n",
" <td>22425.0</td>\n",
" <td>0.00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>311685.89</td>\n",
" <td>10835.0</td>\n",
" <td>0.0</td>\n",
" <td>6267.0</td>\n",
" <td>2719172.89</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d25fbc4e-1e79-41a8-8f89-151740eca5f8')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-d25fbc4e-1e79-41a8-8f89-151740eca5f8 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-d25fbc4e-1e79-41a8-8f89-151740eca5f8');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-2a8717c2-0ed2-402f-bb36-41badb1744c9\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-2a8717c2-0ed2-402f-bb36-41badb1744c9')\"\n",
" title=\"Suggest charts.\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-2a8717c2-0ed2-402f-bb36-41badb1744c9 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"text/plain": [
" step type amount oldbalanceOrg newbalanceOrig oldbalanceDest \\\n",
"2 1 2 181.00 181.0 0.0 0.0 \n",
"3 1 3 181.00 181.0 0.0 21182.0 \n",
"15 1 3 229133.94 15325.0 0.0 5083.0 \n",
"19 1 2 215310.30 705.0 0.0 22425.0 \n",
"24 1 2 311685.89 10835.0 0.0 6267.0 \n",
"\n",
" newbalanceDest isFraud \n",
"2 0.00 1 \n",
"3 0.00 1 \n",
"15 51513.44 0 \n",
"19 0.00 0 \n",
"24 2719172.89 0 "
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "aHF00JItXMCM"
},
"outputs": [],
"source": [
"df2['oldbalanceOrg']=(df2['oldbalanceOrg']+df2['newbalanceOrig'])/2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "XvXbhqMCXkUE",
"outputId": "9d10bcc3-6b91-48f3-f334-e61bf38042eb"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div id=\"df-50baf5e1-a3e2-402a-8eb8-153659839e5f\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>step</th>\n",
" <th>type</th>\n",
" <th>amount</th>\n",
" <th>oldbalanceOrg</th>\n",
" <th>newbalanceOrig</th>\n",
" <th>oldbalanceDest</th>\n",
" <th>newbalanceDest</th>\n",
" <th>isFraud</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>181.00</td>\n",
" <td>90.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>181.00</td>\n",
" <td>90.5</td>\n",
" <td>0.0</td>\n",
" <td>21182.0</td>\n",
" <td>0.00</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>229133.94</td>\n",
" <td>7662.5</td>\n",
" <td>0.0</td>\n",
" <td>5083.0</td>\n",
" <td>51513.44</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>215310.30</td>\n",
" <td>352.5</td>\n",
" <td>0.0</td>\n",
" <td>22425.0</td>\n",
" <td>0.00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>311685.89</td>\n",
" <td>5417.5</td>\n",
" <td>0.0</td>\n",
" <td>6267.0</td>\n",
" <td>2719172.89</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-50baf5e1-a3e2-402a-8eb8-153659839e5f')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-50baf5e1-a3e2-402a-8eb8-153659839e5f button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-50baf5e1-a3e2-402a-8eb8-153659839e5f');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-168729e6-0faf-4b91-9686-1e3d58b3b556\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-168729e6-0faf-4b91-9686-1e3d58b3b556')\"\n",
" title=\"Suggest charts.\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-168729e6-0faf-4b91-9686-1e3d58b3b556 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"text/plain": [
" step type amount oldbalanceOrg newbalanceOrig oldbalanceDest \\\n",
"2 1 2 181.00 90.5 0.0 0.0 \n",
"3 1 3 181.00 90.5 0.0 21182.0 \n",
"15 1 3 229133.94 7662.5 0.0 5083.0 \n",
"19 1 2 215310.30 352.5 0.0 22425.0 \n",
"24 1 2 311685.89 5417.5 0.0 6267.0 \n",
"\n",
" newbalanceDest isFraud \n",
"2 0.00 1 \n",
"3 0.00 1 \n",
"15 51513.44 0 \n",
"19 0.00 0 \n",
"24 2719172.89 0 "
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "-e7z5L7fX8wd"
},
"outputs": [],
"source": [
"df2.drop(['newbalanceOrig'],axis=1,inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "iFPN-jQ6YPx0",
"outputId": "8d5d0bb3-f7c4-4d61-f2a7-50ea4b63039e"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div id=\"df-64e652b6-550e-4457-aea7-c38f7986a206\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>step</th>\n",
" <th>type</th>\n",
" <th>amount</th>\n",
" <th>oldbalanceOrg</th>\n",
" <th>oldbalanceDest</th>\n",
" <th>newbalanceDest</th>\n",
" <th>isFraud</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>181.00</td>\n",
" <td>90.5</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>181.00</td>\n",
" <td>90.5</td>\n",
" <td>21182.0</td>\n",
" <td>0.00</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>229133.94</td>\n",
" <td>7662.5</td>\n",
" <td>5083.0</td>\n",
" <td>51513.44</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>215310.30</td>\n",
" <td>352.5</td>\n",
" <td>22425.0</td>\n",
" <td>0.00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>311685.89</td>\n",
" <td>5417.5</td>\n",
" <td>6267.0</td>\n",
" <td>2719172.89</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-64e652b6-550e-4457-aea7-c38f7986a206')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-64e652b6-550e-4457-aea7-c38f7986a206 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-64e652b6-550e-4457-aea7-c38f7986a206');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-9c27649e-cff6-430d-9de8-718c93e2ceaf\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-9c27649e-cff6-430d-9de8-718c93e2ceaf')\"\n",
" title=\"Suggest charts.\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-9c27649e-cff6-430d-9de8-718c93e2ceaf button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"text/plain": [
" step type amount oldbalanceOrg oldbalanceDest newbalanceDest \\\n",
"2 1 2 181.00 90.5 0.0 0.00 \n",
"3 1 3 181.00 90.5 21182.0 0.00 \n",
"15 1 3 229133.94 7662.5 5083.0 51513.44 \n",
"19 1 2 215310.30 352.5 22425.0 0.00 \n",
"24 1 2 311685.89 5417.5 6267.0 2719172.89 \n",
"\n",
" isFraud \n",
"2 1 \n",
"3 1 \n",
"15 0 \n",
"19 0 \n",
"24 0 "
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "9PJiu0hKZc0B"
},
"source": [
"We are averaging all the correlated values and removing the redundant ones"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "yJ4_y2jKYRrK"
},
"outputs": [],
"source": [
"df2['oldbalanceDest']=(df2['oldbalanceDest']+df2['newbalanceDest'])/2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "B2ZHCzG0Z3m5",
"outputId": "51d1d6bf-9fb8-44a2-d09c-73bf10282d8e"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div id=\"df-d4dd5424-7873-4248-b6d6-97c3fe671ea8\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>type</th>\n",
" <th>amount</th>\n",
" <th>oldbalanceOrg</th>\n",
" <th>oldbalanceDest</th>\n",
" <th>isFraud</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>181.00</td>\n",
" <td>90.5</td>\n",
" <td>0.000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>181.00</td>\n",
" <td>90.5</td>\n",
" <td>10591.000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>3</td>\n",
" <td>229133.94</td>\n",
" <td>7662.5</td>\n",
" <td>28298.220</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>2</td>\n",
" <td>215310.30</td>\n",
" <td>352.5</td>\n",
" <td>11212.500</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>2</td>\n",
" <td>311685.89</td>\n",
" <td>5417.5</td>\n",
" <td>1362719.945</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d4dd5424-7873-4248-b6d6-97c3fe671ea8')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-d4dd5424-7873-4248-b6d6-97c3fe671ea8 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-d4dd5424-7873-4248-b6d6-97c3fe671ea8');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-8ab3dcdf-61fc-462e-99e2-013c62cf48bc\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-8ab3dcdf-61fc-462e-99e2-013c62cf48bc')\"\n",
" title=\"Suggest charts.\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-8ab3dcdf-61fc-462e-99e2-013c62cf48bc button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"text/plain": [
" type amount oldbalanceOrg oldbalanceDest isFraud\n",
"2 2 181.00 90.5 0.000 1\n",
"3 3 181.00 90.5 10591.000 1\n",
"15 3 229133.94 7662.5 28298.220 0\n",
"19 2 215310.30 352.5 11212.500 0\n",
"24 2 311685.89 5417.5 1362719.945 0"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "vumRKvchZ4ve"
},
"outputs": [],
"source": [
"df2.drop(['newbalanceDest'],axis=1,inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "vPqre8uzbeu6"
},
"source": [
"Actually step is also logically unnecassary"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "TPaYuWYabiBC"
},
"outputs": [],
"source": [
"df2.drop(['step'],axis=1,inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Y6b9Z-3UcAXy"
},
"outputs": [],
"source": [
"y = df2[['isFraud']]\n",
"X = df2.drop(['isFraud'],axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "RO7ncKrYcEA8"
},
"outputs": [],
"source": [
"train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.2, random_state = 121)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-lHUZFOKcc_K"
},
"source": [
"We will now apply the lessons learn't from our previous experiments on the raw data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "-3pWb1eEcWiV"
},
"outputs": [],
"source": [
"clf1 = RandomForestClassifier(n_estimators=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "8TFs0-hhccKL"
},
"outputs": [],
"source": [
"probabilities = clf1.fit(train_X, train_y.values.ravel()).predict(test_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Agp3zJ6NdKZK",
"outputId": "21f5a097-1d8f-459b-87e9-39f8775a1e8e"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.7256710801241542\n",
"0.8768268835457358\n",
"0.9992004793514317\n"
]
}
],
"source": [
" print(average_precision_score(test_y,probabilities))\n",
" print(recall_score(test_y,probabilities, average='macro'))\n",
" print(accuracy_score(test_y,probabilities))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "sxOhEq6mdsy6"
},
"source": [
"The results are similar to the ones we applied to the raw data earlier. However, there is a slight improvement and more importantly, computational time is reduced to a considerable extent. So clearly, we have removed unncessary data. Ths is our final result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "HaAG7a2rd5zT"
},
"outputs": [],
"source": [
"probabilitiest = clf1.fit(train_X, train_y.values.ravel()).predict(train_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BCHBB8zuew7P",
"outputId": "5d826387-30bf-4633-d25b-06d935846b93"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.9547410553377302\n",
"0.9776808174915346\n",
"0.9998646409126452\n"
]
}
],
"source": [
" print(average_precision_score(train_y,probabilitiest))\n",
" print(recall_score(train_y,probabilitiest, average='macro'))\n",
" print(accuracy_score(train_y,probabilitiest))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Yde-Su7Ie91s"
},
"source": [
"Now we are suffering from a problem of only slight overfitting, in the case of precision, However, in the case of recall the difference between the training and test case is moderate and indicated ambient training (neither overfitting nor underfitting). It is clear that over data cleanup of removing redundant and unncecarry information has impvoed Machine learnign performance to a great extent. Perhaps with further tuning, we can improve the performance!"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "X_7gT84nrDVe"
},
"source": [
"1. Data cleaning including missing values, outliers and multi-collinearity.\n",
"\n",
"Missing Values: There were no missing values or 'Nan' hence data cleaning did not take much of an effort. However, we have removed redundant columns: 'step' and 'IsFlaggedFraud'.\n",
"\n",
"Outliers: As indicated by the graph above, only 'Tranfer' abd 'Cash out' has contributes to fraud, so we are removing all other types. This contributes to improvement in machine learning parameters and keep the overall process cogent.\n",
"\n",
"Multicollinearity: The balance transfer columns have shown considerable correlation as we have seen through the Pearson correlation coefficient. We are averaging them and eliminating the redundant column. This has demonstrated computational efficiency and has evidenced some positive changes in training parameters."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "uiLYoSHTsQY0"
},
"source": [
"2. Describe your fraud detection model in elaboration.\n",
"\n",
"We use the Random forests ML algorithm, they are ensemble decision trees which use multitude of descision trees during the training phase, they deal with the overfitting charactersistics of basic decision trees. They create a multitude of descision trees and select the descisions through voting.\n",
"\n",
"We are using a random forest algorithm to detect fraud. Random forest algorithms are good with data with unbalanced data. There are many zeros in the data. After several trails and errors of both the input parameters in like number of estimators and error detection index, we have obtained good results with respect to minimal overfitting. We are using a total of 10 estimators. The difference between evaluation metrics on the training data and test data is minimal when we use around 10 estimators. We observe the performance metrics: precision, recall and accuracy are better when we use 'cleaned' data rather than the raw data. However, we note high metric values for training data, which can possible indicate overfitting.\n",
"\n",
"The interesting feature of this algorithm is that the cleaned data is responding much more positively with minimal difference between the train metrics and test metrics for accuracy and Recall. But for precision there is slightly higher difference. We dealt with multicollinearity through averaging, removed the redundant columns and chose the rows wit Transfer and Cash out alsone since they are the only ones that involve fraud."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "F6hdEr7av_ux"
},
"source": [
"3. How did you select variables to be included in the model?\n",
"\n",
"The variables where choosen on the basis of relevance with respect to fraud. We also used basis common sense. This realizaiton happened after working with the raw data. The computational time for Random Forest was very high, this was unnecessary, so, we removed the step, IsFlaggedFraud is a rule based model so is unndecessary. Columns for source and destination for money were highly correlated with each other, so we averaged them to single columns. Types of transactions which wre not relevant to the fraud are excluded."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "CrZYQnoZyHyI"
},
"source": [
"4. Demonstrate the performance of the model by using best set of tools.\n",
"\n",
"Accuracy, Precision and Recall are the performance tools that we have choosen. We have proactively carried out several experiments on the Raw data. Although the resuslts are in general not very different for the raw and the cleaned data, overfitting and underfitting are much lesser with the cleaned up data. We find that the difference in recall for test and train prediction involves optimal value. While the difference is slightly higher for the precision. However, high evaluation metrics for training data may possible indicate overfitting."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ToSHZdYczBhe"
},
"source": [
"5. What are the key factors that predict fraudulent customer?\n",
"\n",
"Transfer and Cash out are the modes in which Fraud takes place. Authorities have to be careful with these transactions. Fraud are anomalous transactions that take place. We have demonstrated very clearly as well as it is well published that Random Forests are the best machine Learning algorithms available. Since Random Forests are highly successful, Fraudulent transaction has a highly non linear nature. This means that it is not easy to detect. Random Forests are known to fail when the data contains considerable number of outliers. Frauds are implicit in the data set. The non linear yet implicit nature of frauds, help random forests predict the fradulent customer.\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Sf5-1BAcS2rj"
},
"source": [
"6. Do these factors make sense? If yes, How? If not, How not?\n",
"\n",
"These factors make sense if the fraudulent activities are unorganized crimes: hence th non linearity. If it is a syndicate that involves in this crime then there will be a well established pattern and there would be considerable linearity in the data. If the crime is not organized this data does not make sense, if it is unorganized, then it makes a lot of sense. ALso the threat is not internal (Bank Employees), there would have been some linear pattern."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "7Q0VE4S1Wmhj"
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "WRJClXAPT3lR"
},
"source": [
"What kind of prevention should be adopted while company update its infrastructure?\n",
"\n",
"Since it is clear that there is no established explicit pattern, the best way is to reduce Transfer and Cash out transactions. Debit transactions have not shown much fraud. The key is to establish a pattern, that will take us to the source. Pattern can be established if Banks in crease the documentation in every transaction, more information is needed for better data analysis"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "5wJYgydKWiK1"
},
"source": [
"Assuming these actions have been implemented, how would you determine if they work?\n",
"\n",
"If the measures are working, Fraud will leave a detectable trail, a distinct pattern at least if not linearity"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "OmMJ5Y9NWnxl"
},
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "imYJLUeye5Sw",
"outputId": "68feb3c8-2044-4dd3-f92e-27609d54dbab"
},
"outputs": [
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"1+1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oZSdQxg53IMS"
},
"outputs": [],
"source": [
"df5=df2.copy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "HRihJFI-DJwd"
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "doFfXYbWqj53"
},
"outputs": [],
"source": [
"df5 = df5.drop(df[df['type'] == 0].index)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "5kNM4cxc_qEO"
},
"source": [
"Smaller amounts of transaction show more fraud"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 484
},
"id": "QoYXty3z-vWp",
"outputId": "165ad9c5-c5a7-4e63-8ff6-fdb87c4bfd38"
},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Oldbalnce')"
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"var = df2.groupby('isFraud').oldbalanceOrg.sum()\n",
"fig = plt.figure()\n",
"ax1 = fig.add_subplot(1,1,1)\n",
"var.plot(kind='bar')\n",
"ax1.set_title(\"quantity\")\n",
"ax1.set_xlabel('IsFraud')\n",
"ax1.set_ylabel('Oldbalnce')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 484
},
"id": "Yb2tyUMv-8sK",
"outputId": "f19daad3-436b-42a4-85ac-74a056aa92bc"
},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Oldbalnce')"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"var = df2.groupby('isFraud').oldbalanceDest.sum()\n",
"fig = plt.figure()\n",
"ax1 = fig.add_subplot(1,1,1)\n",
"var.plot(kind='bar')\n",
"ax1.set_title(\"quantity\")\n",
"ax1.set_xlabel('IsFraud')\n",
"ax1.set_ylabel('Oldbalnce')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 484
},
"id": "9hsAe3XqABrG",
"outputId": "8224c575-2ea0-4cd3-b81d-dafd1bf56f1d"
},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Oldbalnce')"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"var = df2.groupby('isFraud').amount.sum()\n",
"fig = plt.figure()\n",
"ax1 = fig.add_subplot(1,1,1)\n",
"var.plot(kind='bar')\n",
"ax1.set_title(\"quantity\")\n",
"ax1.set_xlabel('IsFraud')\n",
"ax1.set_ylabel('Oldbalnce')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "1qO4g2KhcOVS"
},
"outputs": [],
"source": [
"from sklearn.decomposition import PCA\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "MPsn7uvxADVX",
"outputId": "d44639c4-2049-467e-8e9e-5d844b9120e0"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div id=\"df-2a554f64-5041-4e2d-bf4f-973174d3457d\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>type</th>\n",
" <th>amount</th>\n",
" <th>oldbalanceOrg</th>\n",
" <th>oldbalanceDest</th>\n",
" <th>isFraud</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>181.00</td>\n",
" <td>90.5</td>\n",
" <td>0.000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>181.00</td>\n",
" <td>90.5</td>\n",
" <td>10591.000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>3</td>\n",
" <td>229133.94</td>\n",
" <td>7662.5</td>\n",
" <td>28298.220</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>2</td>\n",
" <td>215310.30</td>\n",
" <td>352.5</td>\n",
" <td>11212.500</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>2</td>\n",
" <td>311685.89</td>\n",
" <td>5417.5</td>\n",
" <td>1362719.945</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2a554f64-5041-4e2d-bf4f-973174d3457d')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-2a554f64-5041-4e2d-bf4f-973174d3457d button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-2a554f64-5041-4e2d-bf4f-973174d3457d');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-048b2eb6-d476-4544-86bb-e8480c39dcaa\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-048b2eb6-d476-4544-86bb-e8480c39dcaa')\"\n",
" title=\"Suggest charts.\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-048b2eb6-d476-4544-86bb-e8480c39dcaa button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"text/plain": [
" type amount oldbalanceOrg oldbalanceDest isFraud\n",
"2 2 181.00 90.5 0.000 1\n",
"3 3 181.00 90.5 10591.000 1\n",
"15 3 229133.94 7662.5 28298.220 0\n",
"19 2 215310.30 352.5 11212.500 0\n",
"24 2 311685.89 5417.5 1362719.945 0"
]
},
"execution_count": 107,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 323
},
"id": "AZEQiGA2cCIe",
"outputId": "73d95c96-5702-4dae-d6b7-ae85acbb6995"
},
"outputs": [
{
"ename": "ValueError",
"evalue": "ignored",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-131-f902c1760de2>\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mFraud\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'isFraud'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mgroupby\u001b[0;34m(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, dropna)\u001b[0m\n\u001b[1;32m 8400\u001b[0m \u001b[0maxis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_axis_number\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8401\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 8402\u001b[0;31m return DataFrameGroupBy(\n\u001b[0m\u001b[1;32m 8403\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8404\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mby\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/groupby/groupby.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, mutated, dropna)\u001b[0m\n\u001b[1;32m 963\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrouper\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mget_grouper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 964\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 965\u001b[0;31m grouper, exclusions, obj = get_grouper(\n\u001b[0m\u001b[1;32m 966\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 967\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/groupby/grouper.py\u001b[0m in \u001b[0;36mget_grouper\u001b[0;34m(obj, key, axis, level, sort, observed, mutated, validate, dropna)\u001b[0m\n\u001b[1;32m 897\u001b[0m \u001b[0;31m# allow us to passing the actual Grouping as the gpr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 898\u001b[0m ping = (\n\u001b[0;32m--> 899\u001b[0;31m Grouping(\n\u001b[0m\u001b[1;32m 900\u001b[0m \u001b[0mgroup_axis\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 901\u001b[0m \u001b[0mgpr\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/groupby/grouper.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, index, grouper, obj, level, sort, observed, in_axis, dropna)\u001b[0m\n\u001b[1;32m 478\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlevel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 479\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_orig_grouper\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgrouper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 480\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrouping_vector\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_convert_grouper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrouper\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 481\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_all_grouper\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 482\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/groupby/grouper.py\u001b[0m in \u001b[0;36m_convert_grouper\u001b[0;34m(axis, grouper)\u001b[0m\n\u001b[1;32m 941\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgrouper\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIndex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCategorical\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 942\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgrouper\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 943\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Grouper and axis must be same length\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 944\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 945\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgrouper\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Grouper and axis must be same length"
]
}
],
"source": [
"Fraud = df2.groupby(df2[df2['isFraud'] == 1].index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "E7EI2zC2iFqZ"
},
"outputs": [],
"source": [
"df = df.drop(df[df['type'] == 1].index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ZQWLxEXgdp1s"
},
"outputs": [],
"source": [
"fraud = df2.loc[df2.isFraud == 1]\n",
"nonfraud = df2.loc[df2.isFraud == 0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "uhcgNwpzdqoM",
"outputId": "e1a89ddf-d036-496c-ceca-15ee80b06cbe"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div id=\"df-bf8df98e-803a-4ee8-878b-d60f97b4ccf0\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>type</th>\n",
" <th>amount</th>\n",
" <th>oldbalanceOrg</th>\n",
" <th>oldbalanceDest</th>\n",
" <th>isFraud</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>181.0</td>\n",
" <td>90.5</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>181.0</td>\n",
" <td>90.5</td>\n",
" <td>10591.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>251</th>\n",
" <td>2</td>\n",
" <td>2806.0</td>\n",
" <td>1403.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>252</th>\n",
" <td>3</td>\n",
" <td>2806.0</td>\n",
" <td>1403.0</td>\n",
" <td>13101.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>680</th>\n",
" <td>2</td>\n",
" <td>20128.0</td>\n",
" <td>10064.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-bf8df98e-803a-4ee8-878b-d60f97b4ccf0')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-bf8df98e-803a-4ee8-878b-d60f97b4ccf0 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-bf8df98e-803a-4ee8-878b-d60f97b4ccf0');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-44c378e7-4f24-4eb7-867f-f45bb2c11688\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-44c378e7-4f24-4eb7-867f-f45bb2c11688')\"\n",
" title=\"Suggest charts.\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-44c378e7-4f24-4eb7-867f-f45bb2c11688 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"text/plain": [
" type amount oldbalanceOrg oldbalanceDest isFraud\n",
"2 2 181.0 90.5 0.0 1\n",
"3 3 181.0 90.5 10591.0 1\n",
"251 2 2806.0 1403.0 0.0 1\n",
"252 3 2806.0 1403.0 13101.0 1\n",
"680 2 20128.0 10064.0 0.0 1"
]
},
"execution_count": 135,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fraud.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "gdBjK2KKeJny",
"outputId": "67d64cec-e0d2-4b2a-9c45-a30bf706271a"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div id=\"df-30ee164c-ad2b-4800-ab6f-47b665de1cc6\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>type</th>\n",
" <th>amount</th>\n",
" <th>oldbalanceOrg</th>\n",
" <th>oldbalanceDest</th>\n",
" <th>isFraud</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>3</td>\n",
" <td>229133.94</td>\n",
" <td>7662.500</td>\n",
" <td>28298.220</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>2</td>\n",
" <td>215310.30</td>\n",
" <td>352.500</td>\n",
" <td>11212.500</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>2</td>\n",
" <td>311685.89</td>\n",
" <td>5417.500</td>\n",
" <td>1362719.945</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>3</td>\n",
" <td>110414.71</td>\n",
" <td>13422.705</td>\n",
" <td>145607.580</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>3</td>\n",
" <td>56953.90</td>\n",
" <td>971.010</td>\n",
" <td>67179.590</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-30ee164c-ad2b-4800-ab6f-47b665de1cc6')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-30ee164c-ad2b-4800-ab6f-47b665de1cc6 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-30ee164c-ad2b-4800-ab6f-47b665de1cc6');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-27424477-c88d-4075-b2c0-7a6cca4df7b4\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-27424477-c88d-4075-b2c0-7a6cca4df7b4')\"\n",
" title=\"Suggest charts.\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-27424477-c88d-4075-b2c0-7a6cca4df7b4 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"text/plain": [
" type amount oldbalanceOrg oldbalanceDest isFraud\n",
"15 3 229133.94 7662.500 28298.220 0\n",
"19 2 215310.30 352.500 11212.500 0\n",
"24 2 311685.89 5417.500 1362719.945 0\n",
"42 3 110414.71 13422.705 145607.580 0\n",
"47 3 56953.90 971.010 67179.590 0"
]
},
"execution_count": 138,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nonfraud.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "silH8ybukcS0"
},
"outputs": [],
"source": [
"f=fraud.sample(1000)\n",
"nf=nonfraud.sample(1000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 463
},
"id": "iouoSVn3eNeK",
"outputId": "12703f6b-27ca-466c-fecc-b09fc4c40fdc"
},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure()\n",
"ax = fig.add_subplot(1,1,1)\n",
"ax.scatter(nf['oldbalanceOrg'],f['oldbalanceOrg'],c='g')\n",
"ax.scatter(nf['amount'],f['amount'],c='r')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "vbyEb9E6jn4J"
},
"source": [
"Relationship between fraud amount and non fraud amounts. Shows how largely, the pattern is non linear and invisible. Therefore more doccumentation is necessary to establish more patterns and help with analysis"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "T2jltndPriDb"
},
"source": [
"Lest us try some hyperparameter tuning"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "2Vsh8pV6jou-"
},
"outputs": [],
"source": [
"clf1 = RandomForestClassifier(n_estimators=7)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "R25uEvCCsCl3"
},
"outputs": [],
"source": [
"probabilities = clf1.fit(train_X, train_y.values.ravel()).predict(test_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-q4VTnLnsGpk",
"outputId": "4cf77d99-b166-4fe7-cbe2-b925a2b76b6e"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.7205768043616442\n",
"0.8802368059448755\n",
"0.9991824314812608\n"
]
}
],
"source": [
" print(average_precision_score(test_y,probabilities))\n",
" print(recall_score(test_y,probabilities, average='macro'))\n",
" print(accuracy_score(test_y,probabilities))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "UNJVHckdviLT"
},
"outputs": [],
"source": [
"clf1 = RandomForestClassifier(n_estimators=7,max_depth=3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 74
},
"id": "mSSK3RCSwIpj",
"outputId": "d6937439-11e3-473d-fd0e-9544fc435061"
},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(max_depth=3, n_estimators=7)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(max_depth=3, n_estimators=7)</pre></div></div></div></div></div>"
],
"text/plain": [
"RandomForestClassifier(max_depth=3, n_estimators=7)"
]
},
"execution_count": 154,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clf1.fit(train_X, train_y.values.ravel())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "_9xT3by0yUEH"
},
"outputs": [],
"source": [
"prob=clf1.predict(test_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "VTyarFsEygMY",
"outputId": "93e6fe18-473a-41e9-edfe-47a2bec4211d"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.5614608394916616\n",
"0.7873847634686285\n",
"0.9987258203659386\n"
]
}
],
"source": [
" print(average_precision_score(test_y,prob))\n",
" print(recall_score(test_y,prob, average='macro'))\n",
" print(accuracy_score(test_y,prob))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Sz6OuoL7y3ss"
},
"source": [
"Maximum depth has brought the precision down"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "3ubUZ4o4y06f"
},
"outputs": [],
"source": [
"estimator = clf1.estimators_[5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 606
},
"id": "4Ua-oj-Py2j3",
"outputId": "f87c2199-7982-42ce-86c9-426486b3386e"
},
"outputs": [
{
"data": {
"image/png": "
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment