Created
July 19, 2019 19:29
-
-
Save FavioVazquez/6b1b4e5077c8a818cf301500d896150e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# US - Baby Names\n", | |
| "\n", | |
| "## Introduction:\n", | |
| "\n", | |
| "We are going to use a subset of [US Baby Names from](https://www.kaggle.com/kaggle/us-baby-names) Kaggle.\n", | |
| "\n", | |
| "In the file it will be names from 2004 until 2014" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 53, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Import the necessary libraries\n", | |
| "from optimus import Optimus\n", | |
| "from pyspark.sql.functions import *\n", | |
| "import pandas as pd\n", | |
| "import numpy as np\n", | |
| "op = Optimus()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Import the dataset and assing baby_names" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 54, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "baby_names_pd = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/US_Baby_Names/US_Baby_Names_right.csv')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 55, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "baby_names = op.spark.createDataFrame(baby_names_pd)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 56, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "\n", | |
| "<h1></h1>\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "<div class=\"info_items\">Viewing 5 of 5 rows / 8 columns</div>\n", | |
| "<div class=\"info_items\">1 partition(s)</div>\n", | |
| "\n", | |
| "<table class=\"optimus_table\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">summary</div>\n", | |
| " <div class=\"data_type\">1 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Unnamed: 0</div>\n", | |
| " <div class=\"data_type\">2 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Id</div>\n", | |
| " <div class=\"data_type\">3 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Name</div>\n", | |
| " <div class=\"data_type\">4 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Year</div>\n", | |
| " <div class=\"data_type\">5 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Gender</div>\n", | |
| " <div class=\"data_type\">6 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">State</div>\n", | |
| " <div class=\"data_type\">7 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Count</div>\n", | |
| " <div class=\"data_type\">8 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " </tr>\n", | |
| "\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='count'>count\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='1016395'>1016395\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='1016395'>1016395\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='1016395'>1016395\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='1016395'>1016395\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='1016395'>1016395\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='1016395'>1016395\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='1016395'>1016395\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='mean'>mean\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2830990.4619178567'>2830990.4619178567\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2830991.4619178567'>2830991.4619178567\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Infinity'>Infinity\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2009.0531899507573'>2009.0531899507573\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\"none \"\n", | |
| " title='None'>None\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\"none \"\n", | |
| " title='None'>None\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='34.85012421351935'>34.85012421351935\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='stddev'>stddev\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='1652475.6514804524'>1652475.6514804524\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='1652475.6514804524'>1652475.6514804524\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='NaN'>NaN\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='3.1382928281815494'>3.1382928281815494\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\"none \"\n", | |
| " title='None'>None\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\"none \"\n", | |
| " title='None'>None\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='97.3973464861767'>97.3973464861767\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='min'>min\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11349'>11349\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11350'>11350\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Aaban'>Aaban\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2004'>2004\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='F'>F\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='AK'>AK\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='5'>5\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='max'>max\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='5647425'>5647425\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='5647426'>5647426\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Zyriah'>Zyriah\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2014'>2014\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='M'>M\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='WY'>WY\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='4167'>4167\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "\n", | |
| "\n", | |
| "<div class=\"info_items\">Viewing 5 of 5 rows / 8 columns</div>\n", | |
| "<div class=\"info_items\">1 partition(s)</div>\n" | |
| ], | |
| "text/plain": [ | |
| "<IPython.core.display.HTML object>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "baby_names.describe().table()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# See the first 10 entries" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 57, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "\n", | |
| "<h1></h1>\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "<div class=\"info_items\">Viewing 10 of 1.0 million rows / 7 columns</div>\n", | |
| "<div class=\"info_items\">1 partition(s)</div>\n", | |
| "\n", | |
| "<table class=\"optimus_table\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Unnamed: 0</div>\n", | |
| " <div class=\"data_type\">1 (bigint)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Id</div>\n", | |
| " <div class=\"data_type\">2 (bigint)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Name</div>\n", | |
| " <div class=\"data_type\">3 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Year</div>\n", | |
| " <div class=\"data_type\">4 (bigint)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Gender</div>\n", | |
| " <div class=\"data_type\">5 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">State</div>\n", | |
| " <div class=\"data_type\">6 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Count</div>\n", | |
| " <div class=\"data_type\">7 (bigint)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " </tr>\n", | |
| "\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11349'>11349\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11350'>11350\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Emma'>Emma\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2004'>2004\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='F'>F\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='AK'>AK\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='62'>62\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11350'>11350\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11351'>11351\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Madison'>Madison\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2004'>2004\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='F'>F\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='AK'>AK\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='48'>48\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11351'>11351\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11352'>11352\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Hannah'>Hannah\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2004'>2004\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='F'>F\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='AK'>AK\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='46'>46\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11352'>11352\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11353'>11353\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Grace'>Grace\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2004'>2004\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='F'>F\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='AK'>AK\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='44'>44\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11353'>11353\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11354'>11354\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Emily'>Emily\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2004'>2004\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='F'>F\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='AK'>AK\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='41'>41\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11354'>11354\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11355'>11355\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Abigail'>Abigail\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2004'>2004\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='F'>F\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='AK'>AK\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='37'>37\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11355'>11355\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11356'>11356\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Olivia'>Olivia\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2004'>2004\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='F'>F\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='AK'>AK\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='33'>33\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11356'>11356\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11357'>11357\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Isabella'>Isabella\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2004'>2004\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='F'>F\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='AK'>AK\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='30'>30\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11357'>11357\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11358'>11358\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Alyssa'>Alyssa\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2004'>2004\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='F'>F\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='AK'>AK\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='29'>29\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11358'>11358\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11359'>11359\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Sophia'>Sophia\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2004'>2004\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='F'>F\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='AK'>AK\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='28'>28\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "\n", | |
| "\n", | |
| "<div class=\"info_items\">Viewing 10 of 1.0 million rows / 7 columns</div>\n", | |
| "<div class=\"info_items\">1 partition(s)</div>\n" | |
| ], | |
| "text/plain": [ | |
| "<IPython.core.display.HTML object>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "baby_names.table(10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Delete the column 'Unnamed: 0' and 'Id'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 58, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "baby_names = baby_names.drop(\"Unnamed: 0\", \"Id\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 59, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "KeyboardInterrupt", | |
| "evalue": "", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-59-e3c19713dd04>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mbaby_names\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
| "\u001b[0;32m~/.local/lib/python3.6/site-packages/optimus/helpers/decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mdecorator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mwraps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m~/.local/lib/python3.6/site-packages/optimus/dataframe/extension.py\u001b[0m in \u001b[0;36mtable\u001b[0;34m(self, limit, columns)\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdtypes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 333\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 334\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mj\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 335\u001b[0m \u001b[0mfinal_columns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 336\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m~/.local/lib/python3.6/site-packages/optimus/helpers/decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mdecorator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mwraps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m~/.local/lib/python3.6/site-packages/optimus/dataframe/extension.py\u001b[0m in \u001b[0;36mtable_html\u001b[0;34m(self, limit, columns)\u001b[0m\n\u001b[1;32m 313\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 314\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;32mreturn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 315\u001b[0;31m \"\"\"\n\u001b[0m\u001b[1;32m 316\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 317\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparse_columns\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m~/.local/lib/python3.6/site-packages/pyspark/sql/dataframe.py\u001b[0m in \u001b[0;36mcount\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 520\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 521\u001b[0m \"\"\"\n\u001b[0;32m--> 522\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 523\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 524\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mignore_unicode_prefix\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m~/.local/lib/python3.6/site-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1253\u001b[0m \u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mEND_COMMAND_PART\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1254\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1255\u001b[0;31m \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1256\u001b[0m return_value = get_return_value(\n\u001b[1;32m 1257\u001b[0m answer, self.gateway_client, self.target_id, self.name)\n", | |
| "\u001b[0;32m~/.local/lib/python3.6/site-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36msend_command\u001b[0;34m(self, command, retry, binary)\u001b[0m\n\u001b[1;32m 983\u001b[0m \u001b[0mconnection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 984\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 985\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 986\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mbinary\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 987\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_create_connection_guard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconnection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m~/.local/lib/python3.6/site-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36msend_command\u001b[0;34m(self, command)\u001b[0m\n\u001b[1;32m 1150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1151\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1152\u001b[0;31m \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msmart_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstream\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1153\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Answer received: {0}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0manswer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1154\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0manswer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRETURN_MESSAGE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/opt/conda/lib/python3.6/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 584\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 585\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 586\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 587\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "baby_names.table(5)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Are there more male or female names in the dataset?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 33, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "\n", | |
| "<h1></h1>\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "<div class=\"info_items\">Viewing 2 of 2 rows / 2 columns</div>\n", | |
| "<div class=\"info_items\">2 partition(s)</div>\n", | |
| "\n", | |
| "<table class=\"optimus_table\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Gender</div>\n", | |
| " <div class=\"data_type\">1 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">count</div>\n", | |
| " <div class=\"data_type\">2 (bigint)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " not nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " </tr>\n", | |
| "\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='M'>M\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='457549'>457549\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='F'>F\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='558846'>558846\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "\n", | |
| "\n", | |
| "<div class=\"info_items\">Viewing 2 of 2 rows / 2 columns</div>\n", | |
| "<div class=\"info_items\">2 partition(s)</div>\n" | |
| ], | |
| "text/plain": [ | |
| "<IPython.core.display.HTML object>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "baby_names.groupby(\"Gender\").count().orderBy(\"count\").table()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Group the dataset by name and assign to names" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "baby_names = baby_names.drop(\"Year\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 43, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "names = baby_names.groupby(\"Name\").sum().cols.rename(\"sum(Count)\",\"count\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 44, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "\n", | |
| "<h1></h1>\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "<div class=\"info_items\">Viewing 5 of 17632 rows / 2 columns</div>\n", | |
| "<div class=\"info_items\">200 partition(s)</div>\n", | |
| "\n", | |
| "<table class=\"optimus_table\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Name</div>\n", | |
| " <div class=\"data_type\">1 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">count</div>\n", | |
| " <div class=\"data_type\">2 (bigint)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " </tr>\n", | |
| "\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Kiana'>Kiana\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='5965'>5965\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Alayna'>Alayna\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='14171'>14171\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Ember'>Ember\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='3181'>3181\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Tyler'>Tyler\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='129989'>129989\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Maddox'>Maddox\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='20716'>20716\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "\n", | |
| "\n", | |
| "<div class=\"info_items\">Viewing 5 of 17632 rows / 2 columns</div>\n", | |
| "<div class=\"info_items\">200 partition(s)</div>\n" | |
| ], | |
| "text/plain": [ | |
| "<IPython.core.display.HTML object>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "names.table(5)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 42, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Columns: 2 Rows: 17632\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(f'Columns: {op.profiler.dataset_info(names)[\"cols_count\"]}', \n", | |
| " f'Rows: {op.profiler.dataset_info(names)[\"rows_count\"]}')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 49, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "\n", | |
| "<h1></h1>\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "<div class=\"info_items\">Viewing 10 of 17632 rows / 2 columns</div>\n", | |
| "<div class=\"info_items\">141 partition(s)</div>\n", | |
| "\n", | |
| "<table class=\"optimus_table\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Name</div>\n", | |
| " <div class=\"data_type\">1 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">count</div>\n", | |
| " <div class=\"data_type\">2 (bigint)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " </tr>\n", | |
| "\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Jacob'>Jacob\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='242874'>242874\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Emma'>Emma\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='214852'>214852\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Michael'>Michael\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='214405'>214405\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Ethan'>Ethan\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='209277'>209277\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Isabella'>Isabella\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='204798'>204798\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='William'>William\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='197894'>197894\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Joshua'>Joshua\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='191551'>191551\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Sophia'>Sophia\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='191446'>191446\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Daniel'>Daniel\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='191440'>191440\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Emily'>Emily\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='190318'>190318\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "\n", | |
| "\n", | |
| "<div class=\"info_items\">Viewing 10 of 17632 rows / 2 columns</div>\n", | |
| "<div class=\"info_items\">141 partition(s)</div>\n" | |
| ], | |
| "text/plain": [ | |
| "<IPython.core.display.HTML object>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "names.sort(desc(\"count\")).table(10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# How many different names exist in the dataset?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 50, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "17632" | |
| ] | |
| }, | |
| "execution_count": 50, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "names.count()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# What is the name with most occurrences?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 62, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "\n", | |
| "<h1></h1>\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "<div class=\"info_items\">Viewing 1 of 17632 rows / 1 columns</div>\n", | |
| "<div class=\"info_items\">141 partition(s)</div>\n", | |
| "\n", | |
| "<table class=\"optimus_table\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">Name</div>\n", | |
| " <div class=\"data_type\">1 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " </tr>\n", | |
| "\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='Jacob'>Jacob\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "\n", | |
| "\n", | |
| "<div class=\"info_items\">Viewing 1 of 17632 rows / 1 columns</div>\n", | |
| "<div class=\"info_items\">141 partition(s)</div>\n" | |
| ], | |
| "text/plain": [ | |
| "<IPython.core.display.HTML object>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "names.sort(desc(\"count\")).select(\"Name\").table(1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# How many different names have the least occurrences?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 86, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "2578" | |
| ] | |
| }, | |
| "execution_count": 86, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "min_oc = names.cols.min(\"count\") # This will give you the min ocurrence for names\n", | |
| "names.where(col(\"count\") == min_oc).count()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# What is the median name occurrence?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 99, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "49.0" | |
| ] | |
| }, | |
| "execution_count": 99, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "median_oc = names.approxQuantile(\"count\", [0.5], relativeError=0)[0] # This will give you the median ocurrence for names\n", | |
| "median_oc" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# What is the standard deviation of names?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 100, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "11006.06947" | |
| ] | |
| }, | |
| "execution_count": 100, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "names.cols.std(\"count\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Get a summary with the mean, min, max, std and quartiles." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 111, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "\n", | |
| "<h1></h1>\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "<div class=\"info_items\">Viewing 5 of 5 rows / 2 columns</div>\n", | |
| "<div class=\"info_items\">1 partition(s)</div>\n", | |
| "\n", | |
| "<table class=\"optimus_table\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">summary</div>\n", | |
| " <div class=\"data_type\">1 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " <th>\n", | |
| " <div class=\"column_name\">count</div>\n", | |
| " <div class=\"data_type\">2 (string)</div>\n", | |
| " <div class=\"data_type\">\n", | |
| " \n", | |
| " nullable\n", | |
| " \n", | |
| " </div>\n", | |
| " </th>\n", | |
| " \n", | |
| " </tr>\n", | |
| "\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='count'>count\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='17632'>17632\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='mean'>mean\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='2008.932168784029'>2008.932168784029\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='stddev'>stddev\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='11006.069467890566'>11006.069467890566\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='min'>min\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='5'>5\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " <tr>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='max'>max\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " <td>\n", | |
| " <div class=\" \"\n", | |
| " title='242874'>242874\n", | |
| " </div>\n", | |
| " </td>\n", | |
| " \n", | |
| " </tr>\n", | |
| " \n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "\n", | |
| "\n", | |
| "<div class=\"info_items\">Viewing 5 of 5 rows / 2 columns</div>\n", | |
| "<div class=\"info_items\">1 partition(s)</div>\n" | |
| ], | |
| "text/plain": [ | |
| "<IPython.core.display.HTML object>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "names.select(\"count\").describe().table()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 117, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[11.0, 49.0, 337.0]" | |
| ] | |
| }, | |
| "execution_count": 117, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "names.approxQuantile(\"count\",[0.25,0.5,0.75], relativeError=0)" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.8" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment