Skip to content

Instantly share code, notes, and snippets.

@mwacc
Created September 5, 2018 06:06
Show Gist options
  • Select an option

  • Save mwacc/37ec80810120cc78d2ae869a787ce761 to your computer and use it in GitHub Desktop.

Select an option

Save mwacc/37ec80810120cc78d2ae869a787ce761 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"import google.datalab.bigquery as bq\n",
"import seaborn as sns\n",
"import pandas as pd\n",
"import numpy as np\n",
"import shutil"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time is: 1.828289 sec\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>f0_</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1108779463</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" f0_\n",
"0 1108779463"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = \"\"\"\n",
"SELECT\n",
" count(1)\n",
"FROM\n",
" `nyc-tlc.yellow.trips` \n",
"\"\"\"\n",
"import time\n",
"start_at = time.time()\n",
"res = bq.Query(q).execute().result().to_dataframe()\n",
"print \"Time is: %f sec\" % (time.time() - start_at)\n",
"res[:20]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time is: 1.826464 sec\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>month</th>\n",
" <th>trips</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1973-05-01</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2009-01-01</td>\n",
" <td>14085510</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2009-02-01</td>\n",
" <td>13379753</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2009-03-01</td>\n",
" <td>14391536</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2009-04-01</td>\n",
" <td>14292746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2009-05-01</td>\n",
" <td>14799079</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2009-06-01</td>\n",
" <td>14182830</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2009-07-01</td>\n",
" <td>13624285</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2009-08-01</td>\n",
" <td>13689765</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>2009-09-01</td>\n",
" <td>13983818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>2009-10-01</td>\n",
" <td>15600529</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>2009-11-01</td>\n",
" <td>14280039</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>2009-12-01</td>\n",
" <td>14584019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>2010-01-01</td>\n",
" <td>14863988</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>2010-02-01</td>\n",
" <td>11145405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>2010-03-01</td>\n",
" <td>12882425</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>2010-04-01</td>\n",
" <td>15141479</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>2010-05-01</td>\n",
" <td>15485628</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>2010-06-01</td>\n",
" <td>14822752</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>2010-07-01</td>\n",
" <td>14423765</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" month trips\n",
"0 1973-05-01 1\n",
"1 2009-01-01 14085510\n",
"2 2009-02-01 13379753\n",
"3 2009-03-01 14391536\n",
"4 2009-04-01 14292746\n",
"5 2009-05-01 14799079\n",
"6 2009-06-01 14182830\n",
"7 2009-07-01 13624285\n",
"8 2009-08-01 13689765\n",
"9 2009-09-01 13983818\n",
"10 2009-10-01 15600529\n",
"11 2009-11-01 14280039\n",
"12 2009-12-01 14584019\n",
"13 2010-01-01 14863988\n",
"14 2010-02-01 11145405\n",
"15 2010-03-01 12882425\n",
"16 2010-04-01 15141479\n",
"17 2010-05-01 15485628\n",
"18 2010-06-01 14822752\n",
"19 2010-07-01 14423765"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = \"\"\"\n",
"SELECT\n",
" DATE_TRUNC(EXTRACT(DATE FROM dropoff_datetime), MONTH) AS month,\n",
" COUNT(*) trips \n",
"FROM\n",
" `nyc-tlc.yellow.trips` \n",
"GROUP BY\n",
" month \n",
"ORDER BY\n",
" month \n",
"\"\"\"\n",
"import time\n",
"start_at = time.time()\n",
"res = bq.Query(q).execute().result().to_dataframe()\n",
"print \"Time is: %f sec\" % (time.time() - start_at)\n",
"res[:20]"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time is: 2.135039 sec\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Hour</th>\n",
" <th>f0_</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>16.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>17.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>17.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>20.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5</td>\n",
" <td>22.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>6</td>\n",
" <td>18.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>7</td>\n",
" <td>14.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>8</td>\n",
" <td>12.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>9</td>\n",
" <td>11.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>10</td>\n",
" <td>12.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>11</td>\n",
" <td>11.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>12</td>\n",
" <td>11.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>13</td>\n",
" <td>11.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>14</td>\n",
" <td>11.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>15</td>\n",
" <td>11.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>16</td>\n",
" <td>12.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>17</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>18</td>\n",
" <td>11.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>19</td>\n",
" <td>12.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Hour f0_\n",
"0 0 16.0\n",
"1 1 16.7\n",
"2 2 17.2\n",
"3 3 17.8\n",
"4 4 20.0\n",
"5 5 22.2\n",
"6 6 18.5\n",
"7 7 14.7\n",
"8 8 12.2\n",
"9 9 11.9\n",
"10 10 12.1\n",
"11 11 11.9\n",
"12 12 11.7\n",
"13 13 11.9\n",
"14 14 11.7\n",
"15 15 11.7\n",
"16 16 12.4\n",
"17 17 12.0\n",
"18 18 11.8\n",
"19 19 12.5"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = \"\"\"\n",
"SELECT \n",
" extract(hour FROM pickup_datetime) AS Hour, \n",
" ROUND(AVG(trip_distance / TIMESTAMP_DIFF(dropoff_datetime, pickup_datetime, SECOND)) * 3600,1)\n",
"FROM `nyc-tlc.yellow.trips` \n",
"WHERE dropoff_datetime > pickup_datetime\n",
" AND fare_amount/trip_distance \n",
" BETWEEN 2\n",
" AND 10\n",
" AND trip_distance > 0\n",
"GROUP BY Hour\n",
"ORDER BY Hour\n",
"\n",
"\"\"\"\n",
"import time\n",
"start_at = time.time()\n",
"res = bq.Query(q).execute().result().to_dataframe()\n",
"print \"Time is: %f sec\" % (time.time() - start_at)\n",
"res[:20]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time is: 2.109586 sec\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>f0_</th>\n",
" <th>f1_</th>\n",
" <th>f2_</th>\n",
" <th>f3_</th>\n",
" <th>cnt</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86530</td>\n",
" <td>40.77069</td>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86532</td>\n",
" <td>40.77070</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86494</td>\n",
" <td>40.77047</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86532</td>\n",
" <td>40.77076</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86494</td>\n",
" <td>40.77043</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86179</td>\n",
" <td>40.76841</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86513</td>\n",
" <td>40.77060</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86182</td>\n",
" <td>40.76844</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86163</td>\n",
" <td>40.76830</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.87091</td>\n",
" <td>40.77408</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86539</td>\n",
" <td>40.77077</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86172</td>\n",
" <td>40.76837</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86172</td>\n",
" <td>40.76834</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86182</td>\n",
" <td>40.76841</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86530</td>\n",
" <td>40.77070</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86182</td>\n",
" <td>40.76842</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86165</td>\n",
" <td>40.76830</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.87283</td>\n",
" <td>40.77431</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86156</td>\n",
" <td>40.76825</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86179</td>\n",
" <td>40.76842</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" f0_ f1_ f2_ f3_ cnt\n",
"0 -73.13739 41.36614 -73.86530 40.77069 21\n",
"1 -73.13739 41.36614 -73.86532 40.77070 19\n",
"2 -73.13739 41.36614 -73.86494 40.77047 18\n",
"3 -73.13739 41.36614 -73.86532 40.77076 18\n",
"4 -73.13739 41.36614 -73.86494 40.77043 18\n",
"5 -73.13739 41.36614 -73.86179 40.76841 17\n",
"6 -73.13739 41.36614 -73.86513 40.77060 17\n",
"7 -73.13739 41.36614 -73.86182 40.76844 17\n",
"8 -73.13739 41.36614 -73.86163 40.76830 17\n",
"9 -73.13739 41.36614 -73.87091 40.77408 16\n",
"10 -73.13739 41.36614 -73.86539 40.77077 16\n",
"11 -73.13739 41.36614 -73.86172 40.76837 16\n",
"12 -73.13739 41.36614 -73.86172 40.76834 15\n",
"13 -73.13739 41.36614 -73.86182 40.76841 15\n",
"14 -73.13739 41.36614 -73.86530 40.77070 15\n",
"15 -73.13739 41.36614 -73.86182 40.76842 15\n",
"16 -73.13739 41.36614 -73.86165 40.76830 15\n",
"17 -73.13739 41.36614 -73.87283 40.77431 14\n",
"18 -73.13739 41.36614 -73.86156 40.76825 14\n",
"19 -73.13739 41.36614 -73.86179 40.76842 14"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = \"\"\"\n",
"SELECT\n",
" round(pickup_longitude, 5), \n",
" round(pickup_latitude, 5), \n",
" round(dropoff_longitude,5), \n",
" round(dropoff_latitude,5),\n",
" count(1) as cnt\n",
"FROM\n",
" `nyc-tlc.yellow.trips` \n",
"where\n",
" trip_distance > 10\n",
" AND fare_amount >= 2.5\n",
" AND pickup_longitude > -78\n",
" AND pickup_longitude < -70\n",
" AND dropoff_longitude > -78\n",
" AND dropoff_longitude < -70\n",
" AND pickup_latitude > 37\n",
" AND pickup_latitude < 45\n",
" AND dropoff_latitude > 37\n",
" AND dropoff_latitude < 45\n",
" AND passenger_count > 0 \n",
" AND round(pickup_longitude, 5) != round(dropoff_longitude, 5)\n",
" AND round(pickup_latitude, 5) != round(dropoff_latitude, 5)\n",
"group by round(pickup_longitude, 5), round(pickup_latitude, 5), round(dropoff_longitude,5), round(dropoff_latitude,5)\n",
"order by cnt DESC\n",
"LIMIT 20\n",
"\"\"\"\n",
"import time\n",
"start_at = time.time()\n",
"res = bq.Query(q).execute().result().to_dataframe()\n",
"print \"Time is: %f sec\" % (time.time() - start_at)\n",
"res[:20]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.15"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"import google.datalab.bigquery as bq\n",
"import seaborn as sns\n",
"import pandas as pd\n",
"import numpy as np\n",
"import shutil"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time is: 1.828289 sec\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>f0_</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1108779463</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" f0_\n",
"0 1108779463"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = \"\"\"\n",
"SELECT\n",
" count(1)\n",
"FROM\n",
" `nyc-tlc.yellow.trips` \n",
"\"\"\"\n",
"import time\n",
"start_at = time.time()\n",
"res = bq.Query(q).execute().result().to_dataframe()\n",
"print \"Time is: %f sec\" % (time.time() - start_at)\n",
"res[:20]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time is: 1.826464 sec\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>month</th>\n",
" <th>trips</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1973-05-01</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2009-01-01</td>\n",
" <td>14085510</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2009-02-01</td>\n",
" <td>13379753</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2009-03-01</td>\n",
" <td>14391536</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2009-04-01</td>\n",
" <td>14292746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2009-05-01</td>\n",
" <td>14799079</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2009-06-01</td>\n",
" <td>14182830</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2009-07-01</td>\n",
" <td>13624285</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2009-08-01</td>\n",
" <td>13689765</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>2009-09-01</td>\n",
" <td>13983818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>2009-10-01</td>\n",
" <td>15600529</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>2009-11-01</td>\n",
" <td>14280039</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>2009-12-01</td>\n",
" <td>14584019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>2010-01-01</td>\n",
" <td>14863988</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>2010-02-01</td>\n",
" <td>11145405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>2010-03-01</td>\n",
" <td>12882425</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>2010-04-01</td>\n",
" <td>15141479</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>2010-05-01</td>\n",
" <td>15485628</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>2010-06-01</td>\n",
" <td>14822752</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>2010-07-01</td>\n",
" <td>14423765</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" month trips\n",
"0 1973-05-01 1\n",
"1 2009-01-01 14085510\n",
"2 2009-02-01 13379753\n",
"3 2009-03-01 14391536\n",
"4 2009-04-01 14292746\n",
"5 2009-05-01 14799079\n",
"6 2009-06-01 14182830\n",
"7 2009-07-01 13624285\n",
"8 2009-08-01 13689765\n",
"9 2009-09-01 13983818\n",
"10 2009-10-01 15600529\n",
"11 2009-11-01 14280039\n",
"12 2009-12-01 14584019\n",
"13 2010-01-01 14863988\n",
"14 2010-02-01 11145405\n",
"15 2010-03-01 12882425\n",
"16 2010-04-01 15141479\n",
"17 2010-05-01 15485628\n",
"18 2010-06-01 14822752\n",
"19 2010-07-01 14423765"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = \"\"\"\n",
"SELECT\n",
" DATE_TRUNC(EXTRACT(DATE FROM dropoff_datetime), MONTH) AS month,\n",
" COUNT(*) trips \n",
"FROM\n",
" `nyc-tlc.yellow.trips` \n",
"GROUP BY\n",
" month \n",
"ORDER BY\n",
" month \n",
"\"\"\"\n",
"import time\n",
"start_at = time.time()\n",
"res = bq.Query(q).execute().result().to_dataframe()\n",
"print \"Time is: %f sec\" % (time.time() - start_at)\n",
"res[:20]"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time is: 2.135039 sec\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Hour</th>\n",
" <th>f0_</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>16.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>17.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>17.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>20.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5</td>\n",
" <td>22.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>6</td>\n",
" <td>18.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>7</td>\n",
" <td>14.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>8</td>\n",
" <td>12.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>9</td>\n",
" <td>11.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>10</td>\n",
" <td>12.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>11</td>\n",
" <td>11.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>12</td>\n",
" <td>11.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>13</td>\n",
" <td>11.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>14</td>\n",
" <td>11.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>15</td>\n",
" <td>11.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>16</td>\n",
" <td>12.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>17</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>18</td>\n",
" <td>11.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>19</td>\n",
" <td>12.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Hour f0_\n",
"0 0 16.0\n",
"1 1 16.7\n",
"2 2 17.2\n",
"3 3 17.8\n",
"4 4 20.0\n",
"5 5 22.2\n",
"6 6 18.5\n",
"7 7 14.7\n",
"8 8 12.2\n",
"9 9 11.9\n",
"10 10 12.1\n",
"11 11 11.9\n",
"12 12 11.7\n",
"13 13 11.9\n",
"14 14 11.7\n",
"15 15 11.7\n",
"16 16 12.4\n",
"17 17 12.0\n",
"18 18 11.8\n",
"19 19 12.5"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = \"\"\"\n",
"SELECT \n",
" extract(hour FROM pickup_datetime) AS Hour, \n",
" ROUND(AVG(trip_distance / TIMESTAMP_DIFF(dropoff_datetime, pickup_datetime, SECOND)) * 3600,1)\n",
"FROM `nyc-tlc.yellow.trips` \n",
"WHERE dropoff_datetime > pickup_datetime\n",
" AND fare_amount/trip_distance \n",
" BETWEEN 2\n",
" AND 10\n",
" AND trip_distance > 0\n",
"GROUP BY Hour\n",
"ORDER BY Hour\n",
"\n",
"\"\"\"\n",
"import time\n",
"start_at = time.time()\n",
"res = bq.Query(q).execute().result().to_dataframe()\n",
"print \"Time is: %f sec\" % (time.time() - start_at)\n",
"res[:20]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time is: 2.109586 sec\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>f0_</th>\n",
" <th>f1_</th>\n",
" <th>f2_</th>\n",
" <th>f3_</th>\n",
" <th>cnt</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86530</td>\n",
" <td>40.77069</td>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86532</td>\n",
" <td>40.77070</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86494</td>\n",
" <td>40.77047</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86532</td>\n",
" <td>40.77076</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86494</td>\n",
" <td>40.77043</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86179</td>\n",
" <td>40.76841</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86513</td>\n",
" <td>40.77060</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86182</td>\n",
" <td>40.76844</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86163</td>\n",
" <td>40.76830</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.87091</td>\n",
" <td>40.77408</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86539</td>\n",
" <td>40.77077</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86172</td>\n",
" <td>40.76837</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86172</td>\n",
" <td>40.76834</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86182</td>\n",
" <td>40.76841</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86530</td>\n",
" <td>40.77070</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86182</td>\n",
" <td>40.76842</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86165</td>\n",
" <td>40.76830</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.87283</td>\n",
" <td>40.77431</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86156</td>\n",
" <td>40.76825</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86179</td>\n",
" <td>40.76842</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" f0_ f1_ f2_ f3_ cnt\n",
"0 -73.13739 41.36614 -73.86530 40.77069 21\n",
"1 -73.13739 41.36614 -73.86532 40.77070 19\n",
"2 -73.13739 41.36614 -73.86494 40.77047 18\n",
"3 -73.13739 41.36614 -73.86532 40.77076 18\n",
"4 -73.13739 41.36614 -73.86494 40.77043 18\n",
"5 -73.13739 41.36614 -73.86179 40.76841 17\n",
"6 -73.13739 41.36614 -73.86513 40.77060 17\n",
"7 -73.13739 41.36614 -73.86182 40.76844 17\n",
"8 -73.13739 41.36614 -73.86163 40.76830 17\n",
"9 -73.13739 41.36614 -73.87091 40.77408 16\n",
"10 -73.13739 41.36614 -73.86539 40.77077 16\n",
"11 -73.13739 41.36614 -73.86172 40.76837 16\n",
"12 -73.13739 41.36614 -73.86172 40.76834 15\n",
"13 -73.13739 41.36614 -73.86182 40.76841 15\n",
"14 -73.13739 41.36614 -73.86530 40.77070 15\n",
"15 -73.13739 41.36614 -73.86182 40.76842 15\n",
"16 -73.13739 41.36614 -73.86165 40.76830 15\n",
"17 -73.13739 41.36614 -73.87283 40.77431 14\n",
"18 -73.13739 41.36614 -73.86156 40.76825 14\n",
"19 -73.13739 41.36614 -73.86179 40.76842 14"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = \"\"\"\n",
"SELECT\n",
" round(pickup_longitude, 5), \n",
" round(pickup_latitude, 5), \n",
" round(dropoff_longitude,5), \n",
" round(dropoff_latitude,5),\n",
" count(1) as cnt\n",
"FROM\n",
" `nyc-tlc.yellow.trips` \n",
"where\n",
" trip_distance > 10\n",
" AND fare_amount >= 2.5\n",
" AND pickup_longitude > -78\n",
" AND pickup_longitude < -70\n",
" AND dropoff_longitude > -78\n",
" AND dropoff_longitude < -70\n",
" AND pickup_latitude > 37\n",
" AND pickup_latitude < 45\n",
" AND dropoff_latitude > 37\n",
" AND dropoff_latitude < 45\n",
" AND passenger_count > 0 \n",
" AND round(pickup_longitude, 5) != round(dropoff_longitude, 5)\n",
" AND round(pickup_latitude, 5) != round(dropoff_latitude, 5)\n",
"group by round(pickup_longitude, 5), round(pickup_latitude, 5), round(dropoff_longitude,5), round(dropoff_latitude,5)\n",
"order by cnt DESC\n",
"LIMIT 20\n",
"\"\"\"\n",
"import time\n",
"start_at = time.time()\n",
"res = bq.Query(q).execute().result().to_dataframe()\n",
"print \"Time is: %f sec\" % (time.time() - start_at)\n",
"res[:20]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.15"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment