Skip to content

Instantly share code, notes, and snippets.

@acstrahl
Last active May 23, 2025 06:22
Show Gist options
  • Select an option

  • Save acstrahl/4bf895ad908080b1a6798e0274a4ea30 to your computer and use it in GitHub Desktop.

Select an option

Save acstrahl/4bf895ad908080b1a6798e0274a4ea30 to your computer and use it in GitHub Desktop.
Answering business questions with SQL DEMO
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Introduction and Schema Diagram"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'Connected: [email protected]'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%capture\n",
"%load_ext sql\n",
"%sql sqlite:///chinook.db"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Overview of the Data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>name</th>\n",
" <th>type</th>\n",
" </tr>\n",
" <tr>\n",
" <td>album</td>\n",
" <td>table</td>\n",
" </tr>\n",
" <tr>\n",
" <td>artist</td>\n",
" <td>table</td>\n",
" </tr>\n",
" <tr>\n",
" <td>customer</td>\n",
" <td>table</td>\n",
" </tr>\n",
" <tr>\n",
" <td>employee</td>\n",
" <td>table</td>\n",
" </tr>\n",
" <tr>\n",
" <td>genre</td>\n",
" <td>table</td>\n",
" </tr>\n",
" <tr>\n",
" <td>invoice</td>\n",
" <td>table</td>\n",
" </tr>\n",
" <tr>\n",
" <td>invoice_line</td>\n",
" <td>table</td>\n",
" </tr>\n",
" <tr>\n",
" <td>media_type</td>\n",
" <td>table</td>\n",
" </tr>\n",
" <tr>\n",
" <td>playlist</td>\n",
" <td>table</td>\n",
" </tr>\n",
" <tr>\n",
" <td>playlist_track</td>\n",
" <td>table</td>\n",
" </tr>\n",
" <tr>\n",
" <td>track</td>\n",
" <td>table</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[('album', 'table'),\n",
" ('artist', 'table'),\n",
" ('customer', 'table'),\n",
" ('employee', 'table'),\n",
" ('genre', 'table'),\n",
" ('invoice', 'table'),\n",
" ('invoice_line', 'table'),\n",
" ('media_type', 'table'),\n",
" ('playlist', 'table'),\n",
" ('playlist_track', 'table'),\n",
" ('track', 'table')]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"SELECT\n",
" name,\n",
" type\n",
"FROM sqlite_master\n",
"WHERE type IN (\"table\",\"view\");"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>track_id</th>\n",
" <th>name</th>\n",
" <th>album_id</th>\n",
" <th>media_type_id</th>\n",
" <th>genre_id</th>\n",
" <th>composer</th>\n",
" <th>milliseconds</th>\n",
" <th>bytes</th>\n",
" <th>unit_price</th>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>For Those About To Rock (We Salute You)</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Angus Young, Malcolm Young, Brian Johnson</td>\n",
" <td>343719</td>\n",
" <td>11170334</td>\n",
" <td>0.99</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>Balls to the Wall</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>None</td>\n",
" <td>342562</td>\n",
" <td>5510424</td>\n",
" <td>0.99</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>Fast As a Shark</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>F. Baltes, S. Kaufman, U. Dirkscneider &amp; W. Hoffman</td>\n",
" <td>230619</td>\n",
" <td>3990994</td>\n",
" <td>0.99</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>Restless and Wild</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider &amp; W. Hoffman</td>\n",
" <td>252051</td>\n",
" <td>4331779</td>\n",
" <td>0.99</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>Princess of the Dawn</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>Deaffy &amp; R.A. Smith-Diesel</td>\n",
" <td>375418</td>\n",
" <td>6290521</td>\n",
" <td>0.99</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(1, 'For Those About To Rock (We Salute You)', 1, 1, 1, 'Angus Young, Malcolm Young, Brian Johnson', 343719, 11170334, 0.99),\n",
" (2, 'Balls to the Wall', 2, 2, 1, None, 342562, 5510424, 0.99),\n",
" (3, 'Fast As a Shark', 3, 2, 1, 'F. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman', 230619, 3990994, 0.99),\n",
" (4, 'Restless and Wild', 3, 2, 1, 'F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman', 252051, 4331779, 0.99),\n",
" (5, 'Princess of the Dawn', 3, 2, 1, 'Deaffy & R.A. Smith-Diesel', 375418, 6290521, 0.99)]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SELECT *\n",
"FROM track\n",
"LIMIT 5;"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>invoice_line_id</th>\n",
" <th>invoice_id</th>\n",
" <th>track_id</th>\n",
" <th>unit_price</th>\n",
" <th>quantity</th>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1158</td>\n",
" <td>0.99</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1159</td>\n",
" <td>0.99</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1160</td>\n",
" <td>0.99</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1161</td>\n",
" <td>0.99</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1162</td>\n",
" <td>0.99</td>\n",
" <td>1</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(1, 1, 1158, 0.99, 1),\n",
" (2, 1, 1159, 0.99, 1),\n",
" (3, 1, 1160, 0.99, 1),\n",
" (4, 1, 1161, 0.99, 1),\n",
" (5, 1, 1162, 0.99, 1)]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SELECT *\n",
"FROM invoice_line\n",
"LIMIT 5;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Selecting New Albums to Purchase"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>genre</th>\n",
" <th>count</th>\n",
" <th>percentage</th>\n",
" </tr>\n",
" <tr>\n",
" <td>Rock</td>\n",
" <td>561</td>\n",
" <td>53.38</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Alternative &amp; Punk</td>\n",
" <td>130</td>\n",
" <td>12.37</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Metal</td>\n",
" <td>124</td>\n",
" <td>11.8</td>\n",
" </tr>\n",
" <tr>\n",
" <td>R&amp;B/Soul</td>\n",
" <td>53</td>\n",
" <td>5.04</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Blues</td>\n",
" <td>36</td>\n",
" <td>3.43</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Alternative</td>\n",
" <td>35</td>\n",
" <td>3.33</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Latin</td>\n",
" <td>22</td>\n",
" <td>2.09</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Pop</td>\n",
" <td>22</td>\n",
" <td>2.09</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Hip Hop/Rap</td>\n",
" <td>20</td>\n",
" <td>1.9</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Jazz</td>\n",
" <td>14</td>\n",
" <td>1.33</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Easy Listening</td>\n",
" <td>13</td>\n",
" <td>1.24</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Reggae</td>\n",
" <td>6</td>\n",
" <td>0.57</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Electronica/Dance</td>\n",
" <td>5</td>\n",
" <td>0.48</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Classical</td>\n",
" <td>4</td>\n",
" <td>0.38</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Heavy Metal</td>\n",
" <td>3</td>\n",
" <td>0.29</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Soundtrack</td>\n",
" <td>2</td>\n",
" <td>0.19</td>\n",
" </tr>\n",
" <tr>\n",
" <td>TV Shows</td>\n",
" <td>1</td>\n",
" <td>0.1</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[('Rock', 561, 53.38),\n",
" ('Alternative & Punk', 130, 12.37),\n",
" ('Metal', 124, 11.8),\n",
" ('R&B/Soul', 53, 5.04),\n",
" ('Blues', 36, 3.43),\n",
" ('Alternative', 35, 3.33),\n",
" ('Latin', 22, 2.09),\n",
" ('Pop', 22, 2.09),\n",
" ('Hip Hop/Rap', 20, 1.9),\n",
" ('Jazz', 14, 1.33),\n",
" ('Easy Listening', 13, 1.24),\n",
" ('Reggae', 6, 0.57),\n",
" ('Electronica/Dance', 5, 0.48),\n",
" ('Classical', 4, 0.38),\n",
" ('Heavy Metal', 3, 0.29),\n",
" ('Soundtrack', 2, 0.19),\n",
" ('TV Shows', 1, 0.1)]"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"WITH genre_usa_tracks AS\n",
" (\n",
" SELECT il.invoice_line_id, g.name AS genre, t.track_id, i.billing_country AS country\n",
" FROM track AS t\n",
" JOIN genre AS g ON t.genre_id = g.genre_id\n",
" JOIN invoice_line AS il ON t.track_id = il.track_id\n",
" JOIN invoice AS i ON il.invoice_id = i.invoice_id\n",
" WHERE i.billing_country = 'USA'\n",
" )\n",
" \n",
"SELECT genre, \n",
" COUNT(*) AS count,\n",
" ROUND(\n",
" CAST(\n",
" COUNT(*) AS float)/(\n",
" SELECT COUNT(*) FROM genre_usa_tracks)*100,2) AS percentage\n",
"FROM genre_usa_tracks\n",
"GROUP BY genre\n",
"ORDER BY 2 DESC;\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Based on the sales of tracks across different genres in the USA, we should purchase the new albums by the following artists:\n",
"\n",
"- Red Tone (Punk)\n",
"- Slim Jim Bites (Blues)\n",
"- Meteor and the Girls (Pop)\n",
"\n",
"It's worth keeping in mind that combined, these three genres only make up only 17% of total sales, so we should be on the lookout for artists and albums from the rock genre, which accounts for 53% of sales."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Analyzing Employee Sales Performance"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>employee_name</th>\n",
" <th>hire_date</th>\n",
" <th>customer_count</th>\n",
" <th>total_sales_dollars</th>\n",
" <th>avg_dollars_per_customer</th>\n",
" </tr>\n",
" <tr>\n",
" <td>Jane Peacock</td>\n",
" <td>2017-04-01 00:00:00</td>\n",
" <td>212</td>\n",
" <td>1731.51</td>\n",
" <td>8.17</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Steve Johnson</td>\n",
" <td>2017-10-17 00:00:00</td>\n",
" <td>188</td>\n",
" <td>1393.92</td>\n",
" <td>7.41</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Margaret Park</td>\n",
" <td>2017-05-03 00:00:00</td>\n",
" <td>214</td>\n",
" <td>1584.0</td>\n",
" <td>7.4</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[('Jane Peacock', '2017-04-01 00:00:00', 212, 1731.51, 8.17),\n",
" ('Steve Johnson', '2017-10-17 00:00:00', 188, 1393.92, 7.41),\n",
" ('Margaret Park', '2017-05-03 00:00:00', 214, 1584.0, 7.4)]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SELECT\n",
" e.first_name ||\" \"||e.last_name AS employee_name,\n",
" e.hire_date,\n",
" COUNT(c.customer_id) AS customer_count,\n",
" ROUND(SUM(i.total),2) AS total_sales_dollars,\n",
" ROUND(CAST(SUM(i.total) AS float)/COUNT(c.customer_id),2) AS avg_dollars_per_customer\n",
" FROM customer AS c\n",
" JOIN invoice AS i ON c.customer_id = i.customer_id\n",
" JOIN employee AS e ON c.support_rep_id = e.employee_id\n",
" GROUP BY employee_name\n",
" ORDER BY avg_dollars_per_customer DESC;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"While there is a 20% difference in sales between Jane (the top employee) and Steve (the bottom employee), the difference roughly corresponds to the differences in their hiring dates."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Analyzing Sales by Country"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>country</th>\n",
" <th>customers</th>\n",
" <th>total_sales</th>\n",
" <th>average_order</th>\n",
" <th>customer_lifetime_value</th>\n",
" </tr>\n",
" <tr>\n",
" <td>USA</td>\n",
" <td>13</td>\n",
" <td>1040.49</td>\n",
" <td>7.94</td>\n",
" <td>80.04</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Canada</td>\n",
" <td>8</td>\n",
" <td>535.59</td>\n",
" <td>7.05</td>\n",
" <td>66.95</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Brazil</td>\n",
" <td>5</td>\n",
" <td>427.68</td>\n",
" <td>7.01</td>\n",
" <td>85.54</td>\n",
" </tr>\n",
" <tr>\n",
" <td>France</td>\n",
" <td>5</td>\n",
" <td>389.07</td>\n",
" <td>7.78</td>\n",
" <td>77.81</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Germany</td>\n",
" <td>4</td>\n",
" <td>334.62</td>\n",
" <td>8.16</td>\n",
" <td>83.66</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Czech Republic</td>\n",
" <td>2</td>\n",
" <td>273.24</td>\n",
" <td>9.11</td>\n",
" <td>136.62</td>\n",
" </tr>\n",
" <tr>\n",
" <td>United Kingdom</td>\n",
" <td>3</td>\n",
" <td>245.52</td>\n",
" <td>8.77</td>\n",
" <td>81.84</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Portugal</td>\n",
" <td>2</td>\n",
" <td>185.13</td>\n",
" <td>6.38</td>\n",
" <td>92.57</td>\n",
" </tr>\n",
" <tr>\n",
" <td>India</td>\n",
" <td>2</td>\n",
" <td>183.15</td>\n",
" <td>8.72</td>\n",
" <td>91.58</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Other</td>\n",
" <td>15</td>\n",
" <td>1094.94</td>\n",
" <td>7.45</td>\n",
" <td>73.0</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[('USA', 13, 1040.49, 7.94, 80.04),\n",
" ('Canada', 8, 535.59, 7.05, 66.95),\n",
" ('Brazil', 5, 427.68, 7.01, 85.54),\n",
" ('France', 5, 389.07, 7.78, 77.81),\n",
" ('Germany', 4, 334.62, 8.16, 83.66),\n",
" ('Czech Republic', 2, 273.24, 9.11, 136.62),\n",
" ('United Kingdom', 3, 245.52, 8.77, 81.84),\n",
" ('Portugal', 2, 185.13, 6.38, 92.57),\n",
" ('India', 2, 183.15, 8.72, 91.58),\n",
" ('Other', 15, 1094.94, 7.45, 73.0)]"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"WITH\n",
" customer_counts AS (\n",
" SELECT\n",
" country,\n",
" COUNT(*) AS num_customers\n",
" FROM customer\n",
" GROUP BY country\n",
" ),\n",
"\n",
" labeled_invoices AS (\n",
" SELECT\n",
" CASE\n",
" WHEN cc.num_customers = 1 THEN 'Other'\n",
" ELSE c.country\n",
" END AS country,\n",
" i.customer_id,\n",
" i.invoice_id,\n",
" il.unit_price\n",
" FROM invoice_line AS il\n",
" JOIN invoice AS i USING (invoice_id)\n",
" JOIN customer AS c ON i.customer_id = c.customer_id\n",
" JOIN customer_counts AS cc\n",
" ON cc.country = c.country\n",
" )\n",
"\n",
"SELECT\n",
" country,\n",
" COUNT(DISTINCT customer_id) AS customers,\n",
" ROUND(SUM(unit_price),2) AS total_sales,\n",
" ROUND(SUM(unit_price) \n",
" / COUNT(DISTINCT invoice_id), 2) AS average_order,\n",
" ROUND(SUM(unit_price) \n",
" / COUNT(DISTINCT customer_id), 2) AS customer_lifetime_value\n",
"FROM labeled_invoices\n",
"GROUP BY country\n",
"ORDER BY\n",
" CASE WHEN country = 'Other' THEN 1 ELSE 0 END,\n",
" total_sales DESC\n",
";\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Albums vs. Individual Tracks"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>album_purchased</th>\n",
" <th>total_count</th>\n",
" <th>percentage</th>\n",
" </tr>\n",
" <tr>\n",
" <td>No</td>\n",
" <td>500</td>\n",
" <td>81.83%</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Yes</td>\n",
" <td>111</td>\n",
" <td>18.17%</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[('No', 500, '81.83%'), ('Yes', 111, '18.17%')]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"WITH\n",
" --Counting the number of tracks included on each album, and filtering out albums with only 1 track\n",
" albums_with_track_count AS\n",
" (\n",
" SELECT a.album_id,\n",
" COUNT(t.track_id) AS num_tracks\n",
" FROM album AS a\n",
" JOIN track AS t ON a.album_id = t.album_id\n",
" GROUP BY a.album_id\n",
" HAVING num_tracks > 1\n",
" ),\n",
"\n",
" --Counting tracks by album for each invoice\n",
" invoice_with_track_count AS\n",
" (\n",
" SELECT il.invoice_id,\n",
" a.album_id,\n",
" COUNT(il.track_id) AS album_track_purchases\n",
" FROM invoice_line AS il\n",
" JOIN track AS t ON t.track_id = il.track_id\n",
" JOIN album AS a ON a.album_id = t.album_id\n",
" GROUP BY il.invoice_id, a.album_id\n",
" ),\n",
"\n",
" --Categorizing each invoice as either a full album purchase or a non-full album purchase\n",
" purchase_type AS\n",
" (\n",
" SELECT invoice_id,\n",
" a.album_id,\n",
" album_track_purchases,\n",
" num_tracks,\n",
" CASE\n",
" WHEN num_tracks = album_track_purchases THEN 1\n",
" ELSE 0\n",
" END AS full_album_purchased\n",
" FROM invoice_with_track_count AS i\n",
" JOIN albums_with_track_count AS a \n",
" ON i.album_id = a.album_id --inner join must be used here to remove invoices with a single 1 track album purchase\n",
" ),\n",
"\n",
" --Getting one row per invoice\n",
" invoice_summary AS\n",
" (\n",
" SELECT invoice_id,\n",
" MAX(full_album_purchased) AS album_invoice\n",
" FROM purchase_type\n",
" GROUP BY invoice_id\n",
" )\n",
"\n",
"SELECT CASE\n",
" WHEN album_invoice = 1 THEN 'Yes'\n",
" WHEN album_invoice = 0 THEN 'No'\n",
" ELSE 'Other'\n",
" END AS album_purchased,\n",
" COUNT(album_invoice) AS total_count,\n",
" ROUND(CAST(COUNT(invoice_id) AS float)/CAST((SELECT COUNT(invoice_id) \n",
" FROM invoice_summary) AS float)*100,2) || '%' AS percentage\n",
" FROM invoice_summary\n",
" GROUP BY album_purchased;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Album purchases account for 18.8% of purchases. Based on this data, I would recommend against purchasing only select tracks from albums from record companies, since there is potential to lose one fifth of revenue."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>10</th>\n",
" <th>11</th>\n",
" <th>12</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Luís</td>\n",
" <td>Gonçalves</td>\n",
" <td>Embraer - Empresa Brasileira de Aeronáutica S.A.</td>\n",
" <td>Av. Brigadeiro Faria Lima, 2170</td>\n",
" <td>São José dos Campos</td>\n",
" <td>SP</td>\n",
" <td>Brazil</td>\n",
" <td>12227-000</td>\n",
" <td>+55 (12) 3923-5555</td>\n",
" <td>+55 (12) 3923-5566</td>\n",
" <td>[email protected]</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Leonie</td>\n",
" <td>Köhler</td>\n",
" <td>None</td>\n",
" <td>Theodor-Heuss-Straße 34</td>\n",
" <td>Stuttgart</td>\n",
" <td>None</td>\n",
" <td>Germany</td>\n",
" <td>70174</td>\n",
" <td>+49 0711 2842222</td>\n",
" <td>None</td>\n",
" <td>[email protected]</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>François</td>\n",
" <td>Tremblay</td>\n",
" <td>None</td>\n",
" <td>1498 rue Bélanger</td>\n",
" <td>Montréal</td>\n",
" <td>QC</td>\n",
" <td>Canada</td>\n",
" <td>H2G 1A7</td>\n",
" <td>+1 (514) 721-4711</td>\n",
" <td>None</td>\n",
" <td>[email protected]</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>Bjørn</td>\n",
" <td>Hansen</td>\n",
" <td>None</td>\n",
" <td>Ullevålsveien 14</td>\n",
" <td>Oslo</td>\n",
" <td>None</td>\n",
" <td>Norway</td>\n",
" <td>0171</td>\n",
" <td>+47 22 44 22 22</td>\n",
" <td>None</td>\n",
" <td>[email protected]</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>František</td>\n",
" <td>Wichterlová</td>\n",
" <td>JetBrains s.r.o.</td>\n",
" <td>Klanova 9/506</td>\n",
" <td>Prague</td>\n",
" <td>None</td>\n",
" <td>Czech Republic</td>\n",
" <td>14700</td>\n",
" <td>+420 2 4172 5555</td>\n",
" <td>+420 2 4172 5555</td>\n",
" <td>[email protected]</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 \\\n",
"0 1 Luís Gonçalves \n",
"1 2 Leonie Köhler \n",
"2 3 François Tremblay \n",
"3 4 Bjørn Hansen \n",
"4 5 František Wichterlová \n",
"\n",
" 3 \\\n",
"0 Embraer - Empresa Brasileira de Aeronáutica S.A. \n",
"1 None \n",
"2 None \n",
"3 None \n",
"4 JetBrains s.r.o. \n",
"\n",
" 4 5 6 7 \\\n",
"0 Av. Brigadeiro Faria Lima, 2170 São José dos Campos SP Brazil \n",
"1 Theodor-Heuss-Straße 34 Stuttgart None Germany \n",
"2 1498 rue Bélanger Montréal QC Canada \n",
"3 Ullevålsveien 14 Oslo None Norway \n",
"4 Klanova 9/506 Prague None Czech Republic \n",
"\n",
" 8 9 10 \\\n",
"0 12227-000 +55 (12) 3923-5555 +55 (12) 3923-5566 \n",
"1 70174 +49 0711 2842222 None \n",
"2 H2G 1A7 +1 (514) 721-4711 None \n",
"3 0171 +47 22 44 22 22 None \n",
"4 14700 +420 2 4172 5555 +420 2 4172 5555 \n",
"\n",
" 11 12 \n",
"0 [email protected] 3 \n",
"1 [email protected] 5 \n",
"2 [email protected] 3 \n",
"3 [email protected] 4 \n",
"4 [email protected] 4 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"query = \"SELECT * FROM customer LIMIT 5\"\n",
"\n",
"result = %sql $query\n",
"df = pd.DataFrame(result)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment