Last active
May 23, 2025 06:22
-
-
Save acstrahl/4bf895ad908080b1a6798e0274a4ea30 to your computer and use it in GitHub Desktop.
Answering business questions with SQL DEMO
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Introduction and Schema Diagram" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'Connected: [email protected]'" | |
| ] | |
| }, | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "%%capture\n", | |
| "%load_ext sql\n", | |
| "%sql sqlite:///chinook.db" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Overview of the Data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Done.\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table>\n", | |
| " <tr>\n", | |
| " <th>name</th>\n", | |
| " <th>type</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>album</td>\n", | |
| " <td>table</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>artist</td>\n", | |
| " <td>table</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>customer</td>\n", | |
| " <td>table</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>employee</td>\n", | |
| " <td>table</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>genre</td>\n", | |
| " <td>table</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>invoice</td>\n", | |
| " <td>table</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>invoice_line</td>\n", | |
| " <td>table</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>media_type</td>\n", | |
| " <td>table</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>playlist</td>\n", | |
| " <td>table</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>playlist_track</td>\n", | |
| " <td>table</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>track</td>\n", | |
| " <td>table</td>\n", | |
| " </tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "[('album', 'table'),\n", | |
| " ('artist', 'table'),\n", | |
| " ('customer', 'table'),\n", | |
| " ('employee', 'table'),\n", | |
| " ('genre', 'table'),\n", | |
| " ('invoice', 'table'),\n", | |
| " ('invoice_line', 'table'),\n", | |
| " ('media_type', 'table'),\n", | |
| " ('playlist', 'table'),\n", | |
| " ('playlist_track', 'table'),\n", | |
| " ('track', 'table')]" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "%%sql\n", | |
| "SELECT\n", | |
| " name,\n", | |
| " type\n", | |
| "FROM sqlite_master\n", | |
| "WHERE type IN (\"table\",\"view\");" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Done.\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table>\n", | |
| " <tr>\n", | |
| " <th>track_id</th>\n", | |
| " <th>name</th>\n", | |
| " <th>album_id</th>\n", | |
| " <th>media_type_id</th>\n", | |
| " <th>genre_id</th>\n", | |
| " <th>composer</th>\n", | |
| " <th>milliseconds</th>\n", | |
| " <th>bytes</th>\n", | |
| " <th>unit_price</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>1</td>\n", | |
| " <td>For Those About To Rock (We Salute You)</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>Angus Young, Malcolm Young, Brian Johnson</td>\n", | |
| " <td>343719</td>\n", | |
| " <td>11170334</td>\n", | |
| " <td>0.99</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>2</td>\n", | |
| " <td>Balls to the Wall</td>\n", | |
| " <td>2</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>None</td>\n", | |
| " <td>342562</td>\n", | |
| " <td>5510424</td>\n", | |
| " <td>0.99</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>3</td>\n", | |
| " <td>Fast As a Shark</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>F. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman</td>\n", | |
| " <td>230619</td>\n", | |
| " <td>3990994</td>\n", | |
| " <td>0.99</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>4</td>\n", | |
| " <td>Restless and Wild</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman</td>\n", | |
| " <td>252051</td>\n", | |
| " <td>4331779</td>\n", | |
| " <td>0.99</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>5</td>\n", | |
| " <td>Princess of the Dawn</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>Deaffy & R.A. Smith-Diesel</td>\n", | |
| " <td>375418</td>\n", | |
| " <td>6290521</td>\n", | |
| " <td>0.99</td>\n", | |
| " </tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "[(1, 'For Those About To Rock (We Salute You)', 1, 1, 1, 'Angus Young, Malcolm Young, Brian Johnson', 343719, 11170334, 0.99),\n", | |
| " (2, 'Balls to the Wall', 2, 2, 1, None, 342562, 5510424, 0.99),\n", | |
| " (3, 'Fast As a Shark', 3, 2, 1, 'F. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman', 230619, 3990994, 0.99),\n", | |
| " (4, 'Restless and Wild', 3, 2, 1, 'F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman', 252051, 4331779, 0.99),\n", | |
| " (5, 'Princess of the Dawn', 3, 2, 1, 'Deaffy & R.A. Smith-Diesel', 375418, 6290521, 0.99)]" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "%%sql\n", | |
| "\n", | |
| "SELECT *\n", | |
| "FROM track\n", | |
| "LIMIT 5;" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Done.\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table>\n", | |
| " <tr>\n", | |
| " <th>invoice_line_id</th>\n", | |
| " <th>invoice_id</th>\n", | |
| " <th>track_id</th>\n", | |
| " <th>unit_price</th>\n", | |
| " <th>quantity</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1158</td>\n", | |
| " <td>0.99</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1159</td>\n", | |
| " <td>0.99</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>3</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1160</td>\n", | |
| " <td>0.99</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>4</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1161</td>\n", | |
| " <td>0.99</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>5</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1162</td>\n", | |
| " <td>0.99</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "[(1, 1, 1158, 0.99, 1),\n", | |
| " (2, 1, 1159, 0.99, 1),\n", | |
| " (3, 1, 1160, 0.99, 1),\n", | |
| " (4, 1, 1161, 0.99, 1),\n", | |
| " (5, 1, 1162, 0.99, 1)]" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "%%sql\n", | |
| "\n", | |
| "SELECT *\n", | |
| "FROM invoice_line\n", | |
| "LIMIT 5;" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Selecting New Albums to Purchase" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 41, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Done.\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table>\n", | |
| " <tr>\n", | |
| " <th>genre</th>\n", | |
| " <th>count</th>\n", | |
| " <th>percentage</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Rock</td>\n", | |
| " <td>561</td>\n", | |
| " <td>53.38</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Alternative & Punk</td>\n", | |
| " <td>130</td>\n", | |
| " <td>12.37</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Metal</td>\n", | |
| " <td>124</td>\n", | |
| " <td>11.8</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>R&B/Soul</td>\n", | |
| " <td>53</td>\n", | |
| " <td>5.04</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Blues</td>\n", | |
| " <td>36</td>\n", | |
| " <td>3.43</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Alternative</td>\n", | |
| " <td>35</td>\n", | |
| " <td>3.33</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Latin</td>\n", | |
| " <td>22</td>\n", | |
| " <td>2.09</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Pop</td>\n", | |
| " <td>22</td>\n", | |
| " <td>2.09</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Hip Hop/Rap</td>\n", | |
| " <td>20</td>\n", | |
| " <td>1.9</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Jazz</td>\n", | |
| " <td>14</td>\n", | |
| " <td>1.33</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Easy Listening</td>\n", | |
| " <td>13</td>\n", | |
| " <td>1.24</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Reggae</td>\n", | |
| " <td>6</td>\n", | |
| " <td>0.57</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Electronica/Dance</td>\n", | |
| " <td>5</td>\n", | |
| " <td>0.48</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Classical</td>\n", | |
| " <td>4</td>\n", | |
| " <td>0.38</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Heavy Metal</td>\n", | |
| " <td>3</td>\n", | |
| " <td>0.29</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Soundtrack</td>\n", | |
| " <td>2</td>\n", | |
| " <td>0.19</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>TV Shows</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0.1</td>\n", | |
| " </tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "[('Rock', 561, 53.38),\n", | |
| " ('Alternative & Punk', 130, 12.37),\n", | |
| " ('Metal', 124, 11.8),\n", | |
| " ('R&B/Soul', 53, 5.04),\n", | |
| " ('Blues', 36, 3.43),\n", | |
| " ('Alternative', 35, 3.33),\n", | |
| " ('Latin', 22, 2.09),\n", | |
| " ('Pop', 22, 2.09),\n", | |
| " ('Hip Hop/Rap', 20, 1.9),\n", | |
| " ('Jazz', 14, 1.33),\n", | |
| " ('Easy Listening', 13, 1.24),\n", | |
| " ('Reggae', 6, 0.57),\n", | |
| " ('Electronica/Dance', 5, 0.48),\n", | |
| " ('Classical', 4, 0.38),\n", | |
| " ('Heavy Metal', 3, 0.29),\n", | |
| " ('Soundtrack', 2, 0.19),\n", | |
| " ('TV Shows', 1, 0.1)]" | |
| ] | |
| }, | |
| "execution_count": 41, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "%%sql\n", | |
| "\n", | |
| "WITH genre_usa_tracks AS\n", | |
| " (\n", | |
| " SELECT il.invoice_line_id, g.name AS genre, t.track_id, i.billing_country AS country\n", | |
| " FROM track AS t\n", | |
| " JOIN genre AS g ON t.genre_id = g.genre_id\n", | |
| " JOIN invoice_line AS il ON t.track_id = il.track_id\n", | |
| " JOIN invoice AS i ON il.invoice_id = i.invoice_id\n", | |
| " WHERE i.billing_country = 'USA'\n", | |
| " )\n", | |
| " \n", | |
| "SELECT genre, \n", | |
| " COUNT(*) AS count,\n", | |
| " ROUND(\n", | |
| " CAST(\n", | |
| " COUNT(*) AS float)/(\n", | |
| " SELECT COUNT(*) FROM genre_usa_tracks)*100,2) AS percentage\n", | |
| "FROM genre_usa_tracks\n", | |
| "GROUP BY genre\n", | |
| "ORDER BY 2 DESC;\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Based on the sales of tracks across different genres in the USA, we should purchase the new albums by the following artists:\n", | |
| "\n", | |
| "- Red Tone (Punk)\n", | |
| "- Slim Jim Bites (Blues)\n", | |
| "- Meteor and the Girls (Pop)\n", | |
| "\n", | |
| "It's worth keeping in mind that combined, these three genres only make up only 17% of total sales, so we should be on the lookout for artists and albums from the rock genre, which accounts for 53% of sales." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Analyzing Employee Sales Performance" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Done.\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table>\n", | |
| " <tr>\n", | |
| " <th>employee_name</th>\n", | |
| " <th>hire_date</th>\n", | |
| " <th>customer_count</th>\n", | |
| " <th>total_sales_dollars</th>\n", | |
| " <th>avg_dollars_per_customer</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Jane Peacock</td>\n", | |
| " <td>2017-04-01 00:00:00</td>\n", | |
| " <td>212</td>\n", | |
| " <td>1731.51</td>\n", | |
| " <td>8.17</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Steve Johnson</td>\n", | |
| " <td>2017-10-17 00:00:00</td>\n", | |
| " <td>188</td>\n", | |
| " <td>1393.92</td>\n", | |
| " <td>7.41</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Margaret Park</td>\n", | |
| " <td>2017-05-03 00:00:00</td>\n", | |
| " <td>214</td>\n", | |
| " <td>1584.0</td>\n", | |
| " <td>7.4</td>\n", | |
| " </tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "[('Jane Peacock', '2017-04-01 00:00:00', 212, 1731.51, 8.17),\n", | |
| " ('Steve Johnson', '2017-10-17 00:00:00', 188, 1393.92, 7.41),\n", | |
| " ('Margaret Park', '2017-05-03 00:00:00', 214, 1584.0, 7.4)]" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "%%sql\n", | |
| "\n", | |
| "SELECT\n", | |
| " e.first_name ||\" \"||e.last_name AS employee_name,\n", | |
| " e.hire_date,\n", | |
| " COUNT(c.customer_id) AS customer_count,\n", | |
| " ROUND(SUM(i.total),2) AS total_sales_dollars,\n", | |
| " ROUND(CAST(SUM(i.total) AS float)/COUNT(c.customer_id),2) AS avg_dollars_per_customer\n", | |
| " FROM customer AS c\n", | |
| " JOIN invoice AS i ON c.customer_id = i.customer_id\n", | |
| " JOIN employee AS e ON c.support_rep_id = e.employee_id\n", | |
| " GROUP BY employee_name\n", | |
| " ORDER BY avg_dollars_per_customer DESC;" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "While there is a 20% difference in sales between Jane (the top employee) and Steve (the bottom employee), the difference roughly corresponds to the differences in their hiring dates." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Analyzing Sales by Country" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 73, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Done.\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table>\n", | |
| " <tr>\n", | |
| " <th>country</th>\n", | |
| " <th>customers</th>\n", | |
| " <th>total_sales</th>\n", | |
| " <th>average_order</th>\n", | |
| " <th>customer_lifetime_value</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>USA</td>\n", | |
| " <td>13</td>\n", | |
| " <td>1040.49</td>\n", | |
| " <td>7.94</td>\n", | |
| " <td>80.04</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Canada</td>\n", | |
| " <td>8</td>\n", | |
| " <td>535.59</td>\n", | |
| " <td>7.05</td>\n", | |
| " <td>66.95</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Brazil</td>\n", | |
| " <td>5</td>\n", | |
| " <td>427.68</td>\n", | |
| " <td>7.01</td>\n", | |
| " <td>85.54</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>France</td>\n", | |
| " <td>5</td>\n", | |
| " <td>389.07</td>\n", | |
| " <td>7.78</td>\n", | |
| " <td>77.81</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Germany</td>\n", | |
| " <td>4</td>\n", | |
| " <td>334.62</td>\n", | |
| " <td>8.16</td>\n", | |
| " <td>83.66</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Czech Republic</td>\n", | |
| " <td>2</td>\n", | |
| " <td>273.24</td>\n", | |
| " <td>9.11</td>\n", | |
| " <td>136.62</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>United Kingdom</td>\n", | |
| " <td>3</td>\n", | |
| " <td>245.52</td>\n", | |
| " <td>8.77</td>\n", | |
| " <td>81.84</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Portugal</td>\n", | |
| " <td>2</td>\n", | |
| " <td>185.13</td>\n", | |
| " <td>6.38</td>\n", | |
| " <td>92.57</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>India</td>\n", | |
| " <td>2</td>\n", | |
| " <td>183.15</td>\n", | |
| " <td>8.72</td>\n", | |
| " <td>91.58</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Other</td>\n", | |
| " <td>15</td>\n", | |
| " <td>1094.94</td>\n", | |
| " <td>7.45</td>\n", | |
| " <td>73.0</td>\n", | |
| " </tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "[('USA', 13, 1040.49, 7.94, 80.04),\n", | |
| " ('Canada', 8, 535.59, 7.05, 66.95),\n", | |
| " ('Brazil', 5, 427.68, 7.01, 85.54),\n", | |
| " ('France', 5, 389.07, 7.78, 77.81),\n", | |
| " ('Germany', 4, 334.62, 8.16, 83.66),\n", | |
| " ('Czech Republic', 2, 273.24, 9.11, 136.62),\n", | |
| " ('United Kingdom', 3, 245.52, 8.77, 81.84),\n", | |
| " ('Portugal', 2, 185.13, 6.38, 92.57),\n", | |
| " ('India', 2, 183.15, 8.72, 91.58),\n", | |
| " ('Other', 15, 1094.94, 7.45, 73.0)]" | |
| ] | |
| }, | |
| "execution_count": 73, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "%%sql\n", | |
| "\n", | |
| "WITH\n", | |
| " customer_counts AS (\n", | |
| " SELECT\n", | |
| " country,\n", | |
| " COUNT(*) AS num_customers\n", | |
| " FROM customer\n", | |
| " GROUP BY country\n", | |
| " ),\n", | |
| "\n", | |
| " labeled_invoices AS (\n", | |
| " SELECT\n", | |
| " CASE\n", | |
| " WHEN cc.num_customers = 1 THEN 'Other'\n", | |
| " ELSE c.country\n", | |
| " END AS country,\n", | |
| " i.customer_id,\n", | |
| " i.invoice_id,\n", | |
| " il.unit_price\n", | |
| " FROM invoice_line AS il\n", | |
| " JOIN invoice AS i USING (invoice_id)\n", | |
| " JOIN customer AS c ON i.customer_id = c.customer_id\n", | |
| " JOIN customer_counts AS cc\n", | |
| " ON cc.country = c.country\n", | |
| " )\n", | |
| "\n", | |
| "SELECT\n", | |
| " country,\n", | |
| " COUNT(DISTINCT customer_id) AS customers,\n", | |
| " ROUND(SUM(unit_price),2) AS total_sales,\n", | |
| " ROUND(SUM(unit_price) \n", | |
| " / COUNT(DISTINCT invoice_id), 2) AS average_order,\n", | |
| " ROUND(SUM(unit_price) \n", | |
| " / COUNT(DISTINCT customer_id), 2) AS customer_lifetime_value\n", | |
| "FROM labeled_invoices\n", | |
| "GROUP BY country\n", | |
| "ORDER BY\n", | |
| " CASE WHEN country = 'Other' THEN 1 ELSE 0 END,\n", | |
| " total_sales DESC\n", | |
| ";\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Albums vs. Individual Tracks" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Done.\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table>\n", | |
| " <tr>\n", | |
| " <th>album_purchased</th>\n", | |
| " <th>total_count</th>\n", | |
| " <th>percentage</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>No</td>\n", | |
| " <td>500</td>\n", | |
| " <td>81.83%</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>Yes</td>\n", | |
| " <td>111</td>\n", | |
| " <td>18.17%</td>\n", | |
| " </tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "[('No', 500, '81.83%'), ('Yes', 111, '18.17%')]" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "%%sql\n", | |
| "\n", | |
| "WITH\n", | |
| " --Counting the number of tracks included on each album, and filtering out albums with only 1 track\n", | |
| " albums_with_track_count AS\n", | |
| " (\n", | |
| " SELECT a.album_id,\n", | |
| " COUNT(t.track_id) AS num_tracks\n", | |
| " FROM album AS a\n", | |
| " JOIN track AS t ON a.album_id = t.album_id\n", | |
| " GROUP BY a.album_id\n", | |
| " HAVING num_tracks > 1\n", | |
| " ),\n", | |
| "\n", | |
| " --Counting tracks by album for each invoice\n", | |
| " invoice_with_track_count AS\n", | |
| " (\n", | |
| " SELECT il.invoice_id,\n", | |
| " a.album_id,\n", | |
| " COUNT(il.track_id) AS album_track_purchases\n", | |
| " FROM invoice_line AS il\n", | |
| " JOIN track AS t ON t.track_id = il.track_id\n", | |
| " JOIN album AS a ON a.album_id = t.album_id\n", | |
| " GROUP BY il.invoice_id, a.album_id\n", | |
| " ),\n", | |
| "\n", | |
| " --Categorizing each invoice as either a full album purchase or a non-full album purchase\n", | |
| " purchase_type AS\n", | |
| " (\n", | |
| " SELECT invoice_id,\n", | |
| " a.album_id,\n", | |
| " album_track_purchases,\n", | |
| " num_tracks,\n", | |
| " CASE\n", | |
| " WHEN num_tracks = album_track_purchases THEN 1\n", | |
| " ELSE 0\n", | |
| " END AS full_album_purchased\n", | |
| " FROM invoice_with_track_count AS i\n", | |
| " JOIN albums_with_track_count AS a \n", | |
| " ON i.album_id = a.album_id --inner join must be used here to remove invoices with a single 1 track album purchase\n", | |
| " ),\n", | |
| "\n", | |
| " --Getting one row per invoice\n", | |
| " invoice_summary AS\n", | |
| " (\n", | |
| " SELECT invoice_id,\n", | |
| " MAX(full_album_purchased) AS album_invoice\n", | |
| " FROM purchase_type\n", | |
| " GROUP BY invoice_id\n", | |
| " )\n", | |
| "\n", | |
| "SELECT CASE\n", | |
| " WHEN album_invoice = 1 THEN 'Yes'\n", | |
| " WHEN album_invoice = 0 THEN 'No'\n", | |
| " ELSE 'Other'\n", | |
| " END AS album_purchased,\n", | |
| " COUNT(album_invoice) AS total_count,\n", | |
| " ROUND(CAST(COUNT(invoice_id) AS float)/CAST((SELECT COUNT(invoice_id) \n", | |
| " FROM invoice_summary) AS float)*100,2) || '%' AS percentage\n", | |
| " FROM invoice_summary\n", | |
| " GROUP BY album_purchased;" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Album purchases account for 18.8% of purchases. Based on this data, I would recommend against purchasing only select tracks from albums from record companies, since there is potential to lose one fifth of revenue." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Done.\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>0</th>\n", | |
| " <th>1</th>\n", | |
| " <th>2</th>\n", | |
| " <th>3</th>\n", | |
| " <th>4</th>\n", | |
| " <th>5</th>\n", | |
| " <th>6</th>\n", | |
| " <th>7</th>\n", | |
| " <th>8</th>\n", | |
| " <th>9</th>\n", | |
| " <th>10</th>\n", | |
| " <th>11</th>\n", | |
| " <th>12</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>Luís</td>\n", | |
| " <td>Gonçalves</td>\n", | |
| " <td>Embraer - Empresa Brasileira de Aeronáutica S.A.</td>\n", | |
| " <td>Av. Brigadeiro Faria Lima, 2170</td>\n", | |
| " <td>São José dos Campos</td>\n", | |
| " <td>SP</td>\n", | |
| " <td>Brazil</td>\n", | |
| " <td>12227-000</td>\n", | |
| " <td>+55 (12) 3923-5555</td>\n", | |
| " <td>+55 (12) 3923-5566</td>\n", | |
| " <td>[email protected]</td>\n", | |
| " <td>3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2</td>\n", | |
| " <td>Leonie</td>\n", | |
| " <td>Köhler</td>\n", | |
| " <td>None</td>\n", | |
| " <td>Theodor-Heuss-Straße 34</td>\n", | |
| " <td>Stuttgart</td>\n", | |
| " <td>None</td>\n", | |
| " <td>Germany</td>\n", | |
| " <td>70174</td>\n", | |
| " <td>+49 0711 2842222</td>\n", | |
| " <td>None</td>\n", | |
| " <td>[email protected]</td>\n", | |
| " <td>5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>François</td>\n", | |
| " <td>Tremblay</td>\n", | |
| " <td>None</td>\n", | |
| " <td>1498 rue Bélanger</td>\n", | |
| " <td>Montréal</td>\n", | |
| " <td>QC</td>\n", | |
| " <td>Canada</td>\n", | |
| " <td>H2G 1A7</td>\n", | |
| " <td>+1 (514) 721-4711</td>\n", | |
| " <td>None</td>\n", | |
| " <td>[email protected]</td>\n", | |
| " <td>3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4</td>\n", | |
| " <td>Bjørn</td>\n", | |
| " <td>Hansen</td>\n", | |
| " <td>None</td>\n", | |
| " <td>Ullevålsveien 14</td>\n", | |
| " <td>Oslo</td>\n", | |
| " <td>None</td>\n", | |
| " <td>Norway</td>\n", | |
| " <td>0171</td>\n", | |
| " <td>+47 22 44 22 22</td>\n", | |
| " <td>None</td>\n", | |
| " <td>[email protected]</td>\n", | |
| " <td>4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>5</td>\n", | |
| " <td>František</td>\n", | |
| " <td>Wichterlová</td>\n", | |
| " <td>JetBrains s.r.o.</td>\n", | |
| " <td>Klanova 9/506</td>\n", | |
| " <td>Prague</td>\n", | |
| " <td>None</td>\n", | |
| " <td>Czech Republic</td>\n", | |
| " <td>14700</td>\n", | |
| " <td>+420 2 4172 5555</td>\n", | |
| " <td>+420 2 4172 5555</td>\n", | |
| " <td>[email protected]</td>\n", | |
| " <td>4</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 0 1 2 \\\n", | |
| "0 1 Luís Gonçalves \n", | |
| "1 2 Leonie Köhler \n", | |
| "2 3 François Tremblay \n", | |
| "3 4 Bjørn Hansen \n", | |
| "4 5 František Wichterlová \n", | |
| "\n", | |
| " 3 \\\n", | |
| "0 Embraer - Empresa Brasileira de Aeronáutica S.A. \n", | |
| "1 None \n", | |
| "2 None \n", | |
| "3 None \n", | |
| "4 JetBrains s.r.o. \n", | |
| "\n", | |
| " 4 5 6 7 \\\n", | |
| "0 Av. Brigadeiro Faria Lima, 2170 São José dos Campos SP Brazil \n", | |
| "1 Theodor-Heuss-Straße 34 Stuttgart None Germany \n", | |
| "2 1498 rue Bélanger Montréal QC Canada \n", | |
| "3 Ullevålsveien 14 Oslo None Norway \n", | |
| "4 Klanova 9/506 Prague None Czech Republic \n", | |
| "\n", | |
| " 8 9 10 \\\n", | |
| "0 12227-000 +55 (12) 3923-5555 +55 (12) 3923-5566 \n", | |
| "1 70174 +49 0711 2842222 None \n", | |
| "2 H2G 1A7 +1 (514) 721-4711 None \n", | |
| "3 0171 +47 22 44 22 22 None \n", | |
| "4 14700 +420 2 4172 5555 +420 2 4172 5555 \n", | |
| "\n", | |
| " 11 12 \n", | |
| "0 [email protected] 3 \n", | |
| "1 [email protected] 5 \n", | |
| "2 [email protected] 3 \n", | |
| "3 [email protected] 4 \n", | |
| "4 [email protected] 4 " | |
| ] | |
| }, | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "\n", | |
| "query = \"SELECT * FROM customer LIMIT 5\"\n", | |
| "\n", | |
| "result = %sql $query\n", | |
| "df = pd.DataFrame(result)\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.4.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment