Created
July 29, 2020 04:26
-
-
Save charrismatic/c37858485a2581644bd3ba02f09fc4d4 to your computer and use it in GitHub Desktop.
test-notebook
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Slope Comparisons" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import numpy as np\n", | |
| "import altair as alt\n", | |
| "from scipy import stats\n", | |
| "from altair import datum\n", | |
| "from ipywidgets import interact\n", | |
| "\n", | |
| "# Set up some global config and variables\n", | |
| "alt.renderers.enable('default')\n", | |
| "pd.options.mode.chained_assignment = None\n", | |
| "np.seterr(all='ignore')\n", | |
| "\n", | |
| "df = pd.read_csv('https://raw.githubusercontent.com/ironhacks/COVID-19-notebook-demo-1/master/jhu-daily-reports.csv')\n", | |
| "df['Active'] = df.Confirmed - (df.Deaths + df.Recovered)\n", | |
| "samples = df[['Date', 'Country']].groupby('Date').Country.nunique()\n", | |
| "days = samples[samples > 1].index.tolist()\n", | |
| "df = df[df['Date'].isin(days)]\n", | |
| "\n", | |
| "country_level = df.groupby(['Country', 'Date'], as_index=False).sum()\n", | |
| "def state_data(country):\n", | |
| " return df[df['Country'] == country].groupby(['State', 'Date'], as_index=False).sum()\n", | |
| "def county_data(state):\n", | |
| " return df[(df['Country'] == 'US') & (df['State'] == state)].groupby(['County', 'Date'], as_index=False).sum()\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def slope_chart(data, by, offset, xscale='linear', limit=400, scale=1, value='Confirmed_New', window=7, today=days[-1]):\n", | |
| " data = data[data['Date']<=today]\n", | |
| " source = data[data['Date'] == today]\n", | |
| " for var in source[by].unique():\n", | |
| " values = data[data[by] == var].sort_values('Date').tail(window)[['Confirmed', value]]\n", | |
| " slope, intercept, r_value, p_value, std_err = stats.linregress(values.Confirmed, values[value])\n", | |
| " source.loc[source[by] == var, 'Slope'] = slope\n", | |
| " source.fillna(0, inplace=True)\n", | |
| " source = source[source[value] > limit]\n", | |
| "\n", | |
| " title = 'Slope of %s in last %d days since %s vs. Total Confirmed' % (' '.join(value.split('_')), window, today)\n", | |
| " base = alt.Chart(source, title=title).mark_point(filled=True, stroke='grey').encode(\n", | |
| " alt.X('Confirmed:Q', scale=alt.Scale(type=xscale), axis=alt.Axis(offset=offset)),\n", | |
| " alt.Y('Slope:Q', axis=alt.Axis(title='Slope')),\n", | |
| " color=alt.Color(by+':N', scale=alt.Scale(scheme='category20'), legend=alt.Legend(columns=2, clipHeight=20, padding=10)),\n", | |
| " size=alt.Size(value+':Q', scale=alt.Scale(domain=[source.Confirmed_New.min(), source.Confirmed_New.max()], range=[100*scale, 3000*scale])),\n", | |
| " tooltip=[by, 'Confirmed', 'Slope', value]\n", | |
| " )\n", | |
| " text = base.mark_text().encode(\n", | |
| " text=by+':N',\n", | |
| " size=alt.value(12),\n", | |
| " color=alt.value('black')\n", | |
| " ).transform_filter(datum[value] > limit*2)\n", | |
| " regression = base.transform_regression('Confirmed', 'Slope', method=\"poly\", order=1).mark_line(strokeDash=[6,8]).encode(color=alt.value('grey'), size=alt.value(2))\n", | |
| " hline = alt.Chart().mark_rule(color='red', strokeDash=[6,3]).encode(alt.Y('a:Q', axis=alt.Axis(title=''))).transform_calculate(a=\"0\")\n", | |
| "\n", | |
| " return (base+text+regression+hline) if offset == 0 else (base+text+regression)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Country Level" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "slope_chart(country_level, 'Country', 0, xscale='log', limit=450, scale=3, window=7).properties(\n", | |
| " width=1200,\n", | |
| " height=800\n", | |
| ").interactive()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "data = country_level\n", | |
| "state = alt.Chart(data[data['Country'] == 'US'].sort_values('Date').tail(60)).mark_line().encode(\n", | |
| " alt.X('Date:T', axis=alt.Axis(title='Cumulative Cases')),\n", | |
| " alt.Y('Confirmed_New:Q', axis=alt.Axis(title='New Cases'))\n", | |
| ")\n", | |
| "reg = state.transform_regression(\"Date\", \"Confirmed_New\", method=\"linear\").mark_line(color='red', strokeDash=[6,3])\n", | |
| "(state+reg).interactive()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# State Level" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "@interact(window=(2, 21, 1))\n", | |
| "def chart(window=7):\n", | |
| " return slope_chart(state_data('US'), 'State', 0, limit=100, xscale='log', scale=3, window=window).properties(\n", | |
| " width=1200,\n", | |
| " height=800\n", | |
| " ).interactive()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "data = state_data('US')\n", | |
| "state = alt.Chart(data[data['State'] == 'OK'].sort_values('Date').tail(60)).mark_line().encode(\n", | |
| " x='Confirmed:Q',\n", | |
| " y='Confirmed_New:Q'\n", | |
| ")\n", | |
| "reg = state.transform_regression(\"Confirmed\", \"Confirmed_New\", method=\"poly\").mark_line(color='red', strokeDash=[6,3])\n", | |
| "(state+reg).interactive()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# US County Level" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "@interact(window=(2, 21, 1))\n", | |
| "def chart(window=7):\n", | |
| " return slope_chart(county_data('CA'), 'County', 0, xscale='log', limit=15, scale=5, window=window).properties(\n", | |
| " width=1100,\n", | |
| " height=600\n", | |
| " ).interactive()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "jupyter": { | |
| "source_hidden": true | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "data = county_data('CA')\n", | |
| "state = alt.Chart(data[data['County'] == 'Los Angeles'].sort_values('Date').tail(60)).mark_line().encode(\n", | |
| " x='Confirmed:Q',\n", | |
| " y='Confirmed_New:Q'\n", | |
| ")\n", | |
| "reg = state.transform_regression(\"Confirmed\", \"Confirmed_New\", method=\"poly\").mark_line(color='red', strokeDash=[6,3])\n", | |
| "(state+reg).interactive()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# US Hospitalizations" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 32, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "e741db7d90ba48129e8bc8f0ceeb2dee", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "interactive(children=(IntSlider(value=7, description='window', max=21, min=2), Output()), _dom_classes=('widge…" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "dfh = pd.read_csv('https://covidtracking.com/api/v1/states/daily.csv')\n", | |
| "dfh.date = pd.to_datetime(dfh.date, format='%Y%m%d')\n", | |
| "dfh.date = dfh.date.dt.strftime('%m-%d-%Y')\n", | |
| "dfh = dfh.rename({'date': 'Date', 'state':'State', 'hospitalizedCurrently': 'Hospitalized'}, axis=1)\n", | |
| "data = state_data('US')\n", | |
| "data = data.merge(dfh, on=['Date', 'State'], how='outer')\n", | |
| "@interact(window=(2, 21, 1))\n", | |
| "def chart(window=7):\n", | |
| " return slope_chart(\n", | |
| " data, \n", | |
| " 'State', \n", | |
| " 0, \n", | |
| " xscale='log', \n", | |
| " limit=200, \n", | |
| " scale=2, \n", | |
| " value='Hospitalized', \n", | |
| " window=window, \n", | |
| " today=days[-1]\n", | |
| " ).properties(\n", | |
| " width=1100,\n", | |
| " height=800\n", | |
| " ).interactive()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "state = alt.Chart(data[data['State'] == 'CA'].sort_values('Date').tail(7)).mark_line().encode(\n", | |
| " x='Date:T',\n", | |
| " y='Hospitalized:Q'\n", | |
| ")\n", | |
| "reg = state.transform_regression(\"Date\", \"Hospitalized\", method=\"poly\").mark_line(color='red', strokeDash=[6,3])\n", | |
| "(state+reg).interactive()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "data = country_level\n", | |
| "state = alt.Chart(data[(data['Country'] == 'India')]).mark_line().encode(\n", | |
| " alt.X('Date:T', axis=alt.Axis(title='Date')),\n", | |
| " alt.Y('Confirmed_New:Q', axis=alt.Axis(title='New Cases'))\n", | |
| ")\n", | |
| "reg = state.transform_regression(\"Date\", \"Confirmed_New\", method=\"linear\").mark_line(color='grey', strokeDash=[6,3])\n", | |
| "\n", | |
| "marks = pd.DataFrame([\n", | |
| " {\"Phase\": \"1\", \"start\": \"03-25-2020\", \"end\": \"04-14-2020\"},\n", | |
| " {\"Phase\": \"2\", \"start\": \"04-14-2020\", \"end\": \"05-03-2020\"},\n", | |
| " {\"Phase\": \"3\", \"start\": \"05-03-2020\", \"end\": \"05-17-2020\"},\n", | |
| " {\"Phase\": \"4\", \"start\": \"05-17-2020\", \"end\": \"05-31-2020\"},\n", | |
| "])\n", | |
| "rect = alt.Chart(marks).mark_rect(opacity=0.3).encode(x='start:T', x2='end:T', color='Phase:N')\n", | |
| "\n", | |
| "(rect+state+reg).properties(\n", | |
| " width=800,\n", | |
| " height=500\n", | |
| ").interactive()" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.6" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment