Skip to content

Instantly share code, notes, and snippets.

@1vth1nk3r
Last active December 10, 2021 08:28
Show Gist options
  • Select an option

  • Save 1vth1nk3r/09f63cdfbf5796ecb48e63365f0aaf30 to your computer and use it in GitHub Desktop.

Select an option

Save 1vth1nk3r/09f63cdfbf5796ecb48e63365f0aaf30 to your computer and use it in GitHub Desktop.
f, (ax1, ax2) = plt.subplots(1, 2, figsize = (15, 15))
sns.barplot('people_per_market', 'state', hue = 'is_selected',
dodge = False, data = markets_by_state, ax = ax1)
sns.scatterplot('log_pop', 'log_markets', hue = 'is_selected',
data = markets_by_state, ax = ax2, s = 100)
ax1.legend_.remove()
ax2.legend_.remove()
sns.set_style('whitegrid')
plt.scatter('good','prop selling', marker = '_', alpha = 0.7, data = goods_by_state)
highlighted = goods_by_state.query("state in ['New Mexico','North Dakota','Vermont']")
sns.lineplot('good','prop selling', 'state', data = highlighted, legend = False)
last_rows = highlighted.groupby('state', as_index = False).agg('first')
for _,row in last_rows.iterrows():
plt.annotate(row['state'], (row['good'], row['prop selling']),
ha = 'right', xytext = (5,0), textcoords = 'offset pixels')
sns.despine(bottom = True, left = True)
sns.set(font_scale = 0.85)
blue_pal = sns.light_palette("steelblue", as_cmap = True)
g = sns.heatmap(markets_by_month.reindex(state_by_lat),
linewidths = 0.1,
cmap = blue_pal, cbar = False,
yticklabels = True)
g.set_yticklabels(g.get_yticklabels(), rotation = 0)
plt.title('Distribution of months open for farmers markets by latitude')
plt.show()
sns.barplot('people_per_market', 'state', palette = state_colors,
data = markets_by_state, ax = ax1)
p = sns.scatterplot('population', 'num_markets', color = state_colors,
data = markets_by_state, s = 60, ax = ax2)
ax2.set(xscale = "log", yscale = 'log')
ax2.annotate(tx_message, xy = (26956958,230),
xytext = (26956958, 450),ha = 'right',
size = 15, backgroundcolor = 'white',
arrowprops = {'facecolor':'black', 'width': 3})
sns.set_style('whitegrid')
houston_pollution = pollution[pollution.city == 'Houston']
houston_colors = ['orangered' if (day == 330) & (year == 2014) else 'lightgray'
for day,year in zip(houston_pollution.day, houston_pollution.year)]
sns.regplot(x = 'NO2', y = 'SO2', data = houston_pollution, fit_reg = False,
scatter_kws = {'facecolors': houston_colors, 'alpha': 0.7})
houston_pollution = pollution[pollution.city == 'Houston'].copy()
max_O3 = houston_pollution.O3.max()
houston_pollution['point_type'] = ['Highest O3 Day' if O3 == max_O3 else 'Others' for O3 in houston_pollution.O3]
sns.scatterplot(x = 'NO2', y = 'SO2', hue = 'point_type', data = houston_pollution)
sns.kdeplot(pollution[pollution.year == 2012].O3, shade = True, label = '2012')
sns.kdeplot(pollution[pollution.year != 2012].O3, shade = True, label = 'other years')
sns.distplot(pollution[pollution.city == 'Vandenberg Air Force Base'].O3, label = 'Vandenberg',
hist = False, color = 'steelblue', rug = True)
pollution_mar = pollution[pollution.month == 3]
sns.swarmplot(y = "city", x = 'O3', data = pollution_mar, size = 3)
plt.text(0.57, 41, 'Cincinnati had highest observed\nSO2 value on Aug 11, 2013',
fontdict = {'ha': 'left', 'size': 'large'})
plt.annotate('Long Beach New Years', xy = (lb_newyears['CO'], lb_newyears['NO2']), xytext = (2, 15),
arrowprops = {'facecolor':'gray', 'width': 3, 'shrink': 0.03}, backgroundcolor = 'white')
is_lb = ['orangered' if city == 'Long Beach' else 'lightgray' for city in pollution['city']]
sns.regplot(x = 'CO', y = 'O3', data = pollution, fit_reg = False,
scatter_kws = {'facecolors':is_lb, 'alpha': 0.3})
g = sns.FacetGrid(data = pollution, col = 'city', col_wrap = 3)
g.map(sns.scatterplot, 'CO', 'NO2', alpha = 0.2)
sns.barplot(y = 'city', x = 'CO', estimator = np.mean, ci = False, data = pollution, edgecolor = 'black')
sns.barplot(y = 'city', x = 'CO', estimator = np.mean, ci = False, data = pollution, color = 'cadetblue')
color_palette = sns.light_palette('orangered', as_cmap = True)
sns.scatterplot(x = 'CO', y = 'NO2', hue = 'O3', data = cinci_2014, palette = color_palette)
color_palette = sns.diverging_palette(250, 0, as_cmap = True)
sns.heatmap(nov_2015_CO, cmap = color_palette, center = 0, vmin = -4, vmax = 4)
plt.style.use("dark_background")
color_palette = sns.diverging_palette(250, 0, center = 'dark', as_cmap = True)
sns.heatmap(oct_2015_o3, cmap = color_palette, center = 0)
sns.lineplot(x = "day", y = "CO", hue = "city", palette = "Set2", linewidth = 3, data = pollution_jan13)
wanted_combos = ['Vandenberg Air Force Base NO2', 'Long Beach CO', 'Cincinnati SO2']
city_pol_month['color_cats'] = [x if x in wanted_combos else 'other' for x in city_pol_month['city_pol']]
sns.lineplot(x = "month", y = "value", hue = 'color_cats', units = 'city_pol', estimator = None,
palette = 'Set2', data = city_pol_month)
pollution['CO quartile'] = pd.qcut(pollution['CO'], q = 4, labels = False)
des_moines = pollution.query("city == 'Des Moines'")
sns.scatterplot(x = 'SO2', y = 'NO2', hue = 'CO quartile', data = des_moines, palette = 'GnBu')
sns.catplot(x = 'city', hue = 'year', y = 'value', row = 'pollutant',
data = city_maxes, palette = 'BuGn', sharey = False, kind = 'bar')
average_ests['lower'] = average_ests['mean'] - 1.96*average_ests['std_err']
average_ests['upper'] = average_ests['mean'] + 1.96*average_ests['std_err']
g = sns.FacetGrid(average_ests, row = 'pollutant', sharex = False)
g.map(plt.hlines, 'y', 'lower', 'upper')
g.map(plt.scatter, 'seen', 'y', color = 'orangered').set_ylabels('').set_xlabels('')
plt.hlines(y = 'year', xmin = 'lower', xmax = 'upper', linewidth = 5, color = 'steelblue',
alpha = 0.7, data = diffs_by_year)
plt.plot('mean', 'year', 'k|', data = diffs_by_year)
plt.axvline(x = 0, color = 'orangered', linestyle = '--')
vandenberg_NO2['lower'] = vandenberg_NO2['mean'] - 2.58*vandenberg_NO2['std_err']
vandenberg_NO2['upper'] = vandenberg_NO2['mean'] + 2.58*vandenberg_NO2['std_err']
plt.plot('day', 'mean', data = vandenberg_NO2, color = 'white', alpha = 0.4)
plt.fill_between(x = 'day', y1 = 'lower', y2 = 'upper', data = vandenberg_NO2)
g = sns.FacetGrid(eastern_SO2, col = 'city', col_wrap = 2)
g.map(plt.fill_between, 'day', 'lower', 'upper', color = 'coral')
g.map(plt.plot, 'day', 'mean', color = 'white')
for city, color in [('Denver',"#66c2a5"), ('Long Beach', "#fc8d62")]:
city_data = SO2_compare[SO2_compare.city == city]
plt.fill_between(x = 'day', y1 = 'lower', y2 = 'upper', data = city_data, color = color, alpha = 0.4)
plt.plot('day','mean', data = city_data, label = city, color = color, alpha = 0.25)
alphas = [ 0.01, 0.05, 0.1]
widths = [ '99% CI', '95%', '90%']
colors = ['#fee08b','#fc8d59','#d53e4f']
for alpha, color, width in zip(alphas, colors, widths):
conf_ints = pollution_model.conf_int(alpha)
plt.hlines(y = conf_ints.index, xmin = conf_ints[0], xmax = conf_ints[1],
colors = color, label = width, linewidth = 10)
plt.plot(pollution_model.params, pollution_model.params.index, 'wo', label = 'Point Estimate')
int_widths = ['90%', '99%']
z_scores = [1.67, 2.58]
colors = ['#fc8d59', '#fee08b']
for percent, Z, color in zip(int_widths, z_scores, colors):
plt.fill_between(
x = cinci_13_no2.day, alpha = 0.4, color = color,
y1 = cinci_13_no2['mean'] - Z*cinci_13_no2['std_err'],
y2 = cinci_13_no2['mean'] + Z*cinci_13_no2['std_err'],
label = percent)
sizes = [ 15, 10, 5]
int_widths = ['90% CI', '95%', '99%']
z_scores = [ 1.67, 1.96, 2.58]
for percent, Z, size in zip(int_widths, z_scores, sizes):
plt.hlines(y = rocket_model.pollutant,
xmin = rocket_model['est'] - Z*rocket_model['std_err'],
xmax = rocket_model['est'] + Z*rocket_model['std_err'],
label = percent,
linewidth = size,
color = 'gray')
plt.plot('est', 'pollutant', 'wo', data = rocket_model, label = 'Point Estimate')
plt.legend(loc = 'center left', bbox_to_anchor = (1, 0.5))
cinci_may_NO2 = pollution.query("city == 'Cincinnati' & month == 5").NO2
boot_means = bootstrap(cinci_may_NO2, 1000)
lower, upper = np.percentile(boot_means, [2.5, 97.5])
plt.axvspan(lower, upper, color = 'gray', alpha = 0.2)
sns.distplot(boot_means, bins = 100, kde = False)
sns.lmplot('NO2', 'SO2', data = no2_so2_boot,
hue = 'sample',
line_kws = {'color': 'steelblue', 'alpha': 0.2},
ci = None, legend = False, scatter = False)
plt.scatter('NO2', 'SO2', data = no2_so2)
city_boots = pd.DataFrame()
for city in ['Cincinnati', 'Des Moines', 'Indianapolis', 'Houston']:
city_NO2 = pollution_may[pollution_may.city == city].NO2
cur_boot = pd.DataFrame({'NO2_avg': bootstrap(city_NO2, 100), 'city':city})
city_boots = pd.concat([city_boots,cur_boot])
sns.swarmplot(y = "city", x = "NO2_avg", data = city_boots, color = 'coral')
first_rows = markets.head(3).transpose()
col_descriptions = markets.describe(include = 'all',
percentiles = [0.5]).transpose()
pd.plotting.scatter_matrix(markets[numeric_columns], figsize = (15,10), alpha = 0.5)
markets['log_pop'] = np.log(markets['state_pop'])
sns.scatterplot(x = 'log_pop', y = 'num_items_sold', alpha = 0.25, data = markets)
sns.regplot(x = 'lat', y = 'months_open', ci = False, data = markets,
scatter_kws = {'alpha':0.1, 'color':'gray'})
g = sns.regplot("log_markets", "log_pop", ci = False, scatter_kws = {'s':2},data = markets_and_pop)
for _, row in markets_and_pop.iterrows():
state, _, _, log_markets, log_pop = row
g.annotate(state, (log_markets,log_pop), size=10)
to_plot = ['Cheese','Maple','Fruits','Grains','Seafood','Plants','Vegetables']
goods_by_state_small = goods_by_state.query("good in "+str(to_plot))
g = sns.scatterplot('good','prop_selling', data = goods_by_state_small, s = 0)
for _,row in goods_by_state_small.iterrows():
g.annotate(row['state'], (row['good'], row['prop_selling']), ha = 'center', size = 10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment