Skip to content

Instantly share code, notes, and snippets.

@dmitryhd
Last active November 7, 2016 08:54
Show Gist options
  • Select an option

  • Save dmitryhd/3aee2a0ad1cece50440544e14fd29da3 to your computer and use it in GitHub Desktop.

Select an option

Save dmitryhd/3aee2a0ad1cece50440544e14fd29da3 to your computer and use it in GitHub Desktop.
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import seaborn as sns
%matplotlib inline
pylab.rcParams['figure.figsize'] = 12.0, 7.0
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titleweight'] = 'bold'
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
# plt.rcParams['font.size'] = 14
# sns.set_context("notebook", font_scale=1.3)
# Number of ticks
plt.locator_params(nbins=4)
# add vertical line
plt.vlines(x=datetime(2016, 4, 21), ymin=0, ymax=30000)
# Dotted line
# ls='dashed'
# barplot
sns.barplot("log_time", y="active_user_num", data=job_log2)
# for white grid
sns.set_style("whitegrid")
# get axis
ax = plt.gca()
# add text
ax.text(2, 2, r'an equation: $E=mc^2$', fontsize=15)
# set marker style
att = {
'color': 'white', 'markerfacecolor': None, 'markersize': 8.0,
'markeredgewidth': 1.0, 'alpha': 1.0, 'marker': 'o',
'markeredgecolor': 'gray'
}
plt.plot(x, y, **att)
# Rotate labels
locs, labels = plt.xticks()
plt.setp(labels, rotation=45);
# Format dates
# 1
job_log2['log_time_fmt'] = job_log2.log_time.apply(lambda x: x.strftime("%Y-%m-%d"))
# 2
from matplotlib.dates import DayLocator, HourLocator, DateFormatter, drange
ax = plt.gca()
ax.xaxis.set_major_locator(DayLocator())
# ax.xaxis.set_minor_locator(HourLocator(np.arange(0, 25, 6)))
# Thousand separator on axis
import matplotlib
ax = plt.gca()
ax.get_yaxis().set_major_formatter(
matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
# Plot time series with under curve
t = job_res.set_index('send_time')
plt.plot(np.cumsum(t.contacters_uplift), color='steelblue', lw=3) #, hue='rec_type')
# sns.barplot("log_time_fmt", y="retention_user_num", data=job_log2)
plt.fill_between(x=t.index, y1=np.cumsum(t.contacters_uplift).values, y2=0, color='steelblue', alpha=.5)
# simple barplot
import pandas as pd
df = pd.DataFrame({'name': ['aaa', 'bbb', 'ccc'], 'val': [1, 2, 3]})
def barplot(df, x_name: str, y_name: str):
df = df.copy()
df.sort_values(y_name, inplace=True, ascending=False)
ids = list(range(len(df)))
new_idx = [idx + 0.5 for idx in ids]
plt.bar(ids, df[y_name])
plt.xticks(new_idx, df[x_name], rotation=90)
def hbarplot(df, x_name: str, y_name: str):
df = df.copy()
df.sort_values(y_name, inplace=True, ascending=True)
ids = list(range(len(df)))
plt.barh(ids, df[y_name])
new_idx = [idx + 0.5 for idx in ids]
plt.yticks(new_idx, df[x_name])
barplot(df, 'name', 'val')
import numpy as np
import statsmodels as sm
import matplotlib.pyplot as plt
# sample = np.random.uniform(0, 1, 50)
def plot_ecdf(sample, bins=1000, max_percentile=98, title='', percentiles=None):
ecdf = sm.tools.tools.ECDF(sample)
x = np.linspace(min(sample), max(sample), num=bins)
y = ecdf(x)
plt.step(x, y)
plt.yticks(np.arange(0, 1.01, 0.1), [int(y * 100) for y in np.arange(0, 1, 0.1)]);
plt.ylabel('Percentile (%)');
plt.xlabel('Value');
plt.xlim(min(sample), np.percentile(sample, max_percentile));
plt.title('ECDF ' + title);
# vertical lines with xticks on certain percentiles
if not percentiles:
percentiles_to_draw = [25, 50, 75, 90, 95]
else:
percentiles_to_draw = percentiles
# xticks = list(plt.xticks()[0])
xticks = []
for percentile in percentiles_to_draw:
perc = np.percentile(user_interest_sum, percentile)
plt.vlines(x=perc, ymin=0, ymax=1, colors='gray', lw=1); # linestyles='dashed'
# plt.hlines(y=perc, xmin=min(sample), xmax=max(sample), colors='gray', lw=1); # linestyles='dashed'
xticks.append(perc)
plt.xticks(xticks);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment