Last active
November 7, 2016 08:54
-
-
Save dmitryhd/3aee2a0ad1cece50440544e14fd29da3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import matplotlib.pyplot as plt | |
| import matplotlib.pylab as pylab | |
| import seaborn as sns | |
| %matplotlib inline | |
| pylab.rcParams['figure.figsize'] = 12.0, 7.0 | |
| plt.rcParams['axes.labelweight'] = 'bold' | |
| plt.rcParams['axes.titleweight'] = 'bold' | |
| plt.rcParams['font.family'] = 'serif' | |
| plt.rcParams['font.serif'] = 'Ubuntu' | |
| plt.rcParams['font.monospace'] = 'Ubuntu Mono' | |
| # plt.rcParams['font.size'] = 14 | |
| # sns.set_context("notebook", font_scale=1.3) | |
| # Number of ticks | |
| plt.locator_params(nbins=4) | |
| # add vertical line | |
| plt.vlines(x=datetime(2016, 4, 21), ymin=0, ymax=30000) | |
| # Dotted line | |
| # ls='dashed' | |
| # barplot | |
| sns.barplot("log_time", y="active_user_num", data=job_log2) | |
| # for white grid | |
| sns.set_style("whitegrid") | |
| # get axis | |
| ax = plt.gca() | |
| # add text | |
| ax.text(2, 2, r'an equation: $E=mc^2$', fontsize=15) | |
| # set marker style | |
| att = { | |
| 'color': 'white', 'markerfacecolor': None, 'markersize': 8.0, | |
| 'markeredgewidth': 1.0, 'alpha': 1.0, 'marker': 'o', | |
| 'markeredgecolor': 'gray' | |
| } | |
| plt.plot(x, y, **att) | |
| # Rotate labels | |
| locs, labels = plt.xticks() | |
| plt.setp(labels, rotation=45); | |
| # Format dates | |
| # 1 | |
| job_log2['log_time_fmt'] = job_log2.log_time.apply(lambda x: x.strftime("%Y-%m-%d")) | |
| # 2 | |
| from matplotlib.dates import DayLocator, HourLocator, DateFormatter, drange | |
| ax = plt.gca() | |
| ax.xaxis.set_major_locator(DayLocator()) | |
| # ax.xaxis.set_minor_locator(HourLocator(np.arange(0, 25, 6))) | |
| # Thousand separator on axis | |
| import matplotlib | |
| ax = plt.gca() | |
| ax.get_yaxis().set_major_formatter( | |
| matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ','))) | |
| # Plot time series with under curve | |
| t = job_res.set_index('send_time') | |
| plt.plot(np.cumsum(t.contacters_uplift), color='steelblue', lw=3) #, hue='rec_type') | |
| # sns.barplot("log_time_fmt", y="retention_user_num", data=job_log2) | |
| plt.fill_between(x=t.index, y1=np.cumsum(t.contacters_uplift).values, y2=0, color='steelblue', alpha=.5) | |
| # simple barplot | |
| import pandas as pd | |
| df = pd.DataFrame({'name': ['aaa', 'bbb', 'ccc'], 'val': [1, 2, 3]}) | |
| def barplot(df, x_name: str, y_name: str): | |
| df = df.copy() | |
| df.sort_values(y_name, inplace=True, ascending=False) | |
| ids = list(range(len(df))) | |
| new_idx = [idx + 0.5 for idx in ids] | |
| plt.bar(ids, df[y_name]) | |
| plt.xticks(new_idx, df[x_name], rotation=90) | |
| def hbarplot(df, x_name: str, y_name: str): | |
| df = df.copy() | |
| df.sort_values(y_name, inplace=True, ascending=True) | |
| ids = list(range(len(df))) | |
| plt.barh(ids, df[y_name]) | |
| new_idx = [idx + 0.5 for idx in ids] | |
| plt.yticks(new_idx, df[x_name]) | |
| barplot(df, 'name', 'val') | |
| import numpy as np | |
| import statsmodels as sm | |
| import matplotlib.pyplot as plt | |
| # sample = np.random.uniform(0, 1, 50) | |
| def plot_ecdf(sample, bins=1000, max_percentile=98, title='', percentiles=None): | |
| ecdf = sm.tools.tools.ECDF(sample) | |
| x = np.linspace(min(sample), max(sample), num=bins) | |
| y = ecdf(x) | |
| plt.step(x, y) | |
| plt.yticks(np.arange(0, 1.01, 0.1), [int(y * 100) for y in np.arange(0, 1, 0.1)]); | |
| plt.ylabel('Percentile (%)'); | |
| plt.xlabel('Value'); | |
| plt.xlim(min(sample), np.percentile(sample, max_percentile)); | |
| plt.title('ECDF ' + title); | |
| # vertical lines with xticks on certain percentiles | |
| if not percentiles: | |
| percentiles_to_draw = [25, 50, 75, 90, 95] | |
| else: | |
| percentiles_to_draw = percentiles | |
| # xticks = list(plt.xticks()[0]) | |
| xticks = [] | |
| for percentile in percentiles_to_draw: | |
| perc = np.percentile(user_interest_sum, percentile) | |
| plt.vlines(x=perc, ymin=0, ymax=1, colors='gray', lw=1); # linestyles='dashed' | |
| # plt.hlines(y=perc, xmin=min(sample), xmax=max(sample), colors='gray', lw=1); # linestyles='dashed' | |
| xticks.append(perc) | |
| plt.xticks(xticks); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment