Last active
December 10, 2021 08:28
-
-
Save 1vth1nk3r/ed0d7726df75266a0b01b97e9784a33d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import seaborn as sns | |
| sns.set() #default sns style | |
| sns.set_style('whitegrid') # "white", "dark", "whitegrid", "darkgrid", "ticks" | |
| sns.set_palette('Purples') | |
| # divergent RdBu, PRGn, RdBu_r, PRGn_r | |
| # sequential Greys, Blues, PuRd, GnBu | |
| # bright, colorblind | |
| # custom ["#39A7D0", "#36ADA4"] | |
| # deep, muted, pastel, bright, dark, colorblond | |
| sns.color_palette('Purples', 8) # husl, coolwarm | |
| sns.set(color_codes=True) #setting to matplotlib color codes | |
| sns.set_context('paper') # 'paper', "notebook", "talk", "poster" | |
| sns.despine(top=True, right=True) #remove lines on axes | |
| # FacetGrid can create subplots (relplot() , catplot()) | |
| g.fig.suptitle("Car Weight vs. Horsepower", y=1.02) | |
| g.set_titles("This is {col_name}") # subtitles | |
| #AxesSubplot (scatterplot() , countplot(), ... ) | |
| AxesSubplot.set_title('Average MPG Over Time') | |
| g.set(xlabel="Car Model Year", ylabel="Average MPG") | |
| plt.xticks(rotation=90) | |
| # using underlying matplot for working with axes | |
| fig, ax = plt.subplots() | |
| sns.distplot(df['fmr_3'], ax=ax) | |
| ax.set(xlabel="1 Bedroom Fair Market Rent", xlim=(100,1500), title="US Rent") | |
| # Add vertical lines for the median | |
| ax.axvline(x=median, color='m', label='Median', linestyle='--', linewidth=2) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Scatter plot | |
| sns.scatterplot(x=gdp, y=phones) | |
| # hue for third dimention | |
| sns.scatterplot(data=student_data, x="absences", y="G3", hue="location", hue_order = ['Rural', 'Urban']) | |
| # Count plot | |
| sns.countplot(y=region) | |
| palette_colors = {'Rural': "green", 'Urban': "blue"} | |
| sns.countplot(x="school", | |
| data=student_data, | |
| hue="location", | |
| palette=palette_colors) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Show the relationship between two quantitative variables | |
| # Examples: scatter plots, line plots | |
| # Scatter relational | |
| sns.relplot(x="absences", y="G3", data=student_data, kind='scatter') | |
| col='study_time', col_order = ['yes', 'no'] # split to different columns | |
| row="study_time", row_order = ['yes', 'no'] # split to rows | |
| size='cylinders', hue='cylinders' # size and colour as a 3rd dimention | |
| hue='origin', style='origin' # size and style as a 3rd dimention | |
| # Line relplot | |
| sns.relplot(data=mpg, x='model_year', y='mpg', kind='line') | |
| ci='sd' #std instead of conf interv, None for hiding | |
| style="origin", markers=True, dashes=False, hue="origin" #all lines solid but different markers |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Show the distribution of a quantitative variable within categories defned by a categorical variable | |
| # Examples: bar plots, count plots, box plots, point plots | |
| # Barplot | |
| sns.catplot(kind='count', data=survey_data, x='Internet usage') | |
| col='Age Category' # splitting | |
| sns.catplot(kind='bar', data=survey_data, x='Gender', y='Interested in Math') | |
| category_order = ["<2 hours", "2 to 5 hours", "5 to 10 hours", ">10 hours"] | |
| sns.catplot(x="study_time", y="G3", data=student_data, kind="bar", order=category_order) #rearrange | |
| # Boxplot | |
| sns.catplot(kind='box', data=student_data, x='study_time', y='G3', order=category_order) | |
| sns.catplot(kind='box', data=student_data, x='internet', y='G3', sym='', hue='location') | |
| # sym ommit outliers, hue - subgroups | |
| # whis=0.5 | |
| # whis=[5,95] | |
| # length of whiskers | |
| # Point plot | |
| sns.catplot(kind='point', data=student_data, x='famrel', y='absences') | |
| # capsize=0.2 | |
| # join=False - remove lines | |
| sns.catplot(kind="point", x="romantic", y="absences", data=student_data, | |
| hue="school", ci=None, estimator=median) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # distribution plot | |
| sns.distplot(df['Award_Amount'], kde=False, bins=20) # smothing the dist | |
| sns.distplot(df['Award_Amount'], hist=False, rug=True, kde_kws={'shade':True}) | |
| # regplot | |
| sns.regplot(data=df, x='insurance_losses', y='premiums', marker='^') | |
| # x_bins=5 breaks X into bins | |
| # order=2 - polynomial | |
| # fit_reg=False = scatterplot | |
| sns.regplot(data=df, x='mnth', y='total_rentals', x_jitter=.1, order=2) # for categorical | |
| # x_estimator=np.mean | |
| sns.residplot(data=df, y='Tuition', x="SAT_AVG_ALL", color='g') | |
| sns.lmplot(data=df, x="insurance_losses", y="premiums", hue="Region") | |
| row="Region" # split to several |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # show each observation | |
| sns.stripplot(data=df, x='Award_Amount', y='Model Selected', jitter=True) | |
| sns.swarmplot(data=df, x='Award_Amount', y='Model Selected', hue='Region') | |
| # abstract representations | |
| sns.boxplot(data=df, x='Award_Amount', y='Model Selected') | |
| sns.violinplot(data=df, x='Award_Amount', y='Model Selected', palette='husl') | |
| sns.lvplot(data=df, x='Award_Amount', y='Model Selected', palette='Paired', hue='Region') | |
| # statistical estimates | |
| sns.countplot(data=df, y="Model Selected", hue="Region") | |
| sns.pointplot(data=df, y='Award_Amount', x='Model Selected', capsize=.1) | |
| sns.barplot(data=df, y='Award_Amount', x='Model Selected', hue='Region') | |
| # matrix plot | |
| sns.heatmap(pd.crosstab(df["mnth"], df["weekday"], values=df["total_rentals"],aggfunc='mean').round(0) | |
| cbar=False, cmap="YlGnBu", linewidths=0.3, annot=True, fmt="d", | |
| center=df_crosstab.loc[9, 6]) #center colours |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Grid of plots | |
| g2 = sns.FacetGrid(df, row="Degree_Type", row_order=['Graduate', 'Bachelors', 'Associates', 'Certificate']) | |
| g2.map(sns.pointplot, 'SAT_AVG_ALL') # map plots on to grid | |
| # or col="Degree_Type", col_order=degree_ord | |
| # same | |
| sns.factorplot(data=df, x='SAT_AVG_ALL', kind='point', | |
| row='Degree_Type', row_order=['Graduate', 'Bachelors', 'Associates', 'Certificate']) | |
| # Grid of plots | |
| sns.factorplot(data=df, x='Tuition', kind='box', row='Degree_Type') | |
| sns.lmplot(data=df, x='SAT_AVG_ALL', y='Tuition', hue='WOMENONLY', | |
| col="Ownership", row='Degree_Type', row_order=['Graduate', 'Bachelors'], col_order=inst_ord) | |
| # Pairwise | |
| g = sns.PairGrid(df, vars=["fatal_collisions", "premiums"]) | |
| g2 = g.map(plt.scatter) | |
| g2 = g.map_diag(plt.hist) | |
| g3 = g2.map_offdiag(plt.scatter) | |
| # or | |
| sns.pairplot(data=df, vars=["fatal_collisions", "premiums"], kind='scatter', | |
| hue='Region', palette='RdBu', diag_kws={'alpha':.5} ) | |
| sns.pairplot(data=df, | |
| x_vars=["fatal_collisions_speeding", "fatal_collisions_alc"], | |
| y_vars=['premiums', 'insurance_losses'], | |
| kind='scatter', hue='Region', palette='husl') | |
| sns.pairplot(data=df, vars=["insurance_losses", "premiums"], | |
| kind='reg', palette='BrBG', diag_kind = 'kde', hue='Region') | |
| # Joint of reg and dist | |
| g = sns.JointGrid(x="hum", y="total_rentals", data=df, xlim=(0.1, 1.0)) | |
| g.plot(sns.regplot, sns.distplot) | |
| #same | |
| sns.jointplot(x="hum", y="total_rentals", kind='reg', data=df) | |
| #regression and residuals | |
| sns.jointplot(x="temp", y="total_rentals", kind='reg', data=df, order=2, xlim=(0, 1)) | |
| sns.jointplot(x="temp", y="total_rentals", kind='resid', data=df, order=2) | |
| # kdeplot over the scatter plot | |
| g = (sns.jointplot(x="temp", y="casual", kind='scatter', data=df, marginal_kws=dict(bins=10, rug=True)) | |
| .plot_joint(sns.kdeplot)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment