Skip to content

Instantly share code, notes, and snippets.

@1vth1nk3r
Last active December 10, 2021 08:28
Show Gist options
  • Select an option

  • Save 1vth1nk3r/ed0d7726df75266a0b01b97e9784a33d to your computer and use it in GitHub Desktop.

Select an option

Save 1vth1nk3r/ed0d7726df75266a0b01b97e9784a33d to your computer and use it in GitHub Desktop.
import seaborn as sns
sns.set() #default sns style
sns.set_style('whitegrid') # "white", "dark", "whitegrid", "darkgrid", "ticks"
sns.set_palette('Purples')
# divergent RdBu, PRGn, RdBu_r, PRGn_r
# sequential Greys, Blues, PuRd, GnBu
# bright, colorblind
# custom ["#39A7D0", "#36ADA4"]
# deep, muted, pastel, bright, dark, colorblond
sns.color_palette('Purples', 8) # husl, coolwarm
sns.set(color_codes=True) #setting to matplotlib color codes
sns.set_context('paper') # 'paper', "notebook", "talk", "poster"
sns.despine(top=True, right=True) #remove lines on axes
# FacetGrid can create subplots (relplot() , catplot())
g.fig.suptitle("Car Weight vs. Horsepower", y=1.02)
g.set_titles("This is {col_name}") # subtitles
#AxesSubplot (scatterplot() , countplot(), ... )
AxesSubplot.set_title('Average MPG Over Time')
g.set(xlabel="Car Model Year", ylabel="Average MPG")
plt.xticks(rotation=90)
# using underlying matplot for working with axes
fig, ax = plt.subplots()
sns.distplot(df['fmr_3'], ax=ax)
ax.set(xlabel="1 Bedroom Fair Market Rent", xlim=(100,1500), title="US Rent")
# Add vertical lines for the median
ax.axvline(x=median, color='m', label='Median', linestyle='--', linewidth=2)
# Scatter plot
sns.scatterplot(x=gdp, y=phones)
# hue for third dimention
sns.scatterplot(data=student_data, x="absences", y="G3", hue="location", hue_order = ['Rural', 'Urban'])
# Count plot
sns.countplot(y=region)
palette_colors = {'Rural': "green", 'Urban': "blue"}
sns.countplot(x="school",
data=student_data,
hue="location",
palette=palette_colors)
# Show the relationship between two quantitative variables
# Examples: scatter plots, line plots
# Scatter relational
sns.relplot(x="absences", y="G3", data=student_data, kind='scatter')
col='study_time', col_order = ['yes', 'no'] # split to different columns
row="study_time", row_order = ['yes', 'no'] # split to rows
size='cylinders', hue='cylinders' # size and colour as a 3rd dimention
hue='origin', style='origin' # size and style as a 3rd dimention
# Line relplot
sns.relplot(data=mpg, x='model_year', y='mpg', kind='line')
ci='sd' #std instead of conf interv, None for hiding
style="origin", markers=True, dashes=False, hue="origin" #all lines solid but different markers
# Show the distribution of a quantitative variable within categories defned by a categorical variable
# Examples: bar plots, count plots, box plots, point plots
# Barplot
sns.catplot(kind='count', data=survey_data, x='Internet usage')
col='Age Category' # splitting
sns.catplot(kind='bar', data=survey_data, x='Gender', y='Interested in Math')
category_order = ["<2 hours", "2 to 5 hours", "5 to 10 hours", ">10 hours"]
sns.catplot(x="study_time", y="G3", data=student_data, kind="bar", order=category_order) #rearrange
# Boxplot
sns.catplot(kind='box', data=student_data, x='study_time', y='G3', order=category_order)
sns.catplot(kind='box', data=student_data, x='internet', y='G3', sym='', hue='location')
# sym ommit outliers, hue - subgroups
# whis=0.5
# whis=[5,95]
# length of whiskers
# Point plot
sns.catplot(kind='point', data=student_data, x='famrel', y='absences')
# capsize=0.2
# join=False - remove lines
sns.catplot(kind="point", x="romantic", y="absences", data=student_data,
hue="school", ci=None, estimator=median)
# distribution plot
sns.distplot(df['Award_Amount'], kde=False, bins=20) # smothing the dist
sns.distplot(df['Award_Amount'], hist=False, rug=True, kde_kws={'shade':True})
# regplot
sns.regplot(data=df, x='insurance_losses', y='premiums', marker='^')
# x_bins=5 breaks X into bins
# order=2 - polynomial
# fit_reg=False = scatterplot
sns.regplot(data=df, x='mnth', y='total_rentals', x_jitter=.1, order=2) # for categorical
# x_estimator=np.mean
sns.residplot(data=df, y='Tuition', x="SAT_AVG_ALL", color='g')
sns.lmplot(data=df, x="insurance_losses", y="premiums", hue="Region")
row="Region" # split to several
# show each observation
sns.stripplot(data=df, x='Award_Amount', y='Model Selected', jitter=True)
sns.swarmplot(data=df, x='Award_Amount', y='Model Selected', hue='Region')
# abstract representations
sns.boxplot(data=df, x='Award_Amount', y='Model Selected')
sns.violinplot(data=df, x='Award_Amount', y='Model Selected', palette='husl')
sns.lvplot(data=df, x='Award_Amount', y='Model Selected', palette='Paired', hue='Region')
# statistical estimates
sns.countplot(data=df, y="Model Selected", hue="Region")
sns.pointplot(data=df, y='Award_Amount', x='Model Selected', capsize=.1)
sns.barplot(data=df, y='Award_Amount', x='Model Selected', hue='Region')
# matrix plot
sns.heatmap(pd.crosstab(df["mnth"], df["weekday"], values=df["total_rentals"],aggfunc='mean').round(0)
cbar=False, cmap="YlGnBu", linewidths=0.3, annot=True, fmt="d",
center=df_crosstab.loc[9, 6]) #center colours
# Grid of plots
g2 = sns.FacetGrid(df, row="Degree_Type", row_order=['Graduate', 'Bachelors', 'Associates', 'Certificate'])
g2.map(sns.pointplot, 'SAT_AVG_ALL') # map plots on to grid
# or col="Degree_Type", col_order=degree_ord
# same
sns.factorplot(data=df, x='SAT_AVG_ALL', kind='point',
row='Degree_Type', row_order=['Graduate', 'Bachelors', 'Associates', 'Certificate'])
# Grid of plots
sns.factorplot(data=df, x='Tuition', kind='box', row='Degree_Type')
sns.lmplot(data=df, x='SAT_AVG_ALL', y='Tuition', hue='WOMENONLY',
col="Ownership", row='Degree_Type', row_order=['Graduate', 'Bachelors'], col_order=inst_ord)
# Pairwise
g = sns.PairGrid(df, vars=["fatal_collisions", "premiums"])
g2 = g.map(plt.scatter)
g2 = g.map_diag(plt.hist)
g3 = g2.map_offdiag(plt.scatter)
# or
sns.pairplot(data=df, vars=["fatal_collisions", "premiums"], kind='scatter',
hue='Region', palette='RdBu', diag_kws={'alpha':.5} )
sns.pairplot(data=df,
x_vars=["fatal_collisions_speeding", "fatal_collisions_alc"],
y_vars=['premiums', 'insurance_losses'],
kind='scatter', hue='Region', palette='husl')
sns.pairplot(data=df, vars=["insurance_losses", "premiums"],
kind='reg', palette='BrBG', diag_kind = 'kde', hue='Region')
# Joint of reg and dist
g = sns.JointGrid(x="hum", y="total_rentals", data=df, xlim=(0.1, 1.0))
g.plot(sns.regplot, sns.distplot)
#same
sns.jointplot(x="hum", y="total_rentals", kind='reg', data=df)
#regression and residuals
sns.jointplot(x="temp", y="total_rentals", kind='reg', data=df, order=2, xlim=(0, 1))
sns.jointplot(x="temp", y="total_rentals", kind='resid', data=df, order=2)
# kdeplot over the scatter plot
g = (sns.jointplot(x="temp", y="casual", kind='scatter', data=df, marginal_kws=dict(bins=10, rug=True))
.plot_joint(sns.kdeplot))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment