1vth1nk3r · December 10, 2021 08:28
diff --git a/seaborn from dtc 0 style b/seaborn from dtc 0 style
 import seaborn as sns

 sns.set() #default sns style
 sns.set_style('whitegrid') #  "white", "dark", "whitegrid", "darkgrid", "ticks"

 sns.set_palette('Purples')
 # divergent RdBu, PRGn, RdBu_r, PRGn_r
 # sequential Greys, Blues, PuRd, GnBu
 # bright, colorblind
 # custom ["#39A7D0", "#36ADA4"] 
 # deep, muted, pastel, bright, dark, colorblond

 sns.color_palette('Purples', 8) # husl, coolwarm

 sns.set(color_codes=True) #setting to matplotlib color codes

 sns.set_context('paper') # 'paper', "notebook", "talk", "poster"

 sns.despine(top=True, right=True) #remove lines on axes


 # FacetGrid can create subplots (relplot() , catplot())
 g.fig.suptitle("Car Weight vs. Horsepower", y=1.02)
 g.set_titles("This is {col_name}") # subtitles


 #AxesSubplot (scatterplot() , countplot(), ... )
 AxesSubplot.set_title('Average MPG Over Time')


 g.set(xlabel="Car Model Year", ylabel="Average MPG")
 plt.xticks(rotation=90)

 # using underlying matplot for working with axes
 fig, ax = plt.subplots()
 sns.distplot(df['fmr_3'], ax=ax)
 ax.set(xlabel="1 Bedroom Fair Market Rent", xlim=(100,1500), title="US Rent")

 # Add vertical lines for the median
 ax.axvline(x=median, color='m', label='Median', linestyle='--', linewidth=2)
diff --git a/seaborn from dtc 1 count & scatter b/seaborn from dtc 1 count & scatter
 # Scatter plot 
 sns.scatterplot(x=gdp, y=phones)

 # hue for third dimention
 sns.scatterplot(data=student_data, x="absences", y="G3", hue="location", hue_order = ['Rural', 'Urban'])


 # Count plot 
 sns.countplot(y=region)

 palette_colors = {'Rural': "green", 'Urban': "blue"}
 sns.countplot(x="school",  
                data=student_data, 
                hue="location",
                palette=palette_colors)
diff --git a/seaborn from dtc 2 relplot b/seaborn from dtc 2 relplot
 # Show the relationship between two quantitative variables 
 # Examples: scatter plots, line plots

 # Scatter relational
 sns.relplot(x="absences", y="G3", data=student_data, kind='scatter')

 col='study_time', col_order = ['yes', 'no'] # split to different columns
 row="study_time", row_order = ['yes', 'no'] # split to rows 

 size='cylinders', hue='cylinders' # size and colour as a 3rd dimention
 hue='origin', style='origin'  # size and style as a 3rd dimention


 # Line relplot
 sns.relplot(data=mpg, x='model_year', y='mpg', kind='line')

 ci='sd' #std instead of conf interv, None for hiding

 style="origin", markers=True, dashes=False, hue="origin" #all lines solid but different markers
diff --git a/seaborn from dtc 3 catplot b/seaborn from dtc 3 catplot
 # Show the distribution of a quantitative variable within categories defned by a categorical variable 
 # Examples: bar plots, count plots, box plots, point plots

 # Barplot
 sns.catplot(kind='count', data=survey_data, x='Internet usage')
 col='Age Category' # splitting

 sns.catplot(kind='bar', data=survey_data, x='Gender', y='Interested in Math')

 category_order = ["<2 hours", "2 to 5 hours", "5 to 10 hours", ">10 hours"]
 sns.catplot(x="study_time", y="G3", data=student_data, kind="bar", order=category_order) #rearrange


 # Boxplot
 sns.catplot(kind='box', data=student_data, x='study_time', y='G3', order=category_order)

 sns.catplot(kind='box', data=student_data, x='internet', y='G3', sym='', hue='location') 
 # sym ommit outliers, hue - subgroups

 # whis=0.5
 # whis=[5,95]
 # length of whiskers


 # Point plot 
 sns.catplot(kind='point', data=student_data, x='famrel', y='absences')
 # capsize=0.2
 # join=False - remove lines

 sns.catplot(kind="point", x="romantic", y="absences", data=student_data,
            hue="school", ci=None, estimator=median)
diff --git a/seaborn from dtc 3 distplot b/seaborn from dtc 3 distplot
 # distribution plot
 sns.distplot(df['Award_Amount'], kde=False, bins=20) # smothing the dist

 sns.distplot(df['Award_Amount'], hist=False, rug=True, kde_kws={'shade':True})
             
             
 # regplot             
 sns.regplot(data=df, x='insurance_losses', y='premiums', marker='^')
 # x_bins=5 breaks X into bins
 # order=2 - polynomial
 #   fit_reg=False = scatterplot

 sns.regplot(data=df, x='mnth', y='total_rentals', x_jitter=.1, order=2) # for categorical
 # x_estimator=np.mean

 sns.residplot(data=df, y='Tuition',  x="SAT_AVG_ALL", color='g')

 sns.lmplot(data=df, x="insurance_losses", y="premiums", hue="Region")
 row="Region" # split to several
diff --git a/seaborn from dtc 4 b/seaborn from dtc 4
 # show each observation
 sns.stripplot(data=df, x='Award_Amount', y='Model Selected', jitter=True)
 sns.swarmplot(data=df, x='Award_Amount', y='Model Selected', hue='Region')         
        
 # abstract representations        
 sns.boxplot(data=df, x='Award_Amount', y='Model Selected')
 sns.violinplot(data=df, x='Award_Amount', y='Model Selected', palette='husl')
 sns.lvplot(data=df, x='Award_Amount', y='Model Selected', palette='Paired', hue='Region')         

 # statistical estimates
 sns.countplot(data=df, y="Model Selected", hue="Region")
 sns.pointplot(data=df, y='Award_Amount', x='Model Selected', capsize=.1)
 sns.barplot(data=df, y='Award_Amount', x='Model Selected', hue='Region')

 # matrix plot
 sns.heatmap(pd.crosstab(df["mnth"], df["weekday"], values=df["total_rentals"],aggfunc='mean').round(0)
            cbar=False, cmap="YlGnBu", linewidths=0.3, annot=True, fmt="d",
            center=df_crosstab.loc[9, 6]) #center colours
diff --git a/seaborn from dtc 5 b/seaborn from dtc 5
 # Grid of plots
 g2 = sns.FacetGrid(df, row="Degree_Type", row_order=['Graduate', 'Bachelors', 'Associates', 'Certificate'])
 g2.map(sns.pointplot, 'SAT_AVG_ALL') # map plots on to grid
 # or col="Degree_Type", col_order=degree_ord

 # same 
 sns.factorplot(data=df, x='SAT_AVG_ALL', kind='point',
        row='Degree_Type', row_order=['Graduate', 'Bachelors', 'Associates', 'Certificate'])

 # Grid of plots
 sns.factorplot(data=df, x='Tuition', kind='box', row='Degree_Type')

 sns.lmplot(data=df, x='SAT_AVG_ALL', y='Tuition',  hue='WOMENONLY',
        col="Ownership", row='Degree_Type',  row_order=['Graduate', 'Bachelors'],  col_order=inst_ord)


 # Pairwise
 g = sns.PairGrid(df, vars=["fatal_collisions", "premiums"])
 g2 = g.map(plt.scatter)
 g2 = g.map_diag(plt.hist)
 g3 = g2.map_offdiag(plt.scatter)

 # or 
 sns.pairplot(data=df, vars=["fatal_collisions", "premiums"], kind='scatter', 
            hue='Region', palette='RdBu', diag_kws={'alpha':.5} )
            
 sns.pairplot(data=df,
        x_vars=["fatal_collisions_speeding", "fatal_collisions_alc"],
        y_vars=['premiums', 'insurance_losses'],
        kind='scatter', hue='Region', palette='husl')
        
 sns.pairplot(data=df, vars=["insurance_losses", "premiums"],
             kind='reg', palette='BrBG', diag_kind = 'kde', hue='Region')      
             
 # Joint of reg and dist
 g = sns.JointGrid(x="hum", y="total_rentals", data=df, xlim=(0.1, 1.0)) 
 g.plot(sns.regplot, sns.distplot)

 #same
 sns.jointplot(x="hum", y="total_rentals", kind='reg', data=df)


 #regression and residuals
 sns.jointplot(x="temp", y="total_rentals", kind='reg', data=df, order=2, xlim=(0, 1))
 sns.jointplot(x="temp", y="total_rentals", kind='resid', data=df, order=2)         


 # kdeplot over the scatter plot
 g = (sns.jointplot(x="temp", y="casual",  kind='scatter', data=df, marginal_kws=dict(bins=10, rug=True))
    .plot_joint(sns.kdeplot))
	import seaborn as sns

	sns.set() #default sns style
	sns.set_style('whitegrid') # "white", "dark", "whitegrid", "darkgrid", "ticks"

	sns.set_palette('Purples')
	# divergent RdBu, PRGn, RdBu_r, PRGn_r
	# sequential Greys, Blues, PuRd, GnBu
	# bright, colorblind
	# custom ["#39A7D0", "#36ADA4"]
	# deep, muted, pastel, bright, dark, colorblond

	sns.color_palette('Purples', 8) # husl, coolwarm

	sns.set(color_codes=True) #setting to matplotlib color codes

	sns.set_context('paper') # 'paper', "notebook", "talk", "poster"

	sns.despine(top=True, right=True) #remove lines on axes


	# FacetGrid can create subplots (relplot() , catplot())
	g.fig.suptitle("Car Weight vs. Horsepower", y=1.02)
	g.set_titles("This is {col_name}") # subtitles


	#AxesSubplot (scatterplot() , countplot(), ... )
	AxesSubplot.set_title('Average MPG Over Time')


	g.set(xlabel="Car Model Year", ylabel="Average MPG")
	plt.xticks(rotation=90)

	# using underlying matplot for working with axes
	fig, ax = plt.subplots()
	sns.distplot(df['fmr_3'], ax=ax)
	ax.set(xlabel="1 Bedroom Fair Market Rent", xlim=(100,1500), title="US Rent")

	# Add vertical lines for the median
	ax.axvline(x=median, color='m', label='Median', linestyle='--', linewidth=2)
	# Scatter plot
	sns.scatterplot(x=gdp, y=phones)

	# hue for third dimention
	sns.scatterplot(data=student_data, x="absences", y="G3", hue="location", hue_order = ['Rural', 'Urban'])


	# Count plot
	sns.countplot(y=region)

	palette_colors = {'Rural': "green", 'Urban': "blue"}
	sns.countplot(x="school",
	data=student_data,
	hue="location",
	palette=palette_colors)
	# Show the relationship between two quantitative variables
	# Examples: scatter plots, line plots

	# Scatter relational
	sns.relplot(x="absences", y="G3", data=student_data, kind='scatter')

	col='study_time', col_order = ['yes', 'no'] # split to different columns
	row="study_time", row_order = ['yes', 'no'] # split to rows

	size='cylinders', hue='cylinders' # size and colour as a 3rd dimention
	hue='origin', style='origin' # size and style as a 3rd dimention


	# Line relplot
	sns.relplot(data=mpg, x='model_year', y='mpg', kind='line')

	ci='sd' #std instead of conf interv, None for hiding

	style="origin", markers=True, dashes=False, hue="origin" #all lines solid but different markers
	# Show the distribution of a quantitative variable within categories defned by a categorical variable
	# Examples: bar plots, count plots, box plots, point plots

	# Barplot
	sns.catplot(kind='count', data=survey_data, x='Internet usage')
	col='Age Category' # splitting

	sns.catplot(kind='bar', data=survey_data, x='Gender', y='Interested in Math')

	category_order = ["<2 hours", "2 to 5 hours", "5 to 10 hours", ">10 hours"]
	sns.catplot(x="study_time", y="G3", data=student_data, kind="bar", order=category_order) #rearrange


	# Boxplot
	sns.catplot(kind='box', data=student_data, x='study_time', y='G3', order=category_order)

	sns.catplot(kind='box', data=student_data, x='internet', y='G3', sym='', hue='location')
	# sym ommit outliers, hue - subgroups

	# whis=0.5
	# whis=[5,95]
	# length of whiskers


	# Point plot
	sns.catplot(kind='point', data=student_data, x='famrel', y='absences')
	# capsize=0.2
	# join=False - remove lines

	sns.catplot(kind="point", x="romantic", y="absences", data=student_data,
	hue="school", ci=None, estimator=median)
	# distribution plot
	sns.distplot(df['Award_Amount'], kde=False, bins=20) # smothing the dist

	sns.distplot(df['Award_Amount'], hist=False, rug=True, kde_kws={'shade':True})


	# regplot
	sns.regplot(data=df, x='insurance_losses', y='premiums', marker='^')
	# x_bins=5 breaks X into bins
	# order=2 - polynomial
	# fit_reg=False = scatterplot

	sns.regplot(data=df, x='mnth', y='total_rentals', x_jitter=.1, order=2) # for categorical
	# x_estimator=np.mean

	sns.residplot(data=df, y='Tuition', x="SAT_AVG_ALL", color='g')

	sns.lmplot(data=df, x="insurance_losses", y="premiums", hue="Region")
	row="Region" # split to several
	# show each observation
	sns.stripplot(data=df, x='Award_Amount', y='Model Selected', jitter=True)
	sns.swarmplot(data=df, x='Award_Amount', y='Model Selected', hue='Region')

	# abstract representations
	sns.boxplot(data=df, x='Award_Amount', y='Model Selected')
	sns.violinplot(data=df, x='Award_Amount', y='Model Selected', palette='husl')
	sns.lvplot(data=df, x='Award_Amount', y='Model Selected', palette='Paired', hue='Region')

	# statistical estimates
	sns.countplot(data=df, y="Model Selected", hue="Region")
	sns.pointplot(data=df, y='Award_Amount', x='Model Selected', capsize=.1)
	sns.barplot(data=df, y='Award_Amount', x='Model Selected', hue='Region')

	# matrix plot
	sns.heatmap(pd.crosstab(df["mnth"], df["weekday"], values=df["total_rentals"],aggfunc='mean').round(0)
	cbar=False, cmap="YlGnBu", linewidths=0.3, annot=True, fmt="d",
	center=df_crosstab.loc[9, 6]) #center colours
	# Grid of plots
	g2 = sns.FacetGrid(df, row="Degree_Type", row_order=['Graduate', 'Bachelors', 'Associates', 'Certificate'])
	g2.map(sns.pointplot, 'SAT_AVG_ALL') # map plots on to grid
	# or col="Degree_Type", col_order=degree_ord

	# same
	sns.factorplot(data=df, x='SAT_AVG_ALL', kind='point',
	row='Degree_Type', row_order=['Graduate', 'Bachelors', 'Associates', 'Certificate'])

	# Grid of plots
	sns.factorplot(data=df, x='Tuition', kind='box', row='Degree_Type')

	sns.lmplot(data=df, x='SAT_AVG_ALL', y='Tuition', hue='WOMENONLY',
	col="Ownership", row='Degree_Type', row_order=['Graduate', 'Bachelors'], col_order=inst_ord)


	# Pairwise
	g = sns.PairGrid(df, vars=["fatal_collisions", "premiums"])
	g2 = g.map(plt.scatter)
	g2 = g.map_diag(plt.hist)
	g3 = g2.map_offdiag(plt.scatter)

	# or
	sns.pairplot(data=df, vars=["fatal_collisions", "premiums"], kind='scatter',
	hue='Region', palette='RdBu', diag_kws={'alpha':.5} )

	sns.pairplot(data=df,
	x_vars=["fatal_collisions_speeding", "fatal_collisions_alc"],
	y_vars=['premiums', 'insurance_losses'],
	kind='scatter', hue='Region', palette='husl')

	sns.pairplot(data=df, vars=["insurance_losses", "premiums"],
	kind='reg', palette='BrBG', diag_kind = 'kde', hue='Region')

	# Joint of reg and dist
	g = sns.JointGrid(x="hum", y="total_rentals", data=df, xlim=(0.1, 1.0))
	g.plot(sns.regplot, sns.distplot)

	#same
	sns.jointplot(x="hum", y="total_rentals", kind='reg', data=df)


	#regression and residuals
	sns.jointplot(x="temp", y="total_rentals", kind='reg', data=df, order=2, xlim=(0, 1))
	sns.jointplot(x="temp", y="total_rentals", kind='resid', data=df, order=2)


	# kdeplot over the scatter plot
	g = (sns.jointplot(x="temp", y="casual", kind='scatter', data=df, marginal_kws=dict(bins=10, rug=True))
	.plot_joint(sns.kdeplot))