Seaborn Cheat Sheet 2023

Seaborn Cheat Sheet 2023

import numpy  as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

Datasets

Office Sales

!wget https://raw.githubusercontent.com/ashok-python/data/master/dm_office_sales.csv -P datasets

office_sales_df = pd.read_csv('datasets/dm_office_sales.csv')
office_sales_df.head(3)

	division	level of education	training level	work experience	salary	sales
0	printers	some college	2	6	91684	372302
1	printers	associate's degree	2	10	119679	495660
2	peripherals	high school	0	9	82045	320453

Student Performance

!wget https://raw.githubusercontent.com/rashida048/Datasets/master/StudentsPerformance.csv -P datasets

students_performance_df = pd.read_csv('datasets/StudentsPerformance.csv')
students_performance_df.head(3)

	gender	race/ethnicity	parental level of education	lunch	test preparation course	math score	reading score	writing score
0	female	group B	bachelor's degree	standard	none	72	72	74
1	female	group C	some college	standard	completed	69	90	88
2	female	group B	master's degree	standard	none	90	95	93

Toronto Weather

!wget https://github.com/datagy/mediumdata/raw/master/toronto-weather.xlsx -P datasets

cols = ['LOCAL_DATE', 'MEAN_TEMPERATURE']
renames = {'LOCAL_DATE': 'Date', 'MEAN_TEMPERATURE': 'Temperature'}

country_table_df = pd.read_excel('datasets/toronto-weather.xlsx', usecols=cols, parse_dates=['LOCAL_DATE'])
country_table_df.rename(columns=renames, inplace=True)
country_table_df['Day'] = country_table_df['Date'].dt.day
country_table_df['Month'] = country_table_df['Date'].dt.month
country_table_df = country_table_df[country_table_df['Day'] <= 28]

country_table_df = pd.pivot_table(
    data=country_table_df,
    index='Month',
    columns='Day',
    values='Temperature'
)

country_table_df.iloc[:3, :3]

Credit Card Approval Prediction

!wget https://raw.githubusercontent.com/BrunoHerick/analiseCartaoCredito/main/application_record.csv -P datasets

credit_approv_df = pd.read_csv('datasets/application_record.csv')
credit_approv_df.head(3).transpose()

	0	1	2
ID	5008804	5008805	5008806
CODE_GENDER	M	M	M
FLAG_OWN_CAR	Y	Y	Y
FLAG_OWN_REALTY	Y	Y	Y
CNT_CHILDREN	0	0	0
AMT_INCOME_TOTAL	427500.0	427500.0	112500.0
NAME_INCOME_TYPE	Working	Working	Working
NAME_EDUCATION_TYPE	Higher education	Higher education	Secondary / secondary special
NAME_FAMILY_STATUS	Civil marriage	Civil marriage	Married
NAME_HOUSING_TYPE	Rented apartment	Rented apartment	House / apartment
DAYS_BIRTH	-12005	-12005	-21474
DAYS_EMPLOYED	-4542	-4542	-1134
FLAG_MOBIL	1	1	1
FLAG_WORK_PHONE	1	1	0
FLAG_PHONE	0	0	0
FLAG_EMAIL	0	0	0
OCCUPATION_TYPE	NaN	NaN	Security staff
CNT_FAM_MEMBERS	2	2	2
CLASSIFICAO	bom	bom	bom

Tips

!wget https://raw.githubusercontent.com/plotly/datasets/master/tips.csv -P datasets

tips_df = pd.read_csv('datasets/tips.csv')
tips_df.head(3).transpose()

	0	1	2
total_bill	16.99	10.34	21.01
tip	1.01	1.66	3.5
sex	Female	Male	Male
smoker	No	No	No
day	Sun	Sun	Sun
time	Dinner	Dinner	Dinner
size	2	3	3

Scatter Plots

plt.figure(figsize=(10, 7))

sns.set(style='darkgrid')

# hue/style by categorical column
sns.scatterplot(
    x='salary',
    y='sales',
    data=office_sales_df,
    s=40,
    alpha=0.6,
    hue='level of education',
    palette='nipy_spectral',
    style='division'
).set_title('Salary vs Sales')

plt.savefig('assets/Seaborn_Cheat_Sheet_01.webp', bbox_inches='tight')

plt.figure(figsize=(10, 6))

# hue/size by continuous column
sns.scatterplot(
    x='salary',
    y='sales',
    data=office_sales_df,
    hue='work experience',
    palette='cool',
    size='training level'
).set_title('Salary vs Sales')

plt.savefig('assets/Seaborn_Cheat_Sheet_02.webp', bbox_inches='tight')

plt.figure(figsize=(10, 6))

credit_approv_df['DAYS_BIRTH_INV'] = credit_approv_df['DAYS_BIRTH'].apply(lambda num: num*(-1))
credit_approv_df['DAYS_EMPLOYED_INV'] = credit_approv_df['DAYS_EMPLOYED'].apply(lambda num: num*(-1))

# hue/size by continuous column
plot = sns.scatterplot(
    x='DAYS_BIRTH_INV',
    y='DAYS_EMPLOYED_INV',
    data=credit_approv_df,
    hue='CODE_GENDER',
    palette='winter',
    size='AMT_INCOME_TOTAL',
    alpha=0.3
)

plot.set_title('Age vs Days Employed by Gender and Total Income')
plot.set_ylim(0, 17500)

plt.savefig('assets/Seaborn_Cheat_Sheet_20.webp', bbox_inches='tight')

Continuous Distribution Plots

Rug Plot

plt.figure(figsize=(10, 3))
plt.title('Salary Distribution based on Training')

sns.rugplot(
    data=office_sales_df,
    x='salary',
    height=0.75,
    hue='training level',
    palette='gist_rainbow'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_03.webp', bbox_inches='tight')

Histogram

plt.figure(figsize=(10, 5))
plt.title('Salary Distribution based on Training')

sns.histplot(
    data=office_sales_df,
    x='salary',
    bins=50,
    hue='training level',
    palette='winter',
    kde=True
)

plt.savefig('assets/Seaborn_Cheat_Sheet_04.webp', bbox_inches='tight')

credit_approv_df['AGE_YEARS'] = credit_approv_df['DAYS_BIRTH'].apply(lambda num: round(num/(-365)))

plt.figure(figsize=(10, 5))
plt.title('Age in Years Distribution')

sns.histplot(
    data=credit_approv_df,
    x='AGE_YEARS',
    bins=45,
    element='step',
    hue='CODE_GENDER',
    palette='tab20'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_21.webp', bbox_inches='tight')

Kernel Density Estimation

plt.figure(figsize=(10, 5))
plt.title('Salary Distribution based on Training')

sns.kdeplot(
    data=office_sales_df,
    clip=[
        office_sales_df['salary'].min(),
        office_sales_df['salary'].max()
    ],
    x='salary',
    hue='training level',
    palette='viridis',
    fill=True
)

plt.savefig('assets/Seaborn_Cheat_Sheet_05.webp', bbox_inches='tight')

Categorical Plots

Count Plot

plt.figure(figsize=(10, 5))
plt.title('Sales Personal per Division by Training')

sns.countplot(
    data=office_sales_df,
    x='division',
    hue='training level',
    palette='seismic'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_06.webp', bbox_inches='tight')

Barplot

plt.figure(figsize=(10, 5))
plt.title('Mean Salary based on Education and Division')
sns.set(style='darkgrid')
sns.barplot(
    data=office_sales_df,
    x='level of education',
    y='salary',
    estimator=np.mean,
    errorbar='sd',
    hue='division',
    palette='magma'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
plt.savefig('assets/Seaborn_Cheat_Sheet_07.webp', bbox_inches='tight')

Categorical Distribution Plots

Boxplot

plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Prep Course Attendance')

sns.boxplot(
    data=students_performance_df,
    y='gender',
    x='math score',
    hue='test preparation course',
    palette='cool',
    orient='h'
)
plt.savefig('assets/Seaborn_Cheat_Sheet_08.webp', bbox_inches='tight')

ten_percent = round((len(credit_approv_df)/100*10)) 

plt.figure(figsize=(10, 5))
plt.title('Lower 10% Total Income by Family Status and House Ownership')
  
plot = sns.boxplot(
    data=credit_approv_df.tail(ten_percent),
    y='AMT_INCOME_TOTAL',
    x='NAME_FAMILY_STATUS',
    hue='FLAG_OWN_REALTY',
    palette='winter',
    orient='v',
    linewidth=0.5,
    fliersize=1
)

plot.set_ylim(
    (credit_approv_df.tail(ten_percent)['AMT_INCOME_TOTAL'].min() - 10000),
    (credit_approv_df.tail(ten_percent)['AMT_INCOME_TOTAL'].max() + 10000)
)

plt.savefig('assets/Seaborn_Cheat_Sheet_22.webp', bbox_inches='tight')

Violinplot

plt.figure(figsize=(12, 5))
plt.title('Tips Distribution')

sns.violinplot(
    x=tips_df['tip'],
    color='mediumspringgreen'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_25.webp', bbox_inches='tight')

plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Prep Course Attendance')

sns.violinplot(
    data=students_performance_df,
    x='gender',
    y='math score',
    hue='test preparation course',
    palette='cool',
    orient='v',
    inner='quartile',
    bw=0.3,
    split=True
)
plt.legend(loc='lower right')
plt.savefig('assets/Seaborn_Cheat_Sheet_09.webp', bbox_inches='tight')

Swarmplot

plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Parental Education')

sns.swarmplot(
    data=students_performance_df,
    x='math score',
    y='gender',
    hue='parental level of education',
    palette='tab10'
)
plt.legend(loc='lower left')
plt.savefig('assets/Seaborn_Cheat_Sheet_10.webp', bbox_inches='tight')

colour_palette = ['dodgerblue', 'mediumspringgreen']

plt.figure(figsize=(12, 5))
plt.title('Tips by Day and Time of the Day')

sns.swarmplot(
    data=tips_df,
    x='day',
    y='tip',
    hue='time',
    palette=colour_palette
)
plt.legend(loc='upper right')
plt.savefig('assets/Seaborn_Cheat_Sheet_24.webp', bbox_inches='tight')

![Seaborn Cheat Sheet 2023](https://github.com/mpolinowski/python-seaborn-2023/raw/master/assets/Seaborn_Cheat_Sheet_24.webp)

Boxenplot

plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Prep Course Attendance')

sns.boxenplot(
    data=students_performance_df,
    x='gender',
    y='math score',
    hue='test preparation course',
    palette='seismic',
    orient='v'
)
plt.savefig('assets/Seaborn_Cheat_Sheet_11.webp', bbox_inches='tight')

Comparison Plots

Jointplot

sns.jointplot(
    data=students_performance_df,
    x='reading score',
    y='math score',
    kind='scatter',
    hue='gender',
    palette='winter',
    alpha=0.4
)

plt.savefig('assets/Seaborn_Cheat_Sheet_12.webp', bbox_inches='tight')

plot = sns.jointplot(
    data=students_performance_df,
    x='reading score',
    y='math score',
    kind='kde',
    fill=True,
    color='dodgerblue'
)

plot.fig.suptitle('Math Score vs Reading Score by Gender',
                  fontsize=6, fontdict={"weight": "normal"})

plt.savefig('assets/Seaborn_Cheat_Sheet_13.webp', bbox_inches='tight')

Pairplot

sns.pairplot(
    data=students_performance_df,
    hue='gender',
    palette='terrain'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_14.webp', bbox_inches='tight')

Grid Plots

Catplot

sns.catplot(
    data=students_performance_df,
    x='lunch',
    y='math score',
    kind='boxen',
    hue='test preparation course',
    palette='mako',
    col='test preparation course',
    row='gender'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_15.webp', bbox_inches='tight')

Pairgrid

grid = sns.PairGrid(students_performance_df)

grid = grid.map_upper(sns.scatterplot, color = 'dodgerblue')
grid = grid.map_lower(sns.kdeplot, cmap = 'winter')
grid = grid.map_diag(sns.histplot, color='fuchsia')

plt.savefig('assets/Seaborn_Cheat_Sheet_16.webp', bbox_inches='tight')

grid = sns.PairGrid(
    data=students_performance_df,
    hue='gender',
    palette='cool'
)

grid = grid.map_upper(
    sns.scatterplot,
    size=students_performance_df["test preparation course"],
    alpha=0.8
)
grid = grid.map_lower(
    sns.scatterplot,
    size=students_performance_df["race/ethnicity"],
    style=students_performance_df["race/ethnicity"]
)
grid = grid.map_diag(sns.kdeplot)

grid = grid.add_legend(title="", adjust_subtitles=True)

plt.savefig('assets/Seaborn_Cheat_Sheet_17.webp', bbox_inches='tight')

Matrix Plots

Heatmap

plt.figure(figsize=(20, 8), dpi=200)
plt.title('Average daily temperature of Toronto, Canada in 2020')

sns.heatmap(
    country_table_df,
    linewidth=0.5,
    cmap='coolwarm',
    annot=True
)

plt.savefig('assets/Seaborn_Cheat_Sheet_18.webp', bbox_inches='tight')

credit_approv_df_only_numeric = credit_approv_df.drop([
    'CODE_GENDER',
    'FLAG_OWN_CAR',
    'FLAG_OWN_REALTY',
    'NAME_INCOME_TYPE',
    'NAME_EDUCATION_TYPE',
    'NAME_FAMILY_STATUS',
    'NAME_FAMILY_STATUS',
    'NAME_HOUSING_TYPE',
    'OCCUPATION_TYPE',
    'CLASSIFICAO',
    'CNT_CHILDREN',
    'FLAG_MOBIL'
    ], axis=1)

credit_approv_df_dropna = credit_approv_df_numeric.dropna(how='all')


plt.figure(figsize=(20, 8), dpi=200)
plt.title('Correlation Heatmap Credit Card Approval Dataset')

sns.heatmap(
    credit_approv_df_dropna.corr(),
    linewidth=0.5,
    cmap='seismic',
    annot=True
)

plt.savefig('assets/Seaborn_Cheat_Sheet_23.webp', bbox_inches='tight')

Clustermap

sns.clustermap(
    country_table_df,
    linewidth=0.5,
    cmap='coolwarm',
    annot=True,
    col_cluster=False
)

plt.savefig('assets/Seaborn_Cheat_Sheet_19.webp', bbox_inches='tight')

Facet Grid

plot = sns.FacetGrid(
    tips_df,
    col='time',
    row='smoker',
    hue='day',
    palette='plasma_r',
    sharex=True
)

plot = plot.map(
    plt.hist,
    'tip'
)

plot = plot.add_legend()

plt.savefig('assets/Seaborn_Cheat_Sheet_26.webp', bbox_inches='tight')

plot = sns.FacetGrid(
    tips_df,
    col='day',
    row='sex',
    hue='time',
    palette='plasma'
)

plot = plot.map(
    plt.scatter,
    'total_bill', 'tip',
)

plot = plot.add_legend()
plt.savefig('assets/Seaborn_Cheat_Sheet_27.webp', bbox_inches='tight')

mpolinowski / python-seaborn-2023

Seaborn Cheat Sheet 2023

Datasets

Office Sales

Student Performance

Toronto Weather

Credit Card Approval Prediction

Tips

Scatter Plots

Continuous Distribution Plots

Rug Plot

Histogram

Kernel Density Estimation

Categorical Plots

Count Plot

Barplot

Categorical Distribution Plots

Boxplot

Violinplot

Swarmplot

Boxenplot

Comparison Plots

Jointplot

Pairplot

Grid Plots

Catplot

Pairgrid

Matrix Plots

Heatmap

Clustermap

Facet Grid

About

Languages