Seaborn Cheat Sheet 2023
import numpy as np
import pandas as pd
import matplotlib .pyplot as plt
import seaborn as sns
Datasets
Office Sales
!wget https :// raw .githubusercontent .com / ashok - python / data / master / dm_office_sales .csv - P datasets
office_sales_df = pd .read_csv ('datasets/dm_office_sales.csv' )
office_sales_df .head (3 )
division
level of education
training level
work experience
salary
sales
0
printers
some college
2
6
91684
372302
1
printers
associate's degree
2
10
119679
495660
2
peripherals
high school
0
9
82045
320453
Student Performance
!wget https :// raw .githubusercontent .com / rashida048 / Datasets / master / StudentsPerformance .csv - P datasets
students_performance_df = pd .read_csv ('datasets/StudentsPerformance.csv' )
students_performance_df .head (3 )
gender
race/ethnicity
parental level of education
lunch
test preparation course
math score
reading score
writing score
0
female
group B
bachelor's degree
standard
none
72
72
74
1
female
group C
some college
standard
completed
69
90
88
2
female
group B
master's degree
standard
none
90
95
93
Toronto Weather
!wget https :// github .com / datagy / mediumdata / raw / master / toronto - weather .xlsx - P datasets
cols = ['LOCAL_DATE' , 'MEAN_TEMPERATURE' ]
renames = {'LOCAL_DATE' : 'Date' , 'MEAN_TEMPERATURE' : 'Temperature' }
country_table_df = pd .read_excel ('datasets/toronto-weather.xlsx' , usecols = cols , parse_dates = ['LOCAL_DATE' ])
country_table_df .rename (columns = renames , inplace = True )
country_table_df ['Day' ] = country_table_df ['Date' ].dt .day
country_table_df ['Month' ] = country_table_df ['Date' ].dt .month
country_table_df = country_table_df [country_table_df ['Day' ] <= 28 ]
country_table_df = pd .pivot_table (
data = country_table_df ,
index = 'Month' ,
columns = 'Day' ,
values = 'Temperature'
)
country_table_df .iloc [:3 , :3 ]
Credit Card Approval Prediction
!wget https :// raw .githubusercontent .com / BrunoHerick / analiseCartaoCredito / main / application_record .csv - P datasets
credit_approv_df = pd .read_csv ('datasets/application_record.csv' )
credit_approv_df .head (3 ).transpose ()
0
1
2
ID
5008804
5008805
5008806
CODE_GENDER
M
M
M
FLAG_OWN_CAR
Y
Y
Y
FLAG_OWN_REALTY
Y
Y
Y
CNT_CHILDREN
0
0
0
AMT_INCOME_TOTAL
427500.0
427500.0
112500.0
NAME_INCOME_TYPE
Working
Working
Working
NAME_EDUCATION_TYPE
Higher education
Higher education
Secondary / secondary special
NAME_FAMILY_STATUS
Civil marriage
Civil marriage
Married
NAME_HOUSING_TYPE
Rented apartment
Rented apartment
House / apartment
DAYS_BIRTH
-12005
-12005
-21474
DAYS_EMPLOYED
-4542
-4542
-1134
FLAG_MOBIL
1
1
1
FLAG_WORK_PHONE
1
1
0
FLAG_PHONE
0
0
0
FLAG_EMAIL
0
0
0
OCCUPATION_TYPE
NaN
NaN
Security staff
CNT_FAM_MEMBERS
2
2
2
CLASSIFICAO
bom
bom
bom
Tips
!wget https :// raw .githubusercontent .com / plotly / datasets / master / tips .csv - P datasets
tips_df = pd .read_csv ('datasets/tips.csv' )
tips_df .head (3 ).transpose ()
0
1
2
total_bill
16.99
10.34
21.01
tip
1.01
1.66
3.5
sex
Female
Male
Male
smoker
No
No
No
day
Sun
Sun
Sun
time
Dinner
Dinner
Dinner
size
2
3
3
Scatter Plots
plt .figure (figsize = (10 , 7 ))
sns .set (style = 'darkgrid' )
# hue/style by categorical column
sns .scatterplot (
x = 'salary' ,
y = 'sales' ,
data = office_sales_df ,
s = 40 ,
alpha = 0.6 ,
hue = 'level of education' ,
palette = 'nipy_spectral' ,
style = 'division'
).set_title ('Salary vs Sales' )
plt .savefig ('assets/Seaborn_Cheat_Sheet_01.webp' , bbox_inches = 'tight' )
plt .figure (figsize = (10 , 6 ))
# hue/size by continuous column
sns .scatterplot (
x = 'salary' ,
y = 'sales' ,
data = office_sales_df ,
hue = 'work experience' ,
palette = 'cool' ,
size = 'training level'
).set_title ('Salary vs Sales' )
plt .savefig ('assets/Seaborn_Cheat_Sheet_02.webp' , bbox_inches = 'tight' )
plt .figure (figsize = (10 , 6 ))
credit_approv_df ['DAYS_BIRTH_INV' ] = credit_approv_df ['DAYS_BIRTH' ].apply (lambda num : num * (- 1 ))
credit_approv_df ['DAYS_EMPLOYED_INV' ] = credit_approv_df ['DAYS_EMPLOYED' ].apply (lambda num : num * (- 1 ))
# hue/size by continuous column
plot = sns .scatterplot (
x = 'DAYS_BIRTH_INV' ,
y = 'DAYS_EMPLOYED_INV' ,
data = credit_approv_df ,
hue = 'CODE_GENDER' ,
palette = 'winter' ,
size = 'AMT_INCOME_TOTAL' ,
alpha = 0.3
)
plot .set_title ('Age vs Days Employed by Gender and Total Income' )
plot .set_ylim (0 , 17500 )
plt .savefig ('assets/Seaborn_Cheat_Sheet_20.webp' , bbox_inches = 'tight' )
Continuous Distribution Plots
Rug Plot
plt .figure (figsize = (10 , 3 ))
plt .title ('Salary Distribution based on Training' )
sns .rugplot (
data = office_sales_df ,
x = 'salary' ,
height = 0.75 ,
hue = 'training level' ,
palette = 'gist_rainbow'
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_03.webp' , bbox_inches = 'tight' )
Histogram
plt .figure (figsize = (10 , 5 ))
plt .title ('Salary Distribution based on Training' )
sns .histplot (
data = office_sales_df ,
x = 'salary' ,
bins = 50 ,
hue = 'training level' ,
palette = 'winter' ,
kde = True
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_04.webp' , bbox_inches = 'tight' )
credit_approv_df ['AGE_YEARS' ] = credit_approv_df ['DAYS_BIRTH' ].apply (lambda num : round (num / (- 365 )))
plt .figure (figsize = (10 , 5 ))
plt .title ('Age in Years Distribution' )
sns .histplot (
data = credit_approv_df ,
x = 'AGE_YEARS' ,
bins = 45 ,
element = 'step' ,
hue = 'CODE_GENDER' ,
palette = 'tab20'
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_21.webp' , bbox_inches = 'tight' )
Kernel Density Estimation
plt .figure (figsize = (10 , 5 ))
plt .title ('Salary Distribution based on Training' )
sns .kdeplot (
data = office_sales_df ,
clip = [
office_sales_df ['salary' ].min (),
office_sales_df ['salary' ].max ()
],
x = 'salary' ,
hue = 'training level' ,
palette = 'viridis' ,
fill = True
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_05.webp' , bbox_inches = 'tight' )
Categorical Plots
Count Plot
plt .figure (figsize = (10 , 5 ))
plt .title ('Sales Personal per Division by Training' )
sns .countplot (
data = office_sales_df ,
x = 'division' ,
hue = 'training level' ,
palette = 'seismic'
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_06.webp' , bbox_inches = 'tight' )
Barplot
plt .figure (figsize = (10 , 5 ))
plt .title ('Mean Salary based on Education and Division' )
sns .set (style = 'darkgrid' )
sns .barplot (
data = office_sales_df ,
x = 'level of education' ,
y = 'salary' ,
estimator = np .mean ,
errorbar = 'sd' ,
hue = 'division' ,
palette = 'magma'
)
plt .legend (bbox_to_anchor = (1.01 ,1.01 ))
plt .savefig ('assets/Seaborn_Cheat_Sheet_07.webp' , bbox_inches = 'tight' )
Categorical Distribution Plots
Boxplot
plt .figure (figsize = (10 , 5 ))
plt .title ('Math Score by Gender and Prep Course Attendance' )
sns .boxplot (
data = students_performance_df ,
y = 'gender' ,
x = 'math score' ,
hue = 'test preparation course' ,
palette = 'cool' ,
orient = 'h'
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_08.webp' , bbox_inches = 'tight' )
ten_percent = round ((len (credit_approv_df )/ 100 * 10 ))
plt .figure (figsize = (10 , 5 ))
plt .title ('Lower 10% Total Income by Family Status and House Ownership' )
plot = sns .boxplot (
data = credit_approv_df .tail (ten_percent ),
y = 'AMT_INCOME_TOTAL' ,
x = 'NAME_FAMILY_STATUS' ,
hue = 'FLAG_OWN_REALTY' ,
palette = 'winter' ,
orient = 'v' ,
linewidth = 0.5 ,
fliersize = 1
)
plot .set_ylim (
(credit_approv_df .tail (ten_percent )['AMT_INCOME_TOTAL' ].min () - 10000 ),
(credit_approv_df .tail (ten_percent )['AMT_INCOME_TOTAL' ].max () + 10000 )
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_22.webp' , bbox_inches = 'tight' )
Violinplot
plt .figure (figsize = (12 , 5 ))
plt .title ('Tips Distribution' )
sns .violinplot (
x = tips_df ['tip' ],
color = 'mediumspringgreen'
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_25.webp' , bbox_inches = 'tight' )
plt .figure (figsize = (10 , 5 ))
plt .title ('Math Score by Gender and Prep Course Attendance' )
sns .violinplot (
data = students_performance_df ,
x = 'gender' ,
y = 'math score' ,
hue = 'test preparation course' ,
palette = 'cool' ,
orient = 'v' ,
inner = 'quartile' ,
bw = 0.3 ,
split = True
)
plt .legend (loc = 'lower right' )
plt .savefig ('assets/Seaborn_Cheat_Sheet_09.webp' , bbox_inches = 'tight' )
Swarmplot
plt .figure (figsize = (10 , 5 ))
plt .title ('Math Score by Gender and Parental Education' )
sns .swarmplot (
data = students_performance_df ,
x = 'math score' ,
y = 'gender' ,
hue = 'parental level of education' ,
palette = 'tab10'
)
plt .legend (loc = 'lower left' )
plt .savefig ('assets/Seaborn_Cheat_Sheet_10.webp' , bbox_inches = 'tight' )
colour_palette = ['dodgerblue' , 'mediumspringgreen' ]
plt .figure (figsize = (12 , 5 ))
plt .title ('Tips by Day and Time of the Day' )
sns .swarmplot (
data = tips_df ,
x = 'day' ,
y = 'tip' ,
hue = 'time' ,
palette = colour_palette
)
plt .legend (loc = 'upper right' )
plt .savefig ('assets/Seaborn_Cheat_Sheet_24.webp' , bbox_inches = 'tight' )
![Seaborn Cheat Sheet 2023 ](https :// github .com / mpolinowski / python - seaborn - 2023 / raw / master / assets / Seaborn_Cheat_Sheet_24 .webp )
Boxenplot
plt .figure (figsize = (10 , 5 ))
plt .title ('Math Score by Gender and Prep Course Attendance' )
sns .boxenplot (
data = students_performance_df ,
x = 'gender' ,
y = 'math score' ,
hue = 'test preparation course' ,
palette = 'seismic' ,
orient = 'v'
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_11.webp' , bbox_inches = 'tight' )
Comparison Plots
Jointplot
sns .jointplot (
data = students_performance_df ,
x = 'reading score' ,
y = 'math score' ,
kind = 'scatter' ,
hue = 'gender' ,
palette = 'winter' ,
alpha = 0.4
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_12.webp' , bbox_inches = 'tight' )
plot = sns .jointplot (
data = students_performance_df ,
x = 'reading score' ,
y = 'math score' ,
kind = 'kde' ,
fill = True ,
color = 'dodgerblue'
)
plot .fig .suptitle ('Math Score vs Reading Score by Gender' ,
fontsize = 6 , fontdict = {"weight" : "normal" })
plt .savefig ('assets/Seaborn_Cheat_Sheet_13.webp' , bbox_inches = 'tight' )
Pairplot
sns .pairplot (
data = students_performance_df ,
hue = 'gender' ,
palette = 'terrain'
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_14.webp' , bbox_inches = 'tight' )
Grid Plots
Catplot
sns .catplot (
data = students_performance_df ,
x = 'lunch' ,
y = 'math score' ,
kind = 'boxen' ,
hue = 'test preparation course' ,
palette = 'mako' ,
col = 'test preparation course' ,
row = 'gender'
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_15.webp' , bbox_inches = 'tight' )
Pairgrid
grid = sns .PairGrid (students_performance_df )
grid = grid .map_upper (sns .scatterplot , color = 'dodgerblue' )
grid = grid .map_lower (sns .kdeplot , cmap = 'winter' )
grid = grid .map_diag (sns .histplot , color = 'fuchsia' )
plt .savefig ('assets/Seaborn_Cheat_Sheet_16.webp' , bbox_inches = 'tight' )
grid = sns .PairGrid (
data = students_performance_df ,
hue = 'gender' ,
palette = 'cool'
)
grid = grid .map_upper (
sns .scatterplot ,
size = students_performance_df ["test preparation course" ],
alpha = 0.8
)
grid = grid .map_lower (
sns .scatterplot ,
size = students_performance_df ["race/ethnicity" ],
style = students_performance_df ["race/ethnicity" ]
)
grid = grid .map_diag (sns .kdeplot )
grid = grid .add_legend (title = "" , adjust_subtitles = True )
plt .savefig ('assets/Seaborn_Cheat_Sheet_17.webp' , bbox_inches = 'tight' )
Matrix Plots
Heatmap
plt .figure (figsize = (20 , 8 ), dpi = 200 )
plt .title ('Average daily temperature of Toronto, Canada in 2020' )
sns .heatmap (
country_table_df ,
linewidth = 0.5 ,
cmap = 'coolwarm' ,
annot = True
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_18.webp' , bbox_inches = 'tight' )
credit_approv_df_only_numeric = credit_approv_df .drop ([
'CODE_GENDER' ,
'FLAG_OWN_CAR' ,
'FLAG_OWN_REALTY' ,
'NAME_INCOME_TYPE' ,
'NAME_EDUCATION_TYPE' ,
'NAME_FAMILY_STATUS' ,
'NAME_FAMILY_STATUS' ,
'NAME_HOUSING_TYPE' ,
'OCCUPATION_TYPE' ,
'CLASSIFICAO' ,
'CNT_CHILDREN' ,
'FLAG_MOBIL'
], axis = 1 )
credit_approv_df_dropna = credit_approv_df_numeric .dropna (how = 'all' )
plt .figure (figsize = (20 , 8 ), dpi = 200 )
plt .title ('Correlation Heatmap Credit Card Approval Dataset' )
sns .heatmap (
credit_approv_df_dropna .corr (),
linewidth = 0.5 ,
cmap = 'seismic' ,
annot = True
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_23.webp' , bbox_inches = 'tight' )
Clustermap
sns .clustermap (
country_table_df ,
linewidth = 0.5 ,
cmap = 'coolwarm' ,
annot = True ,
col_cluster = False
)
plt .savefig ('assets/Seaborn_Cheat_Sheet_19.webp' , bbox_inches = 'tight' )
Facet Grid
plot = sns .FacetGrid (
tips_df ,
col = 'time' ,
row = 'smoker' ,
hue = 'day' ,
palette = 'plasma_r' ,
sharex = True
)
plot = plot .map (
plt .hist ,
'tip'
)
plot = plot .add_legend ()
plt .savefig ('assets/Seaborn_Cheat_Sheet_26.webp' , bbox_inches = 'tight' )
plot = sns .FacetGrid (
tips_df ,
col = 'day' ,
row = 'sex' ,
hue = 'time' ,
palette = 'plasma'
)
plot = plot .map (
plt .scatter ,
'total_bill' , 'tip' ,
)
plot = plot .add_legend ()
plt .savefig ('assets/Seaborn_Cheat_Sheet_27.webp' , bbox_inches = 'tight' )