mpolinowski / python-seaborn-2023

Common Python Seaborn Operations

Home Page:https://mpolinowski.github.io/docs/Development/Python/2023-05-07-python-seaborn-cheat-sheet/2023-05-07

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Seaborn Cheat Sheet 2023

import numpy  as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

Datasets

Office Sales

!wget https://raw.githubusercontent.com/ashok-python/data/master/dm_office_sales.csv -P datasets
office_sales_df = pd.read_csv('datasets/dm_office_sales.csv')
office_sales_df.head(3)
division level of education training level work experience salary sales
0 printers some college 2 6 91684 372302
1 printers associate's degree 2 10 119679 495660
2 peripherals high school 0 9 82045 320453

Student Performance

!wget https://raw.githubusercontent.com/rashida048/Datasets/master/StudentsPerformance.csv -P datasets
students_performance_df = pd.read_csv('datasets/StudentsPerformance.csv')
students_performance_df.head(3)
gender race/ethnicity parental level of education lunch test preparation course math score reading score writing score
0 female group B bachelor's degree standard none 72 72 74
1 female group C some college standard completed 69 90 88
2 female group B master's degree standard none 90 95 93

Toronto Weather

!wget https://github.com/datagy/mediumdata/raw/master/toronto-weather.xlsx -P datasets
cols = ['LOCAL_DATE', 'MEAN_TEMPERATURE']
renames = {'LOCAL_DATE': 'Date', 'MEAN_TEMPERATURE': 'Temperature'}

country_table_df = pd.read_excel('datasets/toronto-weather.xlsx', usecols=cols, parse_dates=['LOCAL_DATE'])
country_table_df.rename(columns=renames, inplace=True)
country_table_df['Day'] = country_table_df['Date'].dt.day
country_table_df['Month'] = country_table_df['Date'].dt.month
country_table_df = country_table_df[country_table_df['Day'] <= 28]

country_table_df = pd.pivot_table(
    data=country_table_df,
    index='Month',
    columns='Day',
    values='Temperature'
)

country_table_df.iloc[:3, :3]

Credit Card Approval Prediction

!wget https://raw.githubusercontent.com/BrunoHerick/analiseCartaoCredito/main/application_record.csv -P datasets
credit_approv_df = pd.read_csv('datasets/application_record.csv')
credit_approv_df.head(3).transpose()
0 1 2
ID 5008804 5008805 5008806
CODE_GENDER M M M
FLAG_OWN_CAR Y Y Y
FLAG_OWN_REALTY Y Y Y
CNT_CHILDREN 0 0 0
AMT_INCOME_TOTAL 427500.0 427500.0 112500.0
NAME_INCOME_TYPE Working Working Working
NAME_EDUCATION_TYPE Higher education Higher education Secondary / secondary special
NAME_FAMILY_STATUS Civil marriage Civil marriage Married
NAME_HOUSING_TYPE Rented apartment Rented apartment House / apartment
DAYS_BIRTH -12005 -12005 -21474
DAYS_EMPLOYED -4542 -4542 -1134
FLAG_MOBIL 1 1 1
FLAG_WORK_PHONE 1 1 0
FLAG_PHONE 0 0 0
FLAG_EMAIL 0 0 0
OCCUPATION_TYPE NaN NaN Security staff
CNT_FAM_MEMBERS 2 2 2
CLASSIFICAO bom bom bom

Tips

!wget https://raw.githubusercontent.com/plotly/datasets/master/tips.csv -P datasets
tips_df = pd.read_csv('datasets/tips.csv')
tips_df.head(3).transpose()
0 1 2
total_bill 16.99 10.34 21.01
tip 1.01 1.66 3.5
sex Female Male Male
smoker No No No
day Sun Sun Sun
time Dinner Dinner Dinner
size 2 3 3

Scatter Plots

plt.figure(figsize=(10, 7))

sns.set(style='darkgrid')

# hue/style by categorical column
sns.scatterplot(
    x='salary',
    y='sales',
    data=office_sales_df,
    s=40,
    alpha=0.6,
    hue='level of education',
    palette='nipy_spectral',
    style='division'
).set_title('Salary vs Sales')

plt.savefig('assets/Seaborn_Cheat_Sheet_01.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

plt.figure(figsize=(10, 6))

# hue/size by continuous column
sns.scatterplot(
    x='salary',
    y='sales',
    data=office_sales_df,
    hue='work experience',
    palette='cool',
    size='training level'
).set_title('Salary vs Sales')

plt.savefig('assets/Seaborn_Cheat_Sheet_02.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

plt.figure(figsize=(10, 6))

credit_approv_df['DAYS_BIRTH_INV'] = credit_approv_df['DAYS_BIRTH'].apply(lambda num: num*(-1))
credit_approv_df['DAYS_EMPLOYED_INV'] = credit_approv_df['DAYS_EMPLOYED'].apply(lambda num: num*(-1))

# hue/size by continuous column
plot = sns.scatterplot(
    x='DAYS_BIRTH_INV',
    y='DAYS_EMPLOYED_INV',
    data=credit_approv_df,
    hue='CODE_GENDER',
    palette='winter',
    size='AMT_INCOME_TOTAL',
    alpha=0.3
)

plot.set_title('Age vs Days Employed by Gender and Total Income')
plot.set_ylim(0, 17500)

plt.savefig('assets/Seaborn_Cheat_Sheet_20.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Continuous Distribution Plots

Rug Plot

plt.figure(figsize=(10, 3))
plt.title('Salary Distribution based on Training')

sns.rugplot(
    data=office_sales_df,
    x='salary',
    height=0.75,
    hue='training level',
    palette='gist_rainbow'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_03.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Histogram

plt.figure(figsize=(10, 5))
plt.title('Salary Distribution based on Training')

sns.histplot(
    data=office_sales_df,
    x='salary',
    bins=50,
    hue='training level',
    palette='winter',
    kde=True
)

plt.savefig('assets/Seaborn_Cheat_Sheet_04.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

credit_approv_df['AGE_YEARS'] = credit_approv_df['DAYS_BIRTH'].apply(lambda num: round(num/(-365)))

plt.figure(figsize=(10, 5))
plt.title('Age in Years Distribution')

sns.histplot(
    data=credit_approv_df,
    x='AGE_YEARS',
    bins=45,
    element='step',
    hue='CODE_GENDER',
    palette='tab20'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_21.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Kernel Density Estimation

plt.figure(figsize=(10, 5))
plt.title('Salary Distribution based on Training')

sns.kdeplot(
    data=office_sales_df,
    clip=[
        office_sales_df['salary'].min(),
        office_sales_df['salary'].max()
    ],
    x='salary',
    hue='training level',
    palette='viridis',
    fill=True
)

plt.savefig('assets/Seaborn_Cheat_Sheet_05.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Categorical Plots

Count Plot

plt.figure(figsize=(10, 5))
plt.title('Sales Personal per Division by Training')

sns.countplot(
    data=office_sales_df,
    x='division',
    hue='training level',
    palette='seismic'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_06.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Barplot

plt.figure(figsize=(10, 5))
plt.title('Mean Salary based on Education and Division')
sns.set(style='darkgrid')
sns.barplot(
    data=office_sales_df,
    x='level of education',
    y='salary',
    estimator=np.mean,
    errorbar='sd',
    hue='division',
    palette='magma'
)
plt.legend(bbox_to_anchor=(1.01,1.01))
plt.savefig('assets/Seaborn_Cheat_Sheet_07.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Categorical Distribution Plots

Boxplot

plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Prep Course Attendance')

sns.boxplot(
    data=students_performance_df,
    y='gender',
    x='math score',
    hue='test preparation course',
    palette='cool',
    orient='h'
)
plt.savefig('assets/Seaborn_Cheat_Sheet_08.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

ten_percent = round((len(credit_approv_df)/100*10)) 

plt.figure(figsize=(10, 5))
plt.title('Lower 10% Total Income by Family Status and House Ownership')
  
plot = sns.boxplot(
    data=credit_approv_df.tail(ten_percent),
    y='AMT_INCOME_TOTAL',
    x='NAME_FAMILY_STATUS',
    hue='FLAG_OWN_REALTY',
    palette='winter',
    orient='v',
    linewidth=0.5,
    fliersize=1
)

plot.set_ylim(
    (credit_approv_df.tail(ten_percent)['AMT_INCOME_TOTAL'].min() - 10000),
    (credit_approv_df.tail(ten_percent)['AMT_INCOME_TOTAL'].max() + 10000)
)

plt.savefig('assets/Seaborn_Cheat_Sheet_22.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Violinplot

plt.figure(figsize=(12, 5))
plt.title('Tips Distribution')

sns.violinplot(
    x=tips_df['tip'],
    color='mediumspringgreen'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_25.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Prep Course Attendance')

sns.violinplot(
    data=students_performance_df,
    x='gender',
    y='math score',
    hue='test preparation course',
    palette='cool',
    orient='v',
    inner='quartile',
    bw=0.3,
    split=True
)
plt.legend(loc='lower right')
plt.savefig('assets/Seaborn_Cheat_Sheet_09.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Swarmplot

plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Parental Education')

sns.swarmplot(
    data=students_performance_df,
    x='math score',
    y='gender',
    hue='parental level of education',
    palette='tab10'
)
plt.legend(loc='lower left')
plt.savefig('assets/Seaborn_Cheat_Sheet_10.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

colour_palette = ['dodgerblue', 'mediumspringgreen']

plt.figure(figsize=(12, 5))
plt.title('Tips by Day and Time of the Day')

sns.swarmplot(
    data=tips_df,
    x='day',
    y='tip',
    hue='time',
    palette=colour_palette
)
plt.legend(loc='upper right')
plt.savefig('assets/Seaborn_Cheat_Sheet_24.webp', bbox_inches='tight')
![Seaborn Cheat Sheet 2023](https://github.com/mpolinowski/python-seaborn-2023/raw/master/assets/Seaborn_Cheat_Sheet_24.webp)

Boxenplot

plt.figure(figsize=(10, 5))
plt.title('Math Score by Gender and Prep Course Attendance')

sns.boxenplot(
    data=students_performance_df,
    x='gender',
    y='math score',
    hue='test preparation course',
    palette='seismic',
    orient='v'
)
plt.savefig('assets/Seaborn_Cheat_Sheet_11.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Comparison Plots

Jointplot

sns.jointplot(
    data=students_performance_df,
    x='reading score',
    y='math score',
    kind='scatter',
    hue='gender',
    palette='winter',
    alpha=0.4
)

plt.savefig('assets/Seaborn_Cheat_Sheet_12.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

plot = sns.jointplot(
    data=students_performance_df,
    x='reading score',
    y='math score',
    kind='kde',
    fill=True,
    color='dodgerblue'
)

plot.fig.suptitle('Math Score vs Reading Score by Gender',
                  fontsize=6, fontdict={"weight": "normal"})

plt.savefig('assets/Seaborn_Cheat_Sheet_13.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Pairplot

sns.pairplot(
    data=students_performance_df,
    hue='gender',
    palette='terrain'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_14.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Grid Plots

Catplot

sns.catplot(
    data=students_performance_df,
    x='lunch',
    y='math score',
    kind='boxen',
    hue='test preparation course',
    palette='mako',
    col='test preparation course',
    row='gender'
)

plt.savefig('assets/Seaborn_Cheat_Sheet_15.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Pairgrid

grid = sns.PairGrid(students_performance_df)

grid = grid.map_upper(sns.scatterplot, color = 'dodgerblue')
grid = grid.map_lower(sns.kdeplot, cmap = 'winter')
grid = grid.map_diag(sns.histplot, color='fuchsia')

plt.savefig('assets/Seaborn_Cheat_Sheet_16.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

grid = sns.PairGrid(
    data=students_performance_df,
    hue='gender',
    palette='cool'
)

grid = grid.map_upper(
    sns.scatterplot,
    size=students_performance_df["test preparation course"],
    alpha=0.8
)
grid = grid.map_lower(
    sns.scatterplot,
    size=students_performance_df["race/ethnicity"],
    style=students_performance_df["race/ethnicity"]
)
grid = grid.map_diag(sns.kdeplot)

grid = grid.add_legend(title="", adjust_subtitles=True)

plt.savefig('assets/Seaborn_Cheat_Sheet_17.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Matrix Plots

Heatmap

plt.figure(figsize=(20, 8), dpi=200)
plt.title('Average daily temperature of Toronto, Canada in 2020')

sns.heatmap(
    country_table_df,
    linewidth=0.5,
    cmap='coolwarm',
    annot=True
)

plt.savefig('assets/Seaborn_Cheat_Sheet_18.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

credit_approv_df_only_numeric = credit_approv_df.drop([
    'CODE_GENDER',
    'FLAG_OWN_CAR',
    'FLAG_OWN_REALTY',
    'NAME_INCOME_TYPE',
    'NAME_EDUCATION_TYPE',
    'NAME_FAMILY_STATUS',
    'NAME_FAMILY_STATUS',
    'NAME_HOUSING_TYPE',
    'OCCUPATION_TYPE',
    'CLASSIFICAO',
    'CNT_CHILDREN',
    'FLAG_MOBIL'
    ], axis=1)

credit_approv_df_dropna = credit_approv_df_numeric.dropna(how='all')


plt.figure(figsize=(20, 8), dpi=200)
plt.title('Correlation Heatmap Credit Card Approval Dataset')

sns.heatmap(
    credit_approv_df_dropna.corr(),
    linewidth=0.5,
    cmap='seismic',
    annot=True
)

plt.savefig('assets/Seaborn_Cheat_Sheet_23.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Clustermap

sns.clustermap(
    country_table_df,
    linewidth=0.5,
    cmap='coolwarm',
    annot=True,
    col_cluster=False
)

plt.savefig('assets/Seaborn_Cheat_Sheet_19.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

Facet Grid

plot = sns.FacetGrid(
    tips_df,
    col='time',
    row='smoker',
    hue='day',
    palette='plasma_r',
    sharex=True
)

plot = plot.map(
    plt.hist,
    'tip'
)

plot = plot.add_legend()

plt.savefig('assets/Seaborn_Cheat_Sheet_26.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023

plot = sns.FacetGrid(
    tips_df,
    col='day',
    row='sex',
    hue='time',
    palette='plasma'
)

plot = plot.map(
    plt.scatter,
    'total_bill', 'tip',
)

plot = plot.add_legend()
plt.savefig('assets/Seaborn_Cheat_Sheet_27.webp', bbox_inches='tight')

Seaborn Cheat Sheet 2023