#!/usr/bin/env python # coding: utf-8 # --- # # #

Department of Data Science

#

Course: Tools and Techniques for Data Science

# # --- #

Instructor: Muhammad Arif Butt, Ph.D.

#

Lecture 3.25 (Data Visualization-V)

# Open In Colab # ## _Data Visualization with Seaborn_ # # **Read Documentation for details:** # https://seaborn.pydata.org # # # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ## Learning agenda of this notebook # 1. Overview of Seaborn Library # 2. Download and Install Seaborn # 3. Built-in Datasets of Seaborn Library # 4. Plotting with Seaborn # - The `relplot()` method # - The `displot()` method # - The `catplot()` method # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ## 2. Download and Install Seaborn Library # In[ ]: # To install this library in Jupyter notebook import sys #!{sys.executable} -m pip install --upgrade pip get_ipython().system('{sys.executable} -m pip install seaborn --quiet') # In[1]: import seaborn as sns sns.__version__ , sns.__path__ # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ## 3. Built-in Sets of Seaborn Library # In[4]: # To handle URLError: import ssl ssl._create_default_https_context = ssl._create_unverified_context print(sns.get_dataset_names()) # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ### a. CAR_CRASHES Dataset # In[6]: import seaborn as sns df_cc = sns.load_dataset('car_crashes') df_cc.head() # In[7]: df_cc.shape # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ### b. FLIGHTS Dataset # In[8]: df_flights = sns.load_dataset('flights') df_flights # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ### c. TIPS Dataset # In[9]: df_tips = sns.load_dataset('tips') df_tips # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ### d. IRIS Dataset # # In[ ]: # In[10]: df_iris = sns.load_dataset('iris') df_iris # In[11]: df_iris['species'].value_counts() # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ### e. TITANIC Dataset # # In[14]: df_titanic = sns.load_dataset('titanic') df_titanic.head() # In[13]: df_titanic.shape # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ## Programming with Seaborn # #### Option 1: (Use Axes-Level Functions) # In[15]: import seaborn as sns from matplotlib import pyplot as plt fig, ax = plt.subplots() sns.boxplot(x='sex', y='age', data=df_titanic, ax=ax); # #### Option 2: (Use Figure-Level Functions) # In[16]: import seaborn as sns sns.catplot(x ='sex', y='age', kind='box', data = df_titanic); # In[ ]: sns.set_context(context='paper') # In[ ]: # In[ ]: # ## 4. Plotting Graphs with Seaborn # # In[17]: import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt #plt.style.use('fivethirtyeight') import warnings warnings.filterwarnings('ignore') # In[18]: sns.set_style(style='white') # 'dark', 'darkgrid' white', 'whitegrid' sns.set_context(context='paper', font_scale=1.5) # talk', 'poster' # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ### a. The `sns.relplot()` Method # - Line Plot # - Scatter Plot # **Example: Line Plot** # In[19]: df_iris.head() # In[20]: df_iris.describe() # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[21]: sns.relplot(x="sepal_width", y="sepal_length", data=df_iris, kind='line'); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[22]: sns.relplot(x="sepal_width", y="sepal_length", data=df_iris, kind='line', hue='species'); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[23]: sns.relplot(x="sepal_width", y="sepal_length", data=df_iris, kind='line', hue='species', style='species'); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # **Example: Scatter Plot** # In[24]: df_tips.head() # In[26]: df_tips.shape # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[25]: sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter'); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[27]: sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter', hue='sex'); # In[28]: sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter', hue='sex', style='sex'); # In[ ]: # In[ ]: # In[29]: sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter', hue='sex', style='sex', col='sex'); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # **Example: Sub-Plots using FacetGrid** # In[31]: sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter', hue='day',col='day', col_wrap=2); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ### b. The `sns.catplot()` Method # - Categorical estimate plots: # - `pointplot` (with ``kind="point"``) # - `barplot` (with ``kind="bar"``) # - `countplot` (with ``kind="count"``) # # - Categorical distribution plots: # - `boxplot` (with ``kind="box"``) # - `violinplot` (with ``kind="violin"``) # - `boxenplot` (with ``kind="boxen"``) # # - Categorical scatterplots: # - `stripplot` (with ``kind="strip"``; the default) # - `swarmplot` (with ``kind="swarm"``) # **Example: Bar Plot** # In[32]: df_titanic # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[33]: sns.catplot(x ='sex', y ='survived',kind='bar', data = df_titanic); # In[34]: sns.catplot(x ='sex', y ='tip',kind='bar', data = df_tips); # In[35]: sns.catplot(x ='size', y ='tip',kind='bar', data = df_tips); # In[36]: sns.catplot(x ='day', y ='tip',kind='bar', data = df_tips); # In[ ]: # **Example: Count Plot** # In[37]: sns.catplot(x ='sex',kind='count', data = df_titanic); # In[38]: sns.catplot(x ='day',kind='count', data = df_tips); # In[ ]: # In[ ]: # In[39]: sns.catplot(x ='sex',kind='count', data = df_titanic, hue='survived'); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # **Example: Box Plot** # In[40]: sns.catplot(x ='sex', y='age', kind='box', data = df_titanic); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[41]: sns.catplot(x ='sex', y='age', kind='box', data = df_titanic, hue='survived'); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # **Example: Violin Plot** # In[42]: sns.catplot(x ='sex', y='age', kind='violin', data = df_titanic); # In[ ]: # In[ ]: # In[ ]: # In[43]: sns.catplot(x ='sex', y='age', kind='violin', data = df_titanic, hue='survived'); # In[ ]: sns.catplot(x ='sex', y='age', kind='violin', data = df_titanic, hue='survived', col='survived'); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # **Example: Strip Plot** # In[44]: sns.catplot(y ='age', kind='strip', data = df_titanic); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: sns.catplot(x ='sex', y='age', kind='strip', data = df_titanic); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[45]: sns.catplot(x ='sex', y='age', kind='strip', data = df_titanic, hue='survived'); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # **Example: Swarm Plot** # In[46]: sns.catplot(x ='sex', y='age', kind='swarm', data = df_titanic, hue='survived'); # In[ ]: # In[ ]: # **Example: Sub-Plots using FacetGrid** # In[47]: sns.catplot(x ='sex', y='age', kind='box', data = df_titanic, hue='survived', col='survived'); # In[ ]: # In[ ]: # In[ ]: # In[48]: sns.catplot(x ='sex', y='age', kind='box', data = df_titanic, col='survived'); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ### c. The `sns.displot()` Method # - Categorical estimate plots: # - `histplot` (with ``kind="hist"``) # - `kdeplot` (with ``kind="kde"``) # - `ecdfplot` (with ``kind="ecdf"``) # # **Example: Histogram** # In[49]: df_tips # In[ ]: df_tips.total_bill.min() # In[ ]: df_tips.total_bill.max() # In[ ]: df_tips.total_bill.mode() # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[50]: sns.displot(x= 'total_bill', data=df_tips); # In[51]: sns.displot(x= 'total_bill', data=df_tips, kind='hist'); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[52]: sns.displot(x= 'total_bill', data=df_tips, kind='hist', bins=30); # In[ ]: # In[53]: sns.displot(x= 'total_bill', data=df_tips, kind='hist', bins=30, hue='day'); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # **Example: KDE** # In[54]: sns.displot(x= 'total_bill', data=df_tips, kind='kde'); # In[ ]: # In[ ]: # In[ ]: # In[55]: sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True) # In[ ]: # In[ ]: # In[ ]: # In[ ]: # **Example: Histogram + KDE** # In[ ]: # In[ ]: sns.displot(x= 'total_bill', data=df_tips, kind='hist', kde=True); # In[ ]: # In[ ]: sns.displot(x= 'total_bill', data=df_tips, hue='day'); # In[ ]: sns.displot(x= 'total_bill', data=df_tips, hue='day', col='day'); # In[ ]: # **Example: Adding hue** # In[ ]: sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True, hue='day'); # In[ ]: # In[ ]: sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True, hue='day') # In[ ]: # In[ ]: # In[ ]: sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True, hue='day') # In[ ]: df_tips # In[ ]: sns.displot(x= 'tip', data=df_tips, kind='kde', fill=True) # In[ ]: sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True) # In[ ]: # **Example: ECDF** # >**Binning Bias** is a pitfall of histograms where you will get different representations of the same data as you change the number of bins of a histogram plot. Note the values along the y-axis changes as you change the number of bins # In[ ]: fig,ax = plt.subplots(2,2) ax[0][0].hist(df_tips['total_bill'],bins=5); ax[0][1].hist(df_tips['total_bill'],bins=25); ax[1][0].hist(df_tips['total_bill'],bins=50); ax[1][1].hist(df_tips['total_bill'],bins=100); # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: sns.displot(x='total_bill', data=df_tips, kind='ecdf'); # In[ ]: sns.displot(x='tip', data=df_tips, kind='ecdf'); # In[ ]: # In[ ]: # In[ ]: sns.displot(x='tip', data=df_tips, kind='ecdf', hue='time'); # In[ ]: df_tips.tip.value_counts() # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # **Example: Bivariate Analysis** # In[ ]: sns.displot(x='total_bill', y='tip', data=df_tips, kind='hist', cbar=True) # In[ ]: # In[ ]: # In[ ]: # In[ ]: sns.displot(x='total_bill', y='tip', data=df_tips, kind='kde') # In[ ]: # In[ ]: sns.displot(x='total_bill', y='tip', data=df_tips, kind='hist', hue='day', col='day') # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # **Example: Sub-Plots using FacetGrid** # In[ ]: sns.displot(x= 'total_bill', data=df_tips, kind='hist', hue='day', col='day'); # In[ ]: # In[ ]: