#!/usr/bin/env python
# coding: utf-8
# ---
#
#
#
Department of Data Science
# Course: Tools and Techniques for Data Science
#
# ---
# Instructor: Muhammad Arif Butt, Ph.D.
# Lecture 3.25 (Data Visualization-V)
#
# ## _Data Visualization with Seaborn_
#
# **Read Documentation for details:**
# https://seaborn.pydata.org
#
#
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ## Learning agenda of this notebook
# 1. Overview of Seaborn Library
# 2. Download and Install Seaborn
# 3. Built-in Datasets of Seaborn Library
# 4. Plotting with Seaborn
# - The `relplot()` method
# - The `displot()` method
# - The `catplot()` method
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ## 2. Download and Install Seaborn Library
# In[ ]:
# To install this library in Jupyter notebook
import sys
#!{sys.executable} -m pip install --upgrade pip
get_ipython().system('{sys.executable} -m pip install seaborn --quiet')
# In[1]:
import seaborn as sns
sns.__version__ , sns.__path__
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ## 3. Built-in Sets of Seaborn Library
# In[4]:
# To handle URLError:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
print(sns.get_dataset_names())
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ### a. CAR_CRASHES Dataset
# In[6]:
import seaborn as sns
df_cc = sns.load_dataset('car_crashes')
df_cc.head()
# In[7]:
df_cc.shape
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ### b. FLIGHTS Dataset
# In[8]:
df_flights = sns.load_dataset('flights')
df_flights
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ### c. TIPS Dataset
# In[9]:
df_tips = sns.load_dataset('tips')
df_tips
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ### d. IRIS Dataset
#
# In[ ]:
# In[10]:
df_iris = sns.load_dataset('iris')
df_iris
# In[11]:
df_iris['species'].value_counts()
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ### e. TITANIC Dataset
#
# In[14]:
df_titanic = sns.load_dataset('titanic')
df_titanic.head()
# In[13]:
df_titanic.shape
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ## Programming with Seaborn
# #### Option 1: (Use Axes-Level Functions)
# In[15]:
import seaborn as sns
from matplotlib import pyplot as plt
fig, ax = plt.subplots()
sns.boxplot(x='sex', y='age', data=df_titanic, ax=ax);
# #### Option 2: (Use Figure-Level Functions)
# In[16]:
import seaborn as sns
sns.catplot(x ='sex', y='age', kind='box', data = df_titanic);
# In[ ]:
sns.set_context(context='paper')
# In[ ]:
# In[ ]:
# ## 4. Plotting Graphs with Seaborn
#
# In[17]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
#plt.style.use('fivethirtyeight')
import warnings
warnings.filterwarnings('ignore')
# In[18]:
sns.set_style(style='white') # 'dark', 'darkgrid' white', 'whitegrid'
sns.set_context(context='paper', font_scale=1.5) # talk', 'poster'
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ### a. The `sns.relplot()` Method
# - Line Plot
# - Scatter Plot
# **Example: Line Plot**
# In[19]:
df_iris.head()
# In[20]:
df_iris.describe()
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[21]:
sns.relplot(x="sepal_width", y="sepal_length", data=df_iris, kind='line');
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[22]:
sns.relplot(x="sepal_width", y="sepal_length", data=df_iris, kind='line', hue='species');
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[23]:
sns.relplot(x="sepal_width", y="sepal_length", data=df_iris, kind='line', hue='species', style='species');
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# **Example: Scatter Plot**
# In[24]:
df_tips.head()
# In[26]:
df_tips.shape
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[25]:
sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter');
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[27]:
sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter', hue='sex');
# In[28]:
sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter', hue='sex', style='sex');
# In[ ]:
# In[ ]:
# In[29]:
sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter', hue='sex', style='sex', col='sex');
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# **Example: Sub-Plots using FacetGrid**
# In[31]:
sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter', hue='day',col='day', col_wrap=2);
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ### b. The `sns.catplot()` Method
# - Categorical estimate plots:
# - `pointplot` (with ``kind="point"``)
# - `barplot` (with ``kind="bar"``)
# - `countplot` (with ``kind="count"``)
#
# - Categorical distribution plots:
# - `boxplot` (with ``kind="box"``)
# - `violinplot` (with ``kind="violin"``)
# - `boxenplot` (with ``kind="boxen"``)
#
# - Categorical scatterplots:
# - `stripplot` (with ``kind="strip"``; the default)
# - `swarmplot` (with ``kind="swarm"``)
# **Example: Bar Plot**
# In[32]:
df_titanic
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[33]:
sns.catplot(x ='sex', y ='survived',kind='bar', data = df_titanic);
# In[34]:
sns.catplot(x ='sex', y ='tip',kind='bar', data = df_tips);
# In[35]:
sns.catplot(x ='size', y ='tip',kind='bar', data = df_tips);
# In[36]:
sns.catplot(x ='day', y ='tip',kind='bar', data = df_tips);
# In[ ]:
# **Example: Count Plot**
# In[37]:
sns.catplot(x ='sex',kind='count', data = df_titanic);
# In[38]:
sns.catplot(x ='day',kind='count', data = df_tips);
# In[ ]:
# In[ ]:
# In[39]:
sns.catplot(x ='sex',kind='count', data = df_titanic, hue='survived');
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# **Example: Box Plot**
# In[40]:
sns.catplot(x ='sex', y='age', kind='box', data = df_titanic);
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[41]:
sns.catplot(x ='sex', y='age', kind='box', data = df_titanic, hue='survived');
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# **Example: Violin Plot**
# In[42]:
sns.catplot(x ='sex', y='age', kind='violin', data = df_titanic);
# In[ ]:
# In[ ]:
# In[ ]:
# In[43]:
sns.catplot(x ='sex', y='age', kind='violin', data = df_titanic, hue='survived');
# In[ ]:
sns.catplot(x ='sex', y='age', kind='violin', data = df_titanic, hue='survived', col='survived');
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# **Example: Strip Plot**
# In[44]:
sns.catplot(y ='age', kind='strip', data = df_titanic);
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
sns.catplot(x ='sex', y='age', kind='strip', data = df_titanic);
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[45]:
sns.catplot(x ='sex', y='age', kind='strip', data = df_titanic, hue='survived');
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# **Example: Swarm Plot**
# In[46]:
sns.catplot(x ='sex', y='age', kind='swarm', data = df_titanic, hue='survived');
# In[ ]:
# In[ ]:
# **Example: Sub-Plots using FacetGrid**
# In[47]:
sns.catplot(x ='sex', y='age', kind='box', data = df_titanic, hue='survived', col='survived');
# In[ ]:
# In[ ]:
# In[ ]:
# In[48]:
sns.catplot(x ='sex', y='age', kind='box', data = df_titanic, col='survived');
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ### c. The `sns.displot()` Method
# - Categorical estimate plots:
# - `histplot` (with ``kind="hist"``)
# - `kdeplot` (with ``kind="kde"``)
# - `ecdfplot` (with ``kind="ecdf"``)
#
# **Example: Histogram**
# In[49]:
df_tips
# In[ ]:
df_tips.total_bill.min()
# In[ ]:
df_tips.total_bill.max()
# In[ ]:
df_tips.total_bill.mode()
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[50]:
sns.displot(x= 'total_bill', data=df_tips);
# In[51]:
sns.displot(x= 'total_bill', data=df_tips, kind='hist');
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[52]:
sns.displot(x= 'total_bill', data=df_tips, kind='hist', bins=30);
# In[ ]:
# In[53]:
sns.displot(x= 'total_bill', data=df_tips, kind='hist', bins=30, hue='day');
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# **Example: KDE**
# In[54]:
sns.displot(x= 'total_bill', data=df_tips, kind='kde');
# In[ ]:
# In[ ]:
# In[ ]:
# In[55]:
sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True)
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# **Example: Histogram + KDE**
# In[ ]:
# In[ ]:
sns.displot(x= 'total_bill', data=df_tips, kind='hist', kde=True);
# In[ ]:
# In[ ]:
sns.displot(x= 'total_bill', data=df_tips, hue='day');
# In[ ]:
sns.displot(x= 'total_bill', data=df_tips, hue='day', col='day');
# In[ ]:
# **Example: Adding hue**
# In[ ]:
sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True, hue='day');
# In[ ]:
# In[ ]:
sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True, hue='day')
# In[ ]:
# In[ ]:
# In[ ]:
sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True, hue='day')
# In[ ]:
df_tips
# In[ ]:
sns.displot(x= 'tip', data=df_tips, kind='kde', fill=True)
# In[ ]:
sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True)
# In[ ]:
# **Example: ECDF**
# >**Binning Bias** is a pitfall of histograms where you will get different representations of the same data as you change the number of bins of a histogram plot. Note the values along the y-axis changes as you change the number of bins
# In[ ]:
fig,ax = plt.subplots(2,2)
ax[0][0].hist(df_tips['total_bill'],bins=5);
ax[0][1].hist(df_tips['total_bill'],bins=25);
ax[1][0].hist(df_tips['total_bill'],bins=50);
ax[1][1].hist(df_tips['total_bill'],bins=100);
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
sns.displot(x='total_bill', data=df_tips, kind='ecdf');
# In[ ]:
sns.displot(x='tip', data=df_tips, kind='ecdf');
# In[ ]:
# In[ ]:
# In[ ]:
sns.displot(x='tip', data=df_tips, kind='ecdf', hue='time');
# In[ ]:
df_tips.tip.value_counts()
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# **Example: Bivariate Analysis**
# In[ ]:
sns.displot(x='total_bill', y='tip', data=df_tips, kind='hist', cbar=True)
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
sns.displot(x='total_bill', y='tip', data=df_tips, kind='kde')
# In[ ]:
# In[ ]:
sns.displot(x='total_bill', y='tip', data=df_tips, kind='hist', hue='day', col='day')
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# **Example: Sub-Plots using FacetGrid**
# In[ ]:
sns.displot(x= 'total_bill', data=df_tips, kind='hist', hue='day', col='day');
# In[ ]:
# In[ ]: