Read Documentation for details: https://seaborn.pydata.org
relplot()
methoddisplot()
methodcatplot()
method
# To install this library in Jupyter notebook
import sys
#!{sys.executable} -m pip install --upgrade pip
!{sys.executable} -m pip install seaborn --quiet
import seaborn as sns
sns.__version__ , sns.__path__
('0.11.2', ['/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/seaborn'])
# To handle URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] ... unable to get local issuer certificate>
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
print(sns.get_dataset_names())
--------------------------------------------------------------------------- gaierror Traceback (most recent call last) /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py in do_open(self, http_class, req, **http_conn_args) 1353 try: -> 1354 h.request(req.get_method(), req.selector, req.data, headers, 1355 encode_chunked=req.has_header('Transfer-encoding')) /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py in request(self, method, url, body, headers, encode_chunked) 1251 """Send a complete request to the server.""" -> 1252 self._send_request(method, url, body, headers, encode_chunked) 1253 /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py in _send_request(self, method, url, body, headers, encode_chunked) 1297 body = _encode(body, 'body') -> 1298 self.endheaders(body, encode_chunked=encode_chunked) 1299 /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py in endheaders(self, message_body, encode_chunked) 1246 raise CannotSendHeader() -> 1247 self._send_output(message_body, encode_chunked=encode_chunked) 1248 /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py in _send_output(self, message_body, encode_chunked) 1006 del self._buffer[:] -> 1007 self.send(msg) 1008 /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py in send(self, data) 946 if self.auto_open: --> 947 self.connect() 948 else: /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py in connect(self) 1413 -> 1414 super().connect() 1415 /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py in connect(self) 917 """Connect to the host and port specified in __init__.""" --> 918 self.sock = self._create_connection( 919 (self.host,self.port), self.timeout, self.source_address) /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/socket.py in create_connection(address, timeout, source_address) 786 err = None --> 787 for res in getaddrinfo(host, port, 0, SOCK_STREAM): 788 af, socktype, proto, canonname, sa = res /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/socket.py in getaddrinfo(host, port, family, type, proto, flags) 917 addrlist = [] --> 918 for res in _socket.getaddrinfo(host, port, family, type, proto, flags): 919 af, socktype, proto, canonname, sa = res gaierror: [Errno 8] nodename nor servname provided, or not known During handling of the above exception, another exception occurred: URLError Traceback (most recent call last) /var/folders/1t/g3ylw8h50cjdqmk5d6jh1qmm0000gn/T/ipykernel_37492/4093529098.py in <module> 3 ssl._create_default_https_context = ssl._create_unverified_context 4 ----> 5 print(sns.get_dataset_names()) /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/seaborn/utils.py in get_dataset_names() 518 """ 519 url = "https://github.com/mwaskom/seaborn-data" --> 520 with urlopen(url) as resp: 521 html = resp.read() 522 /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context) 220 else: 221 opener = _opener --> 222 return opener.open(url, data, timeout) 223 224 def install_opener(opener): /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py in open(self, fullurl, data, timeout) 523 524 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method()) --> 525 response = self._open(req, data) 526 527 # post-process response /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py in _open(self, req, data) 540 541 protocol = req.type --> 542 result = self._call_chain(self.handle_open, protocol, protocol + 543 '_open', req) 544 if result: /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args) 500 for handler in handlers: 501 func = getattr(handler, meth_name) --> 502 result = func(*args) 503 if result is not None: 504 return result /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py in https_open(self, req) 1395 1396 def https_open(self, req): -> 1397 return self.do_open(http.client.HTTPSConnection, req, 1398 context=self._context, check_hostname=self._check_hostname) 1399 /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py in do_open(self, http_class, req, **http_conn_args) 1355 encode_chunked=req.has_header('Transfer-encoding')) 1356 except OSError as err: # timeout error -> 1357 raise URLError(err) 1358 r = h.getresponse() 1359 except: URLError: <urlopen error [Errno 8] nodename nor servname provided, or not known>
import seaborn as sns
df_cc = sns.load_dataset('car_crashes')
df_cc.head()
total | speeding | alcohol | not_distracted | no_previous | ins_premium | ins_losses | abbrev | |
---|---|---|---|---|---|---|---|---|
0 | 18.8 | 7.332 | 5.640 | 18.048 | 15.040 | 784.55 | 145.08 | AL |
1 | 18.1 | 7.421 | 4.525 | 16.290 | 17.014 | 1053.48 | 133.93 | AK |
2 | 18.6 | 6.510 | 5.208 | 15.624 | 17.856 | 899.47 | 110.35 | AZ |
3 | 22.4 | 4.032 | 5.824 | 21.056 | 21.280 | 827.34 | 142.39 | AR |
4 | 12.0 | 4.200 | 3.360 | 10.920 | 10.680 | 878.41 | 165.63 | CA |
df_cc.shape
(51, 8)
df_flights = sns.load_dataset('flights')
df_flights
year | month | passengers | |
---|---|---|---|
0 | 1949 | Jan | 112 |
1 | 1949 | Feb | 118 |
2 | 1949 | Mar | 132 |
3 | 1949 | Apr | 129 |
4 | 1949 | May | 121 |
... | ... | ... | ... |
139 | 1960 | Aug | 606 |
140 | 1960 | Sep | 508 |
141 | 1960 | Oct | 461 |
142 | 1960 | Nov | 390 |
143 | 1960 | Dec | 432 |
144 rows × 3 columns
df_tips = sns.load_dataset('tips')
df_tips
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
... | ... | ... | ... | ... | ... | ... | ... |
239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 |
240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 |
241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 |
242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 |
243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 |
244 rows × 7 columns
df_iris = sns.load_dataset('iris')
df_iris
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
... | ... | ... | ... | ... | ... |
145 | 6.7 | 3.0 | 5.2 | 2.3 | virginica |
146 | 6.3 | 2.5 | 5.0 | 1.9 | virginica |
147 | 6.5 | 3.0 | 5.2 | 2.0 | virginica |
148 | 6.2 | 3.4 | 5.4 | 2.3 | virginica |
149 | 5.9 | 3.0 | 5.1 | 1.8 | virginica |
150 rows × 5 columns
df_iris['species'].value_counts()
setosa 50 versicolor 50 virginica 50 Name: species, dtype: int64
df_titanic = sns.load_dataset('titanic')
df_titanic.head()
survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | Third | man | True | NaN | Southampton | no | False |
1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | First | woman | False | C | Cherbourg | yes | False |
2 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S | Third | woman | False | NaN | Southampton | yes | True |
3 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S | First | woman | False | C | Southampton | yes | False |
4 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S | Third | man | True | NaN | Southampton | no | True |
df_titanic.shape
(891, 15)
import seaborn as sns
from matplotlib import pyplot as plt
fig, ax = plt.subplots()
sns.boxplot(x='sex', y='age', data=df_titanic, ax=ax);
import seaborn as sns
sns.catplot(x ='sex', y='age', kind='box', data = df_titanic);
sns.set_context(context='paper')
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
#plt.style.use('fivethirtyeight')
import warnings
warnings.filterwarnings('ignore')
sns.set_style(style='white') # 'dark', 'darkgrid' white', 'whitegrid'
sns.set_context(context='paper', font_scale=1.5) # talk', 'poster'
sns.relplot()
Method¶Example: Line Plot
df_iris.head()
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
df_iris.describe()
sepal_length | sepal_width | petal_length | petal_width | |
---|---|---|---|---|
count | 150.000000 | 150.000000 | 150.000000 | 150.000000 |
mean | 5.843333 | 3.057333 | 3.758000 | 1.199333 |
std | 0.828066 | 0.435866 | 1.765298 | 0.762238 |
min | 4.300000 | 2.000000 | 1.000000 | 0.100000 |
25% | 5.100000 | 2.800000 | 1.600000 | 0.300000 |
50% | 5.800000 | 3.000000 | 4.350000 | 1.300000 |
75% | 6.400000 | 3.300000 | 5.100000 | 1.800000 |
max | 7.900000 | 4.400000 | 6.900000 | 2.500000 |
sns.relplot(x="sepal_width", y="sepal_length", data=df_iris, kind='line');
sns.relplot(x="sepal_width", y="sepal_length", data=df_iris, kind='line', hue='species');
sns.relplot(x="sepal_width", y="sepal_length", data=df_iris, kind='line', hue='species', style='species');
Example: Scatter Plot
df_tips.head()
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
df_tips.shape
(244, 7)
sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter');
sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter', hue='sex');
sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter', hue='sex', style='sex');
sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter', hue='sex', style='sex', col='sex');
Example: Sub-Plots using FacetGrid
sns.relplot(x='total_bill', y='tip', data=df_tips, kind='scatter', hue='day',col='day', col_wrap=2);
sns.catplot()
Method¶Categorical estimate plots:
pointplot
(with kind="point"
)barplot
(with kind="bar"
)countplot
(with kind="count"
)Categorical distribution plots:
boxplot
(with kind="box"
)violinplot
(with kind="violin"
)boxenplot
(with kind="boxen"
)Categorical scatterplots:
stripplot
(with kind="strip"
; the default)swarmplot
(with kind="swarm"
)Example: Bar Plot
df_titanic
survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | Third | man | True | NaN | Southampton | no | False |
1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | First | woman | False | C | Cherbourg | yes | False |
2 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S | Third | woman | False | NaN | Southampton | yes | True |
3 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S | First | woman | False | C | Southampton | yes | False |
4 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S | Third | man | True | NaN | Southampton | no | True |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
886 | 0 | 2 | male | 27.0 | 0 | 0 | 13.0000 | S | Second | man | True | NaN | Southampton | no | True |
887 | 1 | 1 | female | 19.0 | 0 | 0 | 30.0000 | S | First | woman | False | B | Southampton | yes | True |
888 | 0 | 3 | female | NaN | 1 | 2 | 23.4500 | S | Third | woman | False | NaN | Southampton | no | False |
889 | 1 | 1 | male | 26.0 | 0 | 0 | 30.0000 | C | First | man | True | C | Cherbourg | yes | True |
890 | 0 | 3 | male | 32.0 | 0 | 0 | 7.7500 | Q | Third | man | True | NaN | Queenstown | no | True |
891 rows × 15 columns
sns.catplot(x ='sex', y ='survived',kind='bar', data = df_titanic);
sns.catplot(x ='sex', y ='tip',kind='bar', data = df_tips);
sns.catplot(x ='size', y ='tip',kind='bar', data = df_tips);
sns.catplot(x ='day', y ='tip',kind='bar', data = df_tips);
Example: Count Plot
sns.catplot(x ='sex',kind='count', data = df_titanic);
sns.catplot(x ='day',kind='count', data = df_tips);
sns.catplot(x ='sex',kind='count', data = df_titanic, hue='survived');
Example: Box Plot
sns.catplot(x ='sex', y='age', kind='box', data = df_titanic);
sns.catplot(x ='sex', y='age', kind='box', data = df_titanic, hue='survived');
Example: Violin Plot
sns.catplot(x ='sex', y='age', kind='violin', data = df_titanic);
sns.catplot(x ='sex', y='age', kind='violin', data = df_titanic, hue='survived');
sns.catplot(x ='sex', y='age', kind='violin', data = df_titanic, hue='survived', col='survived');
Example: Strip Plot
sns.catplot(y ='age', kind='strip', data = df_titanic);
sns.catplot(x ='sex', y='age', kind='strip', data = df_titanic);
sns.catplot(x ='sex', y='age', kind='strip', data = df_titanic, hue='survived');
Example: Swarm Plot
sns.catplot(x ='sex', y='age', kind='swarm', data = df_titanic, hue='survived');
Example: Sub-Plots using FacetGrid
sns.catplot(x ='sex', y='age', kind='box', data = df_titanic, hue='survived', col='survived');
sns.catplot(x ='sex', y='age', kind='box', data = df_titanic, col='survived');
sns.displot()
Method¶histplot
(with kind="hist"
)kdeplot
(with kind="kde"
)ecdfplot
(with kind="ecdf"
)Example: Histogram
df_tips
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
... | ... | ... | ... | ... | ... | ... | ... |
239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 |
240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 |
241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 |
242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 |
243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 |
244 rows × 7 columns
df_tips.total_bill.min()
df_tips.total_bill.max()
df_tips.total_bill.mode()
sns.displot(x= 'total_bill', data=df_tips);
sns.displot(x= 'total_bill', data=df_tips, kind='hist');
sns.displot(x= 'total_bill', data=df_tips, kind='hist', bins=30);
sns.displot(x= 'total_bill', data=df_tips, kind='hist', bins=30, hue='day');
Example: KDE
sns.displot(x= 'total_bill', data=df_tips, kind='kde');
sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True)
<seaborn.axisgrid.FacetGrid at 0x7fd1443938b0>
Example: Histogram + KDE
sns.displot(x= 'total_bill', data=df_tips, kind='hist', kde=True);
sns.displot(x= 'total_bill', data=df_tips, hue='day');
sns.displot(x= 'total_bill', data=df_tips, hue='day', col='day');
Example: Adding hue
sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True, hue='day');
sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True, hue='day')
sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True, hue='day')
df_tips
sns.displot(x= 'tip', data=df_tips, kind='kde', fill=True)
sns.displot(x= 'total_bill', data=df_tips, kind='kde', fill=True)
Example: ECDF
Binning Bias is a pitfall of histograms where you will get different representations of the same data as you change the number of bins of a histogram plot. Note the values along the y-axis changes as you change the number of bins
fig,ax = plt.subplots(2,2)
ax[0][0].hist(df_tips['total_bill'],bins=5);
ax[0][1].hist(df_tips['total_bill'],bins=25);
ax[1][0].hist(df_tips['total_bill'],bins=50);
ax[1][1].hist(df_tips['total_bill'],bins=100);
sns.displot(x='total_bill', data=df_tips, kind='ecdf');
sns.displot(x='tip', data=df_tips, kind='ecdf');
sns.displot(x='tip', data=df_tips, kind='ecdf', hue='time');
df_tips.tip.value_counts()
Example: Bivariate Analysis
sns.displot(x='total_bill', y='tip', data=df_tips, kind='hist', cbar=True)
sns.displot(x='total_bill', y='tip', data=df_tips, kind='kde')
sns.displot(x='total_bill', y='tip', data=df_tips, kind='hist', hue='day', col='day')
Example: Sub-Plots using FacetGrid
sns.displot(x= 'total_bill', data=df_tips, kind='hist', hue='day', col='day');