#!/usr/bin/env python
# coding: utf-8
# ---
#
#
#
Department of Data Science
# Course: Tools and Techniques for Data Science
#
# ---
# Instructor: Muhammad Arif Butt, Ph.D.
# Lecture 3.21 (Data Visualization-I)
#
# ## _Data Visualization with Matplotlib_
#
# **Read Documentation for details:**
# https://matplotlib.org/stable/users/index.html
#
#
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# # Chart Chooser
#
#
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# # Data Visualization using Matplotlib and Seaborn
#
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# # Anatomy of a Figure
#
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ## Learning agenda of this notebook
# 1. Introduction to Data Visualization
# 2. Chart Chooser
# 3. Anatomy of a Figure
# 4. Download and Install Matplotlib
# 5. How to draw a Line Chart
# 6. Enhance the Graph Step by Step
# In[ ]:
# In[ ]:
# In[ ]:
# ## 4. Download and Install Matplotlib
# To begin, let's install the Matplotlib library. We'll use the `matplotlib.pyplot` module for basic plots like line & bar charts. It is often imported with the alias `plt`.
# In[ ]:
# To install this library in Jupyter notebook
import sys
get_ipython().system('{sys.executable} -m pip install matplotlib --quiet')
# In[1]:
import matplotlib
matplotlib.__version__ , matplotlib.__path__
# In[2]:
from matplotlib import pyplot as plt
# In[3]:
print(dir(plt))
# In[ ]:
# In[ ]:
# In[ ]:
# ## 5. Programming with Matplotlib
# #### Option 1: Statefull Approach
# In[4]:
import numpy as np
x = np.linspace(-2, 2, 20)
y = x**3
plt.plot(x,y)
plt.show()
# In[ ]:
# In[ ]:
# In[ ]:
# #### Option 2: Stateless (Object Oriented) Approach
# In[7]:
# Use plt.subplot() method to add an Axes to the current figure
fig = plt.figure()
ax = plt.subplot()
ax.plot(x,y)
plt.show()
# In[8]:
# Use fig.add_subplot() method to add an Axes to the figure on which it is called
fig = plt.figure()
ax = fig.add_subplot()
ax.plot(x,y)
plt.show()
# In[9]:
# Use plt.subplots() method to create a figure and a add set of subplots in a single call
fig, ax = plt.subplots()
ax.plot(x,y)
plt.show()
# In[ ]:
# In[ ]:
# ## 6. How to Draw a Line Chart
# In[10]:
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
fig,ax = plt.subplots()
ax.plot(years, chemical_exports)
plt.show()
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ## 7. Enhance the Graph Step by Step
# ### a. Adding Chart Title
# In[11]:
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
fig,ax = plt.subplots()
ax.plot(years, chemical_exports)
plt.title("LCI Exports in last 12 years")
plt.show()
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ### b. Setting x/y-Labels
# In[12]:
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
fig,ax = plt.subplots()
ax.plot(years, chemical_exports)
ax.set_xlabel("Years")
ax.set_ylabel("Amount (Million US$)")
plt.title("LCI Exports in last 12 years")
plt.show()
# ### c. Modifying Ticks
# In[13]:
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
fig,ax = plt.subplots()
#fig = plt.figure()
#ax = fig.add_subplot()
ax.plot(years, chemical_exports)
ax.set_xlabel("Years")
ax.set_ylabel("Amount (Million US$)")
xvals = np.arange(2011, 2022, 2)
yvals = np.linspace(0.80, 0.98, 10)
ax.set_xticks(xvals)
ax.set_yticks(yvals)
plt.title("LCI Exports in last 12 years")
plt.show()
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ### d. Adding Multiple Lines in a Graph
# In[14]:
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940]
fig = plt.figure()
ax = fig.add_subplot()
ax.plot(years, chemical_exports)
ax.plot(years, medicine_exports)
ax.set_xlabel("Years")
ax.set_ylabel("Amount (Million US$)")
xvals = np.arange(2011, 2022, 2)
yvals = np.linspace(0.80, 0.98, 10)
ax.set_xticks(xvals)
ax.set_yticks(yvals)
plt.title("LCI Exports in last 12 years")
plt.show()
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ### e. Adding Chart Legend
# **Option1:**
# In[17]:
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940]
fig = plt.figure()
ax = fig.add_subplot()
ax.plot(years, chemical_exports)
ax.plot(years, medicine_exports)
ax.set_xlabel("Years")
ax.set_ylabel("Amount (Million US$)")
xvals = np.arange(2011, 2022, 2)
yvals = np.linspace(0.80, 0.98, 10)
ax.set_xticks(xvals)
ax.set_yticks(yvals)
plt.title("LCI Exports in last 12 years")
plt.legend(["Chemicals", "Medicines"])
plt.show()
# **Limitations of above way of adding a Lagend to your graph:**
# >- This technique expects the sequence of arguments passed to the `plt.legend()` method to be exactly same as the sequence in which the plots are added above using the two calls to `ax.plot()` method
# >- I mean the sequence of strings/labels that you pass to the `plt.legend()` method will be assigned to the two lines in the exact sequence in which they have been added using the two `ax.plot()` methods.
# >- Solution to this problem is use `label` argument to `ax.plot()` method instead of passing a list of strings to the `plt.legend()` method.
# In[ ]:
# In[ ]:
# **Option2:**
# In[23]:
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940]
fig = plt.figure()
ax = fig.add_subplot()
ax.plot(years, chemical_exports, label="Chemicals")
ax.plot(years, medicine_exports, label="Medicines")
ax.set_xlabel("Years")
ax.set_ylabel("Amount (Million US$)")
xvals = np.arange(2011, 2022, 2)
yvals = np.linspace(0.80, 0.98, 10)
ax.set_xticks(xvals)
ax.set_yticks(yvals)
plt.title("LCI Exports in last 12 years")
plt.legend(loc='best')
plt.show()
# In[ ]:
# In[ ]:
# ### f. Use of Markers
# - Matplotlib provides many different markers, like a circle, cross, square, diamond, etc. You can find the full list of marker types here: https://matplotlib.org/3.1.1/api/markers_api.html .
# - The `plt.plot` function supports many arguments for markers:
# - `markersize` or `ms`: Set the size of markers
# - `markeredgecolor` or `mec`: Set the edge color for markers
# - `markeredgewidth` or `mew`: Set the edge width for markers
# - `markerfacecolor` or `mfc`: Set the fill color for markers
# In[24]:
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940]
fig = plt.figure()
ax = fig.add_subplot()
ax.plot(years, chemical_exports, label="Chemicals", marker='o')
ax.plot(years, medicine_exports, label="Medicines", marker='x')
ax.set_xlabel("Years")
ax.set_ylabel("Amount (Million US$)")
xvals = np.arange(2011, 2022, 2)
yvals = np.linspace(0.80, 0.98, 10)
ax.set_xticks(xvals)
ax.set_yticks(yvals)
plt.title("LCI Exports in last 12 years")
plt.legend(loc='best')
plt.show()
# In[ ]:
# ### g. Styling the Lines (Line Attributes)
# - The `plt.plot` function supports many arguments for styling lines
# - `color` or `c`: Set the color of the line
# - `linestyle` or `ls`: Choose between a solid or dashed line
# - `linewidth` or `lw`: Set the width of a line
#
#
# Checkout the supported colors: https://matplotlib.org/3.1.0/tutorials/colors/colors.html
#
# Check out the documentation for `plt.plot` to learn more: [https://matplotlib.org/api/_as_gen/matplotlib.pyplot.plot.html#matplotlib.pyplot.plot]
# In[26]:
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940]
fig = plt.figure()
ax = fig.add_subplot()
ax.plot(years, chemical_exports, label="Chemicals", marker='o', c='b', ls='-', lw=2)
ax.plot(years, medicine_exports, label="Medicines", marker='x', c='r', ls=':', lw=2)
ax.set_xlabel("Years")
ax.set_ylabel("Amount (Million US$)")
xvals = np.arange(2011, 2022, 2)
yvals = np.linspace(0.80, 0.98, 10)
ax.set_xticks(xvals)
ax.set_yticks(yvals)
plt.title("LCI Exports in last 12 years")
plt.legend(loc='best')
plt.show()
# In[ ]:
# ### h. Using Format String to Style the Lines
# **Most of the time we just want to set the marker, line style, and line color. The plt.plot() method can be passed a single string for this purpose `fmt='[marker][line][color]`**
#
# ```plt.plot(years, sports_export, 'o-g')```
#
# This format string cannot be passed as named argument, rather is a positional argument after the x,y arguments to plt.plot() method
# In[ ]:
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940]
fig = plt.figure()
ax = fig.add_subplot()
ax.plot(years, chemical_exports, 'o-g', label="Chemicals")
ax.plot(years, medicine_exports, 'x-r', label="Medicines")
ax.set_xlabel("Years")
ax.set_ylabel("Amount (Million US$)")
xvals = np.arange(2011, 2022, 2)
yvals = np.linspace(0.80, 0.98, 10)
ax.set_xticks(xvals)
ax.set_yticks(yvals)
plt.title("LCI Exports in last 12 years")
plt.legend(loc='best')
plt.show()
# In[ ]:
# In[ ]:
# ### i. Changing Figure Size and Adding Grid
# In[27]:
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940]
fig = plt.figure(figsize=(12,6))
ax = fig.add_subplot()
ax.plot(years, chemical_exports, label="Chemicals", marker='o', c='b', ls='-', lw=2)
ax.plot(years, medicine_exports, label="Medicines", marker='x', c='r', ls=':', lw=2)
ax.set_xlabel("Years")
ax.set_ylabel("Amount (Million US$)")
xvals = np.arange(2011, 2022, 2)
yvals = np.linspace(0.80, 0.98, 10)
ax.set_xticks(xvals)
ax.set_yticks(yvals)
plt.title("LCI Exports in last 12 years")
plt.legend(loc='best')
plt.grid(True)
plt.show()
# ### j. Using Annotation
# In[28]:
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940]
fig = plt.figure(figsize=(12,6))
ax = fig.add_subplot()
ax.plot(years, chemical_exports, label="Chemicals", marker='o', c='b', ls='-', lw=2)
ax.plot(years, medicine_exports, label="Medicines", marker='x', c='r', ls=':', lw=2)
ax.set_xlabel("Years")
ax.set_ylabel("Amount (Million US$)")
xvals = np.arange(2011, 2022, 2)
yvals = np.linspace(0.80, 0.98, 10)
ax.set_xticks(xvals)
ax.set_yticks(yvals)
plt.title("LCI Exports in last 12 years")
plt.legend(loc='best')
plt.annotate("Peak of Chemical Exports", xy=(2016,0.95))
plt.grid(True)
plt.show()
# ### k. Using Built-in Styles
# In[29]:
plt.style.available
# In[35]:
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940]
fig = plt.figure(figsize=(12,6))
ax = fig.add_subplot()
ax.plot(years, chemical_exports, label="Chemicals", marker='o', c='b', ls='-', lw=2)
ax.plot(years, medicine_exports, label="Medicines", marker='x', c='r', ls=':', lw=2)
ax.set_xlabel("Years")
ax.set_ylabel("Amount (Million US$)")
xvals = np.arange(2011, 2022, 2)
yvals = np.linspace(0.80, 0.98, 10)
ax.set_xticks(xvals)
ax.set_yticks(yvals)
plt.title("LCI Exports in last 12 years")
plt.legend(loc='best')
plt.annotate("Peak of Chemical Exports", xy=(2016,0.95))
plt.grid(True)
plt.style.use('default')
#plt.style.use('dark_background')
#plt.style.use('fivethirtyeight')
#plt.style.use('ggplot')
plt.show()
# In[ ]:
# In[ ]:
# ### l. Saving the Graph
# In[ ]:
xvals
# In[ ]:
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940]
fig = plt.figure(figsize=(12,6))
ax = fig.add_subplot()
ax.plot(years,chemical_exports, label="Chemicals", marker='o', c='b', ls='-', lw=2)
ax.plot(years,medicine_exports, label="Medicines", marker='x', c='r', ls=':', lw=2)
ax.set_xlabel("Years")
ax.set_ylabel("Amount (Million US$)")
xvals = np.arange(2011, 2022, 2)
yvals = np.linspace(0.80, 0.98, 10)
ax.set_xticks(xvals)
ax.set_yticks(yvals)
plt.title("LCI Exports in last 12 years")
plt.legend(loc='best')
plt.annotate("Peak of Chemical Exports", xy=(2016,0.95))
plt.grid(True)
plt.style.use('default')
#plt.savefig("mychart.png") # can use .pdf, .ps, .eps, .svg formats
plt.show()
# In[ ]:
get_ipython().system('ls -l mychart.png')
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# ## 7. Sub-Plots
# In[ ]:
fig, ax = plt.subplots(nrows=3, ncols=3)
print(ax)
# In[ ]:
# In[ ]:
# In[ ]:
# ### a. Multiple Axes Objects within a Figure Object
# In[ ]:
from matplotlib import pyplot as plt
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940]
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1)
ax1.plot(years, chemical_exports, label='Chemicals', marker='o', c='b', ls='-', lw=2)
ax1.set_title("LCI Chemical exports in last 12 years")
ax1.set_xlabel("Years")
ax1.set_ylabel("Amount (Million Rs.)")
ax2.plot(years, medicine_exports, label='Medicines', marker='x', c='r', ls=':', lw=2)
ax2.set_title("LCI Medicine exports in last 12 years")
ax2.set_xlabel("Years")
ax2.set_ylabel("Amount (Million Rs.)")
plt.tight_layout()
plt.show()
# In[ ]:
# In[ ]:
# In[ ]:
# ### b. Multiple Figure Objects Having Single Axes
# In[ ]:
from matplotlib import pyplot as plt
chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919]
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940]
fig1, ax1 = plt.subplots()
fig2, ax2 = plt.subplots()
ax1.plot(years, chemical_exports, label='Chemicals', marker='o', c='b', ls='-', lw=2)
ax1.set_title("LCI Chemical exports in last 12 years")
ax1.set_xlabel("Years")
ax1.set_ylabel("Amount (Million Rs.)")
ax2.plot(years, medicine_exports, label='Medicines', marker='x', c='r', ls=':', lw=2)
ax2.set_title("LCI Medicine exports in last 12 years")
ax2.set_xlabel("Years")
ax2.set_ylabel("Amount (Million Rs.)")
plt.tight_layout()
plt.show()
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]: