#!/usr/bin/env python # coding: utf-8 # --- # # #

Department of Data Science

#

Course: Tools and Techniques for Data Science

# # --- #

Instructor: Muhammad Arif Butt, Ph.D.

#

Lecture 3.21 (Data Visualization-I)

# Open In Colab # ## _Data Visualization with Matplotlib_ # # **Read Documentation for details:** # https://matplotlib.org/stable/users/index.html # # # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # # Chart Chooser # # # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # # Data Visualization using Matplotlib and Seaborn # # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # # Anatomy of a Figure # # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ## Learning agenda of this notebook # 1. Introduction to Data Visualization # 2. Chart Chooser # 3. Anatomy of a Figure # 4. Download and Install Matplotlib # 5. How to draw a Line Chart # 6. Enhance the Graph Step by Step # In[ ]: # In[ ]: # In[ ]: # ## 4. Download and Install Matplotlib # To begin, let's install the Matplotlib library. We'll use the `matplotlib.pyplot` module for basic plots like line & bar charts. It is often imported with the alias `plt`. # In[ ]: # To install this library in Jupyter notebook import sys get_ipython().system('{sys.executable} -m pip install matplotlib --quiet') # In[1]: import matplotlib matplotlib.__version__ , matplotlib.__path__ # In[2]: from matplotlib import pyplot as plt # In[3]: print(dir(plt)) # In[ ]: # In[ ]: # In[ ]: # ## 5. Programming with Matplotlib # #### Option 1: Statefull Approach # In[4]: import numpy as np x = np.linspace(-2, 2, 20) y = x**3 plt.plot(x,y) plt.show() # In[ ]: # In[ ]: # In[ ]: # #### Option 2: Stateless (Object Oriented) Approach # In[7]: # Use plt.subplot() method to add an Axes to the current figure fig = plt.figure() ax = plt.subplot() ax.plot(x,y) plt.show() # In[8]: # Use fig.add_subplot() method to add an Axes to the figure on which it is called fig = plt.figure() ax = fig.add_subplot() ax.plot(x,y) plt.show() # In[9]: # Use plt.subplots() method to create a figure and a add set of subplots in a single call fig, ax = plt.subplots() ax.plot(x,y) plt.show() # In[ ]: # In[ ]: # ## 6. How to Draw a Line Chart # In[10]: chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] fig,ax = plt.subplots() ax.plot(years, chemical_exports) plt.show() # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ## 7. Enhance the Graph Step by Step # ### a. Adding Chart Title # In[11]: chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] fig,ax = plt.subplots() ax.plot(years, chemical_exports) plt.title("LCI Exports in last 12 years") plt.show() # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ### b. Setting x/y-Labels # In[12]: chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] fig,ax = plt.subplots() ax.plot(years, chemical_exports) ax.set_xlabel("Years") ax.set_ylabel("Amount (Million US$)") plt.title("LCI Exports in last 12 years") plt.show() # ### c. Modifying Ticks # In[13]: chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] fig,ax = plt.subplots() #fig = plt.figure() #ax = fig.add_subplot() ax.plot(years, chemical_exports) ax.set_xlabel("Years") ax.set_ylabel("Amount (Million US$)") xvals = np.arange(2011, 2022, 2) yvals = np.linspace(0.80, 0.98, 10) ax.set_xticks(xvals) ax.set_yticks(yvals) plt.title("LCI Exports in last 12 years") plt.show() # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ### d. Adding Multiple Lines in a Graph # In[14]: chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940] fig = plt.figure() ax = fig.add_subplot() ax.plot(years, chemical_exports) ax.plot(years, medicine_exports) ax.set_xlabel("Years") ax.set_ylabel("Amount (Million US$)") xvals = np.arange(2011, 2022, 2) yvals = np.linspace(0.80, 0.98, 10) ax.set_xticks(xvals) ax.set_yticks(yvals) plt.title("LCI Exports in last 12 years") plt.show() # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ### e. Adding Chart Legend # **Option1:** # In[17]: chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940] fig = plt.figure() ax = fig.add_subplot() ax.plot(years, chemical_exports) ax.plot(years, medicine_exports) ax.set_xlabel("Years") ax.set_ylabel("Amount (Million US$)") xvals = np.arange(2011, 2022, 2) yvals = np.linspace(0.80, 0.98, 10) ax.set_xticks(xvals) ax.set_yticks(yvals) plt.title("LCI Exports in last 12 years") plt.legend(["Chemicals", "Medicines"]) plt.show() # **Limitations of above way of adding a Lagend to your graph:** # >- This technique expects the sequence of arguments passed to the `plt.legend()` method to be exactly same as the sequence in which the plots are added above using the two calls to `ax.plot()` method # >- I mean the sequence of strings/labels that you pass to the `plt.legend()` method will be assigned to the two lines in the exact sequence in which they have been added using the two `ax.plot()` methods. # >- Solution to this problem is use `label` argument to `ax.plot()` method instead of passing a list of strings to the `plt.legend()` method. # In[ ]: # In[ ]: # **Option2:** # In[23]: chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940] fig = plt.figure() ax = fig.add_subplot() ax.plot(years, chemical_exports, label="Chemicals") ax.plot(years, medicine_exports, label="Medicines") ax.set_xlabel("Years") ax.set_ylabel("Amount (Million US$)") xvals = np.arange(2011, 2022, 2) yvals = np.linspace(0.80, 0.98, 10) ax.set_xticks(xvals) ax.set_yticks(yvals) plt.title("LCI Exports in last 12 years") plt.legend(loc='best') plt.show() # In[ ]: # In[ ]: # ### f. Use of Markers # - Matplotlib provides many different markers, like a circle, cross, square, diamond, etc. You can find the full list of marker types here: https://matplotlib.org/3.1.1/api/markers_api.html . # - The `plt.plot` function supports many arguments for markers: # - `markersize` or `ms`: Set the size of markers # - `markeredgecolor` or `mec`: Set the edge color for markers # - `markeredgewidth` or `mew`: Set the edge width for markers # - `markerfacecolor` or `mfc`: Set the fill color for markers # In[24]: chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940] fig = plt.figure() ax = fig.add_subplot() ax.plot(years, chemical_exports, label="Chemicals", marker='o') ax.plot(years, medicine_exports, label="Medicines", marker='x') ax.set_xlabel("Years") ax.set_ylabel("Amount (Million US$)") xvals = np.arange(2011, 2022, 2) yvals = np.linspace(0.80, 0.98, 10) ax.set_xticks(xvals) ax.set_yticks(yvals) plt.title("LCI Exports in last 12 years") plt.legend(loc='best') plt.show() # In[ ]: # ### g. Styling the Lines (Line Attributes) # - The `plt.plot` function supports many arguments for styling lines # - `color` or `c`: Set the color of the line # - `linestyle` or `ls`: Choose between a solid or dashed line # - `linewidth` or `lw`: Set the width of a line # # # Checkout the supported colors: https://matplotlib.org/3.1.0/tutorials/colors/colors.html # # Check out the documentation for `plt.plot` to learn more: [https://matplotlib.org/api/_as_gen/matplotlib.pyplot.plot.html#matplotlib.pyplot.plot] # In[26]: chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940] fig = plt.figure() ax = fig.add_subplot() ax.plot(years, chemical_exports, label="Chemicals", marker='o', c='b', ls='-', lw=2) ax.plot(years, medicine_exports, label="Medicines", marker='x', c='r', ls=':', lw=2) ax.set_xlabel("Years") ax.set_ylabel("Amount (Million US$)") xvals = np.arange(2011, 2022, 2) yvals = np.linspace(0.80, 0.98, 10) ax.set_xticks(xvals) ax.set_yticks(yvals) plt.title("LCI Exports in last 12 years") plt.legend(loc='best') plt.show() # In[ ]: # ### h. Using Format String to Style the Lines # **Most of the time we just want to set the marker, line style, and line color. The plt.plot() method can be passed a single string for this purpose `fmt='[marker][line][color]`** # # ```plt.plot(years, sports_export, 'o-g')``` # # This format string cannot be passed as named argument, rather is a positional argument after the x,y arguments to plt.plot() method # In[ ]: chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940] fig = plt.figure() ax = fig.add_subplot() ax.plot(years, chemical_exports, 'o-g', label="Chemicals") ax.plot(years, medicine_exports, 'x-r', label="Medicines") ax.set_xlabel("Years") ax.set_ylabel("Amount (Million US$)") xvals = np.arange(2011, 2022, 2) yvals = np.linspace(0.80, 0.98, 10) ax.set_xticks(xvals) ax.set_yticks(yvals) plt.title("LCI Exports in last 12 years") plt.legend(loc='best') plt.show() # In[ ]: # In[ ]: # ### i. Changing Figure Size and Adding Grid # In[27]: chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940] fig = plt.figure(figsize=(12,6)) ax = fig.add_subplot() ax.plot(years, chemical_exports, label="Chemicals", marker='o', c='b', ls='-', lw=2) ax.plot(years, medicine_exports, label="Medicines", marker='x', c='r', ls=':', lw=2) ax.set_xlabel("Years") ax.set_ylabel("Amount (Million US$)") xvals = np.arange(2011, 2022, 2) yvals = np.linspace(0.80, 0.98, 10) ax.set_xticks(xvals) ax.set_yticks(yvals) plt.title("LCI Exports in last 12 years") plt.legend(loc='best') plt.grid(True) plt.show() # ### j. Using Annotation # In[28]: chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940] fig = plt.figure(figsize=(12,6)) ax = fig.add_subplot() ax.plot(years, chemical_exports, label="Chemicals", marker='o', c='b', ls='-', lw=2) ax.plot(years, medicine_exports, label="Medicines", marker='x', c='r', ls=':', lw=2) ax.set_xlabel("Years") ax.set_ylabel("Amount (Million US$)") xvals = np.arange(2011, 2022, 2) yvals = np.linspace(0.80, 0.98, 10) ax.set_xticks(xvals) ax.set_yticks(yvals) plt.title("LCI Exports in last 12 years") plt.legend(loc='best') plt.annotate("Peak of Chemical Exports", xy=(2016,0.95)) plt.grid(True) plt.show() # ### k. Using Built-in Styles # In[29]: plt.style.available # In[35]: chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940] fig = plt.figure(figsize=(12,6)) ax = fig.add_subplot() ax.plot(years, chemical_exports, label="Chemicals", marker='o', c='b', ls='-', lw=2) ax.plot(years, medicine_exports, label="Medicines", marker='x', c='r', ls=':', lw=2) ax.set_xlabel("Years") ax.set_ylabel("Amount (Million US$)") xvals = np.arange(2011, 2022, 2) yvals = np.linspace(0.80, 0.98, 10) ax.set_xticks(xvals) ax.set_yticks(yvals) plt.title("LCI Exports in last 12 years") plt.legend(loc='best') plt.annotate("Peak of Chemical Exports", xy=(2016,0.95)) plt.grid(True) plt.style.use('default') #plt.style.use('dark_background') #plt.style.use('fivethirtyeight') #plt.style.use('ggplot') plt.show() # In[ ]: # In[ ]: # ### l. Saving the Graph # In[ ]: xvals # In[ ]: chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940] fig = plt.figure(figsize=(12,6)) ax = fig.add_subplot() ax.plot(years,chemical_exports, label="Chemicals", marker='o', c='b', ls='-', lw=2) ax.plot(years,medicine_exports, label="Medicines", marker='x', c='r', ls=':', lw=2) ax.set_xlabel("Years") ax.set_ylabel("Amount (Million US$)") xvals = np.arange(2011, 2022, 2) yvals = np.linspace(0.80, 0.98, 10) ax.set_xticks(xvals) ax.set_yticks(yvals) plt.title("LCI Exports in last 12 years") plt.legend(loc='best') plt.annotate("Peak of Chemical Exports", xy=(2016,0.95)) plt.grid(True) plt.style.use('default') #plt.savefig("mychart.png") # can use .pdf, .ps, .eps, .svg formats plt.show() # In[ ]: get_ipython().system('ls -l mychart.png') # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # ## 7. Sub-Plots # In[ ]: fig, ax = plt.subplots(nrows=3, ncols=3) print(ax) # In[ ]: # In[ ]: # In[ ]: # ### a. Multiple Axes Objects within a Figure Object # In[ ]: from matplotlib import pyplot as plt chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940] fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1) ax1.plot(years, chemical_exports, label='Chemicals', marker='o', c='b', ls='-', lw=2) ax1.set_title("LCI Chemical exports in last 12 years") ax1.set_xlabel("Years") ax1.set_ylabel("Amount (Million Rs.)") ax2.plot(years, medicine_exports, label='Medicines', marker='x', c='r', ls=':', lw=2) ax2.set_title("LCI Medicine exports in last 12 years") ax2.set_xlabel("Years") ax2.set_ylabel("Amount (Million Rs.)") plt.tight_layout() plt.show() # In[ ]: # In[ ]: # In[ ]: # ### b. Multiple Figure Objects Having Single Axes # In[ ]: from matplotlib import pyplot as plt chemical_exports = [0.810, 0.831, 0.895, 0.91, 0.915, 0.926, 0.945, 0.931, 0.919, 0.921, 0.920, 0.919] years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] medicine_exports = [0.791, 0.818, 0.832, 0.816, 0.840, 0.833, 0.835, 0.838, 0.842, 0.910, 0.930, 0.940] fig1, ax1 = plt.subplots() fig2, ax2 = plt.subplots() ax1.plot(years, chemical_exports, label='Chemicals', marker='o', c='b', ls='-', lw=2) ax1.set_title("LCI Chemical exports in last 12 years") ax1.set_xlabel("Years") ax1.set_ylabel("Amount (Million Rs.)") ax2.plot(years, medicine_exports, label='Medicines', marker='x', c='r', ls=':', lw=2) ax2.set_title("LCI Medicine exports in last 12 years") ax2.set_xlabel("Years") ax2.set_ylabel("Amount (Million Rs.)") plt.tight_layout() plt.show() # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: