#!/usr/bin/env python # coding: utf-8 # # Loading Necessary Libraries # In[2]: import matplotlib.pyplot as plt import numpy as np from numpy.random import randint import pandas as pd get_ipython().run_line_magic('matplotlib', 'inline') # # Data Visualization # ### Types of Graphs covered: # - **Line Graph** # - **Basics** # - **Subplots** # - **Bar Chart** # - **Pie Chart** # - **Box & Whiskers Plot** # # Line Graph (Basics) # In[5]: x = [1, 2, 3, 8, 9, 23] y = [5, 13, 10, 11, 45, 27] plt.style.use('dark_background') plt.rcParams['figure.figsize'] = (10,4) #Line 1 #Keyword Argument Notation #plt.plot(x,y, label='2x', color='red', linewidth=2, marker='.', linestyle='--', markersize=10, markeredgecolor='blue') #Sorthand Notation #format = '[color][marker][line]' plt.plot(x,y, 'g^--', label = 'A simple line', linewidth = 2) #Line 2 # select interval we want to plot points at x2 = np.arange(0,8.5,0.5) # Plot part of the graph as line plt.plot(x2[:10], x2[:10]**2, 'r', label = 'X^2') plt.plot(x2[9:], x2[9:]**2, 'r--') #add title to graph plt.title('Line Graph', loc = 'left', fontdict = {'fontname': 'Times New Roman', 'color': 'white', 'fontsize' : '22'}) #add title to axis plt.xlabel('X-axis', fontdict = {'color': 'white', 'fontsize' : '16'}) plt.ylabel('Y-axis', fontdict = {'color': 'white', 'fontsize' : '16'}) #Ticks are the markers denoting data points on axes. #plt.xticks([0,2,4,6,8,10,12,14,16,18,20,22,24]) #plt.yticks([5,10,15,20,25,30,35,40,45]) #add legend plt.legend(loc = 0) #save graph plt.savefig('mygraph.png', dpi = 200) #show graph plt.show() # # Multiple Plots in the same Canvas : Subplots # ## Two Plots Together # In[141]: x = np.linspace(0,10,20) x # In[142]: y = randint(1, 50, 20) y # In[6]: x = np.linspace(0,10,20) y = randint(1, 50, 20) plt.style.use('dark_background') plt.rcParams['figure.figsize'] = (10,4) # Plot 1 plt.subplot(1,2,1) plt.plot(x, y, 'c:o') plt.title('A Cyan Plot', fontdict = {'fontname': 'Times New Roman', 'color' : 'cyan'}) #Plot 2 plt.subplot(1,2,2) plt.plot(x, y, 'r--_') plt.title('A Red Plot', fontdict = {'fontname': 'Times New Roman', 'color' : 'red'}) #Title at Top plt.suptitle('Two Plots Together', fontdict = {'fontname': 'Times New Roman', 'color': 'white'}) plt.show() # ## Three Plots Together # In[17]: x = np.linspace(0,10,20) y = randint(1, 50, 20) plt.style.use('dark_background') plt.rcParams['figure.figsize'] = (12,4) # Plot 1 plt.subplot(1,3,1) plt.plot(x, y, 'c:o') plt.title('A Cyan Plot', fontdict = {'fontname': 'fantasy', 'color' : 'cyan'}) #Plot 2 plt.subplot(1,3,2) plt.plot(x, y**y, 'r--_') plt.title('A Red Plot', fontdict = {'fontname': 'fantasy', 'color' : 'red'}) #Plot 3 plt.subplot(1,3,3) plt.plot(x, y*x, 'g-^') plt.title('A Green Plot', fontdict = {'fontname': 'fantasy', 'color' : 'green'}) #Labeling plt.suptitle('Three Plots Together', fontdict = {'fontname': 'fantasy', 'color': 'white'}) plt.savefig('threeplots.png') plt.show() # ## Different Methods of Creating Subplots # ### Object Oriented Method # In[199]: fig = plt.figure(figsize= (10,4)) ax1 = fig.add_axes([0,0,1,1]) ax2 = fig .add_axes([0.1, 0.1, 0.4, 0.3]) ax1.plot(x, y) ax2.plot(x, y*2) plt.show() # ### Brute Force Method to plot more than one graphs # In[200]: fig, ax = plt.subplots(1,2, figsize= (10,4)) ax[0].plot(x, y, 'b') ax[1].plot(x, y*x, 'r-.') plt.show() # ### For Loop Method # In[201]: fig , ax = plt.subplots(1,2, figsize= (10,4)) col = ['g', 'm'] data = [y, y**y] for i, axes in enumerate(ax): axes.plot(x, data[i], col[i]) fig.tight_layout() # ## **Setting X and Y limits of subplots** # In[162]: fig, ax = plt.subplots(1,3, figsize = (12, 4)) #Get three plots simultaneously ax[0].plot(x, y, x, y*x) ax[1].plot(x, y**2, 'r') #setting limits to y axis ax[1].set_ylim([0,1000]) ax[2].plot(x, y, x, y*x) #setting limits to x and y axes ax[2].set_ylim([0,300]) ax[2].set_xlim([0,5]) plt.show() # ## **Plotting Log(x) on a subplot** # In[180]: fig, ax = plt.subplots(1,2, figsize = (10,4)) ax[0].plot(x, y, x, y**2) ax[1].plot(x, np.exp(x), 'w') #plotting log of x ax[1].set_yscale('log') plt.show() # ## Setting Xticks and Yticks and Changing their Labels # In[193]: fig, ax = plt.subplots(figsize = (10,4)) ax.plot(x, y) ax.set_xticks([1,2,5,10]) ax.set_xticklabels([r'a', r'B', r'$\alpha$', r'$\delta$'], fontsize = 20) ax.set_yticks([1,15,25,50]) plt.show() # ## Setting Y-axis Label as Scientific Notation # In[196]: from matplotlib import ticker # In[198]: fig, ax = plt.subplots(figsize = (10,4)) ax.plot(x, y) ax.set_title('Scientific Notation') formatter = ticker.ScalarFormatter(useMathText = True) formatter.set_scientific(True) formatter.set_powerlimits((-1,-1)) ax.yaxis.set_major_formatter(formatter) plt.show() # # Bar Chart (Basics) # In[8]: labels = ['A', 'B', 'C'] values = [6, 8, 3] plt.style.use('dark_background') plt.rcParams['figure.figsize'] = (10,4) bars = plt.bar(labels, values, align = 'edge', edgecolor = 'indigo', facecolor = 'pink') #adding title plt.title('Bar Graph', loc = 'right', fontdict = {'fontname': 'Times New Roman', 'color': 'white', 'fontsize': '30'}) #adding label to axis plt.xlabel('X-axis', fontdict = {'color': 'white','fontsize' : '16'}) plt.ylabel('Y-axis', fontdict = {'color': 'white','fontsize' : '16'}) #add patterns to bars bars[0].set_hatch('O') bars[1].set_hatch('*') bars[2].set_hatch('+') plt.show() # # Real World Examples # ## Line Chart # - **Data selected for analysis** # In[6]: gas = pd.read_csv('gas_prices.csv') gas # - **Line Chart showing changes in the prices of gas (USD/gallon)** # In[7]: #add title to graph plt.title('Change in Gas Prices', loc = 'center', fontdict = {'fontname': 'century','color': 'white', 'fontsize': '25'}) #changing style of the graph plt.style.use('dark_background') #add title to axislightblue plt.xlabel('Change over Years', fontdict = {'fontname': 'century', 'color': 'white','fontsize' : '17'}) plt.ylabel('Prices (USD/Gallon)', fontdict = {'fontname': 'century', 'color': 'white','fontsize' : '17'}) #plotting all countries in the graph for country in gas: if country != 'Year': plt.plot(gas.Year, gas[country], marker = '.') plt.xticks(gas.Year[::2]) #[::2] allows to have two years ga in the xticks #adding legend plt.legend(['Australia', 'Canada', 'France', 'Germany', 'Italy', 'Japan', 'Mexico', 'South Korea', 'UK', 'USA'], loc = 0, bbox_to_anchor=(1,1)) #bbox_to_anchor=(1,1) puts the legend outside the graph plt.savefig('gas_prices.png', dpi= 300, bbox_inches='tight') #bbox_inches='tight' prevents the graph from getting cropped plt.show() # - **Line Chart showing changes in the prices of gas (USD/gallon) in USA, Italy , France and Germany only** # In[8]: #add title to graph plt.title('Change in Gas Prices', loc = 'center', fontdict = {'fontname': 'century', 'color': 'white', 'fontsize': '25'}) #changing style of the graph plt.style.use('dark_background') #add title to axis plt.xlabel('Change over Years', fontdict = {'fontname': 'century', 'color': 'white','fontsize' : '17'}) plt.ylabel('Prices (USD/Gallon)', fontdict = {'fontname': 'century', 'color': 'white','fontsize' : '17'}) #Method 1 # plt.plot(gas.Year, gas.USA, 'b.--') # plt.plot(gas.Year, gas.Canada, 'r.-') # plt.plot(gas.Year, gas.France, 'y.:') # plt.plot(gas.Year, gas.Germany, 'g.:') #Method 2 specific_countries = ['USA', 'France', 'Germany', 'Italy'] for country in gas: plt.plot(gas.Year, gas[specific_countries], marker = '.') #adding blank year in the graph and customizing xticks plt.xticks(gas.Year[::2].tolist()+[2010]) #adding legend plt.legend(['USA', 'France', 'Germany', 'Italy'], loc = 0, bbox_to_anchor=(1,1)) plt.savefig('gas_prices_specific.png', dpi= 300, facecolor = 'white', bbox_inches='tight') plt.show() # # Histogram # - **Data selected for Analysis** # In[10]: fifa = pd.read_csv('fifa_data.csv') fifa.head() # - **Distribution of Players Overall Skills in FIFA 2018** # In[11]: bins = [70,80,90,100] #add title to graph plt.title('Distribution of Players Overall Skills', loc = 'center', fontdict = {'fontname': 'century', 'color': 'white', 'fontsize': '22'}) #add title to axis plt.xlabel('Overall Skill Points', fontdict = {'fontname': 'century', 'color': 'white','fontsize' : '15'}) plt.ylabel('Number of Players', fontdict = {'fontname': 'century', 'color': 'white','fontsize' : '15'}) plt.style.use('dark_background') plt.rcParams['figure.figsize'] = (10,4) plt.hist(fifa.Overall, bins = bins, color = 'cyan') plt.show() # # Pie Chart # - **Preferred Foot by FIFA Players** # In[13]: fifa['Preferred Foot'] # In[12]: left = fifa.loc[fifa['Preferred Foot'] == 'Left'].count()[0] right = fifa.loc[fifa['Preferred Foot'] == 'Right'].count()[0] labels = [right, left] plt.pie(labels, labeldistance = 1.3, labels = labels, colors = ['crimson', 'lightcoral'], autopct= '%.2f %%') plt.title('Preferred Foot by FIFA Players', loc = 'center', fontdict = {'fontname': 'century', 'color': 'white', 'fontsize': '20'}) plt.style.use('dark_background') plt.rcParams['figure.figsize'] = (10,4) plt.show() # - **Weight Distribution of FIFA Players** # In[275]: fifa.Weight # In[14]: fifa.Weight = [ int(x.strip('lbs')) if type(x) == str else x for x in fifa.Weight ] fifa.Weight # In[15]: plt.style.use('seaborn-ticks') light = fifa.loc[fifa.Weight < 125].count()[0] light_medium = fifa[(fifa.Weight >= 125) & (fifa.Weight < 150)].count()[0] medium = fifa[(fifa.Weight >= 150) & (fifa.Weight < 175)].count()[0] medium_heavy = fifa[(fifa.Weight >= 175) & (fifa.Weight < 200)].count()[0] heavy = fifa[fifa.Weight >= 200].count()[0] weights = [light,light_medium, medium, medium_heavy, heavy] label = ['under 125', '125-150', '150-175', '175-200', 'over 200'] explode = (.3,.2,0,0,.3) plt.title('Weight of Professional Soccer Players (lbs)', loc = 'center', fontdict = {'fontname': 'century', 'color': 'navy', 'fontsize': '20'}) plt.pie(weights, labeldistance = 1.3, wedgeprops = {'linewidth': 5}, labels=label, explode=explode, pctdistance=0.9, autopct='%.2f %%') plt.rcParams['figure.figsize'] = (10,4) plt.show() # # Box and Whiskers Chart # - **Comparing FIFA teams with one another** # In[18]: plt.style.use('dark_background') plt.figure(figsize = (6,8)) barcelona = fifa.loc[fifa.Club == 'FC Barcelona']['Overall'] madrid = fifa.loc[fifa.Club == 'Real Madrid']['Overall'] juventus = fifa.loc[fifa.Club == 'Juventus']['Overall'] manchester = fifa.loc[fifa.Club == 'Manchester United']['Overall'] plt.title('FIFA Football Teams Comparison', loc = 'center', fontdict = {'fontname': 'century', 'color': 'white', 'fontsize': '20'}) plt.boxplot([barcelona, madrid, juventus, manchester], labels = ['FC Barcelona', 'Real Madrid', 'Juventus', 'Manchester United']) plt.show()