Matplotlib uses a special file called matplotlibrc
file.
This file is usually in your ~/.matplotlib/matplotlibrc
You can check the location of this file by typing:
# Importing modules
%matplotlib inline
import matplotlib
# Printing out `fname` for `matplotlibrc`
matplotlib.matplotlib_fname()
'/Users/victor2/.matplotlib/matplotlibrc'
import numpy as np
import matplotlib.pyplot as plt
You can also edit the different parameters of the matplotlibrc
file.
# List of possible values to be changed
import matplotlib as mpl
matplotlib.rcParams
print('Linewidth: {0} Color: {1}'.format(
mpl.rcParams['lines.linewidth'],
mpl.rcParams['lines.color']))
Linewidth: 1.5 Color: k
# Changing line properties
mpl.rc('lines', linewidth=10, color='g')
print('Linewidth: {0} Color: {1}'.format(
mpl.rcParams['lines.linewidth'],
mpl.rcParams['lines.color']))
Linewidth: 10.0 Color: g
Newer versions of matplotlib offer the option to setup a style sheet.
For example, one can have a plot look like taken from ggplot
or SuperMongo
.
def plotting(stylename='classic'):
# Defining data
x = np.arange(0,10)
y = np.random.randint(20,30,x.size)
# Defining style sheet
try:
plt.style.use(stylename)
except IOError:
msg = '{0} not found'.format(stylename)
raise IOError(msg)
# Plotting
plt.clf()
plt.plot(x,y,'-ro', label=stylename)
plt.xlabel('X label')
plt.ylabel('X label')
plt.legend(loc=1)
plt.title('Plot using "{0}" Style'.format(stylename), fontsize=20 )
plt.show()
plotting(stylename='classic')
Now with a new style sheet
plotting(stylename='ggplot')
for style in plt.style.available[0:5]:
plotting(stylename=style)
To get a list of the all the styles available:
plt.style.available
['seaborn-dark', 'seaborn-darkgrid', 'seaborn-ticks', 'fivethirtyeight', 'seaborn-whitegrid', 'classic', '_classic_test', 'fast', 'seaborn-talk', 'seaborn-dark-palette', 'seaborn-bright', 'seaborn-pastel', 'grayscale', 'seaborn-notebook', 'ggplot', 'seaborn-colorblind', 'seaborn-muted', 'seaborn', 'Solarize_Light2', 'seaborn-paper', 'bmh', 'tableau-colorblind10', 'seaborn-white', 'dark_background', 'seaborn-poster', 'seaborn-deep']
style sheets
, see: http://www.delaytolerantnetworks.com/Customizing-plots-with-style-sheets-in-matplotlib/Seaborn is visualization library based on matplotlib. It provides high-level interface for drawing attractive statistical graphics.
Some of the data here was taken from: http://blog.insightdatalabs.com/advanced-functionality-in-seaborn/
We'll be using the UCI "Auto MPG" data for the purpose of this module.
We'll be using pandas along with Seaborn
.
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline
import seaborn as sns
sns.set_style('darkgrid')
Reading in the data that we'll be using:
# Names of columns
names = [
'mpg'
, 'cylinders'
, 'displacement'
, 'horsepower'
, 'weight'
, 'acceleration'
, 'model_year'
, 'origin'
, 'car_name'
]
# Reading in ASCII file with motor data
df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data", sep='\s+', names=names)
# Creating new column named `maker`
df['maker'] = df.car_name.map(lambda x: x.split()[0])
# Assiging Continents corresponding values
df.origin = df.origin.map({1: 'America', 2: 'Europe', 3: 'Asia'})
# Dropping `NaN` values
df=df.applymap(lambda x: np.nan if x == '?' else x).dropna()
# Changing the data type of `horsepower`
df['horsepower'] = df.horsepower.astype(float)
df.head()
mpg | cylinders | displacement | horsepower | weight | acceleration | model_year | origin | car_name | maker | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 18.0 | 8 | 307.0 | 130.0 | 3504.0 | 12.0 | 70 | America | chevrolet chevelle malibu | chevrolet |
1 | 15.0 | 8 | 350.0 | 165.0 | 3693.0 | 11.5 | 70 | America | buick skylark 320 | buick |
2 | 18.0 | 8 | 318.0 | 150.0 | 3436.0 | 11.0 | 70 | America | plymouth satellite | plymouth |
3 | 16.0 | 8 | 304.0 | 150.0 | 3433.0 | 12.0 | 70 | America | amc rebel sst | amc |
4 | 17.0 | 8 | 302.0 | 140.0 | 3449.0 | 10.5 | 70 | America | ford torino | ford |
sns.set_context('notebook')
sns.factorplot(data=df, x="model_year", y="mpg")
/Users/victor2/anaconda3/envs/vandy_computational_workshop/lib/python3.7/site-packages/seaborn/categorical.py:3666: UserWarning: The `factorplot` function has been renamed to `catplot`. The original name will be removed in a future release. Please update your code. Note that the default `kind` in `factorplot` (`'point'`) has changed `'strip'` in `catplot`. warnings.warn(msg)
<seaborn.axisgrid.FacetGrid at 0x1a2353ae80>
We can start off by visualizing 'model_year
' vs 'mpg
' for each type of 'origin
' class.
We can do this by using the factorplot command:
sns.factorplot(data=df, x="model_year", y="mpg", col="origin")
<seaborn.axisgrid.FacetGrid at 0x1a23483080>
You can easily change the type of graph that you're plotting
g = sns.FacetGrid(df, col="origin")
g.map(sns.distplot, "mpg")
<seaborn.axisgrid.FacetGrid at 0x1a242b56a0>
Or look at a scatter plot of the data
g = sns.FacetGrid(df, col="origin")
g.map(plt.scatter, "horsepower", "mpg")
<seaborn.axisgrid.FacetGrid at 0x1a2461f400>
You can easily compute and plot a regression of the data
g = sns.FacetGrid(df, col="origin")
g.map(sns.regplot, "horsepower", "mpg")
plt.xlim(0, 250)
plt.ylim(0, 60)
(0, 60)
Let's say you want to visualize the "Kernel Density Estimation" for each type
# Define new variable `tons`
df['tons'] = (df.weight/2000).astype(int)
# Create grid to plot your data
g = sns.FacetGrid(df, col="origin", row="tons")
# 1) Specify type of function
# 2) Specify 'x' and 'y' for each plot
g.map(sns.kdeplot, "horsepower", "mpg")
# Define the x- and y-limits for each subplot
plt.xlim(0, 250)
plt.ylim(0, 60)
(0, 60)
These functions allow you to plot pairwise relations in a dataset.
Let's say we want to plot the relation between mpg
, horsepower
, weight
, and origin
.
And we also want to separate them based on the origin
.
No types
g = sns.PairGrid(df[["mpg", "horsepower", "weight", "origin"]])
g.map_upper(sns.regplot)
g.map_lower(sns.residplot)
g.map_diag(plt.hist)
for ax in g.axes.flat:
plt.setp(ax.get_xticklabels(), rotation=45)
g.set(alpha=0.5)
<seaborn.axisgrid.PairGrid at 0x1a25013dd8>
We can specify the types of origin
g = sns.PairGrid(df[["mpg", "horsepower", "weight", "origin"]], hue="origin")
g.map_upper(sns.regplot)
g.map_lower(sns.residplot)
g.map_diag(plt.hist)
for ax in g.axes.flat:
plt.setp(ax.get_xticklabels(), rotation=45)
g.add_legend()
g.set(alpha=0.5)
<seaborn.axisgrid.PairGrid at 0x1a25610588>
Or maker
g = sns.PairGrid(df[["mpg", "horsepower", "weight", "origin","maker"]], hue="maker")
g.map_upper(sns.regplot)
g.map_lower(sns.residplot)
g.map_diag(plt.hist)
for ax in g.axes.flat:
plt.setp(ax.get_xticklabels(), rotation=45)
g.add_legend()
g.set(alpha=0.5)
<seaborn.axisgrid.PairGrid at 0x1a247b44a8>
You can see, you have some freedom when it comes to what you want to plot~
g = sns.PairGrid(df[["mpg", "horsepower", "weight", "origin"]])
g.map_diag(sns.kdeplot)
g.map_offdiag(sns.kdeplot, cmap="Blues_d", n_levels=6);
You can also visualize 2D-data in different ways:
You can estimate the Pearson correlation factor, along with the p-value of a linear fitting.
sns.jointplot("mpg", "horsepower", data=df, kind='kde')
<seaborn.axisgrid.JointGrid at 0x1a272c2240>
g = sns.jointplot(x="mpg", y="horsepower", data=df, kind="kde", color="r")
g.plot_joint(plt.scatter, c="w", s=30, linewidth=1, marker="+")
g.ax_joint.collections[0].set_alpha(0)
Or show the linear regression of data
sns.jointplot("horsepower", "mpg", data=df, kind="reg")
<seaborn.axisgrid.JointGrid at 0x1a276dfa20>
Or a 2nd-order fitting to the data:
g = sns.JointGrid(x="horsepower", y="mpg", data=df)
g.plot_joint(sns.regplot, order=2)
g.plot_marginals(sns.distplot)
<seaborn.axisgrid.JointGrid at 0x1a27899588>
You can also plot 2D-Histograms
with sns.axes_style("white"):
sns.jointplot(x="horsepower", y="mpg", data=df, kind="hex");
sns.boxplot(x="origin", y="horsepower",data=df, palette="PRGn")
sns.despine(offset=10, trim=True)
sns.boxplot(x="origin", y="horsepower", data=df, palette="PRGn")
sns.despine(offset=10, trim=True)
Plot one or more timeseries with flexible representation of uncertainty.
gammas = sns.load_dataset("gammas")
ax = sns.tsplot(time="timepoint", value="BOLD signal",
unit="subject", condition="ROI",
data=gammas)
/Users/victor2/anaconda3/envs/vandy_computational_workshop/lib/python3.7/site-packages/seaborn/timeseries.py:183: UserWarning: The `tsplot` function is deprecated and will be removed in a future release. Please update your code to use the new `lineplot` function. warnings.warn(msg, UserWarning)
x = np.linspace(0, 15, 31)
data = np.sin(x) + np.random.rand(10, 31) + np.random.randn(10, 1)
ax = sns.tsplot(data=data)
ax = sns.tsplot(data=data, err_style="unit_traces")
ax = sns.tsplot(data=data, err_style="boot_traces", n_boot=500)
sns.set(style="whitegrid", palette="muted")
# Load the Dataset
iris = sns.load_dataset("iris")
# Create a boxplot
iris = pd.melt(iris, "species", var_name="measurement")
# Draw a categorical scatterplot to show each observation
sns.swarmplot(x="measurement", y="value", hue="species", data=iris)
<matplotlib.axes._subplots.AxesSubplot at 0x1a24927dd8>
sns.set(context="paper", font="monospace")
# Load the datset of correlations between cortical brain networks
df = sns.load_dataset("brain_networks", header=[0, 1, 2], index_col=0)
corrmat = df.corr()
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(12, 9))
# Draw the heatmap using seaborn
sns.heatmap(corrmat, vmax=.8, square=True)
# Use matplotlib directly to emphasize known networks
networks = corrmat.columns.get_level_values("network")
for i, network in enumerate(networks):
if i and network != networks[i - 1]:
ax.axhline(len(networks) - i, c="w")
ax.axvline(i, c="w")
f.tight_layout()