Plotting and Visualization¶

In [ ]:

import numpy as np
import pandas as pd
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_rows = 20
np.random.seed(12345)
import matplotlib.pyplot as plt
import matplotlib
plt.rc('figure', figsize=(10, 6))
np.set_printoptions(precision=4, suppress=True)

%matplotlib notebook

A Brief matplotlib API Primer¶

In [ ]:

import matplotlib.pyplot as plt

In [ ]:

import numpy as np
data = np.arange(10)
data
plt.plot(data)

Figures and Subplots¶

In [ ]:

fig = plt.figure()

In [ ]:

ax1 = fig.add_subplot(2, 2, 1)

In [ ]:

ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

fig = plt.figure() ax1 = fig.add_subplot(2, 2, 1) ax2 = fig.add_subplot(2, 2, 2) ax3 = fig.add_subplot(2, 2, 3)

In [ ]:

plt.plot(np.random.randn(50).cumsum(), 'k--')

In [ ]:

_ = ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))

In [ ]:

plt.close('all')

In [ ]:

fig, axes = plt.subplots(2, 3)
axes

Adjusting the spacing around subplots¶

subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

fig, axes = plt.subplots(2, 2, sharex=True, sharey=True) for i in range(2): for j in range(2): axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5) plt.subplots_adjust(wspace=0, hspace=0)

In [ ]:

fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0, hspace=0)

Colors, Markers, and Line Styles¶

ax.plot(x, y, 'g--')

ax.plot(x, y, linestyle='--', color='g')

In [ ]:

plt.figure()

In [ ]:

from numpy.random import randn
plt.plot(randn(30).cumsum(), 'ko--')

plot(randn(30).cumsum(), color='k', linestyle='dashed', marker='o')

In [ ]:

plt.close('all')

In [ ]:

data = np.random.randn(30).cumsum()
plt.plot(data, 'k--', label='Default')
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')
plt.legend(loc='best')

Ticks, Labels, and Legends¶

Setting the title, axis labels, ticks, and ticklabels¶

In [ ]:

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(np.random.randn(1000).cumsum())

In [ ]:

ticks = ax.set_xticks([0, 250, 500, 750, 1000])
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
                            rotation=30, fontsize='small')

In [ ]:

ax.set_title('My first matplotlib plot')
ax.set_xlabel('Stages')

props = { 'title': 'My first matplotlib plot', 'xlabel': 'Stages' } ax.set(**props)

Adding legends¶

In [ ]:

from numpy.random import randn
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum(), 'k', label='one')
ax.plot(randn(1000).cumsum(), 'k--', label='two')
ax.plot(randn(1000).cumsum(), 'k.', label='three')

In [ ]:

ax.legend(loc='best')

Annotations and Drawing on a Subplot¶

ax.text(x, y, 'Hello world!', family='monospace', fontsize=10)

In [ ]:

from datetime import datetime

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

data = pd.read_csv('examples/spx.csv', index_col=0, parse_dates=True)
spx = data['SPX']

spx.plot(ax=ax, style='k-')

crisis_data = [
    (datetime(2007, 10, 11), 'Peak of bull market'),
    (datetime(2008, 3, 12), 'Bear Stearns Fails'),
    (datetime(2008, 9, 15), 'Lehman Bankruptcy')
]

for date, label in crisis_data:
    ax.annotate(label, xy=(date, spx.asof(date) + 75),
                xytext=(date, spx.asof(date) + 225),
                arrowprops=dict(facecolor='black', headwidth=4, width=2,
                                headlength=4),
                horizontalalignment='left', verticalalignment='top')

# Zoom in on 2007-2010
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])

ax.set_title('Important dates in the 2008-2009 financial crisis')

In [ ]:

ax.set_title('Important dates in the 2008–2009 financial crisis')

fig = plt.figure() ax = fig.add_subplot(1, 1, 1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3) circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3) pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]], color='g', alpha=0.5)

ax.add_patch(rect) ax.add_patch(circ) ax.add_patch(pgon)

In [ ]:

fig = plt.figure(figsize=(12, 6)); ax = fig.add_subplot(1, 1, 1)
rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
                   color='g', alpha=0.5)
ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

Saving Plots to File¶

plt.savefig('figpath.svg')

plt.savefig('figpath.png', dpi=400, bbox_inches='tight')

from io import BytesIO buffer = BytesIO() plt.savefig(buffer) plot_data = buffer.getvalue()

matplotlib Configuration¶

plt.rc('figure', figsize=(10, 10))

font_options = {'family' : 'monospace', 'weight' : 'bold', 'size' : 'small'} plt.rc('font', **font_options)

Plotting with pandas and seaborn¶

Line Plots¶

In [ ]:

plt.close('all')

In [ ]:

s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
s.plot()

In [ ]:

df = pd.DataFrame(np.random.randn(10, 4).cumsum(0),
                  columns=['A', 'B', 'C', 'D'],
                  index=np.arange(0, 100, 10))
df.plot()

Bar Plots¶

In [ ]:

fig, axes = plt.subplots(2, 1)
data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))
data.plot.bar(ax=axes[0], color='k', alpha=0.7)
data.plot.barh(ax=axes[1], color='k', alpha=0.7)

In [ ]:

np.random.seed(12348)

In [ ]:

df = pd.DataFrame(np.random.rand(6, 4),
                  index=['one', 'two', 'three', 'four', 'five', 'six'],
                  columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
df
df.plot.bar()

In [ ]:

plt.figure()

In [ ]:

df.plot.barh(stacked=True, alpha=0.5)

In [ ]:

plt.close('all')

In [ ]:

tips = pd.read_csv('examples/tips.csv')
party_counts = pd.crosstab(tips['day'], tips['size'])
party_counts
# Not many 1- and 6-person parties
party_counts = party_counts.loc[:, 2:5]

In [ ]:

# Normalize to sum to 1
party_pcts = party_counts.div(party_counts.sum(1), axis=0)
party_pcts
party_pcts.plot.bar()

In [ ]:

plt.close('all')

In [ ]:

import seaborn as sns
tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])
tips.head()
sns.barplot(x='tip_pct', y='day', data=tips, orient='h')

In [ ]:

plt.close('all')

In [ ]:

sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

In [ ]:

plt.close('all')

In [ ]:

sns.set(style="whitegrid")

Histograms and Density Plots¶

In [ ]:

plt.figure()

In [ ]:

tips['tip_pct'].plot.hist(bins=50)

In [ ]:

plt.figure()

In [ ]:

tips['tip_pct'].plot.density()

In [ ]:

plt.figure()

In [ ]:

comp1 = np.random.normal(0, 1, size=200)
comp2 = np.random.normal(10, 2, size=200)
values = pd.Series(np.concatenate([comp1, comp2]))
sns.distplot(values, bins=100, color='k')

Scatter or Point Plots¶

In [ ]:

macro = pd.read_csv('examples/macrodata.csv')
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
trans_data = np.log(data).diff().dropna()
trans_data[-5:]

In [ ]:

plt.figure()

In [ ]:

sns.regplot('m1', 'unemp', data=trans_data)
plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))

In [ ]:

sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha': 0.2})

In [ ]:

sns.factorplot(x='day', y='tip_pct', hue='time', col='smoker',
               kind='bar', data=tips[tips.tip_pct < 1])

In [ ]:

sns.factorplot(x='day', y='tip_pct', row='time',
               col='smoker',
               kind='bar', data=tips[tips.tip_pct < 1])

In [ ]:

sns.factorplot(x='tip_pct', y='day', kind='box',
               data=tips[tips.tip_pct < 0.5])

Other Python Visualization Tools¶

In [ ]:

pd.options.display.max_rows = PREVIOUS_MAX_ROWS

Plotting and Visualization¶

A Brief matplotlib API Primer¶

Figures and Subplots¶

Adjusting the spacing around subplots¶

Colors, Markers, and Line Styles¶

Ticks, Labels, and Legends¶

Setting the title, axis labels, ticks, and ticklabels¶

Adding legends¶

Annotations and Drawing on a Subplot¶

Saving Plots to File¶

matplotlib Configuration¶

Plotting with pandas and seaborn¶

Line Plots¶

Bar Plots¶

Histograms and Density Plots¶

Scatter or Point Plots¶

Facet Grids and Categorical Data¶

Other Python Visualization Tools¶

Conclusion¶