# %load ../../standard_import.txt
import pandas as pd
import numpy as np
import pymc3 as pm
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import gridspec
from IPython.display import Image
%matplotlib inline
plt.style.use('seaborn-white')
color = '#87ceeb'
f_dict = {'size':16}
df = pd.read_csv('data/Guber1999data.csv')
df.info()
df.head()
X = df[['Spend', 'PrcntTake']]
y = df['SATT']
meanx = X.mean().values
scalex = X.std().values
zX = ((X-meanx)/scalex).values
meany = y.mean()
scaley = y.std()
zy = ((y-meany)/scaley).values
Image('images/fig18_4.png', width=400)
with pm.Model() as model:
zbeta0 = pm.Normal('zbeta0', mu=0, sd=2)
zbetaj = pm.Normal('zbetaj', mu=0, sd=2, shape=(2))
zmu = zbeta0 + pm.math.dot(zbetaj, zX.T)
nu = pm.Exponential('nu', 1/29.)
zsigma = pm.Uniform('zsigma', 10**-5, 10)
likelihood = pm.StudentT('likelihood', nu=nu, mu=zmu, lam=1/zsigma**2, observed=zy)
with model:
trace = pm.sample(10000)
pm.traceplot(trace);
# Transform parameters back to original scale
burnin = 100
beta0 = trace['zbeta0']*scaley + meany - np.sum(trace['zbetaj']*meanx/scalex, axis=1)*scaley
betaj = (trace['zbetaj']/scalex)*scaley
scale = (trace['zsigma']*scaley)[burnin:]
intercept = beta0[burnin:]
spend = betaj[:,0][burnin:]
prcnttake = betaj[:,1][burnin:]
normality = np.log10(trace['nu'][burnin:])
fig, ([ax1, ax2, ax3], [ax4, ax5, ax6]) = plt.subplots(2,3, figsize=(12,6))
for ax, estimate, title, xlabel in zip(fig.axes,
[intercept, spend, prcnttake, scale, normality],
['Intercept', 'Spend', 'PrcntTake', 'Scale', 'Normality'],
[r'$\beta_0$', r'$\beta_1$', r'$\beta_2$', r'$\sigma$', r'log10($\nu$)']):
pm.plot_posterior(estimate, point_estimate='mode', ax=ax, color=color)
ax.set_title(title, fontdict=f_dict)
ax.set_xlabel(xlabel, fontdict=f_dict)
fig.tight_layout()
ax6.set_visible(False)
Below we create the scatterplots of figure 18.5 using pairplot()
in seaborn and then tweak the lower triangle.
# DataFrame with the columns in correct order
pair_plt = pd.DataFrame({'Intercept':intercept,
'Spend':spend,
'PrcntTake': prcnttake,
'Scale':scale,
'Normality': normality},
columns=['Intercept', 'Spend', 'PrcntTake', 'Scale', 'Normality'])
# Correlation coefficients
corr = np.round(np.corrcoef(pair_plt, rowvar=0), decimals=3)
# Indexes of the lower triangle, below the diagonal
lower_idx = np.tril(corr, -1).nonzero()
# The seaborn pairplot
pgrid = sns.pairplot(pair_plt, plot_kws={'edgecolor':color, 'facecolor':'none'})
# Replace the plots on the diagonal with the parameter names
for i, ax in enumerate(pgrid.diag_axes):
ax.clear()
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
ax.text(.5,.5, pair_plt.columns[i], transform=ax.transAxes, fontdict={'size':16, 'weight':'bold'}, ha='center')
# Replace the lower triangle with the correlation coefficients
for i, ax in enumerate(pgrid.axes[lower_idx]):
ax.clear()
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
ax.text(.5,.5, corr[lower_idx][i], transform=ax.transAxes, fontdict=f_dict, ha='center')
X2 = X.assign(PropNotTake = lambda x: (100-x.PrcntTake)/100)
meanx2 = X2.mean().values
scalex2 = X2.std().values
zX2 = ((X2-meanx2)/scalex2).values
with pm.Model() as model2:
zbeta0 = pm.Normal('zbeta0', mu=0, sd=2)
zbetaj = pm.Normal('zbetaj', mu=0, sd=2, shape=(3))
zmu = zbeta0 + pm.math.dot(zbetaj, zX2.T)
nu = pm.Exponential('nu', 1/29.)
zsigma = pm.Uniform('zsigma', 10**-5, 10)
likelihood = pm.StudentT('likelihood', nu=nu, mu=zmu, lam=1/zsigma**2, observed=zy)
with model2:
trace2 = pm.sample(5000)
pm.traceplot(trace2);
# Transform parameters back to original scale
burnin = 200
beta0 = trace2['zbeta0']*scaley + meany - np.sum(trace2['zbetaj']*meanx2/scalex2, axis=1)*scaley
betaj = (trace2['zbetaj']/scalex2)*scaley
scale = (trace2['zsigma']*scaley)[burnin:]
intercept = beta0[burnin:]
spend = betaj[:,0][burnin:]
prcnttake = betaj[:,1][burnin:]
propnottake = betaj[:,2][burnin:]
normality = np.log10(trace2['nu'][burnin:])
fig, ([ax1, ax2, ax3], [ax4, ax5, ax6]) = plt.subplots(2,3, figsize=(12,6))
for ax, estimate, title, xlabel in zip(fig.axes,
[intercept, spend, prcnttake, propnottake, scale, normality],
['Intercept', 'Spend', 'PrcntTake', 'PropNotTake', 'Scale', 'Normality'],
[r'$\beta_0$', r'$\beta_1$', r'$\beta_2$', r'$\beta_3$', r'$\sigma$', r'log10($\nu$)']):
pm.plot_posterior(estimate, point_estimate='mode', ax=ax, color=color)
ax.set_title(title, fontdict=f_dict)
ax.set_xlabel(xlabel, fontdict=f_dict)
plt.tight_layout()
# DataFrame with the columns in correct order
pair_plt = pd.DataFrame({'Intercept':intercept,
'Spend':spend,
'PrcntTake': prcnttake,
'PropNotTake': propnottake,
'Scale':scale,
'Normality': normality},
columns=['Intercept', 'Spend', 'PrcntTake', 'PropNotTake', 'Scale', 'Normality'])
# Correlation coefficients
corr = np.round(np.corrcoef(pair_plt, rowvar=0), decimals=3)
# Indexes of the lower triangle, below the diagonal
lower_idx = np.tril(corr, -1).nonzero()
# The seaborn pairplot
pgrid = sns.pairplot(pair_plt, plot_kws={'edgecolor':color, 'facecolor':'none'})
# Replace the plots on the diagonal with the parameter names
for i, ax in enumerate(pgrid.diag_axes):
ax.clear()
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
ax.text(.5,.5, pair_plt.columns[i], transform=ax.transAxes, fontdict={'size':16, 'weight':'bold'}, ha='center')
# Replace the lower triangle with the correlation coefficients
for i, ax in enumerate(pgrid.axes[lower_idx]):
ax.clear()
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
ax.text(.5,.5, corr[lower_idx][i], transform=ax.transAxes, fontdict=f_dict, ha='center')