# %load std_ipython_import.txt
import pandas as pd
import numpy as np
import numpy.ma as ma
import matplotlib.pyplot as plt
import seaborn as sns
import pymc3 as pm
import theano.tensor as tt
from scipy.stats import norm
from IPython.display import Image
%matplotlib inline
plt.style.use('seaborn-white')
color = '#87ceeb'
# Calculate Gamma shape and rate from mode and sd.
def gammaShRaFromModeSD(mode, sd):
rate = (mode + np.sqrt( mode**2 + 4 * sd**2 ) ) / ( 2 * sd**2 )
shape = 1 + mode * rate
return(shape, rate)
def plot_mustache(var, sd, j, width=.75, ax=None):
for i in np.arange(0, len(var), int(len(var)*.1)):
rv = norm(loc=var[i], scale=sd[i])
yrange = np.linspace(rv.ppf(0.01), rv.ppf(0.99), 100)
xrange = rv.pdf(yrange)
# When the SD of a group is large compared to others, then the top of its mustache is relatively
# low and does not plot well together with low SD groups.
# Scale the xrange so that the 'height' of the all mustaches is 0.75
xrange_scaled = xrange*(width/xrange.max())
# Using the negative value to flip the mustache in the right direction.
ax.plot(-xrange_scaled+j, yrange, color=color, alpha=.6, zorder=99)
df = pd.read_csv('data/Salary.csv', usecols=[0,3,5], dtype={'Org': 'category', 'Pos': 'category'})
# Reorder the Pos categories and rename categories
df.Pos.cat.reorder_categories(['FT3', 'FT2', 'FT1', 'NDW', 'DST'], ordered=True, inplace=True)
df.Pos.cat.rename_categories(['Assis', 'Assoc', 'Full', 'Endow', 'Disting'], inplace=True)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1080 entries, 0 to 1079 Data columns (total 3 columns): Org 1080 non-null category Pos 1080 non-null category Salary 1080 non-null int64 dtypes: category(2), int64(1) memory usage: 13.8 KB
df.groupby('Pos').apply(lambda x: x.head(2))
Org | Pos | Salary | ||
---|---|---|---|---|
Pos | ||||
Assis | 4 | LGED | Assis | 63796 |
6 | INFO | Assis | 98814 | |
Assoc | 0 | PL | Assoc | 72395 |
1 | MUTH | Assoc | 61017 | |
Full | 7 | CRIN | Full | 107745 |
9 | PSY | Full | 173302 | |
Endow | 5 | MGMT | Endow | 219600 |
8 | CRIN | Endow | 114275 | |
Disting | 29 | SPEA | Disting | 285000 |
128 | MUHI | Disting | 114189 |
Image('images/fig20_2.png')
Reparameterization of hierarchical models generally results in much more efficient and faster sampling.
See http://twiecki.github.io/blog/2017/02/08/bayesian-hierchical-non-centered/ and
http://pymc3.readthedocs.io/en/latest/notebooks/Diagnosing_biased_Inference_with_Divergences.html
y = df.Salary
yMean = y.mean()
ySD = y.std()
x1 = df.Pos.cat.codes.values
Nx1Lvl = len(df.Pos.cat.categories)
x2 = df.Org.cat.codes.values
Nx2Lvl = len(df.Org.cat.categories)
agammaShRa = gammaShRaFromModeSD(ySD/2 , 2*ySD)
with pm.Model() as model1:
#a0 = pm.Normal('a0', yMean, tau=1/(ySD*5)**2)
a0_tilde = pm.Normal('a0_tilde', mu=0, sd=1)
a0 = pm.Deterministic('a0', yMean + ySD*5*a0_tilde)
a1SD = pm.Gamma('a1SD', agammaShRa[0], agammaShRa[1])
#a1 = pm.Normal('a1', 0.0, tau=1/a1SD**2, shape=Nx1Lvl)
a1_tilde = pm.Normal('a1_tilde', mu=0, sd=1, shape=Nx1Lvl)
a1 = pm.Deterministic('a1', 0.0 + a1SD*a1_tilde)
a2SD = pm.Gamma('a2SD', agammaShRa[0], agammaShRa[1])
#a2 = pm.Normal('a2', 0.0, tau=1/a2SD**2, shape=Nx2Lvl)
a2_tilde = pm.Normal('a2_tilde', mu=0, sd=1, shape=Nx2Lvl)
a2 = pm.Deterministic('a2', 0.0 + a2SD*a2_tilde)
a1a2SD = pm.Gamma('a1a2SD', agammaShRa[0], agammaShRa[1])
#a1a2 = pm.Normal('a1a2', 0.0, 1/a1a2SD**2, shape=(Nx1Lvl, Nx2Lvl))
a1a2_tilde = pm.Normal('a1a2_tilde', mu=0, sd=1, shape=(Nx1Lvl, Nx2Lvl))
a1a2 = pm.Deterministic('a1a2', 0.0 + a1a2SD*a1a2_tilde)
mu = a0 + a1[x1] + a2[x2] +a1a2[x1, x2]
ySigma = pm.Uniform('ySigma', ySD/100, ySD*10)
like = pm.Normal('like', mu, sd=ySigma, observed=y)
n_samples = 10000
with model1:
trace1 = pm.sample(n_samples, nuts_kwargs={'target_accept':.95})
Auto-assigning NUTS sampler... Initializing NUTS using ADVI... Average Loss = 12,355: 15%|█▍ | 29604/200000 [00:11<01:06, 2554.59it/s] Convergence archived at 29700 Interrupted at 29,700 [14%]: Average Loss = 13,120 100%|██████████| 10500/10500 [02:44<00:00, 63.86it/s]
pm.traceplot(trace1, varnames=['a0', 'a1', 'a1SD', 'a2', 'a2SD', 'a1a2', 'a1a2SD', 'ySigma']);
# Transforming the trace data to sum-to-zero values
m = np.zeros((Nx1Lvl,Nx2Lvl, len(trace1)))
b1b2 = m.copy()
for (j1,j2) in np.ndindex(Nx1Lvl,Nx2Lvl):
m[j1,j2] = (trace1['a0'] +
trace1['a1'][:,j1] +
trace1['a2'][:,j2] +
trace1['a1a2'][:,j1,j2])
b0 = np.mean(m, axis=(0,1))
b1 = np.mean(m, axis=1) - b0
b2 = np.mean(m, axis=0) - b0
for (j1,j2) in np.ndindex(Nx1Lvl,Nx2Lvl):
b1b2[j1,j2] = (m[j1,j2] - (b0 + b1[j1] + b2[j2]))
# Below are the values corresponding to the first column of table 20.2
print('b0: {}'.format(np.round(np.mean(b0))))
print('b1: {}'.format(np.round(np.mean(b1, axis=1))))
print('b2: {}'.format(np.round(np.mean(b2, axis=1))[[20,48,12,7]]))
print('b1b2: {}'.format(np.round(np.mean(b1b2, axis=2))[0,48]))
print('b1b2: {}'.format(np.round(np.mean(b1b2, axis=2))[2,48]))
print('b1b2: {}'.format(np.round(np.mean(b1b2, axis=2))[0,12]))
print('b1b2: {}'.format(np.round(np.mean(b1b2, axis=2))[2,12]))
print('ySigma: {}'.format(np.round(np.mean(trace1['ySigma']))))
b0: 127106.0 b1: [-46398. -33094. -3145. 26997. 55640.] b2: [ -19362. 6687. 19206. 109196.] b1b2: -3264.0 b1b2: -15098.0 b1b2: -12821.0 b1b2: 12978.0 ySigma: 17990.0
burnin = 200
scale = trace1['ySigma']
# Figure 20.8 in the book shows the Salary data for four of the sixty departments.
# Let's create the subset for plotting.
subset_org = ['BFIN', 'CHEM', 'PSY', 'ENG']
subset_df = df[df.Org.isin(subset_org)]
fg = sns.FacetGrid(subset_df, col='Org', col_order=subset_org,
col_wrap=2, size=3.5, aspect=1.3, despine=False, sharex=False)
fg.map(sns.swarmplot, 'Pos', 'Salary', data=subset_df, color='r')
fg.fig.suptitle('Data with Posterior Prediction', y=1.05, fontsize=16)
for ax in fg.axes:
ax.set_xlim(xmin=-1)
ax.set_ylim((0,350000))
for i, org_idx in enumerate([7,12,48,20]):
for pos_idx in np.arange(5):
plot_mustache(b0[burnin:]+
b1[pos_idx, burnin:]+
b2[org_idx,burnin:]+
b1b2[pos_idx,org_idx,burnin:], scale[burnin:], pos_idx, ax=fg.axes.flatten()[i])
contrasts = [b1[1,:]-b1[0,:],
b2[12,:]-b2[48,:],
b2[7,:]-np.mean([b2[48,:], b2[12,:], b2[20,:]], axis=0)]
titles = ['Assoc\n vs\n Assis',
'CHEM\n vs\n PSY',
'BFIN\n vs\n PSY.CHEM.ENG']
fig, axes = plt.subplots(1,3, figsize=(20,4))
for c, t, ax in zip(contrasts, titles, axes):
pm.plot_posterior(c, point_estimate='mode', round_to=0, ref_val=0, color=color, ax=ax)
ax.set_title(t)
ax.set_xlabel('Difference')
# The posteriors for the main contrasts can be calculated directly from the parameters.
# The posterior for the interaction contrast (rightmost plot) however, is calculated as follows.
# Full vs Assis
P = np.zeros(Nx1Lvl, dtype=np.int)
P[df.Pos.cat.categories == 'Full'] = 1
P[df.Pos.cat.categories == 'Assis'] = -1
# CHEM vs PSY
O = np.zeros(Nx2Lvl, dtype=np.int)
O[df.Org.cat.categories == 'CHEM'] = 1
O[df.Org.cat.categories == 'PSY'] = -1
# The signs in the above vectors can be flipped, the end result (posterior) will be the same.
# Using the outer product of these two vectors, we get the matrix we need to multiply
# with the trace values of b1b2.
ic_factors = np.outer(P,O)
pd.DataFrame(ic_factors, index=df.Pos.cat.categories, columns=df.Org.cat.categories)
ACTG | AFRO | AMST | ANTH | APHS | AST | BEPP | BFIN | BI | BLAN | ... | RPAD | SLIS | SLS | SOC | SPAN | SPEA | SPHS | STAT | TELC | THTR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Assis | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Assoc | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Full | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Endow | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Disting | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 60 columns
contrasts = [m[(df.Pos.cat.categories == 'Full').argmax(),
(df.Org.cat.categories == 'PSY').argmax(),:] -
m[(df.Pos.cat.categories == 'Assis').argmax(),
(df.Org.cat.categories == 'PSY').argmax(),:],
m[(df.Pos.cat.categories == 'Full').argmax(),
(df.Org.cat.categories == 'CHEM').argmax(),:] -
m[(df.Pos.cat.categories == 'Assis').argmax(),
(df.Org.cat.categories == 'CHEM').argmax(),:],
np.tensordot(ic_factors, b1b2)]
titles = ['Full - Assis @ PSY',
'Full - Assis @ CHEM',
'Full.v.Assis\n(x)\nCHEM.v.PSY']
xlabels = ['Difference in Salary']*2 + ['Difference of Differences']
fig, axes = plt.subplots(1,3, figsize=(20,4))
for c, t, l, ax in zip(contrasts, titles, xlabels, axes):
pm.plot_posterior(c, point_estimate='mode', round_to=0, ref_val=0, color=color, ax=ax)
ax.set_title(t)
ax.set_xlabel(l)
Image('images/fig20_7.png')
y = df.Salary
yMean = y.mean()
ySD = y.std()
agammaShRa = gammaShRaFromModeSD(ySD/2 , 2*ySD )
x1 = df.Pos.cat.codes.values
Nx1Lvl = len(df.Pos.cat.categories)
x2 = df.Org.cat.codes.values
Nx2Lvl = len(df.Org.cat.categories)
cellSDs = df.groupby(['Org','Pos']).std().dropna()
medianCellSD = cellSDs.median().squeeze()
sdCellSD = cellSDs.std().squeeze()
sgammaShRa = gammaShRaFromModeSD(medianCellSD, 2*sdCellSD)
with pm.Model() as model2:
#a0 = pm.Normal('a0', yMean, tau=1/(ySD*5)**2)
a0_tilde = pm.Normal('a0_tilde', mu=0, sd=1)
a0 = pm.Deterministic('a0', yMean + ySD*5*a0_tilde)
a1SD = pm.Gamma('a1SD', agammaShRa[0], agammaShRa[1])
#a1 = pm.Normal('a1', 0.0, tau=1/a1SD**2, shape=Nx1Lvl)
a1_tilde = pm.Normal('a1_tilde', mu=0, sd=1, shape=Nx1Lvl)
a1 = pm.Deterministic('a1', 0.0 + a1SD*a1_tilde)
a2SD = pm.Gamma('a2SD', agammaShRa[0], agammaShRa[1])
#a2 = pm.Normal('a2', 0.0, tau=1/a2SD**2, shape=Nx2Lvl)
a2_tilde = pm.Normal('a2_tilde', mu=0, sd=1, shape=Nx2Lvl)
a2 = pm.Deterministic('a2', 0.0 + a2SD*a2_tilde)
a1a2SD = pm.Gamma('a1a2SD', agammaShRa[0], agammaShRa[1])
#a1a2 = pm.Normal('a1a2', 0.0, 1/a1a2SD**2, shape=(Nx1Lvl, Nx2Lvl))
a1a2_tilde = pm.Normal('a1a2_tilde', mu=0, sd=1, shape=(Nx1Lvl, Nx2Lvl))
a1a2 = pm.Deterministic('a1a2', 0.0 + a1a2SD*a1a2_tilde)
mu = a0 + a1[x1] + a2[x2] + a1a2[x1, x2]
sigmaMode = pm.Gamma('sigmaMode', sgammaShRa[0], sgammaShRa[1])
sigmaSD = pm.Gamma('sigmaSD', sgammaShRa[0], sgammaShRa[1])
sigmaRa = pm.Deterministic('sigmaRa', (sigmaMode + tt.sqrt(sigmaMode**2 + 4*sigmaSD**2)) / (2*sigmaSD**2))
sigmaSh = pm.Deterministic('sigmaSh', (sigmaMode * sigmaRa))
sigma = pm.Gamma('sigma', sigmaSh, sigmaRa, shape=(Nx1Lvl, Nx2Lvl))
ySigma = pm.Deterministic('ySigma', tt.maximum(sigma, medianCellSD/1000))
nu = pm.Exponential('nu', 1/30.)
like = pm.StudentT('like', nu, mu, lam=1/(ySigma[x1, x2])**2, observed=y)
n_samples = 1000
with model2:
trace2 = pm.sample(n_samples, nuts_kwargs={'target_accept':.95})
Auto-assigning NUTS sampler... Initializing NUTS using ADVI... Average Loss = 12,097: 23%|██▎ | 45384/200000 [00:32<01:49, 1406.27it/s] Convergence archived at 45500 Interrupted at 45,500 [22%]: Average Loss = 13,081 100%|██████████| 1500/1500 [22:06<00:00, 1.15it/s]/Users/jordi/anaconda/envs/probalistic/lib/python3.5/site-packages/pymc3/step_methods/hmc/nuts.py:448: UserWarning: Chain 0 reached the maximum tree depth. Increase max_treedepth, increase target_accept or reparameterize. 'reparameterize.' % self._chain_id)
pm.traceplot(trace2, varnames=['a0', 'a1', 'a1SD', 'a2', 'a2SD', 'a1a2', 'a1a2SD', 'sigma', 'nu']);
# Transforming the trace data to sum-to-zero values
m = np.zeros((Nx1Lvl,Nx2Lvl, len(trace2)))
b1b2 = m.copy()
sigma = m.copy()
for (j1,j2) in np.ndindex(Nx1Lvl,Nx2Lvl):
m[j1,j2] = (trace2['a0'] +
trace2['a1'][:,j1] +
trace2['a2'][:,j2] +
trace2['a1a2'][:,j1,j2])
sigma[j1,j2] = trace2['ySigma'][:,j1,j2]
b0 = np.mean(m, axis=(0,1))
b1 = np.mean(m, axis=1) - b0
b2 = np.mean(m, axis=0) - b0
for (j1,j2) in np.ndindex(Nx1Lvl,Nx2Lvl):
b1b2[j1,j2] = (m[j1,j2] - (b0 + b1[j1] + b2[j2]))
print('b0: {}'.format(np.round(np.mean(b0))))
print('b1: {}'.format(np.round(np.mean(b1, axis=1))))
print('b2: {}'.format(np.round(np.mean(b2, axis=1), decimals=3)[[20,48,12,7]]))
print('b1b2: {}'.format(np.round(np.mean(b1b2, axis=2), decimals=3)[0,48]))
print('b1b2: {}'.format(np.round(np.mean(b1b2, axis=2), decimals=3)[2,48]))
print('b1b2: {}'.format(np.round(np.mean(b1b2, axis=2), decimals=3)[0,12]))
print('b1b2: {}'.format(np.round(np.mean(b1b2, axis=2), decimals=3)[2,12]))
b0: 122968.0 b1: [-41736. -29269. -1864. 22626. 50243.] b2: [ -15403.08 -2064.643 10327.325 115091.975] b1b2: 2444.448 b1b2: -3179.127 b1b2: -8653.669 b1b2: 4731.509
burnin = 20
scale = sigma
# Figure 20.8 in the book shows the Salary data for four of the sixty departments.
# Let's create the subset for plotting.
subset_org = ['BFIN', 'CHEM', 'PSY', 'ENG']
subset_df = df[df.Org.isin(subset_org)]
fg = sns.FacetGrid(subset_df, col='Org', col_order=subset_org,
col_wrap=2, size=3.5, aspect=1.3, despine=False, sharex=False)
fg.map(sns.swarmplot, 'Pos', 'Salary', data=subset_df, color='r')
fg.fig.suptitle('Data with Posterior Prediction', y=1.05, fontsize=16)
for ax in fg.axes:
ax.set_xlim(xmin=-1)
ax.set_ylim((0,350000))
for i, org_idx in enumerate([7,12,48,20]):
for pos_idx in np.arange(5):
plot_mustache(b0[burnin:]+
b1[pos_idx, burnin:]+
b2[org_idx, burnin:]+
b1b2[pos_idx,org_idx,burnin:], sigma[pos_idx, org_idx, burnin:], pos_idx, ax=fg.axes.flatten()[i])
data = [np.log10(trace2['nu']),
trace2['sigmaMode'],
trace2['sigmaSD']]
titles = ['Normality',
'Mode of Cell Sigma\'s',
'SD of Cell Sigma\'s']
xlabels = [r'log10($\nu$)'] + ['param. value']*2
fig, axes = plt.subplots(1,3, figsize=(20,4))
for d, t, l, ax in zip(data, titles, xlabels, axes):
pm.plot_posterior(d, point_estimate='mode', color=color, ax=ax)
ax.set_title(t)
ax.set_xlabel(l)
For the interaction contrast we can use ic_factors
defined above.
contrasts = [m[(df.Pos.cat.categories == 'Full').argmax(),
(df.Org.cat.categories == 'PSY').argmax(),:] -
m[(df.Pos.cat.categories == 'Assis').argmax(),
(df.Org.cat.categories == 'PSY').argmax(),:],
m[(df.Pos.cat.categories == 'Full').argmax(),
(df.Org.cat.categories == 'CHEM').argmax(),:] -
m[(df.Pos.cat.categories == 'Assis').argmax(),
(df.Org.cat.categories == 'CHEM').argmax(),:],
np.tensordot(ic_factors, b1b2)]
titles = ['Full - Assis @ PSY',
'Full - Assis @ CHEM',
'Full.v.Assis\n(x)\nCHEM.v.PSY']
xlabels = ['Difference in Salary']*2 + ['Difference of Differences']
fig, axes = plt.subplots(1,3, figsize=(20,4))
for c, t, l, ax in zip(contrasts, titles, xlabels, axes):
pm.plot_posterior(c, point_estimate='mode', round_to=0, ref_val=0, color=color, ax=ax)
ax.set_title(t)
ax.set_xlabel(l)
df2 = pd.read_csv('data/SplitPlotAgriData.csv')
df2.Field = df2.Field.astype('category')
df2.Till = df2.Till.astype('category')
df2.Fert = df2.Fert.astype('category')
df2.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 99 entries, 0 to 98 Data columns (total 4 columns): Field 99 non-null category Till 99 non-null category Fert 99 non-null category Yield 99 non-null int64 dtypes: category(3), int64(1) memory usage: 2.9 KB
df2.head()
Field | Till | Fert | Yield | |
---|---|---|---|---|
0 | 1 | Chisel | Broad | 119 |
1 | 1 | Chisel | Deep | 130 |
2 | 1 | Chisel | Surface | 123 |
3 | 2 | Chisel | Broad | 135 |
4 | 2 | Chisel | Deep | 148 |
g = sns.FacetGrid(df2, col='Till', hue='Field', size=4)
g.map(sns.pointplot, 'Fert', 'Yield', color='k', scale=0.6)
for ax in g.axes.flatten():
ax.set_xlabel('Fertilization method')
g.fig.suptitle('Data points', y=1.05, fontsize=16);
# Tilling method
xBetween = df2.Till.cat.codes.values
xBetweenLvl = df2.Till.cat.categories
NxBetweenLvl = len(xBetweenLvl)
# Fertilization method
xWithin = df2.Fert.cat.codes.values
xWithinLvl = df2.Fert.cat.categories
NxWithinLvl = len(xWithinLvl)
# Individual fields
xSubject = df2.Field.cat.codes.values
xSubjectLvl = df2.Field.cat.categories
NxSubjectLvl = len(xSubjectLvl)
# Yield
y = df2.Yield
yMean = y.mean()
ySD = y.std()
agammaShRa = gammaShRaFromModeSD(ySD/2, 2*ySD)
with pm.Model() as model3:
# Baseline Yield
#a0 = pm.Normal('a0', mu=yMean, tau=1/(ySD*5)**2)
a0_tilde = pm.Normal('a0_tilde', mu=0.0, sd=1)
a0 = pm.Deterministic('a0', 0.0 +ySD*5*a0_tilde)
# Yield deflection from baseline for tilling method
sigmaB = pm.Gamma('sigmaB', agammaShRa[0], agammaShRa[1])
#aB = pm.Normal('aB', mu=0.0, tau=1/sigmaB**2, shape=NxBetweenLvl)
aB_tilde = pm.Normal('aB_tilde', mu=0.0, sd=1, shape=NxBetweenLvl)
aB = pm.Deterministic('aB', 0.0 + sigmaB*aB_tilde)
# Yield deflection from baseline for fertilization method
sigmaW = pm.Gamma('sigmaW', agammaShRa[0], agammaShRa[1])
#aW = pm.Normal('aW', mu=0.0, tau=1/sigmaW**2, shape=NxWithinLvl)
aW_tilde = pm.Normal('aW_tilde', mu=0.0, sd=1, shape=NxWithinLvl)
aW = pm.Deterministic('aW', 0.0 + sigmaW*aW_tilde)
# Yield deflection from baseline for combination of tilling and fertilization method
sigmaBxW = pm.Gamma('sigmaBxW', agammaShRa[0], agammaShRa[1])
#aBxW = pm.Normal('aBxW', mu=0.0, tau=1/sigmaBxW**2, shape=(NxBetweenLvl, NxWithinLvl))
aBxW_tilde = pm.Normal('aBxW_tilde', mu=0.0, sd=1, shape=(NxBetweenLvl, NxWithinLvl))
aBxW = pm.Deterministic('aBxW', 0.0 + sigmaBxW*aBxW_tilde)
# Yield deflection from baseline for individual fields
sigmaS = pm.Gamma('sigmaS', agammaShRa[0], agammaShRa[1])
#aS = pm.Normal('aS', mu=0.0, tau=1/sigmaS**2, shape=NxSubjectLvl)
aS_tilde = pm.Normal('aS_tilde', mu=0.0, sd=1, shape=NxSubjectLvl)
aS = pm.Deterministic('aS', 0.0 + sigmaS*aS_tilde)
mu = a0 + aB[xBetween] + aW[xWithin] + aBxW[xBetween, xWithin] + aS[xSubject]
sigma = pm.Uniform('sigma', ySD/100, ySD*10)
like = pm.Normal('like', mu=mu, tau=1/sigma**2, observed=y)
with model3:
trace3 = pm.sample(10000, nuts_kwargs={'target_accept':.97})
Auto-assigning NUTS sampler... Initializing NUTS using ADVI... Average Loss = 429.15: 11%|█ | 22282/200000 [00:06<00:48, 3665.83it/s] Convergence archived at 22600 Interrupted at 22,600 [11%]: Average Loss = 516.13 100%|█████████▉| 10499/10500 [12:52<00:00, 12.17it/s]/Users/jordi/anaconda/envs/probalistic/lib/python3.5/site-packages/pymc3/step_methods/hmc/nuts.py:456: UserWarning: Chain 0 contains 8 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize. % (self._chain_id, n_diverging)) 100%|██████████| 10500/10500 [12:52<00:00, 13.58it/s]
pm.traceplot(trace3, varnames=['a0', 'aB', 'sigmaB', 'aW', 'sigmaW', 'aBxW', 'sigmaBxW', 'aS', 'sigmaS', 'sigma']);
a0 = trace3['a0']
aB = trace3['aB']
aW = trace3['aW']
aBxW = trace3['aBxW']
aS = trace3['aS']
sigma = trace3['sigma']
In contrast to the JAGS code from the book, we recenter the deflections after sampling. That is why there is an extra dimension containing all trace values.
Let's put all the cell values in a multi-dimensional array.
We can use Numpy masked arrays, with the values '0' being masked. This helps with calculating means along the different axes: masked values (= empty cells) are not taken into account. This is important since there are different number of Fields for each Tilling method.
# Initialize the array with zeros
mSxBxW = np.zeros((NxSubjectLvl, NxBetweenLvl, NxWithinLvl, len(trace3)))
# Fill the arrray
for k, i, j in zip(xSubject, xBetween, xWithin):
mu = a0 + aB[:,i] + aW[:,j] + aBxW[:,i,j] + aS[:,k]
mSxBxW[k,i,j,:] = mu
# Convert to masked array that masks value '0'.
mSxBxW_ma = ma.masked_equal(mSxBxW, 0)
mSxBxW_ma.shape
(33, 3, 3, 10000)
# Mean for subject S across levels of W, within the level of B
mS = ma.mean(mSxBxW_ma, axis=(2))
mS.data.shape
(33, 3, 10000)
# Mean for treatment combination BxW, across subjects S
mBxW = mSxBxW_ma.mean(axis=(0))
mBxW.data.shape
(3, 3, 10000)
# Mean for level B, across W and S
# Keeping the dimension of axis 1 in order to have the broadcasting work correctly when calculating bBxW.
mB = ma.mean(mBxW, axis=(1), keepdims=True)
mB.data.shape
(3, 1, 10000)
# Mean for level W, across B and S
# Keeping the dimension of axis 0 in order to have the broadcasting work correctly when calculating bBxW.
mW = ma.mean(mBxW, axis=(0), keepdims=True)
mW.shape
(1, 3, 10000)
# Equation 20.3
m = ma.mean(mBxW, axis=(0,1))
b0 = m
print('Mean baseline yield: {}'.format(b0.mean()))
Mean baseline yield: 140.41117824070224
# Equation 20.4
# Suppress the dimension with size one for mB in order to have the broadcasting work properly.
bB = mB.squeeze() - m
print('Mean yield deflection from baseline for tilling method:')
pd.DataFrame(bB.data.mean(1), index=xBetweenLvl, columns=['Deflection'])
Mean yield deflection from baseline for tilling method:
Deflection | |
---|---|
Chisel | -10.545193 |
Moldbrd | 5.460927 |
Ridge | 5.084267 |
# Equation 20.5
# Suppress the dimension with size one for mW in order to have the broadcasting work properly.
bW = mW.squeeze() - m
print('Mean yield deflection from baseline for fertilization method:')
pd.DataFrame(bW.data.mean(1), index=xWithinLvl, columns=['Deflection'])
Mean yield deflection from baseline for fertilization method:
Deflection | |
---|---|
Broad | -9.334563 |
Deep | 6.251056 |
Surface | 3.083507 |
# Equation 20.6
bBxW = mBxW - mB - mW + m
print('Mean yield deflection from baseline for tilling-fertilization combination:')
pd.DataFrame(bBxW.data.mean(2), index=xBetweenLvl, columns=xWithinLvl)
Mean yield deflection from baseline for tilling-fertilization combination:
Broad | Deep | Surface | |
---|---|---|---|
Chisel | 3.632557 | -2.217952 | -1.414606 |
Moldbrd | 4.012968 | -1.164794 | -2.848173 |
Ridge | -7.645525 | 3.382746 | 4.262779 |
# Equation 20.7
bS = mS - mB.squeeze()
print('Mean yield deflection from baseline for individual fields:')
pd.DataFrame(bS.mean(2).compressed(), index=xSubjectLvl, columns=['Deflection'])
Mean yield deflection from baseline for individual fields:
Deflection | |
---|---|
1 | -5.346461 |
2 | 8.483606 |
3 | 11.881251 |
4 | -0.158229 |
5 | 9.968372 |
6 | -5.931021 |
7 | -12.163323 |
8 | -14.255159 |
9 | -5.046309 |
10 | 10.649053 |
11 | 8.499510 |
12 | -6.581290 |
13 | 3.751797 |
14 | 8.084458 |
15 | -9.236416 |
16 | 19.778292 |
17 | -2.397911 |
18 | -9.480090 |
19 | -0.508869 |
20 | 7.108654 |
21 | -13.173166 |
22 | -3.926749 |
23 | -9.743558 |
24 | 20.715688 |
25 | -8.486975 |
26 | -15.889261 |
27 | 0.018739 |
28 | -0.778729 |
29 | -11.563559 |
30 | -8.802619 |
31 | 15.816185 |
32 | 1.041675 |
33 | 17.672413 |
g = sns.FacetGrid(df2, col='Till', hue='Field', size=4)
g.map(sns.pointplot, 'Fert', 'Yield', color='k', scale=0.6)
# Above we found the mean values for the different deflections. Here we plot the
# full posteriors of the yield given tilling and fertilization method.
for i, j in np.ndindex(NxBetweenLvl, NxWithinLvl):
plot_mustache(b0+
bB[i,:]+
bW[j,:]+
bBxW[i,j,:],
sigma,
j, width= 0.4, ax=g.axes.flatten()[i])
for ax in g.axes.flatten():
ax.set_xlabel('Fertilization method')
g.fig.suptitle('Data with Post. Pred.', y=1.05, fontsize=16);
# [Chisel & Moldbrd] vs [Ridge]
B = np.asarray([-.5, -.5, 1])
# [Broad] vs [Deep & Surface]
W = np.asarray([-1, .5, .5])
ic_factors = np.outer(B,W)
pd.DataFrame(ic_factors, index=xBetweenLvl, columns=xWithinLvl)
Broad | Deep | Surface | |
---|---|---|---|
Chisel | 0.5 | -0.25 | -0.25 |
Moldbrd | 0.5 | -0.25 | -0.25 |
Ridge | -1.0 | 0.50 | 0.50 |
contrasts = [bB[1,:]-bB[2,:],
ma.mean(bB[1:,:], axis=0) - bB[0,:],
ma.mean(bW[1:,:], axis=0) - bW[0,:],
np.tensordot(ic_factors, bBxW)]
titles = ['Moldbrd\n vs\n Ridge',
'Moldbrd.Ridge\n vs\n Chisel',
'Deep.Surface\n vs\n Broad',
'Chisel.Moldbrd.v.Ridge\n (x)\n Broad.v.Deep.Surface']
xlabels = ['Difference']*3 + ['Difference of Differences']
fig, axes = plt.subplots(1,4, figsize=(18,3))
for c, t, l, ax in zip(contrasts, titles, xlabels, axes):
pm.plot_posterior(c, point_estimate='mean', ref_val=0, color=color, ax=ax)
ax.set_title(t)
ax.set_xlabel(l)