## Chapter 19 - Metric Predicted Variable with One Nominal Predictor¶

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pymc3 as pm
import theano.tensor as tt
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

from scipy.stats import norm
from IPython.display import Image
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

%matplotlib inline
plt.style.use('seaborn-white')

color = '#87ceeb'

In [2]:
%load_ext watermark
%watermark -p pandas,numpy,pymc3,theano,matplotlib,seaborn,scipy

pandas 0.23.4
numpy 1.15.1
pymc3 3.5
theano 1.0.2
matplotlib 2.2.3
seaborn 0.9.0
scipy 1.1.0

In [3]:
def gammaShRaFromModeSD(mode, sd):
"""Calculate Gamma shape and rate from mode and sd."""
rate = (mode + np.sqrt( mode**2 + 4 * sd**2 ) ) / ( 2 * sd**2 )
shape = 1 + mode * rate
return(shape, rate)

In [4]:
def plot_mustache(var, sd, j, axis, width=.75):
for i in np.arange(start=0, stop=len(var), step=int(len(var)*.1)):
rv = norm(loc=var[i], scale=sd[i])
yrange = np.linspace(rv.ppf(0.01), rv.ppf(0.99), 100)
xrange = rv.pdf(yrange)

# When the SD of a group is large compared to others, then the top of its mustache is relatively
# low and does not plot well together with low SD groups.
# Scale the xrange so that the 'height' of the all mustaches is 0.75
xrange_scaled = xrange*(width/xrange.max())

# Using the negative value to flip the mustache in the right direction.
axis.plot(-xrange_scaled+j, yrange, color=color, alpha=.6)

In [5]:
def plot_cred_lines(b0, bj, bcov, x, ax):
"""Plot credible posterior distribution lines for model in section 19.4"""

B = pd.DataFrame(np.c_[b0, bj, bcov], columns=['beta0', 'betaj', 'betacov'])

# Credible posterior prediction lines
hpd_interval = pm.hpd(B.values, alpha=0.05)
B_hpd = B[B.beta0.between(*hpd_interval[0,:]) &
B.betaj.between(*hpd_interval[1,:]) &
B.betacov.between(*hpd_interval[2,:])]
xrange = np.linspace(x.min()*.95, x.max()*1.05)

for i in np.random.randint(0, len(B_hpd), 10):
ax.plot(xrange, B_hpd.iloc[i,0]+B_hpd.iloc[i,1]+B_hpd.iloc[i,2]*xrange, c=color, alpha=.6, zorder=0)


### 19.3 - Hierarchical Bayesian Approach¶

In [6]:
df = pd.read_csv('data/FruitflyDataReduced.csv', dtype={'CompanionNumber':'category'})
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125 entries, 0 to 124
Data columns (total 3 columns):
Longevity          125 non-null int64
CompanionNumber    125 non-null category
Thorax             125 non-null float64
dtypes: category(1), float64(1), int64(1)
memory usage: 2.3 KB

In [7]:
df.groupby('CompanionNumber').head(2)

Out[7]:
Longevity CompanionNumber Thorax
0 35 Pregnant8 0.64
1 37 Pregnant8 0.68
25 40 None0 0.64
26 37 None0 0.70
50 46 Pregnant1 0.64
51 42 Pregnant1 0.68
75 21 Virgin1 0.68
76 40 Virgin1 0.68
100 16 Virgin8 0.64
101 19 Virgin8 0.64
In [8]:
# Count the number of records per nominal group
df.CompanionNumber.value_counts()

Out[8]:
Virgin8      25
Virgin1      25
Pregnant8    25
Pregnant1    25
None0        25
Name: CompanionNumber, dtype: int64

#### Model (Kruschke, 2015)¶

In [9]:
Image('images/fig19_2.png')

Out[9]:
In [10]:
x = df.CompanionNumber.cat.codes.values
y = df.Longevity
yMean = y.mean()
ySD = y.std()

NxLvl = len(df.CompanionNumber.cat.categories)

agammaShRa = gammaShRaFromModeSD(ySD/2, 2*ySD)

with pm.Model() as model1:

aSigma = pm.Gamma('aSigma', agammaShRa[0], agammaShRa[1])
a0 = pm.Normal('a0', yMean, tau=1/(ySD*5)**2)
a = pm.Normal('a', 0.0, tau=1/aSigma**2, shape=NxLvl)

ySigma = pm.Uniform('ySigma', ySD/100, ySD*10)
y = pm.Normal('y', a0 + a[x], tau=1/ySigma**2, observed=y)

# Convert a0,a to sum-to-zero b0,b
m = pm.Deterministic('m', a0 + a)
b0 = pm.Deterministic('b0', tt.mean(m))
b = pm.Deterministic('b', m - b0)

pm.model_to_graphviz(model1)

Out[10]:
In [11]:
with model1:
trace1 = pm.sample(3000, cores=4, nuts_kwargs={'target_accept': 0.95})

Auto-assigning NUTS sampler...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [ySigma, a, a0, aSigma]
Sampling 4 chains: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 14000/14000 [00:14<00:00, 983.22draws/s]
The number of effective samples is smaller than 25% for some parameters.

In [12]:
pm.traceplot(trace1);


#### Figure 19.3 (top)¶

In [13]:
# Here we plot the metric predicted variable for each group. Then we superimpose the
# posterior predictive distribution

None0 = trace1['m'][:,0]
Pregnant1 = trace1['m'][:,1]
Pregnant8 = trace1['m'][:,2]
Virgin1 = trace1['m'][:,3]
Virgin8 = trace1['m'][:,4]
scale = trace1['ySigma'][:]

fig, ax = plt.subplots(1,1, figsize=(8,5))
ax.set_title('Data with Posterior Predictive Distribution')

sns.swarmplot('CompanionNumber', 'Longevity', data=df, ax=ax);
ax.set_xlim(xmin=-1)

for i, grp in enumerate([None0, Pregnant1, Pregnant8, Virgin1, Virgin8]):
plot_mustache(grp, scale, i, ax)


#### Contrasts¶

In [14]:
fig, axes = plt.subplots(2,4, figsize=(15,6))

contrasts = [np.mean([Pregnant1, Pregnant8], axis=0)-None0,
np.mean([Pregnant1, Pregnant8, None0], axis=0)-Virgin1,
Virgin1-Virgin8,
np.mean([Pregnant1, Pregnant8, None0], axis=0)-np.mean([Virgin1, Virgin8], axis=0)]

contrast_titles = ['Pregnant1.Pregnant8 \n vs \n None0',
'Pregnant1.Pregnant8.None0 \n vs \n Virgin1',
'Virgin1 \n vs \n Virgin8',
'Pregnant1.Pregnant8.None0 \n vs \n Virgin1.Virgin8']

for contr, ctitle, ax_top, ax_bottom in zip(contrasts, contrast_titles, fig.axes[:4], fig.axes[4:]):
pm.plot_posterior(contr, ref_val=0, color=color, ax=ax_top)
pm.plot_posterior(contr/scale, ref_val=0, color=color, ax=ax_bottom)
ax_top.set_title(ctitle)
ax_bottom.set_title(ctitle)
ax_top.set_xlabel('Difference')
ax_bottom.set_xlabel('Effect Size')
fig.tight_layout()


### 19.4 - Adding a Metric Predictor¶

#### Model (Kruschke, 2015)¶

In [15]:
Image('images/fig19_4.png')

Out[15]:
In [16]:
y = df.Longevity
yMean = y.mean()
ySD = y.std()
xNom = df.CompanionNumber.cat.categories
xMet = df.Thorax
xMetMean = df.Thorax.mean()
xMetSD = df.Thorax.std()
NxNomLvl = len(df.CompanionNumber.cat.categories)

X = pd.concat([df.Thorax, pd.get_dummies(df.CompanionNumber, drop_first=True)], axis=1)
lmInfo = LinearRegression().fit(X, y)
residSD = np.sqrt(mean_squared_error(y, lmInfo.predict(X)))

agammaShRa = gammaShRaFromModeSD(ySD/2, 2*ySD)

with pm.Model() as model2:

aSigma = pm.Gamma('aSigma', agammaShRa[0], agammaShRa[1])
a0 = pm.Normal('a0', yMean, tau=1/(ySD*5)**2)
a = pm.Normal('a', 0.0, tau=1/aSigma**2, shape=NxNomLvl)
aMet = pm.Normal('aMet', 0, tau=1/(2*ySD/xMetSD)**2)

ySigma = pm.Uniform('ySigma', residSD/100, ySD*10)

mu = a0 + a[x] + aMet*(xMet - xMetMean)
y = pm.Normal('y', mu, tau=1/ySigma**2, observed=y)

# Convert a0,a to sum-to-zero b0,b
b0 = pm.Deterministic('b0', a0 + tt.mean(a) + aMet*(-xMetMean))
b = pm.Deterministic('b', a - tt.mean(a))

pm.model_to_graphviz(model2)

Out[16]:
In [17]:
with model2:
trace2 = pm.sample(3000, cores=4, nuts_kwargs={'target_accept': 0.95})

Auto-assigning NUTS sampler...

pm.traceplot(trace2);