In [1]:
import pymc3 as pm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
%qtconsole --colors=linux
plt.style.use('ggplot')

from matplotlib import gridspec
from theano import tensor as tt
from scipy import stats


### Note from Junpeng Lao¶

In PyMC3, a discrete latent variable could be very easily expressed as a discrete random variable. PyMC3 will assign this discrete variable with a sampler (usually a Metropolis sampler), while the rest of the (continous) RVs sample with NUTS. However, care must be taken, as mixing different sampler is in general not a good idea. The standard treatment is integrate out the latent variables, as done in Stan.

# Chapter 6 - Latent-mixture models¶

## 6.1 Exam scores¶

Exam result consistent of 2 group of students - one group is purely guess and the other with a >.5 accuracy rate

$$z_{i} \sim \text{Bernoulli}(0.5)$$$$\phi \sim \text{Uniform}(0.5, 1)$$$$\psi = 0.5$$

$$\theta_{i} \sim \begin{cases} \phi & \text{if z_{i} = 1} \\ \psi & \text{if z_{i} = 0} \end{cases}$$ $$k_{i} \sim \text{Binomial}(\theta_{i}, n)$$

(It is also possible to estimate this model using a EM algorthim.)

In [2]:
# pymc3
k = np.array([21,17,21,18,22,31,31,34,34,35,35,36,39,36,35])
p = len(k) #number of people
n = 40 # number of questions

with pm.Model() as model1:
# group prior
zi = pm.Bernoulli('zi', p=.5, shape=p)
# accuracy prior
phi = pm.Uniform('phi', upper=1, lower=.5)
psi = .5
theta = pm.Deterministic('theta', phi*tt.eq(zi, 1) + psi*tt.eq(zi, 0))

# observed
ki = pm.Binomial('ki', p=theta, n=n, observed=k)

# step=pm.NUTS()
start = pm.find_MAP()
step = pm.NUTS(scaling=start)

trace1=pm.sample(3e4, step, init=None)

pm.traceplot(trace1[50:], ['zi', 'phi']);

ztrace = trace1['zi'][50:]
print('Grouping', ztrace[-1, :])

logp = -162.37, ||grad|| = 0: 100%|██████████| 2/2 [00:00<00:00, 555.87it/s]
Assigned BinaryGibbsMetropolis to zi
100%|██████████| 30500/30500.0 [01:17<00:00, 394.67it/s]

Grouping [0 0 0 0 0 1 1 1 1 1 1 1 1 1 1]


## 6.2 Exam scores with individual differences¶

$$z_{i} \sim \text{Bernoulli}(0.5)$$$$\mu \sim \text{Uniform}(0.5, 1)$$$$\lambda \sim \text{Gamma}(.001, .001)$$

$$\phi_{i} \sim \text{Gaussian}(\mu, \lambda)_{\mathcal I(0,1)}$$ $$\psi = 0.5$$
$$\theta_{i} \sim \begin{cases} \phi_{i} & \text{if z_{i} = 1} \\ \psi & \text{if z_{i} = 0} \end{cases}$$ $$k_{i} \sim \text{Binomial}(\theta_{i}, n)$$

In [3]:
# pymc3 - need some tuning to get the same result as in JAGS
k = np.array([21,17,21,18,22,31,31,34,34,35,35,36,39,36,35])
p = len(k) #number of people
n = 40 # number of questions

with pm.Model() as model2:
# group prior
zi = pm.Bernoulli('zi', p=.5, shape=p)
# accuracy prior
psi = .5
mu = pm.Uniform('mu', upper=1, lower=.5)
lambda_ = pm.Gamma('lambda_', alpha=.001, beta=.001)
phi = pm.Normal('phi', mu=mu, tau=lambda_, shape=p)

theta = pm.Deterministic('theta', phi*tt.eq(zi, 1) + psi*tt.eq(zi, 0))

# observed
ki = pm.Binomial('ki', p=theta, n=n, observed=k)

# step=pm.NUTS()
start = pm.find_MAP()
step = pm.NUTS(scaling=start)

trace2=pm.sample(3e4, step=step, njobs=2)

pm.traceplot(trace2[:], ['theta']);

ztrace = trace2['zi'][:]
print('Grouping', ztrace[-1, :])

logp = -136.63, ||grad|| = 10.317: 100%|██████████| 17/17 [00:00<00:00, 2333.22it/s]
Assigned BinaryGibbsMetropolis to zi
100%|█████████▉| 30499/30500.0 [03:42<00:00, 145.63it/s]/Users/jlao/Documents/Github/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 0 contains 1249 diverging samples after tuning. If increasing target_accept does not help try to reparameterize.
% (self._chain_id, n_diverging))
100%|██████████| 30500/30500.0 [03:42<00:00, 137.24it/s]
/Users/jlao/Documents/Github/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 1 contains 1194 diverging samples after tuning. If increasing target_accept does not help try to reparameterize.
% (self._chain_id, n_diverging))

Grouping [0 0 0 0 0 1 1 1 1 1 1 1 1 1 1]

In [4]:
plt.figure()
thetapost = trace2['theta'][25000:]
plt.boxplot(thetapost)


## 6.3 Twenty questions¶

Suppose a group of 10 people attend a lecture, and are asked a set of 20 questions afterwards, with every answer being either correct or incorrect.
$$p_{i},q_{j} \sim \text{Beta}(1,1)$$ $$\theta_{ij} = p_{i}q_{j}$$ $$k_{ij} \sim \text{Bernoulli}(\theta_{ij})$$

In [5]:
dset = 2
if dset==1:
k = np.array([1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,
0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,
1,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,
1,1,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,
1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0]).reshape(10,-1)
elif dset==2:
0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,
1,0,1,1,0,1,1,1,0,1,0,0,1,0,0,0,0,1,0,0,
1,1,0,1,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,-999,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,
1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,-999,0,0], value=-999).reshape(10,-1)
# print(k)
Np, Nq = k.shape

with pm.Model() as model3:
# prior
pi = pm.Beta('pi', alpha=1, beta=1, shape=Np)
qi = pm.Beta('qi', alpha=1, beta=1, shape=Nq)
# accuracy prior
theta = pm.Deterministic('theta', tt.outer(pi, qi))
# observed
kij = pm.Bernoulli('kij', p=theta, observed=k)

trace3=pm.sample(1e4, tune=10000)

pm.traceplot(trace3[:], ['pi', 'qi']);

fig = plt.figure(figsize=(20, 5))
gs = gridspec.GridSpec(1, 3)
ax0 = plt.subplot(gs[0])
pipost = trace3['pi'][5000:]
ax0.boxplot(pipost)

ax1 = plt.subplot(gs[1:])
qipost = trace3['qi'][5000:]
ax1.boxplot(qipost)

Assigned NUTS to pi_logodds__
Assigned NUTS to qi_logodds__
Assigned BinaryGibbsMetropolis to kij_missing
100%|██████████| 20000/20000.0 [00:55<00:00, 358.76it/s]


## 6.4 The two-country quiz¶

$$\alpha \sim \text{Uniform}(0,1)$$$$\beta \sim \text{Uniform}(0,\alpha)$$$$x_{i} \sim \text{Bernoulli}(0.5)$$$$z_{j} \sim \text{Bernoulli}(0.5)$$$$\theta_{ij} \sim \begin{cases} \alpha & \text{if x_{i} = z_{j}} \\ \beta & \text{if x_{i} \neq z_{j}} \end{cases}$$$$k_{ij} \sim \text{Bernoulli}(\theta_{ij})$$

In [6]:
dset = 3
if dset==1:
k = np.array([1,0,0,1,1,0,0,1,
1,0,0,1,1,0,0,1,
0,1,1,0,0,1,0,0,
0,1,1,0,0,1,1,0,
1,0,0,1,1,0,0,1,
0,0,0,1,1,0,0,1,
0,1,0,0,0,1,1,0,
0,1,1,1,0,1,1,0]).reshape(8,-1)
elif dset==2:
1,0,0,1,1,0,0,1,
0,1,1,0,0,1,0,0,
0,1,1,0,0,1,1,0,
1,0,0,1,1,0,0,1,
0,0,0,1,1,0,0,1,
0,1,0,0,0,1,1,0,
0,1,1,1,0,1,1,0,
1,0,0,1,-999,-999,-999,-999,
0,-999,-999,-999,-999,-999,-999,-999,
-999,-999,-999,-999,-999,-999,-999,-999], value=-999).reshape(11,-1)
elif dset==3:
1,0,0,1,1,0,0,1,
1,0,0,1,1,0,0,1,
1,0,0,1,1,0,0,1,
1,0,0,1,1,0,0,1,
1,0,0,1,1,0,0,1,
1,0,0,1,1,0,0,1,
1,0,0,1,1,0,0,1,
1,0,0,1,1,0,0,1,
1,0,0,1,1,0,0,1,
1,0,0,1,1,0,0,1,
1,0,0,1,1,0,0,1,
0,1,1,0,0,1,0,0,
0,1,1,0,0,1,1,0,
1,0,0,1,1,0,0,1,
0,0,0,1,1,0,0,1,
0,1,0,0,0,1,1,0,
0,1,1,1,0,1,1,0,
1,0,0,1,-999,-999,-999,-999,
0,-999,-999,-999,-999,-999,-999,-999,
-999,-999,-999,-999,-999,-999,-999,-999], value=-999).reshape(21,-1)

Nx, Nz = k.shape

with pm.Model() as model4:
# prior
alpha = pm.Uniform('alpha', lower=0, upper=1)
beta = pm.Uniform('beta', lower=0, upper=alpha)

xi = pm.Bernoulli('xi', p=.5, shape=Nx)
zj = pm.Bernoulli('zj', p=.5, shape=Nz)

# accuracy prior
theta = pm.Deterministic('theta',
alpha*(tt.outer(tt.eq(xi, 0), tt.eq(zj, 0)) +
tt.outer(tt.eq(xi, 1), tt.eq(zj, 1))) +
beta*(tt.outer(tt.eq(xi, 0), tt.eq(zj, 1)) +
tt.outer(tt.eq(xi, 0), tt.eq(zj, 1))))

# observed
kij = pm.Bernoulli('kij', p=theta, observed=k)

trace4=pm.sample(3e3)

pm.traceplot(trace4[:], ['alpha', 'beta']);

xtrace = trace4['xi'][:]
print('xi', xtrace[-1,:])
ztrace = trace4['zj'][:]
print('zj', ztrace[-1,:])

Assigned NUTS to alpha_interval__
Assigned NUTS to beta_interval__
Assigned BinaryGibbsMetropolis to xi
Assigned BinaryGibbsMetropolis to zj
Assigned BinaryGibbsMetropolis to kij_missing
100%|██████████| 3500/3500.0 [00:56<00:00, 64.36it/s]