%matplotlib inline
import pymc3 as pm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from theano import tensor as tt
%config InlineBackend.figure_format = 'retina'
plt.style.use(['seaborn-colorblind', 'seaborn-darkgrid'])
a = 3.5 # average morning wait time
b = -1. # average difference afternoon wait time
sigma_a = 1. # std dev in intercepts
sigma_b = 0.5 # std dev in slopes
rho = -0.7 # correlation between intercepts and slopes
Mu = [a, b]
cov_ab = sigma_a * sigma_b * rho
Sigma = np.array([[sigma_a**2, cov_ab], [cov_ab, sigma_b**2]])
Sigma
array([[ 1. , -0.35], [-0.35, 0.25]])
The code 13.4 and the related comment in the book is particular to R and not relevant for Python.
sigmas = [sigma_a, sigma_b]
Rho = np.matrix([[1, rho], [rho, 1]])
Sigma = np.diag(sigmas) * Rho * np.diag(sigmas)
Sigma
matrix([[ 1. , -0.35], [-0.35, 0.25]])
N_cafes = 20
np.random.seed(42)
vary_effects = np.random.multivariate_normal(mean=Mu, cov=Sigma, size=N_cafes)
a_cafe = vary_effects[:, 0]
b_cafe = vary_effects[:, 1]
from matplotlib.patches import Ellipse
from scipy.stats import chi2
def Gauss2d(mu, cov, ci, ax=None):
"""Copied from statsmodel"""
if ax is None:
_, ax = plt.subplots(figsize=(6, 6))
v_, w = np.linalg.eigh(cov)
u = w[0] / np.linalg.norm(w[0])
angle = np.arctan(u[1]/u[0])
angle = 180 * angle / np.pi # convert to degrees
for level in ci:
v = 2 * np.sqrt(v_ * chi2.ppf(level, 2)) #get size corresponding to level
ell = Ellipse(mu[:2], v[0], v[1], 180 + angle, facecolor='None',
edgecolor='k',
alpha=(1-level)*.5,
lw=1.5)
ell.set_clip_box(ax.bbox)
ell.set_alpha(0.5)
ax.add_artist(ell)
return ax
_, ax = plt.subplots(1, 1, figsize=(5, 5))
Gauss2d(Mu, np.asarray(Sigma), [0.1, 0.3, 0.5, 0.8, 0.99], ax=ax)
ax.scatter(a_cafe, b_cafe)
ax.set_xlim(1.5, 6.1)
ax.set_ylim(-2, 0)
ax.set_xlabel('intercepts (a_cafe)')
ax.set_ylabel('slopes (b_cafe)');
N_visits = 10
afternoon = np.tile([0,1], N_visits * N_cafes//2) # wrap with int() to suppress warnings
cafe_id = np.repeat(np.arange(0, N_cafes), N_visits) # 1-20 (minus 1 for python indexing)
mu = a_cafe[cafe_id] + b_cafe[cafe_id]*afternoon
sigma = 0.5 # std dev within cafes
wait = np.random.normal(loc=mu, scale=sigma, size=N_visits*N_cafes)
d = pd.DataFrame(dict(cafe=cafe_id , afternoon=afternoon , wait=wait))
R = pm.LKJCorr.dist(n=2, eta=2).random(size=10000)
_, ax = plt.subplots(1, 1, figsize=(5, 5))
pm.kdeplot(R, ax=ax);
ax.set_xlabel('correlation')
ax.set_ylabel('Density');
_, ax = plt.subplots(1, 1, figsize=(5, 5))
textloc = [[0, .5], [0, .8], [.5, .9]]
for eta, loc in zip([1, 2, 4], textloc):
R = pm.LKJCorr.dist(n=2, eta=eta).random(size=10000)
pm.kdeplot(R, ax=ax);
ax.text(loc[0], loc[1], 'eta = %s'%(eta), horizontalalignment='center')
ax.set_ylim(0, 1.1)
ax.set_xlabel('correlation')
ax.set_ylabel('Density');
cafe_idx = d['cafe'].values
with pm.Model() as m_13_1:
sd_dist = pm.HalfCauchy.dist(beta=2) # This is the same as sigma_cafe ~ dcauchy(0,2)
packed_chol = pm.LKJCholeskyCov('chol_cov', eta=2, n=2, sd_dist=sd_dist)
# compute the covariance matrix
chol = pm.expand_packed_triangular(2, packed_chol, lower=True)
cov = tt.dot(chol, chol.T)
# Extract the standard deviations and rho
sigma_ab = pm.Deterministic('sigma_cafe', tt.sqrt(tt.diag(cov)))
corr = tt.diag(sigma_ab**-1).dot(cov.dot(tt.diag(sigma_ab**-1)))
r = pm.Deterministic('Rho', corr[np.triu_indices(2, k=1)])
ab = pm.Normal('ab', mu=0, sd=10, shape=2) # prior for average intercept and slope
ab_cafe = pm.MvNormal('ab_cafe', mu=ab, chol=chol, shape=(N_cafes, 2)) # Population of varying effects
# Shape needs to be (N_cafes, 2) because we're getting back both a and b for each cafe
mu = ab_cafe[:, 0][cafe_idx] + ab_cafe[:, 1][cafe_idx] * d['afternoon'].values # linear model
sd = pm.HalfCauchy('sigma', beta=2) # prior stddev within cafes
wait = pm.Normal('wait', mu=mu, sd=sd, observed=d['wait']) # likelihood
trace_13_1 = pm.sample(5000, tune=2000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 7000/7000 [01:41<00:00, 77.07it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 0 contains 1 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize. % (self._chain_id, n_diverging))
For complicate model it is always good to do more checks:
pm.traceplot(trace_13_1, varnames=['ab', 'Rho', 'sigma', 'sigma_cafe'],
lines={"ab": Mu,
"Rho": rho,
"sigma": sigma,
"sigma_cafe": sigmas});
post = pm.trace_to_dataframe(trace_13_1)
_, ax = plt.subplots(1, 1, figsize=(5, 5))
R = pm.LKJCorr.dist(n=2, eta=2).random(size=10000)
pm.kdeplot(R, ax=ax, color='k', linestyle='--')
ax.text(0, .8, 'prior', horizontalalignment='center')
pm.kdeplot(trace_13_1['Rho'], ax=ax, color='C0')
ax.text(-.15, 1.5, 'posterior', color='C0', horizontalalignment='center')
ax.set_ylim(-.025, 2.5)
ax.set_xlabel('correlation', fontsize=14)
ax.set_ylabel('Density', fontsize=14);
# compute unpooled estimates directly from data
a1b1 = (d.groupby(['afternoon', 'cafe'])
.agg('mean')
.unstack(level=0)
.values)
a1 = a1b1[:, 0]
b1 = a1b1[:, 1] - a1
# extract posterior means of partially pooled estimates
a2b2 = trace_13_1['ab_cafe'].mean(axis=0)
a2 = a2b2[:, 0]
b2 = a2b2[:, 1]
# plot both and connect with lines
_, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.scatter(a1, b1)
ax.scatter(a2, b2,
facecolors='none', edgecolors='k', lw=1)
ax.plot([a1, a2], [b1, b2], 'k-', alpha=.5)
ax.set_xlabel('intercept', fontsize=14)
ax.set_ylabel('slope', fontsize=14);
# compute posterior mean bivariate Gaussian
Mu_est = trace_13_1['ab'].mean(axis=0)
Chol_cov = trace_13_1['chol_cov'].mean(axis=0)
L_chol = np.zeros((2, 2))
L_chol[np.triu_indices(2, 0)] = Chol_cov
Sigma_est = np.dot(L_chol, L_chol.T)
# draw contours
_, ax = plt.subplots(1, 1, figsize=(5, 5))
Gauss2d(Mu_est, np.asarray(Sigma_est), [0.1, 0.3, 0.5, 0.8, 0.99], ax=ax)
ax.scatter(a1, b1)
ax.scatter(a2, b2,
facecolors='none', edgecolors='k', lw=1)
ax.plot([a1, a2], [b1, b2], 'k-', alpha=.5)
ax.set_xlabel('intercept', fontsize=14)
ax.set_ylabel('slope', fontsize=14)
ax.set_xlim(1.5, 6.1)
ax.set_ylim(-2.5, 0);
# convert varying effects to waiting times
wait_morning_1 = a1
wait_afternoon_1 = a1 + b1
wait_morning_2 = a2
wait_afternoon_2 = a2 + b2
d_ad = pd.read_csv('./Data/UCBadmit.csv', sep=';')
d_ad['male'] = (d_ad['applicant.gender'] == 'male').astype(int)
d_ad['dept_id'] = pd.Categorical(d_ad['dept']).codes
Dept_id = d_ad['dept_id'].values
Ndept = len(d_ad['dept_id'].unique())
with pm.Model() as m_13_2:
a = pm.Normal('a', 0, 10)
bm = pm.Normal('bm', 0, 1)
sigma_dept = pm.HalfCauchy('sigma_dept', 2)
a_dept = pm.Normal('a_dept', a, sigma_dept, shape=Ndept)
p = pm.math.invlogit(a_dept[Dept_id] + bm * d_ad['male'])
admit = pm.Binomial('admit', p=p, n=d_ad.applications, observed=d_ad.admit)
trace_13_2 = pm.sample(4500, tune=500)
pm.summary(trace_13_2, alpha=.11).round(2)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 5000/5000 [00:15<00:00, 331.41it/s]
mean | sd | mc_error | hpd_5.5 | hpd_94.5 | n_eff | Rhat | |
---|---|---|---|---|---|---|---|
a | -0.59 | 0.64 | 0.01 | -1.61 | 0.38 | 6877.0 | 1.0 |
bm | -0.10 | 0.08 | 0.00 | -0.22 | 0.04 | 5429.0 | 1.0 |
a_dept__0 | 0.67 | 0.10 | 0.00 | 0.52 | 0.83 | 6476.0 | 1.0 |
a_dept__1 | 0.63 | 0.11 | 0.00 | 0.44 | 0.80 | 6950.0 | 1.0 |
a_dept__2 | -0.58 | 0.07 | 0.00 | -0.70 | -0.47 | 9000.0 | 1.0 |
a_dept__3 | -0.62 | 0.09 | 0.00 | -0.76 | -0.48 | 8912.0 | 1.0 |
a_dept__4 | -1.06 | 0.10 | 0.00 | -1.21 | -0.90 | 9000.0 | 1.0 |
a_dept__5 | -2.61 | 0.16 | 0.00 | -2.86 | -2.36 | 9000.0 | 1.0 |
sigma_dept | 1.48 | 0.58 | 0.01 | 0.72 | 2.20 | 6156.0 | 1.0 |
with pm.Model() as m_13_3:
a = pm.Normal('a', 0, 10)
bm = pm.Normal('bm', 0, 1)
sd_dist = pm.HalfCauchy.dist(beta=2)
packed_chol = pm.LKJCholeskyCov('chol_cov', eta=2, n=2, sd_dist=sd_dist)
# compute the covariance matrix
chol = pm.expand_packed_triangular(2, packed_chol, lower=True)
cov = tt.dot(chol, chol.T)
# Extract the standard deviations and rho
sigma_ab = pm.Deterministic('sigma_dept', tt.sqrt(tt.diag(cov)))
corr = tt.diag(sigma_ab**-1).dot(cov.dot(tt.diag(sigma_ab**-1)))
r = pm.Deterministic('Rho', corr[np.triu_indices(2, k=1)])
mu = pm.MvNormal('ab_cafe', mu=tt.stack([a, bm]), chol=chol, shape=(Ndept, 2))
a_dept = pm.Deterministic('a_dept', mu[:, 0])
a_dept = pm.Deterministic('bm_dept', mu[:, 1])
p = pm.math.invlogit(mu[Dept_id, 0] + mu[Dept_id, 1] * d_ad['male'])
admit = pm.Binomial('admit', p=p, n=d_ad.applications, observed=d_ad.admit)
trace_13_3 = pm.sample(5000, tune=1000, njobs=4)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 91%|█████████ | 5453/6000 [02:51<00:12, 43.66it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 3 contains 11 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize. % (self._chain_id, n_diverging)) 91%|█████████ | 5473/6000 [02:52<00:16, 32.46it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 1 contains 86 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize. % (self._chain_id, n_diverging)) 94%|█████████▍| 5651/6000 [02:55<00:05, 59.88it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 2 contains 1 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize. % (self._chain_id, n_diverging)) 100%|█████████▉| 5997/6000 [03:01<00:00, 56.46it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 0 contains 64 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize. % (self._chain_id, n_diverging)) 100%|██████████| 6000/6000 [03:01<00:00, 33.06it/s]
pm.forestplot(trace_13_3, varnames=['bm_dept', 'a_dept'], alpha=.11);
with pm.Model() as m_13_4:
a = pm.Normal('a', 0, 10)
sigma_dept = pm.HalfCauchy('sigma_dept', 2)
a_dept = pm.Normal('a_dept', a, sigma_dept, shape=Ndept)
p = pm.math.invlogit(a_dept[Dept_id])
admit = pm.Binomial('admit', p=p, n=d_ad.applications, observed=d_ad.admit)
trace_13_4 = pm.sample(4500, tune=500)
comp_df = pm.compare([trace_13_2, trace_13_3, trace_13_4],
[m_13_2, m_13_3, m_13_4])
comp_df.loc[:,'model'] = pd.Series(['m13.2', 'm13.3', 'm13.4'])
comp_df = comp_df.set_index('model')
comp_df
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 5000/5000 [00:12<00:00, 415.96it/s]
WAIC | pWAIC | dWAIC | weight | SE | dSE | warning | |
---|---|---|---|---|---|---|---|
model | |||||||
m13.3 | 91.43 | 7.01 | 0 | 0.88 | 4.75 | 0 | 1 |
m13.4 | 105.47 | 6.68 | 14.04 | 0.12 | 17.33 | 14.01 | 1 |
m13.2 | 108.13 | 9.19 | 16.69 | 0 | 15.74 | 12.22 | 1 |
d = pd.read_csv('Data/chimpanzees.csv', sep=";")
# we change "actor" and "block" to zero-index
actor = (d['actor'] - 1).values
block = (d['block'] - 1).values
Nactor = len(np.unique(actor))
Nblock = len(np.unique(block))
with pm.Model() as model_13_6:
sd_dist = pm.HalfCauchy.dist(beta=2)
pchol1 = pm.LKJCholeskyCov('pchol_actor', eta=4, n=3, sd_dist=sd_dist)
pchol2 = pm.LKJCholeskyCov('pchol_block', eta=4, n=3, sd_dist=sd_dist)
chol1 = pm.expand_packed_triangular(3, pchol1, lower=True)
chol2 = pm.expand_packed_triangular(3, pchol2, lower=True)
Intercept = pm.Normal('intercept', 0., 1., shape=3)
beta_actor = pm.MvNormal('beta_actor', mu=0., chol=chol1, shape=(Nactor, 3))
beta_block = pm.MvNormal('beta_block', mu=0., chol=chol2, shape=(Nblock, 3))
A = Intercept[0] + beta_actor[actor, 0] + beta_block[block, 0]
BP = Intercept[1] + beta_actor[actor, 1] + beta_block[block, 1]
BPC = Intercept[2] + beta_actor[actor, 2] + beta_block[block, 2]
p = pm.math.invlogit(A + (BP + BPC*d['condition'])*d['prosoc_left'])
pulled_left = pm.Binomial('pulled_left', 1, p, observed=d['pulled_left'])
trace_13_6 = pm.sample(5000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 51%|█████▏ | 3085/6000 [06:02<04:42, 10.31it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:452: UserWarning: The acceptance probability in chain 1 does not match the target. It is 0.434980363024, but should be close to 0.8. Try to increase the number of tuning steps. % (self._chain_id, mean_accept, target_accept)) /home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 1 contains 1597 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize. % (self._chain_id, n_diverging)) 100%|██████████| 6000/6000 [11:13<00:00, 7.16it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 0 contains 218 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize. % (self._chain_id, n_diverging))
# pm.summary(trace_13_6).round(2)
with pm.Model() as model_13_6NC:
sd_dist = pm.HalfCauchy.dist(beta=2)
pchol1 = pm.LKJCholeskyCov('pchol_actor', eta=4, n=3, sd_dist=sd_dist)
pchol2 = pm.LKJCholeskyCov('pchol_block', eta=4, n=3, sd_dist=sd_dist)
chol1 = pm.expand_packed_triangular(3, pchol1, lower=True)
chol2 = pm.expand_packed_triangular(3, pchol2, lower=True)
Intercept = pm.Normal('intercept', 0., 1., shape=3)
b1 = pm.Normal('b1', 0., 1., shape=(3, Nactor))
b2 = pm.Normal('b2', 0., 1., shape=(3, Nblock))
beta_actor = tt.dot(chol1, b1)
beta_block = tt.dot(chol2, b2)
A = Intercept[0] + beta_actor[0, actor] + beta_block[0, block]
BP = Intercept[1] + beta_actor[1, actor] + beta_block[1, block]
BPC = Intercept[2] + beta_actor[2, actor] + beta_block[2, block]
p = pm.math.invlogit(A + (BP + BPC*d['condition'])*d['prosoc_left'])
pulled_left = pm.Binomial('pulled_left', 1, p, observed=d['pulled_left'])
trace_13_6NC = pm.sample(5000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 6000/6000 [06:22<00:00, 13.32it/s]
# extract n_eff values for each model
neff_c = pm.summary(trace_13_6)['n_eff'].values
neff_nc = pm.summary(trace_13_6NC)['n_eff'].values
# plot distributions
_, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.boxplot([neff_c, neff_nc], labels=['m13.6', 'm13.6NC']);
ax.set_xlabel('model', fontsize=14)
ax.set_ylabel('effective samples', fontsize=14);
# I didnt compute the sigma from the chol above, got to get a bit more creative here
def unpack_sigma(pack_chol):
idxs = np.tril_indices(3)
chol_ = np.zeros((3, 3, pack_chol.shape[0]))
chol_[idxs] = pack_chol.T
chol = np.transpose(chol_, (2, 0, 1))
cholt= np.transpose(chol, (0, 2, 1))
sigma = np.matmul(chol, cholt)
return np.sqrt(np.diagonal(sigma, axis1=1, axis2=2))
sigmadict = dict(Sigma_actor=unpack_sigma(trace_13_6NC.get_values('pchol_actor', combine=True)),
Sigma_block=unpack_sigma(trace_13_6NC.get_values('pchol_block', combine=True)))
trace_13_6NC.add_values(sigmadict)
pm.summary(trace_13_6NC, varnames=['Sigma_actor', 'Sigma_block']).round(2)
mean | sd | mc_error | hpd_2.5 | hpd_97.5 | n_eff | Rhat | |
---|---|---|---|---|---|---|---|
Sigma_actor__0 | 2.34 | 0.90 | 0.02 | 0.98 | 4.07 | 2809.0 | 1.0 |
Sigma_actor__1 | 0.47 | 0.38 | 0.01 | 0.02 | 1.16 | 2083.0 | 1.0 |
Sigma_actor__2 | 0.54 | 0.49 | 0.01 | 0.01 | 1.47 | 1324.0 | 1.0 |
Sigma_block__0 | 0.23 | 0.20 | 0.00 | 0.00 | 0.60 | 3687.0 | 1.0 |
Sigma_block__1 | 0.57 | 0.40 | 0.01 | 0.01 | 1.28 | 1063.0 | 1.0 |
Sigma_block__2 | 0.50 | 0.40 | 0.01 | 0.02 | 1.26 | 1375.0 | 1.0 |
R and Rethinking related, skip for now
with pm.Model() as m_12_5:
bp = pm.Normal('bp', 0, 10)
bpC = pm.Normal('bpC', 0, 10)
a = pm.Normal('a', 0, 10)
sigma_actor = pm.HalfCauchy('sigma_actor', 1.)
a_actor = pm.Normal('a_actor', 0., sigma_actor, shape=Nactor)
sigma_block = pm.HalfCauchy('sigma_block', 1.)
a_block = pm.Normal('a_block', 0., sigma_block, shape=Nblock)
p = pm.math.invlogit(a + a_actor[actor] + a_block[block]
+ (bp + bpC * d['condition']) * d['prosoc_left'])
pulled_left = pm.Binomial('pulled_left', 1, p, observed=d['pulled_left'])
trace_12_5 = pm.sample(6000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|█████████▉| 6997/7000 [02:12<00:00, 66.38it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 0 contains 11 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize. % (self._chain_id, n_diverging)) 100%|██████████| 7000/7000 [02:12<00:00, 52.88it/s] /home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 1 contains 28 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize. % (self._chain_id, n_diverging))
comp_df = pm.compare(traces=[trace_13_6NC, trace_12_5],
models=[model_13_6NC, m_12_5],
method='pseudo-BMA')
comp_df.loc[:,'model'] = pd.Series(['m13.6NC', 'm12.5'])
comp_df = comp_df.set_index('model')
comp_df
WAIC | pWAIC | dWAIC | weight | SE | dSE | warning | |
---|---|---|---|---|---|---|---|
model | |||||||
m12.5 | 532.73 | 10.44 | 0 | 0.73 | 19.65 | 0 | 0 |
m13.6NC | 534.71 | 18.43 | 1.97 | 0.27 | 19.89 | 4.13 | 0 |
Actually in model m_13_6NC I am already using the Cholesky decomposition of the covariance matrix. If we want to strictly follow the parameterization of m13.6NC and m13.6nc1 as in the book, we will model the Rho using pm.LKJCorr and do multiplication it with pm.HalfCauchy.
Dmat = pd.read_csv('Data/islandsDistMatrix.csv', sep=",", index_col=0)
Dmat.round(1)
Ml | Ti | SC | Ya | Fi | Tr | Ch | Mn | To | Ha | |
---|---|---|---|---|---|---|---|---|---|---|
Malekula | 0.0 | 0.5 | 0.6 | 4.4 | 1.2 | 2.0 | 3.2 | 2.8 | 1.9 | 5.7 |
Tikopia | 0.5 | 0.0 | 0.3 | 4.2 | 1.2 | 2.0 | 2.9 | 2.7 | 2.0 | 5.3 |
Santa Cruz | 0.6 | 0.3 | 0.0 | 3.9 | 1.6 | 1.7 | 2.6 | 2.4 | 2.3 | 5.4 |
Yap | 4.4 | 4.2 | 3.9 | 0.0 | 5.4 | 2.5 | 1.6 | 1.6 | 6.1 | 7.2 |
Lau Fiji | 1.2 | 1.2 | 1.6 | 5.4 | 0.0 | 3.2 | 4.0 | 3.9 | 0.8 | 4.9 |
Trobriand | 2.0 | 2.0 | 1.7 | 2.5 | 3.2 | 0.0 | 1.8 | 0.8 | 3.9 | 6.7 |
Chuuk | 3.2 | 2.9 | 2.6 | 1.6 | 4.0 | 1.8 | 0.0 | 1.2 | 4.8 | 5.8 |
Manus | 2.8 | 2.7 | 2.4 | 1.6 | 3.9 | 0.8 | 1.2 | 0.0 | 4.6 | 6.7 |
Tonga | 1.9 | 2.0 | 2.3 | 6.1 | 0.8 | 3.9 | 4.8 | 4.6 | 0.0 | 5.0 |
Hawaii | 5.7 | 5.3 | 5.4 | 7.2 | 4.9 | 6.7 | 5.8 | 6.7 | 5.0 | 0.0 |
_, ax = plt.subplots(1, 1, figsize=(5, 5))
xrange = np.linspace(0, 4, 100)
ax.plot(xrange, np.exp(-1*xrange), 'k--')
ax.plot(xrange, np.exp(-1*xrange**2), 'k')
ax.set_xlabel('distance', fontsize=14)
ax.set_ylabel('correlation', fontsize=14);
dk = pd.read_csv('Data/Kline2.csv', sep=",")
Nsociety = dk.shape[0]
dk.loc[:, 'society'] = np.arange(Nsociety)
Dmat_ = Dmat.values
Dmatsq = np.power(Dmat_, 2)
with pm.Model() as m_13_7:
etasq = pm.HalfCauchy('etasq', 1)
rhosq = pm.HalfCauchy('rhosq', 1)
Kij = etasq*(tt.exp(-rhosq*Dmatsq)+np.diag([.01]*Nsociety))
g = pm.MvNormal('g', mu=np.zeros(Nsociety), cov=Kij, shape=Nsociety)
a = pm.Normal('a', 0, 10)
bp = pm.Normal('bp', 0, 1)
lam = pm.math.exp(a + g[dk.society.values] + bp*dk.logpop)
obs = pm.Poisson('total_tools', lam, observed=dk.total_tools)
trace_13_7 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 2000/2000 [02:35<00:00, 13.02it/s]
pm.traceplot(trace_13_7, varnames=['g', 'a', 'bp', 'etasq', 'rhosq']);
A reparameterization of m13.7
with pm.Model() as m_13_7_:
etasq = pm.HalfCauchy('etasq', 1)
rhosq = pm.HalfCauchy('rhosq', 1)
Kij = etasq * (tt.exp(-rhosq * Dmatsq) + np.diag([.01] * Nsociety))
# g = pm.MvNormal('g', mu=np.zeros(Nsociety), cov=Kij, shape=Nsociety)
gmu = pm.Normal('gmu', 0., 1., shape=Nsociety)
g = pm.Deterministic('g', tt.dot(tt.slinalg.cholesky(Kij), gmu))
a = pm.Normal('a', 0, 10)
bp = pm.Normal('bp', 0, 1)
lam = pm.math.exp(a + g[dk.society.values] + bp * dk.logpop)
obs = pm.Poisson('total_tools', lam, observed=dk.total_tools)
trace_13_7_ = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 2000/2000 [02:14<00:00, 12.39it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:452: UserWarning: The acceptance probability in chain 0 does not match the target. It is 0.709777131845, but should be close to 0.8. Try to increase the number of tuning steps. % (self._chain_id, mean_accept, target_accept)) /home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 0 contains 17 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize. % (self._chain_id, n_diverging)) /home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 1 contains 1 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize. % (self._chain_id, n_diverging))
pm.traceplot(trace_13_7_, varnames=['g', 'a', 'bp', 'etasq', 'rhosq']);
pm.summary(trace_13_7, varnames=['g', 'a', 'bp', 'etasq', 'rhosq']).round(2)
mean | sd | mc_error | hpd_2.5 | hpd_97.5 | n_eff | Rhat | |
---|---|---|---|---|---|---|---|
g__0 | -0.26 | 0.39 | 0.02 | -1.00 | 0.49 | 325.0 | 1.00 |
g__1 | -0.13 | 0.37 | 0.02 | -0.92 | 0.50 | 299.0 | 1.00 |
g__2 | -0.16 | 0.36 | 0.02 | -0.89 | 0.49 | 314.0 | 1.01 |
g__3 | 0.29 | 0.33 | 0.02 | -0.33 | 0.98 | 359.0 | 1.00 |
g__4 | 0.03 | 0.32 | 0.02 | -0.65 | 0.62 | 330.0 | 1.00 |
g__5 | -0.46 | 0.34 | 0.02 | -1.10 | 0.18 | 347.0 | 1.00 |
g__6 | 0.09 | 0.33 | 0.02 | -0.56 | 0.75 | 364.0 | 1.00 |
g__7 | -0.26 | 0.33 | 0.02 | -0.88 | 0.41 | 360.0 | 1.00 |
g__8 | 0.22 | 0.31 | 0.02 | -0.42 | 0.80 | 356.0 | 1.00 |
g__9 | -0.12 | 0.44 | 0.02 | -1.03 | 0.72 | 334.0 | 1.00 |
a | 1.30 | 1.03 | 0.06 | -0.86 | 3.40 | 216.0 | 1.01 |
bp | 0.25 | 0.11 | 0.01 | 0.03 | 0.46 | 255.0 | 1.00 |
etasq | 0.30 | 0.35 | 0.01 | 0.01 | 0.90 | 506.0 | 1.00 |
rhosq | 3.09 | 33.12 | 1.30 | 0.01 | 4.33 | 635.0 | 1.00 |
pm.summary(trace_13_7_, varnames=['g', 'a', 'bp', 'etasq', 'rhosq']).round(2)
mean | sd | mc_error | hpd_2.5 | hpd_97.5 | n_eff | Rhat | |
---|---|---|---|---|---|---|---|
g__0 | -0.26 | 0.40 | 0.02 | -1.11 | 0.54 | 616.0 | 1.0 |
g__1 | -0.14 | 0.40 | 0.02 | -1.02 | 0.61 | 579.0 | 1.0 |
g__2 | -0.17 | 0.38 | 0.02 | -0.99 | 0.57 | 546.0 | 1.0 |
g__3 | 0.30 | 0.34 | 0.01 | -0.33 | 1.06 | 746.0 | 1.0 |
g__4 | 0.04 | 0.32 | 0.01 | -0.62 | 0.68 | 949.0 | 1.0 |
g__5 | -0.46 | 0.34 | 0.01 | -1.17 | 0.16 | 700.0 | 1.0 |
g__6 | 0.10 | 0.33 | 0.01 | -0.60 | 0.72 | 867.0 | 1.0 |
g__7 | -0.26 | 0.33 | 0.01 | -0.99 | 0.39 | 928.0 | 1.0 |
g__8 | 0.23 | 0.31 | 0.01 | -0.42 | 0.84 | 977.0 | 1.0 |
g__9 | -0.11 | 0.43 | 0.01 | -0.95 | 0.78 | 946.0 | 1.0 |
a | 1.33 | 1.13 | 0.04 | -1.04 | 3.63 | 621.0 | 1.0 |
bp | 0.24 | 0.11 | 0.00 | 0.03 | 0.49 | 667.0 | 1.0 |
etasq | 0.31 | 0.34 | 0.01 | 0.01 | 0.91 | 575.0 | 1.0 |
rhosq | 1.41 | 4.55 | 0.14 | 0.01 | 5.59 | 969.0 | 1.0 |
post = pm.trace_to_dataframe(trace_13_7, varnames=['g', 'a', 'bp', 'etasq', 'rhosq'])
post_etasq = post['etasq'].values
post_rhosq = post['rhosq'].values
_, ax = plt.subplots(1, 1, figsize=(8, 5))
xrange = np.linspace(0, 10, 200)
ax.plot(xrange, np.median(post_etasq) * np.exp(-np.median(post_rhosq) * xrange**2), 'k')
ax.plot(xrange, (post_etasq[:100][:, None] * np.exp(-post_rhosq[:100][:, None] * xrange**2)).T,
'k', alpha=.1)
ax.set_ylim(0, 1)
ax.set_xlabel('distance')
ax.set_ylabel('correlation');
# compute posterior median covariance among societies
Kij_post = np.median(post_etasq) * (np.exp(-np.median(post_rhosq) * Dmatsq) + np.diag([.01] * Nsociety))
# convert to correlation matrix
sigma_post = np.sqrt(np.diag(Kij_post))
Rho = np.diag(sigma_post**-1).dot(Kij_post.dot(np.diag(sigma_post**-1)))
# add row/col names for convenience
Rho = pd.DataFrame(Rho, index=["Ml","Ti","SC","Ya","Fi","Tr","Ch","Mn","To","Ha"],
columns=["Ml","Ti","SC","Ya","Fi","Tr","Ch","Mn","To","Ha"])
Rho.round(2)
Ml | Ti | SC | Ya | Fi | Tr | Ch | Mn | To | Ha | |
---|---|---|---|---|---|---|---|---|---|---|
Ml | 1.00 | 0.90 | 0.83 | 0.00 | 0.51 | 0.16 | 0.01 | 0.03 | 0.22 | 0.0 |
Ti | 0.90 | 1.00 | 0.95 | 0.00 | 0.50 | 0.17 | 0.03 | 0.04 | 0.18 | 0.0 |
SC | 0.83 | 0.95 | 1.00 | 0.00 | 0.34 | 0.27 | 0.05 | 0.09 | 0.10 | 0.0 |
Ya | 0.00 | 0.00 | 0.00 | 1.00 | 0.00 | 0.07 | 0.34 | 0.31 | 0.00 | 0.0 |
Fi | 0.51 | 0.50 | 0.34 | 0.00 | 1.00 | 0.01 | 0.00 | 0.00 | 0.77 | 0.0 |
Tr | 0.16 | 0.17 | 0.27 | 0.07 | 0.01 | 1.00 | 0.24 | 0.72 | 0.00 | 0.0 |
Ch | 0.01 | 0.03 | 0.05 | 0.34 | 0.00 | 0.24 | 1.00 | 0.52 | 0.00 | 0.0 |
Mn | 0.03 | 0.04 | 0.09 | 0.31 | 0.00 | 0.72 | 0.52 | 1.00 | 0.00 | 0.0 |
To | 0.22 | 0.18 | 0.10 | 0.00 | 0.77 | 0.00 | 0.00 | 0.00 | 1.00 | 0.0 |
Ha | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 1.0 |
# scale point size to logpop
logpop = np.copy(dk['logpop'].values)
logpop /= logpop.max()
psize = np.exp(logpop*5.5)
_, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.scatter(dk['lon2'], dk['lat'], psize);
labels = dk['culture'].values
for i, itext in enumerate(labels):
ax.text(dk['lon2'][i]+1, dk['lat'][i]+1, itext)
# overlay lines shaded by Rho
for i in range(10):
for j in np.arange(i+1, 10):
ax.plot([dk['lon2'][i], dk['lon2'][j]],
[dk['lat'][i], dk['lat'][j]], 'k-',
alpha=Rho.iloc[i, j]**2, lw=2.5)
ax.set_xlabel('longitude')
ax.set_ylabel('latitude');
Nsamp, Nbin = 1000, 30
log_pop_seq = np.linspace(6, 14, Nbin)
a_post = trace_13_7.get_values(varname='a', combine=True)[:, None]
bp_post = trace_13_7.get_values(varname='bp', combine=True)[:, None]
lambda_post = np.exp(a_post + bp_post*log_pop_seq)
# plot raw data
_, axes = plt.subplots(1, 1, figsize=(5, 5))
axes.plot(log_pop_seq, np.median(lambda_post, axis=0), '--', color='k')
alpha = .2
mu_hpd = pm.hpd(lambda_post, alpha=alpha)
axes.fill_between(log_pop_seq,
mu_hpd[:,0], mu_hpd[:,1], alpha=alpha*.5+.1, color='k')
axes.scatter(dk['logpop'], dk['total_tools'], psize)
labels = dk['culture'].values
for i, itext in enumerate(labels):
axes.text(dk['logpop'][i]+.1, dk['total_tools'][i]-2.5, itext)
# overlay correlations
for i in range(10):
for j in np.arange(i+1, 10):
axes.plot([dk['logpop'][i], dk['logpop'][j]],
[dk['total_tools'][i], dk['total_tools'][j]], 'k-',
alpha=Rho.iloc[i, j]**2, lw=2.5)
axes.set_xlabel('log-population')
axes.set_ylabel('total tools')
axes.set_xlim(6.8, 12.8)
axes.set_ylim(10, 73);
Practice session, skip for now.
import sys, IPython, scipy, matplotlib, platform
print("This notebook was createad on a computer %s running %s and using:\nPython %s\nIPython %s\nPyMC3 %s\nNumPy %s\nSciPy %s\nMatplotlib %s\n" % (platform.machine(), ' '.join(platform.linux_distribution()[:2]), sys.version[:5], IPython.__version__, pm.__version__, np.__version__, scipy.__version__, matplotlib.__version__))
This notebook was createad on a computer x86_64 running debian stretch/sid and using: Python 3.6.2 IPython 6.1.0 PyMC3 3.2 NumPy 1.13.3 SciPy 0.19.1 Matplotlib 2.1.0