from __future__ import division, unicode_literals
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import animation as ani
import matplotlib.cm as cm
import seaborn as sns
sns.set(style="whitegrid", palette="muted", color_codes=True)
import numpy as np
import scipy.stats as st
from time import time
import pystan
from pystan import StanModel
/Users/matsuken/anaconda/lib/python2.7/site-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter. warnings.warn(self.msg_depr % (key, alt_key))
pystan.__version__
'2.8.0.0'
schools_dat = {'J': 8, # number of schools
'y': [28, 8, -3, 7, -1, 1, 18, 12], # estimated treatment effects
'sigma': [15, 10, 16, 11, 9, 11, 10, 18]} # s.e. of effect estimates
headers = "A,B,C,D,E,F,G,H".split(",")
headers
#print tabulate(titanic[0:5], headers, tablefmt="pipe")
[u'A', u'B', u'C', u'D', u'E', u'F', u'G', u'H']
# グラフ描画
plt.figure(figsize=(10,6))
xx = np.linspace(-50, 80, 301)
plt.ylim(0, 0.05)
for m, s in zip(schools_dat['y'], schools_dat['sigma']):
plt.plot(xx, st.norm.pdf(xx, m,s))
code = """data {
int<lower=0> J; // number of schools
real y[J]; //
real<lower=0> sigma[J]; // s.e. of effect estimates
} parameters {
real mu;
real<lower=0> tau;
real eta[J];
} transformed parameters {
real theta[J];
for (j in 1:J)
theta[j] <- mu + tau * eta[j];
}
model {
eta ~ normal(0, 1);
y ~ normal(theta, sigma);}
"""
%time stm = StanModel(model_code=code)
CPU times: user 1.5 s, sys: 124 ms, total: 1.62 s Wall time: 21.6 s
n_itr = 3000
n_warmup = 200
chains = 2
%time fit = stm.sampling(data=schools_dat, iter=n_itr, chains=chains, n_jobs=-1, warmup=n_warmup, algorithm="NUTS", verbose=False)
la = fit.extract(permuted=True) # return a dictionary of arrays
names = fit.model_pars
n_param = np.sum([1 if len(x) == 0 else x[0] for x in fit.par_dims])
CPU times: user 18.4 ms, sys: 30.3 ms, total: 48.6 ms Wall time: 1.32 s
/Users/matsuken/anaconda/lib/python2.7/multiprocessing/queues.py:392: UserWarning: Pickling fit objects is an experimental feature! The relevant StanModel instance must be pickled along with this fit object. When unpickling the StanModel must be unpickled first. return send(obj) /Users/matsuken/anaconda/lib/python2.7/multiprocessing/queues.py:392: UserWarning: Pickling fit objects is an experimental feature! The relevant StanModel instance must be pickled along with this fit object. When unpickling the StanModel must be unpickled first. return send(obj)
print fit
Inference for Stan model: anon_model_e2054dad35ba70aa7957970c3954d10a. 2 chains, each with iter=3000; warmup=200; thin=1; post-warmup draws per chain=2800, total post-warmup draws=5600. mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat mu 7.95 0.07 5.26 -2.76 4.63 7.94 11.32 18.38 5600.0 1.0 tau 6.56 0.07 5.52 0.18 2.46 5.23 9.04 20.65 5600.0 1.0 eta[0] 0.39 0.01 0.96 -1.54 -0.23 0.42 1.04 2.18 5600.0 1.0 eta[1] -5.1e-3 0.01 0.89 -1.84 -0.58 -0.01 0.57 1.77 5600.0 1.0 eta[2] -0.21 0.01 0.92 -2.0 -0.83 -0.22 0.39 1.62 5600.0 1.0 eta[3] -0.04 0.01 0.88 -1.74 -0.64 -0.05 0.53 1.72 5600.0 1.0 eta[4] -0.33 0.01 0.9 -2.08 -0.94 -0.35 0.26 1.52 5600.0 1.0 eta[5] -0.18 0.01 0.9 -1.93 -0.8 -0.19 0.4 1.61 5600.0 1.0 eta[6] 0.35 0.01 0.91 -1.47 -0.25 0.35 0.94 2.11 5600.0 1.0 eta[7] 0.06 0.01 0.94 -1.84 -0.55 0.07 0.69 1.87 5600.0 1.0 theta[0] 11.39 0.11 8.59 -2.44 5.84 10.2 15.57 32.56 5600.0 1.0 theta[1] 7.94 0.09 6.47 -4.71 3.88 7.87 11.85 21.5 5600.0 1.0 theta[2] 6.11 0.1 7.66 -11.42 1.87 6.65 10.91 20.48 5600.0 1.0 theta[3] 7.54 0.09 6.64 -6.22 3.58 7.5 11.63 20.86 5600.0 1.0 theta[4] 5.21 0.09 6.49 -8.95 1.21 5.72 9.58 16.71 5600.0 1.0 theta[5] 6.28 0.09 6.74 -8.79 2.32 6.67 10.58 18.69 5600.0 1.0 theta[6] 10.77 0.09 6.84 -0.7 6.21 10.12 14.54 26.44 5600.0 1.0 theta[7] 8.44 0.11 7.87 -7.23 3.83 8.24 12.93 25.43 5600.0 1.0 lp__ -5.0 0.04 2.68 -10.87 -6.69 -4.75 -3.09 -0.4 5600.0 1.0 Samples were drawn using NUTS(diag_e) at Sun Jan 31 10:09:38 2016. For each parameter, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence, Rhat=1).
# 各パラメーターのEAP推定量リスト抽出
mean_list = np.array(fit.summary()['summary'])[:,0]
mean_list
array([ 7.94565412e+00, 6.55509999e+00, 3.94574749e-01, -5.10483276e-03, -2.12850806e-01, -4.49955157e-02, -3.28768301e-01, -1.84253181e-01, 3.45437948e-01, 6.45413381e-02, 1.13877096e+01, 7.94463199e+00, 6.10815486e+00, 7.53633511e+00, 5.20923860e+00, 6.27744954e+00, 1.07704181e+01, 8.44478385e+00, -5.00329452e+00])
mu = mean_list[0]
tau = mean_list[1]
eta = mean_list[2:10]
theta = mean_list[10:18]
plt.figure(figsize=(12,10))
plt.xlim(-.4, .5)
plt.scatter(eta, theta, s=20)
xxx = np.linspace(-.4,.5,201)
plt.plot(xxx, mu + xxx*tau)
plt.xlabel("eta_j")
plt.ylabel("theta_j")
<matplotlib.text.Text at 0x1101ce610>
plt.figure(figsize=(14,6))
xx = np.linspace(-20, 40, 301)
plt.ylim(-0.001, 0.05)
plt.xlim(-20, 40)
#for y in schools_dat['y']:
# plt.scatter([y],[0], s=40)
color = "orange,blue,red,yellow,purple,green,brown,green".split(",")
plt.scatter(schools_dat['y'],np.zeros(len(schools_dat['y'])), s=40, c=color)
for t, s, c in zip(theta, schools_dat['sigma'], color):
plt.plot(xx, st.norm.pdf(xx, t,s), c=c)
for e, t in zip(eta, mean_list[10:18]):
#plt.plot(xx, st.norm.pdf(xx, m,s))
print t, mu + tau*e
11.3877096353 10.5321310504 7.94463199009 7.91219143087 6.10815486388 6.55039580471 7.53633511354 7.65070401566 5.2092385997 5.79054503536 6.27744954248 6.73785609861 10.7704181068 10.2100344107 8.44478385353 8.3687290444
f, axes = plt.subplots(n_param, 2, figsize=(15, 4*n_param))
cnt = 0
for name in names:
dat = la[name]
if dat.ndim == 2:
for j in range(dat.shape[1]):
d = dat[:,j]
sns.distplot(d, hist=False, rug=True, ax=axes[cnt, 0])
sns.tsplot(d, alpha=0.8, lw=.5, ax=axes[cnt, 1])
cnt += 1
else:
# Intercept
sns.distplot(dat, hist=False, rug=True, ax=axes[cnt, 0])
sns.tsplot(dat, alpha=0.8, lw=.5, ax=axes[cnt, 1])
cnt += 1
name_list = []
for name in names:
if la[name].ndim == 2:
for i in range(dat.shape[1]):
name_list.append("{}{}".format(name,i+1))
else:
name_list.append(name)
for i in range(2):
for j, t in enumerate(name_list):
axes[j, i].set_title(t)
plt.show()
# Likelihood
f, axes = plt.subplots(1, 2, figsize=(15, 4))
sns.distplot(la['lp__'], hist=False, rug=True, ax=axes[0])
sns.tsplot(la['lp__'], alpha=0.8, lw=.5, ax=axes[1])
axes[0].set_title("Histgram of likelihood.")
axes[1].set_title("Traceplot of likelihood.")
plt.show()