Notebook

In [1]:

import pymc3 as pm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
%qtconsole --colors=linux
plt.style.use('ggplot')

from matplotlib import gridspec
from theano import tensor as tt
from scipy import stats

Chapter 8 - Comparing Gaussian means¶

8.1 One-sample comparison¶

$$ \delta \sim \text{Cauchy} (0, 1)$$$$ \sigma \sim \text{Cauchy} (0, 1)_{\mathcal I(0,∞)}$$$$ \mu = \delta\sigma $$$$ x_{i} \sim \text{Gaussian}(\mu,1/\sigma^2)$$

In [2]:

# Read data Dr. Smith
Winter = np.array([-0.05,0.41,0.17,-0.13,0.00,-0.05,0.00,0.17,0.29,0.04,0.21,0.08,0.37,
            0.17,0.08,-0.04,-0.04,0.04,-0.13,-0.12,0.04,0.21,0.17,0.17,0.17,
            0.33,0.04,0.04,0.04,0.00,0.21,0.13,0.25,-0.05,0.29,0.42,-0.05,0.12,
            0.04,0.25,0.12])

Summer = np.array([0.00,0.38,-0.12,0.12,0.25,0.12,0.13,0.37,0.00,0.50,0.00,0.00,-0.13,
            -0.37,-0.25,-0.12,0.50,0.25,0.13,0.25,0.25,0.38,0.25,0.12,0.00,0.00,
            0.00,0.00,0.25,0.13,-0.25,-0.38,-0.13,-0.25,0.00,0.00,-0.12,0.25,
            0.00,0.50,0.00])
x = Winter - Summer  # allowed because it is a within-subjects design
x = x / np.std(x)  

with pm.Model() as model1:
    delta = pm.Cauchy('delta', alpha=0, beta=1)
    sigma = pm.HalfCauchy('sigma', beta=1)
    miu = delta*sigma
    xi = pm.Normal('xi', mu=miu, sd=sigma, observed=x)
    trace1=pm.sample(3e3, njobs=2)

burnin=0    
pm.traceplot(trace1[burnin:], varnames=['delta']);
plt.show()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 3500/3500.0 [00:01<00:00, 2141.79it/s]

In [3]:

def display_delta(trace, x):
    # BFs based on density estimation (using kernel smoothing instead of spline)
    from scipy.stats.kde import gaussian_kde
    from scipy.stats import cauchy

    pm.summary(trace, varnames=['delta'])
    tmp = pm.df_summary(trace, varnames=['delta'])
    # 95% confidence interval:
    x0 = tmp.values[0, 3]
    x1 = tmp.values[0, 4]

    t_delt = trace['delta'][:]
    my_pdf = gaussian_kde(t_delt)
    plt.plot(x, my_pdf(x), '--', lw=2.5, alpha=0.6, label='Posterior') # distribution function
    plt.plot(x, cauchy.pdf(x), 'r-', lw=2.5, alpha=0.6, label='Prior')
    posterior = my_pdf(0)             # this gives the pdf at point delta = 0
    prior     = cauchy.pdf(0)         # height of order-restricted prior at delta = 0
    BF01      = posterior/prior
    print ('the Bayes Factor is %.5f' %(BF01))
    plt.plot([0, 0], [posterior, prior], 'k-', 
             [0, 0], [posterior, prior], 'ko', lw=1.5, alpha=1)
    plt.xlabel('Delta')
    plt.ylabel('Density')
    plt.legend(loc='upper left')
    plt.show()

In [4]:

x = np.linspace(-3, 3, 100)
display_delta(trace1, x)

delta:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.117            0.156            0.002            [-0.187, 0.413]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  -0.186         0.011          0.120          0.226          0.416

the Bayes Factor is 5.92097

8.2 Order-restricted one-sample comparison¶

$$ \delta \sim \text{Cauchy} (0, 1)_{\mathcal I(-∞,0)}$$$$ \sigma \sim \text{Cauchy} (0, 1)_{\mathcal I(0,∞)}$$$$ \mu = \delta\sigma $$$$ x_{i} \sim \text{Gaussian}(\mu,1/\sigma^2)$$

In [5]:

# Read data Dr. Smith
Winter = np.array([-0.05,0.41,0.17,-0.13,0.00,-0.05,0.00,0.17,0.29,0.04,0.21,0.08,0.37,
            0.17,0.08,-0.04,-0.04,0.04,-0.13,-0.12,0.04,0.21,0.17,0.17,0.17,
            0.33,0.04,0.04,0.04,0.00,0.21,0.13,0.25,-0.05,0.29,0.42,-0.05,0.12,
            0.04,0.25,0.12])

Summer = np.array([0.00,0.38,-0.12,0.12,0.25,0.12,0.13,0.37,0.00,0.50,0.00,0.00,-0.13,
            -0.37,-0.25,-0.12,0.50,0.25,0.13,0.25,0.25,0.38,0.25,0.12,0.00,0.00,
            0.00,0.00,0.25,0.13,-0.25,-0.38,-0.13,-0.25,0.00,0.00,-0.12,0.25,
            0.00,0.50,0.00])
x = Winter - Summer  # allowed because it is a within-subjects design
x = x / np.std(x)  

with pm.Model() as model2:
    delta1 = pm.HalfCauchy('delta1', beta=1)
    delta = pm.Deterministic('delta', -delta1)
    sigma = pm.HalfCauchy('sigma', beta=1)
    miu = delta*sigma
    xi = pm.Normal('xi', mu=miu, sd=sigma, observed=x)
    trace2=pm.sample(3e3, njobs=2)

burnin=0
pm.traceplot(trace2[burnin:], varnames=['delta']);
plt.show()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
 99%|█████████▉| 3479/3500.0 [00:02<00:00, 1354.61it/s]/home/laoj/Documents/Github/pymc3/pymc3/step_methods/hmc/nuts.py:467: UserWarning: Chain 0 contains 17 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize.
  % (self._chain_id, n_diverging))
100%|██████████| 3500/3500.0 [00:02<00:00, 1425.97it/s]
/home/laoj/Documents/Github/pymc3/pymc3/step_methods/hmc/nuts.py:467: UserWarning: Chain 1 contains 3 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize.
  % (self._chain_id, n_diverging))

In [6]:

x = np.linspace(-3, 0, 100)
display_delta(trace2, x)

delta:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  -0.089           0.074            0.001            [-0.236, -0.000]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  -0.275         -0.127         -0.071         -0.030         -0.003

the Bayes Factor is 13.16856

8.3 Two-sample comparison¶

$$ \delta \sim \text{Cauchy} (0, 1)$$$$ \mu \sim \text{Cauchy} (0, 1)$$$$ \sigma \sim \text{Cauchy} (0, 1)_{\mathcal I(0,∞)}$$$$ \alpha = \delta\sigma $$$$ x_{i} \sim \text{Gaussian}(\mu+\frac{\alpha}{2},1/\sigma^2)$$$$ y_{i} \sim \text{Gaussian}(\mu-\frac{\alpha}{2},1/\sigma^2)$$

In [7]:

# Read data 
x =np.array([70,80,79,83,77,75,84,78,75,75,78,82,74,81,72,70,75,72,76,77])
y =np.array([56,80,63,62,67,71,68,76,79,67,76,74,67,70,62,65,72,72,69,71])

n1 = len(x)
n2 = len(y)

# Rescale
y = y - np.mean(x)
y = y / np.std(x)
x = (x - np.mean(x)) / np.std(x)

with pm.Model() as model3:
    delta = pm.Cauchy('delta', alpha=0, beta=1)
    mu = pm.Cauchy('mu', alpha=0, beta=1)
    sigma = pm.HalfCauchy('sigma', beta=1)
    alpha = delta*sigma
    xi = pm.Normal('xi', mu=mu+alpha/2, sd=sigma, observed=x)
    yi = pm.Normal('yi', mu=mu-alpha/2, sd=sigma, observed=y)
    trace3=pm.sample(3e3, njobs=2)

burnin=0    
pm.traceplot(trace3[burnin:], varnames=['delta']);
plt.show()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 3500/3500.0 [00:02<00:00, 1319.23it/s]

In [8]:

x = np.linspace(-3, 3, 100)
display_delta(trace3, x)

delta:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  1.303            0.357            0.005            [0.623, 2.011]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  0.611          1.059          1.302          1.549          2.006

the Bayes Factor is 0.00467