# The 1919 Eclipse: parameter inference and model comparison¶

Florent Leclercq
Imperial Centre for Inference and Cosmology, Imperial College London
[email protected]

In [1]:
import numpy as np
import scipy
import scipy.stats as ss
import matplotlib.pyplot as plt
np.random.seed(3)

In [2]:
# Some plotting tools and routines

nBins = 30
nContourLevels = 3
# 2d contour levels: (68%, 95%, 99%)
confLevels = [.3173, .0455, .0027]
smoothingKernel = 1

def get_marginal(samples):
# create 1d histogram
hist1d, edges = np.histogram(samples, weights=np.ones_like(samples),
density=True, bins=nBins)

# Bin center between histogram edges
centers = (edges[1:]+edges[:-1])/2

# Filter data
pdf = np.array((centers, hist1d))
pdf = scipy.ndimage.gaussian_filter1d(pdf, sigma=smoothingKernel)

# Clip the pdf to zero out of the bins
centers, hist = pdf[0], pdf[1]
centers = np.insert(centers, 0, centers[0]-(centers[1]-centers[0]))
hist = np.insert(hist, 0, 0.)
centers = np.insert(centers, len(centers), centers[len(centers)-1]-(centers[len(centers)-2]-centers[len(centers)-1]))
hist = np.insert(hist, len(hist), 0.)

# Normalize all the pdfs to the same height
hist /= hist.max()

pdf = np.array((centers, hist))

return pdf

def get_contours(samples_x, samples_y):
# Empty arrays needed below
chainLevels = np.ones(nContourLevels+1)
extents = np.empty(4)

# These are needed to compute the contour levels
nBinsFlat = np.linspace(0., nBins**2, nBins**2)

# Create 2d histogram
hist2d, xedges, yedges = np.histogram2d(
samples_x, samples_y, weights=np.ones_like(samples_x), bins=nBins)
# image extent, needed below for contour lines
extents = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
# Normalize
hist2d = hist2d/np.sum(hist2d)
# Cumulative 1d distribution
histOrdered = np.sort(hist2d.flat)
histCumulative = np.cumsum(histOrdered)

# Compute contour levels (from low to high for technical reasons)
for l in range(nContourLevels):
# Find location of contour level in 1d histCumulative
temp = np.interp(confLevels[l], histCumulative, nBinsFlat)
# Find "height" of contour level
chainLevels[nContourLevels-1-l] = np.interp(temp, nBinsFlat, histOrdered)

# Apply Gaussian smoothing
contours = scipy.ndimage.gaussian_filter(hist2d.T, sigma=smoothingKernel)

xbins = (xedges[1:]+xedges[:-1])/2
ybins = (yedges[1:]+yedges[:-1])/2

return xbins, ybins, contours, chainLevels


## Data model¶

In [3]:
def Dx_11(alpha,a,b,c):
return c-0.160*b-1.261*a-0.587*alpha/19.8
def Dx_5(alpha,a,b,c):
return c-1.107*b-0.160*a-0.557*alpha/19.8
def Dx_4(alpha,a,b,c):
return c+0.472*b+0.334*a-0.186*alpha/19.8
def Dx_3(alpha,a,b,c):
return c+0.360*b+0.348*a-0.222*alpha/19.8
def Dx_6(alpha,a,b,c):
return c+1.099*b+0.587*a+0.080*alpha/19.8
def Dx_10(alpha,a,b,c):
return c+1.321*b+0.860*a+0.158*alpha/19.8
def Dx_2(alpha,a,b,c):
return c-0.328*b+1.079*a+1.540*alpha/19.8

def Dy_11(alpha,d,e,f):
return f-1.261*d-0.160*e+0.036*alpha/19.8
def Dy_5(alpha,d,e,f):
return f-0.160*d-1.107*e-0.789*alpha/19.8
def Dy_4(alpha,d,e,f):
return f+0.334*d+0.472*e+1.336*alpha/19.8
def Dy_3(alpha,d,e,f):
return f+0.348*d+0.360*e+1.574*alpha/19.8
def Dy_6(alpha,d,e,f):
return f+0.587*d+1.099*e+0.726*alpha/19.8
def Dy_10(alpha,d,e,f):
return f+0.860*d+1.321*e+0.589*alpha/19.8
def Dy_2(alpha,d,e,f):
return f+1.079*d-0.328*e-0.156*alpha/19.8

stars = np.array([11,5,4,3,6,10,2])
alpha_GR=1.75
alpha_Newton=0.9


## Simulations¶

In [4]:
nsims=10
a=ss.uniform(-1.,2.).rvs(nsims)
b=ss.uniform(-1.,2.).rvs(nsims)
c=ss.uniform(-1.,2.).rvs(nsims)
d=ss.uniform(-1.,2.).rvs(nsims)
e=ss.uniform(-1.,2.).rvs(nsims)
f=ss.uniform(-1.,2.).rvs(nsims)
alpha=ss.uniform(-0.75,2+0.75).rvs(nsims)

sims_x = np.array([
Dx_11(alpha,a,b,c),
Dx_5(alpha,a,b,c),
Dx_4(alpha,a,b,c),
Dx_3(alpha,a,b,c),
Dx_6(alpha,a,b,c),
Dx_10(alpha,a,b,c),
Dx_2(alpha,a,b,c)
])
sims_y = np.array([
Dy_11(alpha,d,e,f),
Dy_5(alpha,d,e,f),
Dy_4(alpha,d,e,f),
Dy_3(alpha,d,e,f),
Dy_6(alpha,d,e,f),
Dy_10(alpha,d,e,f),
Dy_2(alpha,d,e,f)
])


## Data vectors¶

In [5]:
# Using comparison plate 14_2a
dx_I = np.array([
-1.411+1.500 - (-0.478+0.552),
-1.048+1.500 - (-0.544+0.552),
-1.216+1.500 - (-0.368+0.552),
-1.237+1.500 - (-0.350+0.552),
-1.342+1.500 - (-0.317+0.552),
-1.289+1.500 - (-0.272+0.552),
-0.789+1.500 - (-0.396+0.552),
])
dy_I = np.array([
-0.554+0.554 - (-0.109+0.206),
-0.338+0.554 - (-0.204+0.206),
+0.114+0.554 - (-0.136+0.206),
+0.150+0.554 - (-0.073+0.206),
+0.124+0.554 - (-0.144+0.206),
+0.205+0.554 - (-0.146+0.206),
+0.109+0.554 - (-0.182+0.206),
])
dx_II = np.array([
-1.416+1.500 - (-0.478+0.552),
-1.221+1.500 - (-0.544+0.552),
-1.054+1.500 - (-0.368+0.552),
-1.079+1.500 - (-0.350+0.552),
-1.012+1.500 - (-0.317+0.552),
-0.999+1.500 - (-0.272+0.552),
-0.733+1.500 - (-0.396+0.552),
])
dy_II = np.array([
-1.324+1.324 - (-0.109+0.206),
-1.312+1.324 - (-0.204+0.206),
-0.944+1.324 - (-0.136+0.206),
-0.862+1.324 - (-0.073+0.206),
-0.932+1.324 - (-0.144+0.206),
-0.948+1.324 - (-0.146+0.206),
-1.019+1.324 - (-0.182+0.206),
])
dx_III = np.array([
+0.592-0.500 - (-0.478+0.552),
+0.756-0.500 - (-0.544+0.552),
+0.979-0.500 - (-0.368+0.552),
+0.958-0.500 - (-0.350+0.552),
+1.052-0.500 - (-0.317+0.552),
+1.157-0.500 - (-0.272+0.552),
+1.256-0.500 - (-0.396+0.552),
])
dy_III = np.array([
+0.956-0.843 - (-0.109+0.206),
+0.843-0.843 - (-0.204+0.206),
+1.172-0.843 - (-0.136+0.206),
+1.244-0.843 - (-0.073+0.206),
+1.197-0.843 - (-0.144+0.206),
+1.211-0.843 - (-0.146+0.206),
+0.924-0.843 - (-0.182+0.206),
])
dx_IV = np.array([
+0.563-0.500 - (-0.478+0.552),
+0.683-0.500 - (-0.544+0.552),
+0.849-0.500 - (-0.368+0.552),
+0.861-0.500 - (-0.350+0.552),
+0.894-0.500 - (-0.317+0.552),
+0.934-0.500 - (-0.272+0.552),
+1.177-0.500 - (-0.396+0.552),
])
dy_IV = np.array([
+1.238-1.226 - (-0.109+0.206),
+1.226-1.226 - (-0.204+0.206),
+1.524-1.226 - (-0.136+0.206),
+1.587-1.226 - (-0.073+0.206),
+1.564-1.226 - (-0.144+0.206),
+1.522-1.226 - (-0.146+0.206),
+1.373-1.226 - (-0.182+0.206),
])
dx_V = np.array([
+0.406-0.400 - (-0.478+0.552),
+0.468-0.400 - (-0.544+0.552),
+0.721-0.400 - (-0.368+0.552),
+0.733-0.400 - (-0.350+0.552),
+0.798-0.400 - (-0.317+0.552),
+0.864-0.400 - (-0.272+0.552),
+0.995-0.400 - (-0.396+0.552),
])
dy_V = np.array([
+0.970-0.861 - (-0.109+0.206),
+0.861-0.861 - (-0.204+0.206),
+1.167-0.861 - (-0.136+0.206),
+1.234-0.861 - (-0.073+0.206),
+1.130-0.861 - (-0.144+0.206),
+1.119-0.861 - (-0.146+0.206),
+0.935-0.861 - (-0.182+0.206),
])
# Plate VI was not used!
dx_VII = np.array([
-1.456+1.500 - (-0.478+0.552),
-1.267+1.500 - (-0.544+0.552),
-1.028+1.500 - (-0.368+0.552),
-1.010+1.500 - (-0.350+0.552),
-0.888+1.500 - (-0.317+0.552),
-0.820+1.500 - (-0.272+0.552),
-0.768+1.500 - (-0.396+0.552),
])
dy_VII = np.array([
+0.964-0.777 - (-0.109+0.206),
+0.777-0.777 - (-0.204+0.206),
+1.142-0.777 - (-0.136+0.206),
+1.185-0.777 - (-0.073+0.206),
+1.125-0.777 - (-0.144+0.206),
+1.072-0.777 - (-0.146+0.206),
+0.892-0.777 - (-0.182+0.206),
])
dx_VIII = np.array([
-1.285+1.300 - (-0.478+0.552),
-1.152+1.300 - (-0.544+0.552),
-0.927+1.300 - (-0.368+0.552),
-0.897+1.300 - (-0.350+0.552),
-0.838+1.300 - (-0.317+0.552),
-0.768+1.300 - (-0.272+0.552),
-0.585+1.300 - (-0.396+0.552),
])
dy_VIII = np.array([
-1.195+1.322 - (-0.109+0.206),
-1.332+1.322 - (-0.204+0.206),
-0.930+1.322 - (-0.136+0.206),
-0.894+1.322 - (-0.073+0.206),
-0.937+1.322 - (-0.144+0.206),
-0.964+1.322 - (-0.146+0.206),
-1.166+1.322 - (-0.182+0.206),
])

# Assumed error on the measurements
sigma_d = 0.05

In [6]:
from matplotlib.lines import Line2D

fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(20,8))

for i, star in enumerate(stars):
ax0.scatter(star*np.ones(nsims),sims_x[i],marker="o",color="black")
for dx in [dx_I,dx_II,dx_III,dx_IV,dx_V,dx_VII,dx_VIII]:
ax0.scatter(stars,dx,color="C0",marker="D")
ax0.set_xlabel("Star index")
ax0.set_ylabel("Dx")
legend_elements = [Line2D([0], [0], lw=0, marker="o", color="black", label="simulations"),
Line2D([0], [0], lw=0, marker="D", color="C0", label="data"),
]
ax0.legend(handles=legend_elements)

for i, star in enumerate(stars):
ax1.scatter(star*np.ones(nsims),sims_y[i],marker="o",color="black")
for dy in [dy_I,dy_II,dy_III,dy_IV,dy_V,dy_VII,dy_VIII]:
ax1.scatter(stars,dy,color="C0",marker="D")
ax1.set_xlabel("Star index")
ax1.set_ylabel("Dy")
ax1.legend(handles=legend_elements)

Out[6]:
<matplotlib.legend.Legend at 0x146e42f450d0>

## Log-likelihood¶

In [7]:
def lh_platex_logpdf(theta,data,sigma_d):
alpha,a,b,c = theta[0],theta[1],theta[2],theta[3]
theory = np.array([
Dx_11(alpha,a,b,c),
Dx_5(alpha,a,b,c),
Dx_4(alpha,a,b,c),
Dx_3(alpha,a,b,c),
Dx_6(alpha,a,b,c),
Dx_10(alpha,a,b,c),
Dx_2(alpha,a,b,c)
])
return -1/2.*np.sum((theory-data)**2/sigma_d**2)

def lh_platey_logpdf(theta,data,sigma_d):
alpha,d,e,f = theta[0],theta[1],theta[2],theta[3]
theory = np.array([
Dy_11(alpha,d,e,f),
Dy_5(alpha,d,e,f),
Dy_4(alpha,d,e,f),
Dy_3(alpha,d,e,f),
Dy_6(alpha,d,e,f),
Dy_10(alpha,d,e,f),
Dy_2(alpha,d,e,f)
])
# Note that the second term is not relevant for MCMC, only for model comparison
return -1/2.*np.sum((theory-data)**2/sigma_d**2) - (np.sqrt(2*np.pi)*sigma_d)**theory.shape[0]

def target_logpdf(theta,data,sigma_d):
return lh_platex_logpdf(theta,data,sigma_d)

In [8]:
lh_platex_logpdf([1.7,0.,0.,0.],dx_II,sigma_d), lh_platey_logpdf([0.9,0.1,-0.1,0.05],dy_V,sigma_d)

Out[8]:
(-114.41481733496576, -25.490621014683164)

## Metropolis-Hastings sampler¶

In [9]:
def uniform_proposal_pdf(proposal_cov):
sigmas=np.sqrt(np.diag(proposal_cov))
return ss.uniform(loc=-sigmas*np.sqrt(3), scale=2*sigmas*np.sqrt(3))

def gaussian_proposal_pdf(proposal_cov):
return ss.multivariate_normal(np.zeros(proposal_cov.shape[0]),proposal_cov)

def proposal_pdf(proposal_cov):
#     return uniform_proposal_pdf(proposal_cov)
return gaussian_proposal_pdf(proposal_cov)

In [10]:
def MH_sampler(Ntries,theta_start,data,sigma_d,proposal_cov):
Naccepted=0
samples=list()
samples.append(theta_start)
theta=theta_start
for i in range(Ntries):
theta_p = theta + proposal_pdf(proposal_cov).rvs()
# the uniform/Gaussian proposal pdf satisfies the detailed balance equation, so the
# acceptance ratio simplifies to the Metropolis ratio
a = min(1, np.exp(target_logpdf(theta_p,data,sigma_d) - target_logpdf(theta,data,sigma_d)))
u = ss.uniform().rvs()
if u < a:
Naccepted+=1
theta=theta_p
samples.append(theta)
return Naccepted, np.array(samples)

In [11]:
Ntries1=1000
Nburnin=100
proposal_sigma=np.array([8e-1,4e-2,2e-2,2e-2])
proposal_cov=np.diag(proposal_sigma**2)
theta_start=np.array([1.75,0.,0.,0.])
Naccepted, samples = MH_sampler(Ntries1,theta_start,dx_II,sigma_d,proposal_cov)
fraction_accepted=float(Naccepted)/Ntries1

In [12]:
fig, (ax0, ax1, ax2, ax3) = plt.subplots(4,1,figsize=(10,12))

ax0.set_xlim(0,Ntries1+1)
ax0.set_title("Trace plot")
ax0.set_ylabel("$\\alpha$")
ax0.plot(np.arange(Ntries1+1),samples.T[0],marker='.',color='C0')
ax0.axhline(alpha_Newton,color='black',linestyle='--')
ax0.axhline(alpha_GR,color='black',linestyle='--')
ax0.axvline(Nburnin,color='black',linestyle=':')

ax1.set_xlim(0,Ntries1+1)
ax1.set_ylabel("$a$")
ax1.plot(np.arange(Ntries1+1),samples.T[1],marker='.',color='C1')
ax1.axvline(Nburnin,color='black',linestyle=':')

ax2.set_xlim(0,Ntries1+1)
ax2.set_ylabel("$b$")
ax2.plot(np.arange(Ntries1+1),samples.T[2],marker='.',color='C2')
ax2.axvline(Nburnin,color='black',linestyle=':')

ax3.set_xlim(0,Ntries1+1)
ax3.set_ylabel("$c$")
ax3.plot(np.arange(Ntries1+1),samples.T[3],marker='.',color='C3')
ax3.set_xlabel("sample number")
ax3.axvline(Nburnin,color='black',linestyle=':')

Out[12]:
<matplotlib.lines.Line2D at 0x146e42e27610>
In [13]:
fraction_accepted

Out[13]:
0.319

## Markov chain diagnostics¶

### 1- Step size¶

In [14]:
Ntries2=1000
theta_start=np.array([1.75,0.,0.,0.])

In [15]:
# Suitable step size
proposal_sigma_1=np.array([8e-1,4e-2,2e-2,2e-2])
proposal_cov=np.diag(proposal_sigma_1**2)
Naccepted_1, samples_1 = MH_sampler(Ntries2,theta_start,dx_II,sigma_d,proposal_cov)
fraction_accepted_1=float(Naccepted_1)/Ntries2

In [16]:
# Step size too large
proposal_sigma_2=np.array([1.5,0.2,0.1,0.1])
proposal_cov=np.diag(proposal_sigma_2**2)
Naccepted_2, samples_2 = MH_sampler(Ntries2,theta_start,dx_II,sigma_d,proposal_cov)
fraction_accepted_2=float(Naccepted_2)/Ntries2

In [17]:
# Step size too small
proposal_sigma_3=np.array([1e-2,2e-3,1e-3,1e-3])
proposal_cov=np.diag(proposal_sigma_3**2)
Naccepted_3, samples_3 = MH_sampler(Ntries2,theta_start,dx_II,sigma_d,proposal_cov)
fraction_accepted_3=float(Naccepted_3)/Ntries2

In [18]:
f, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2, 3, sharex='col', sharey='row', figsize=(15,10))

ax1.set_xlim(0,Ntries2)
ax1.set_ylabel("$\\alpha$")
ax1.plot(np.arange(Ntries2+1),samples_1.T[0],marker='.',color='C0')
ymin, ymax = ax1.get_ylim()
ax1.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_1,fraction_accepted_1))
ax2.set_xlim(0,Ntries2)
ax2.set_ylim(ymin,ymax)
ax2.plot(np.arange(Ntries2+1),samples_2.T[0],marker='.',color='C0')
ax2.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_2,fraction_accepted_2))
ax3.set_xlim(0,Ntries2)
ax3.set_ylim(ymin,ymax)
ax3.plot(np.arange(Ntries2+1),samples_3.T[0],marker='.',color='C0')
ax3.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_3,fraction_accepted_3))

ax4.set_xlim(0,Ntries2)
ax4.set_xlabel("sample number")
ax4.set_ylabel("$a$")
ax4.plot(np.arange(Ntries2+1),samples_1.T[1],marker='.',color='C1')
ymin, ymax = ax4.get_ylim()
ax4.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_1,fraction_accepted_1))
ax5.set_xlim(0,Ntries2)
ax5.set_ylim(ymin,ymax)
ax5.set_xlabel("sample number")
ax5.plot(np.arange(Ntries2+1),samples_2.T[1],marker='.',color='C1')
ax5.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_2,fraction_accepted_2))
ax6.set_xlim(0,Ntries2)
ax6.set_ylim(ymin,ymax)
ax6.set_xlabel("sample number")
ax6.plot(np.arange(Ntries2+1),samples_3.T[1],marker='.',color='C1')
ax6.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_3,fraction_accepted_3))

ymin, ymax = ax1.get_ylim()
ax2.text(200,ymax-(ymax-ymin)/10., "step size too large",fontsize=14)
ax3.text(200,ymax-(ymax-ymin)/10., "step size too small",fontsize=14)


### 2- Multiple chains, different starting points¶

In [19]:
Ntries3=6000
Nburnin=350
proposal_sigma=np.array([8e-1,4e-2,2e-2,2e-2])
proposal_cov=np.diag(proposal_sigma**2)
Nchains=5

In [20]:
# Run Nchains different chains starting at different positions in parameter space
chains_dx_II = [MH_sampler(Ntries3,theta_start,dx_II,sigma_d,proposal_cov)
for theta_start in np.stack((np.linspace(0.,3.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains)),axis=1)]
chain_dx_II = np.array([chains_dx_II[j][1] for j in range(Nchains)])

In [21]:
fig, (ax0, ax1, ax2, ax3) = plt.subplots(4,1,figsize=(10,12))

ax0.set_xlim(0,700)
ax0.set_ylabel("$\\alpha$")
for samples in chains_dx_II:
ax0.plot(np.arange(Ntries3+1),samples[1].T[0],marker='.')
ax0.axvline(Nburnin,color='black',linestyle=':')
ax0.set_title("Trace plot for different chains")

ax1.set_xlim(0,700)
ax1.set_ylabel("$a$")
for samples in chains_dx_II:
ax1.plot(np.arange(Ntries3+1),samples[1].T[1],marker='.')
ax1.axvline(Nburnin,color='black',linestyle=':')

ax2.set_xlim(0,700)
ax2.set_ylabel("$b$")
for samples in chains_dx_II:
ax2.plot(np.arange(Ntries3+1),samples[1].T[2],marker='.')
ax2.axvline(Nburnin,color='black',linestyle=':')

ax3.set_xlim(0,700)
ax3.set_ylabel("$c$")
ax3.set_xlabel("sample number")
for samples in chains_dx_II:
ax3.plot(np.arange(Ntries3+1),samples[1].T[3],marker='.')
ax3.axvline(Nburnin,color='black',linestyle=':')

Out[21]:
<matplotlib.lines.Line2D at 0x146e42032eb0>

### 3-Gelman-Rubin test¶

Gelman et al., "Bayesian Data Analysis" (third edition), p. 284-285

Parameters:

• $m$: number of chains
• $n$: length of chains

Definitions:

• "between" chains variance: $$B \equiv \frac{n}{m-1} \sum_{j=1}^m \left( \bar{\psi}_{. j} - \bar{\psi}_{..} \right)^2 \quad \mathrm{where} \quad \bar{\psi}_{. j} = \frac{1}{n} \sum_{i=1}^n \psi_{ij} \quad \mathrm{and} \quad \bar{\psi}_{..} = \frac{1}{m} \sum_{j=1}^m \bar{\psi}_{.j}$$
• "within" chains variance: $$W \equiv \frac{1}{m} \sum_{j=1}^m s_j^2 \quad \mathrm{where} \quad s_j^2 = \frac{1}{n-1} \sum_{i=1}^n \left( \psi_{ij} - \bar{\psi}_{.j} \right)^2$$

Estimators: Estimators of the marginal posterior variance of the estimand:

• $\widehat{\mathrm{var}}^- \equiv W$: underestimates the variance
• $\widehat{\mathrm{var}}^+ \equiv \frac{n}{n-1}W + \frac{1}{n} B$: overestimates the variance

Test:

• Potential scale reduction factor: $\widehat{R} \equiv \sqrt{\frac{\widehat{\mathrm{var}}^+}{\widehat{\mathrm{var}}^-}}$
• Test: $\widehat{R} \rightarrow 1$ as $n \rightarrow \infty$
In [22]:
def gelman_rubin(chain):
# between chains variance
Psi_dotj = np.mean(chain, axis=1)
Psi_dotdot = np.mean(Psi_dotj, axis=0)
m = chain.shape[0]
n = chain.shape[1]
B = n / (m - 1.) * np.sum((Psi_dotj - Psi_dotdot)**2, axis=0)

# within chains variance
sj2 = np.var(chain, axis=1, ddof=1)
W = np.mean(sj2, axis=0)

# estimators
var_minus = W
var_plus = (n - 1.) / n * W + 1. / n * B
R_hat = np.sqrt(var_plus / var_minus)
return R_hat

# The input array must have dimensions (nchains, nsamp, npars) = (m, n, npars).

In [23]:
gelman_rubin(chain_dx_II)

Out[23]:
array([1.00010273, 1.00116675, 1.01472867, 1.01219264])

## Likelihood surface¶

In [24]:
# Remove burn-in phase
chain_dx_II_burn = np.array([chains_dx_II[j][1][Nburnin:] for j in range(Nchains)])
chain_dx_II_burn_flat = np.concatenate(chain_dx_II_burn, axis=0)

In [25]:
# Means of parameters (alpha, a, b, c)
chain_dx_II_burn.mean(axis=(0,1))

Out[25]:
array([ 2.21274596,  0.14546414, -0.10294555,  0.2467706 ])
In [26]:
# Marginal standard deviations of parameters (alpha, a, b, c)
chain_dx_II_burn.std(axis=(0,1))

Out[26]:
array([0.88682913, 0.04556606, 0.02903384, 0.02093263])
In [27]:
# Covariance matrix of parameters (alpha, a, b, c)
np.cov(chain_dx_II_burn_flat.T)

Out[27]:
array([[ 7.86493733e-01, -3.11913471e-02,  9.67890826e-03,
4.59158279e-03],
[-3.11913471e-02,  2.07633923e-03, -7.20143274e-04,
-3.04753140e-04],
[ 9.67890826e-03, -7.20143274e-04,  8.42993590e-04,
-3.91585523e-05],
[ 4.59158279e-03, -3.04753140e-04, -3.91585523e-05,
4.38190310e-04]])
In [28]:
fig = plt.figure(figsize=(20,20))

# Plot 1d marginal distributions
pdf_alpha = get_marginal(chain_dx_II_burn_flat.T[0])
ax0.fill_between(pdf_alpha[0],pdf_alpha[1], 0, color='#52aae7', alpha=0.4)
ax0.set_ylim(0.,pdf_alpha[1].max()*1.05)
ax0.axvline(alpha_GR,color='black',linestyle='--')
ax0.axvline(alpha_Newton,color='black',linestyle='--')
ax0.set_xlabel("$\\alpha$")

pdf_a = get_marginal(chain_dx_II_burn_flat.T[1])
ax1.fill_between(pdf_a[0],pdf_a[1], 0, color='#52aae7', alpha=0.4)
ax1.set_ylim(0.,pdf_a[1].max()*1.05)
ax1.set_xlabel("$a$")

pdf_b = get_marginal(chain_dx_II_burn_flat.T[2])
ax2.fill_between(pdf_b[0],pdf_a[1], 0, color='#52aae7', alpha=0.4)
ax2.set_ylim(0.,pdf_b[1].max()*1.05)
ax2.set_xlabel("$b$")

pdf_c = get_marginal(chain_dx_II_burn_flat.T[3])
ax3.fill_between(pdf_c[0],pdf_c[1], 0, color='#52aae7', alpha=0.4)
ax3.set_ylim(0.,pdf_c[1].max()*1.05)
ax3.set_xlabel("$c$")

# Plot 2d contours
ax0a.scatter(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[1], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[1])
ax0a.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax0a.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax0a.axvline(alpha_GR,color='black',linestyle='--')
ax0a.axvline(alpha_Newton,color='black',linestyle='--')
ax0a.set_xlabel("$\\alpha$")
ax0a.set_ylabel("$a$")

ax1a.scatter(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[2], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[2])
ax1a.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax1a.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax1a.axvline(alpha_GR,color='black',linestyle='--')
ax1a.axvline(alpha_Newton,color='black',linestyle='--')
ax1a.set_xlabel("$\\alpha$")
ax1a.set_ylabel("$b$")

ax1b.scatter(chain_dx_II_burn_flat.T[1], chain_dx_II_burn_flat.T[2], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[1], chain_dx_II_burn_flat.T[2])
ax1b.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax1b.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax1b.set_xlabel("$a$")

ax2a.scatter(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[3], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[3])
ax2a.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax2a.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax2a.axvline(alpha_GR,color='black',linestyle='--')
ax2a.axvline(alpha_Newton,color='black',linestyle='--')
ax2a.set_xlabel("$\\alpha$")
ax2a.set_ylabel("$c$")

ax2b.scatter(chain_dx_II_burn_flat.T[1], chain_dx_II_burn_flat.T[3], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[1], chain_dx_II_burn_flat.T[3])
ax2b.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax2b.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax2b.set_xlabel("$a$")

ax2c.scatter(chain_dx_II_burn_flat.T[2], chain_dx_II_burn_flat.T[3], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[2], chain_dx_II_burn_flat.T[3])
ax2c.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax2c.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax2c.set_xlabel("$b$")

plt.show()


## Effect of nuisance parameters¶

In [29]:
# We now include a strong prior that nuisance parameters are close to zero
def prior_logpdf(theta):
alpha,a,b,c = theta[0],theta[1],theta[2],theta[3]
return ss.multivariate_normal([0.,0.,0.],np.diag(np.array([1e-2,1e-2,1e-2])**2)).logpdf([a,b,c])

def target_logpdf(theta,data,sigma_d):
return prior_logpdf(theta) + lh_platex_logpdf(theta,data,sigma_d)

In [30]:
Ntries3=5000
Nburnin=100
proposal_sigma=np.array([8e-1,1e-2,1e-2,1e-2])
proposal_cov=np.diag(proposal_sigma**2)
Nchains=5

In [31]:
# Run Nchains different chains starting at different positions in parameter space
chains_nuisance = [MH_sampler(Ntries3,theta_start,[dx_II],sigma_d,proposal_cov)
for theta_start in np.stack((np.linspace(0.,3.,Nchains),
np.linspace(-1e-2,1e-2,Nchains),
np.linspace(-1e-2,1e-2,Nchains),
np.linspace(-1e-2,1e-2,Nchains)),axis=1)]
chain_nuisance = np.array([chains_nuisance[j][1] for j in range(Nchains)])

In [32]:
fig, (ax0, ax1, ax2, ax3) = plt.subplots(4,1,figsize=(10,12))

ax0.set_xlim(0,700)
ax0.set_ylabel("$\\alpha$")
for samples in chains_nuisance:
ax0.plot(np.arange(Ntries3+1),samples[1].T[0],marker='.')
ax0.axvline(Nburnin,color='black',linestyle=':')
ax0.set_title("Trace plot for different chains")

ax1.set_xlim(0,700)
ax1.set_ylabel("$a$")
for samples in chains_nuisance:
ax1.plot(np.arange(Ntries3+1),samples[1].T[1],marker='.')
ax1.axvline(Nburnin,color='black',linestyle=':')

ax2.set_xlim(0,700)
ax2.set_ylabel("$b$")
for samples in chains_nuisance:
ax2.plot(np.arange(Ntries3+1),samples[1].T[2],marker='.')
ax2.axvline(Nburnin,color='black',linestyle=':')

ax3.set_xlim(0,700)
ax3.set_ylabel("$c$")
ax3.set_xlabel("sample number")
for samples in chains_nuisance:
ax3.plot(np.arange(Ntries3+1),samples[1].T[3],marker='.')
ax3.axvline(Nburnin,color='black',linestyle=':')

Out[32]:
<matplotlib.lines.Line2D at 0x146e40095e80>