Florent Leclercq
Imperial Centre for Inference and Cosmology, Imperial College London
florent.leclercq@polytechnique.org
import numpy as np
import scipy
import scipy.stats as ss
import matplotlib.pyplot as plt
np.random.seed(3)
# Some plotting tools and routines
nBins = 30
nContourLevels = 3
# 2d contour levels: (68%, 95%, 99%)
confLevels = [.3173, .0455, .0027]
smoothingKernel = 1
def get_marginal(samples):
# create 1d histogram
hist1d, edges = np.histogram(samples, weights=np.ones_like(samples),
density=True, bins=nBins)
# Bin center between histogram edges
centers = (edges[1:]+edges[:-1])/2
# Filter data
pdf = np.array((centers, hist1d))
pdf = scipy.ndimage.gaussian_filter1d(pdf, sigma=smoothingKernel)
# Clip the pdf to zero out of the bins
centers, hist = pdf[0], pdf[1]
centers = np.insert(centers, 0, centers[0]-(centers[1]-centers[0]))
hist = np.insert(hist, 0, 0.)
centers = np.insert(centers, len(centers), centers[len(centers)-1]-(centers[len(centers)-2]-centers[len(centers)-1]))
hist = np.insert(hist, len(hist), 0.)
# Normalize all the pdfs to the same height
hist /= hist.max()
pdf = np.array((centers, hist))
return pdf
def get_contours(samples_x, samples_y):
# Empty arrays needed below
chainLevels = np.ones(nContourLevels+1)
extents = np.empty(4)
# These are needed to compute the contour levels
nBinsFlat = np.linspace(0., nBins**2, nBins**2)
# Create 2d histogram
hist2d, xedges, yedges = np.histogram2d(
samples_x, samples_y, weights=np.ones_like(samples_x), bins=nBins)
# image extent, needed below for contour lines
extents = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
# Normalize
hist2d = hist2d/np.sum(hist2d)
# Cumulative 1d distribution
histOrdered = np.sort(hist2d.flat)
histCumulative = np.cumsum(histOrdered)
# Compute contour levels (from low to high for technical reasons)
for l in range(nContourLevels):
# Find location of contour level in 1d histCumulative
temp = np.interp(confLevels[l], histCumulative, nBinsFlat)
# Find "height" of contour level
chainLevels[nContourLevels-1-l] = np.interp(temp, nBinsFlat, histOrdered)
# Apply Gaussian smoothing
contours = scipy.ndimage.gaussian_filter(hist2d.T, sigma=smoothingKernel)
xbins = (xedges[1:]+xedges[:-1])/2
ybins = (yedges[1:]+yedges[:-1])/2
return xbins, ybins, contours, chainLevels
def Dx_11(alpha,a,b,c):
return c-0.160*b-1.261*a-0.587*alpha/19.8
def Dx_5(alpha,a,b,c):
return c-1.107*b-0.160*a-0.557*alpha/19.8
def Dx_4(alpha,a,b,c):
return c+0.472*b+0.334*a-0.186*alpha/19.8
def Dx_3(alpha,a,b,c):
return c+0.360*b+0.348*a-0.222*alpha/19.8
def Dx_6(alpha,a,b,c):
return c+1.099*b+0.587*a+0.080*alpha/19.8
def Dx_10(alpha,a,b,c):
return c+1.321*b+0.860*a+0.158*alpha/19.8
def Dx_2(alpha,a,b,c):
return c-0.328*b+1.079*a+1.540*alpha/19.8
def Dy_11(alpha,d,e,f):
return f-1.261*d-0.160*e+0.036*alpha/19.8
def Dy_5(alpha,d,e,f):
return f-0.160*d-1.107*e-0.789*alpha/19.8
def Dy_4(alpha,d,e,f):
return f+0.334*d+0.472*e+1.336*alpha/19.8
def Dy_3(alpha,d,e,f):
return f+0.348*d+0.360*e+1.574*alpha/19.8
def Dy_6(alpha,d,e,f):
return f+0.587*d+1.099*e+0.726*alpha/19.8
def Dy_10(alpha,d,e,f):
return f+0.860*d+1.321*e+0.589*alpha/19.8
def Dy_2(alpha,d,e,f):
return f+1.079*d-0.328*e-0.156*alpha/19.8
stars = np.array([11,5,4,3,6,10,2])
alpha_GR=1.75
alpha_Newton=0.9
nsims=10
a=ss.uniform(-1.,2.).rvs(nsims)
b=ss.uniform(-1.,2.).rvs(nsims)
c=ss.uniform(-1.,2.).rvs(nsims)
d=ss.uniform(-1.,2.).rvs(nsims)
e=ss.uniform(-1.,2.).rvs(nsims)
f=ss.uniform(-1.,2.).rvs(nsims)
alpha=ss.uniform(-0.75,2+0.75).rvs(nsims)
sims_x = np.array([
Dx_11(alpha,a,b,c),
Dx_5(alpha,a,b,c),
Dx_4(alpha,a,b,c),
Dx_3(alpha,a,b,c),
Dx_6(alpha,a,b,c),
Dx_10(alpha,a,b,c),
Dx_2(alpha,a,b,c)
])
sims_y = np.array([
Dy_11(alpha,d,e,f),
Dy_5(alpha,d,e,f),
Dy_4(alpha,d,e,f),
Dy_3(alpha,d,e,f),
Dy_6(alpha,d,e,f),
Dy_10(alpha,d,e,f),
Dy_2(alpha,d,e,f)
])
# Using comparison plate 14_2a
dx_I = np.array([
-1.411+1.500 - (-0.478+0.552),
-1.048+1.500 - (-0.544+0.552),
-1.216+1.500 - (-0.368+0.552),
-1.237+1.500 - (-0.350+0.552),
-1.342+1.500 - (-0.317+0.552),
-1.289+1.500 - (-0.272+0.552),
-0.789+1.500 - (-0.396+0.552),
])
dy_I = np.array([
-0.554+0.554 - (-0.109+0.206),
-0.338+0.554 - (-0.204+0.206),
+0.114+0.554 - (-0.136+0.206),
+0.150+0.554 - (-0.073+0.206),
+0.124+0.554 - (-0.144+0.206),
+0.205+0.554 - (-0.146+0.206),
+0.109+0.554 - (-0.182+0.206),
])
dx_II = np.array([
-1.416+1.500 - (-0.478+0.552),
-1.221+1.500 - (-0.544+0.552),
-1.054+1.500 - (-0.368+0.552),
-1.079+1.500 - (-0.350+0.552),
-1.012+1.500 - (-0.317+0.552),
-0.999+1.500 - (-0.272+0.552),
-0.733+1.500 - (-0.396+0.552),
])
dy_II = np.array([
-1.324+1.324 - (-0.109+0.206),
-1.312+1.324 - (-0.204+0.206),
-0.944+1.324 - (-0.136+0.206),
-0.862+1.324 - (-0.073+0.206),
-0.932+1.324 - (-0.144+0.206),
-0.948+1.324 - (-0.146+0.206),
-1.019+1.324 - (-0.182+0.206),
])
dx_III = np.array([
+0.592-0.500 - (-0.478+0.552),
+0.756-0.500 - (-0.544+0.552),
+0.979-0.500 - (-0.368+0.552),
+0.958-0.500 - (-0.350+0.552),
+1.052-0.500 - (-0.317+0.552),
+1.157-0.500 - (-0.272+0.552),
+1.256-0.500 - (-0.396+0.552),
])
dy_III = np.array([
+0.956-0.843 - (-0.109+0.206),
+0.843-0.843 - (-0.204+0.206),
+1.172-0.843 - (-0.136+0.206),
+1.244-0.843 - (-0.073+0.206),
+1.197-0.843 - (-0.144+0.206),
+1.211-0.843 - (-0.146+0.206),
+0.924-0.843 - (-0.182+0.206),
])
dx_IV = np.array([
+0.563-0.500 - (-0.478+0.552),
+0.683-0.500 - (-0.544+0.552),
+0.849-0.500 - (-0.368+0.552),
+0.861-0.500 - (-0.350+0.552),
+0.894-0.500 - (-0.317+0.552),
+0.934-0.500 - (-0.272+0.552),
+1.177-0.500 - (-0.396+0.552),
])
dy_IV = np.array([
+1.238-1.226 - (-0.109+0.206),
+1.226-1.226 - (-0.204+0.206),
+1.524-1.226 - (-0.136+0.206),
+1.587-1.226 - (-0.073+0.206),
+1.564-1.226 - (-0.144+0.206),
+1.522-1.226 - (-0.146+0.206),
+1.373-1.226 - (-0.182+0.206),
])
dx_V = np.array([
+0.406-0.400 - (-0.478+0.552),
+0.468-0.400 - (-0.544+0.552),
+0.721-0.400 - (-0.368+0.552),
+0.733-0.400 - (-0.350+0.552),
+0.798-0.400 - (-0.317+0.552),
+0.864-0.400 - (-0.272+0.552),
+0.995-0.400 - (-0.396+0.552),
])
dy_V = np.array([
+0.970-0.861 - (-0.109+0.206),
+0.861-0.861 - (-0.204+0.206),
+1.167-0.861 - (-0.136+0.206),
+1.234-0.861 - (-0.073+0.206),
+1.130-0.861 - (-0.144+0.206),
+1.119-0.861 - (-0.146+0.206),
+0.935-0.861 - (-0.182+0.206),
])
# Plate VI was not used!
dx_VII = np.array([
-1.456+1.500 - (-0.478+0.552),
-1.267+1.500 - (-0.544+0.552),
-1.028+1.500 - (-0.368+0.552),
-1.010+1.500 - (-0.350+0.552),
-0.888+1.500 - (-0.317+0.552),
-0.820+1.500 - (-0.272+0.552),
-0.768+1.500 - (-0.396+0.552),
])
dy_VII = np.array([
+0.964-0.777 - (-0.109+0.206),
+0.777-0.777 - (-0.204+0.206),
+1.142-0.777 - (-0.136+0.206),
+1.185-0.777 - (-0.073+0.206),
+1.125-0.777 - (-0.144+0.206),
+1.072-0.777 - (-0.146+0.206),
+0.892-0.777 - (-0.182+0.206),
])
dx_VIII = np.array([
-1.285+1.300 - (-0.478+0.552),
-1.152+1.300 - (-0.544+0.552),
-0.927+1.300 - (-0.368+0.552),
-0.897+1.300 - (-0.350+0.552),
-0.838+1.300 - (-0.317+0.552),
-0.768+1.300 - (-0.272+0.552),
-0.585+1.300 - (-0.396+0.552),
])
dy_VIII = np.array([
-1.195+1.322 - (-0.109+0.206),
-1.332+1.322 - (-0.204+0.206),
-0.930+1.322 - (-0.136+0.206),
-0.894+1.322 - (-0.073+0.206),
-0.937+1.322 - (-0.144+0.206),
-0.964+1.322 - (-0.146+0.206),
-1.166+1.322 - (-0.182+0.206),
])
# Assumed error on the measurements
sigma_d = 0.05
from matplotlib.lines import Line2D
fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(20,8))
for i, star in enumerate(stars):
ax0.scatter(star*np.ones(nsims),sims_x[i],marker="o",color="black")
for dx in [dx_I,dx_II,dx_III,dx_IV,dx_V,dx_VII,dx_VIII]:
ax0.scatter(stars,dx,color="C0",marker="D")
ax0.set_xlabel("Star index")
ax0.set_ylabel("Dx")
legend_elements = [Line2D([0], [0], lw=0, marker="o", color="black", label="simulations"),
Line2D([0], [0], lw=0, marker="D", color="C0", label="data"),
]
ax0.legend(handles=legend_elements)
for i, star in enumerate(stars):
ax1.scatter(star*np.ones(nsims),sims_y[i],marker="o",color="black")
for dy in [dy_I,dy_II,dy_III,dy_IV,dy_V,dy_VII,dy_VIII]:
ax1.scatter(stars,dy,color="C0",marker="D")
ax1.set_xlabel("Star index")
ax1.set_ylabel("Dy")
ax1.legend(handles=legend_elements)
<matplotlib.legend.Legend at 0x146e42f450d0>
def lh_platex_logpdf(theta,data,sigma_d):
alpha,a,b,c = theta[0],theta[1],theta[2],theta[3]
theory = np.array([
Dx_11(alpha,a,b,c),
Dx_5(alpha,a,b,c),
Dx_4(alpha,a,b,c),
Dx_3(alpha,a,b,c),
Dx_6(alpha,a,b,c),
Dx_10(alpha,a,b,c),
Dx_2(alpha,a,b,c)
])
return -1/2.*np.sum((theory-data)**2/sigma_d**2)
def lh_platey_logpdf(theta,data,sigma_d):
alpha,d,e,f = theta[0],theta[1],theta[2],theta[3]
theory = np.array([
Dy_11(alpha,d,e,f),
Dy_5(alpha,d,e,f),
Dy_4(alpha,d,e,f),
Dy_3(alpha,d,e,f),
Dy_6(alpha,d,e,f),
Dy_10(alpha,d,e,f),
Dy_2(alpha,d,e,f)
])
# Note that the second term is not relevant for MCMC, only for model comparison
return -1/2.*np.sum((theory-data)**2/sigma_d**2) - (np.sqrt(2*np.pi)*sigma_d)**theory.shape[0]
def target_logpdf(theta,data,sigma_d):
return lh_platex_logpdf(theta,data,sigma_d)
lh_platex_logpdf([1.7,0.,0.,0.],dx_II,sigma_d), lh_platey_logpdf([0.9,0.1,-0.1,0.05],dy_V,sigma_d)
(-114.41481733496576, -25.490621014683164)
def uniform_proposal_pdf(proposal_cov):
sigmas=np.sqrt(np.diag(proposal_cov))
return ss.uniform(loc=-sigmas*np.sqrt(3), scale=2*sigmas*np.sqrt(3))
def gaussian_proposal_pdf(proposal_cov):
return ss.multivariate_normal(np.zeros(proposal_cov.shape[0]),proposal_cov)
def proposal_pdf(proposal_cov):
# return uniform_proposal_pdf(proposal_cov)
return gaussian_proposal_pdf(proposal_cov)
def MH_sampler(Ntries,theta_start,data,sigma_d,proposal_cov):
Naccepted=0
samples=list()
samples.append(theta_start)
theta=theta_start
for i in range(Ntries):
theta_p = theta + proposal_pdf(proposal_cov).rvs()
# the uniform/Gaussian proposal pdf satisfies the detailed balance equation, so the
# acceptance ratio simplifies to the Metropolis ratio
a = min(1, np.exp(target_logpdf(theta_p,data,sigma_d) - target_logpdf(theta,data,sigma_d)))
u = ss.uniform().rvs()
if u < a:
Naccepted+=1
theta=theta_p
samples.append(theta)
return Naccepted, np.array(samples)
Ntries1=1000
Nburnin=100
proposal_sigma=np.array([8e-1,4e-2,2e-2,2e-2])
proposal_cov=np.diag(proposal_sigma**2)
theta_start=np.array([1.75,0.,0.,0.])
Naccepted, samples = MH_sampler(Ntries1,theta_start,dx_II,sigma_d,proposal_cov)
fraction_accepted=float(Naccepted)/Ntries1
fig, (ax0, ax1, ax2, ax3) = plt.subplots(4,1,figsize=(10,12))
ax0.set_xlim(0,Ntries1+1)
ax0.set_title("Trace plot")
ax0.set_ylabel("$\\alpha$")
ax0.plot(np.arange(Ntries1+1),samples.T[0],marker='.',color='C0')
ax0.axhline(alpha_Newton,color='black',linestyle='--')
ax0.axhline(alpha_GR,color='black',linestyle='--')
ax0.axvline(Nburnin,color='black',linestyle=':')
ax1.set_xlim(0,Ntries1+1)
ax1.set_ylabel("$a$")
ax1.plot(np.arange(Ntries1+1),samples.T[1],marker='.',color='C1')
ax1.axvline(Nburnin,color='black',linestyle=':')
ax2.set_xlim(0,Ntries1+1)
ax2.set_ylabel("$b$")
ax2.plot(np.arange(Ntries1+1),samples.T[2],marker='.',color='C2')
ax2.axvline(Nburnin,color='black',linestyle=':')
ax3.set_xlim(0,Ntries1+1)
ax3.set_ylabel("$c$")
ax3.plot(np.arange(Ntries1+1),samples.T[3],marker='.',color='C3')
ax3.set_xlabel("sample number")
ax3.axvline(Nburnin,color='black',linestyle=':')
<matplotlib.lines.Line2D at 0x146e42e27610>
fraction_accepted
0.319
Ntries2=1000
theta_start=np.array([1.75,0.,0.,0.])
# Suitable step size
proposal_sigma_1=np.array([8e-1,4e-2,2e-2,2e-2])
proposal_cov=np.diag(proposal_sigma_1**2)
Naccepted_1, samples_1 = MH_sampler(Ntries2,theta_start,dx_II,sigma_d,proposal_cov)
fraction_accepted_1=float(Naccepted_1)/Ntries2
# Step size too large
proposal_sigma_2=np.array([1.5,0.2,0.1,0.1])
proposal_cov=np.diag(proposal_sigma_2**2)
Naccepted_2, samples_2 = MH_sampler(Ntries2,theta_start,dx_II,sigma_d,proposal_cov)
fraction_accepted_2=float(Naccepted_2)/Ntries2
# Step size too small
proposal_sigma_3=np.array([1e-2,2e-3,1e-3,1e-3])
proposal_cov=np.diag(proposal_sigma_3**2)
Naccepted_3, samples_3 = MH_sampler(Ntries2,theta_start,dx_II,sigma_d,proposal_cov)
fraction_accepted_3=float(Naccepted_3)/Ntries2
f, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2, 3, sharex='col', sharey='row', figsize=(15,10))
ax1.set_xlim(0,Ntries2)
ax1.set_ylabel("$\\alpha$")
ax1.plot(np.arange(Ntries2+1),samples_1.T[0],marker='.',color='C0')
ymin, ymax = ax1.get_ylim()
ax1.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_1,fraction_accepted_1))
ax2.set_xlim(0,Ntries2)
ax2.set_ylim(ymin,ymax)
ax2.plot(np.arange(Ntries2+1),samples_2.T[0],marker='.',color='C0')
ax2.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_2,fraction_accepted_2))
ax3.set_xlim(0,Ntries2)
ax3.set_ylim(ymin,ymax)
ax3.plot(np.arange(Ntries2+1),samples_3.T[0],marker='.',color='C0')
ax3.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_3,fraction_accepted_3))
ax4.set_xlim(0,Ntries2)
ax4.set_xlabel("sample number")
ax4.set_ylabel("$a$")
ax4.plot(np.arange(Ntries2+1),samples_1.T[1],marker='.',color='C1')
ymin, ymax = ax4.get_ylim()
ax4.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_1,fraction_accepted_1))
ax5.set_xlim(0,Ntries2)
ax5.set_ylim(ymin,ymax)
ax5.set_xlabel("sample number")
ax5.plot(np.arange(Ntries2+1),samples_2.T[1],marker='.',color='C1')
ax5.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_2,fraction_accepted_2))
ax6.set_xlim(0,Ntries2)
ax6.set_ylim(ymin,ymax)
ax6.set_xlabel("sample number")
ax6.plot(np.arange(Ntries2+1),samples_3.T[1],marker='.',color='C1')
ax6.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_3,fraction_accepted_3))
ymin, ymax = ax1.get_ylim()
ax1.text(200,ymax-(ymax-ymin)/10., "adequate step size",fontsize=14)
ax2.text(200,ymax-(ymax-ymin)/10., "step size too large",fontsize=14)
ax3.text(200,ymax-(ymax-ymin)/10., "step size too small",fontsize=14)
f.subplots_adjust(wspace=0.1)
Ntries3=6000
Nburnin=350
proposal_sigma=np.array([8e-1,4e-2,2e-2,2e-2])
proposal_cov=np.diag(proposal_sigma**2)
Nchains=5
# Run Nchains different chains starting at different positions in parameter space
chains_dx_II = [MH_sampler(Ntries3,theta_start,dx_II,sigma_d,proposal_cov)
for theta_start in np.stack((np.linspace(0.,3.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains)),axis=1)]
chain_dx_II = np.array([chains_dx_II[j][1] for j in range(Nchains)])
fig, (ax0, ax1, ax2, ax3) = plt.subplots(4,1,figsize=(10,12))
ax0.set_xlim(0,700)
ax0.set_ylabel("$\\alpha$")
for samples in chains_dx_II:
ax0.plot(np.arange(Ntries3+1),samples[1].T[0],marker='.')
ax0.axvline(Nburnin,color='black',linestyle=':')
ax0.set_title("Trace plot for different chains")
ax1.set_xlim(0,700)
ax1.set_ylabel("$a$")
for samples in chains_dx_II:
ax1.plot(np.arange(Ntries3+1),samples[1].T[1],marker='.')
ax1.axvline(Nburnin,color='black',linestyle=':')
ax2.set_xlim(0,700)
ax2.set_ylabel("$b$")
for samples in chains_dx_II:
ax2.plot(np.arange(Ntries3+1),samples[1].T[2],marker='.')
ax2.axvline(Nburnin,color='black',linestyle=':')
ax3.set_xlim(0,700)
ax3.set_ylabel("$c$")
ax3.set_xlabel("sample number")
for samples in chains_dx_II:
ax3.plot(np.arange(Ntries3+1),samples[1].T[3],marker='.')
ax3.axvline(Nburnin,color='black',linestyle=':')
<matplotlib.lines.Line2D at 0x146e42032eb0>
Gelman et al., "Bayesian Data Analysis" (third edition), p. 284-285
Parameters:
Definitions:
Estimators: Estimators of the marginal posterior variance of the estimand:
Test:
def gelman_rubin(chain):
# between chains variance
Psi_dotj = np.mean(chain, axis=1)
Psi_dotdot = np.mean(Psi_dotj, axis=0)
m = chain.shape[0]
n = chain.shape[1]
B = n / (m - 1.) * np.sum((Psi_dotj - Psi_dotdot)**2, axis=0)
# within chains variance
sj2 = np.var(chain, axis=1, ddof=1)
W = np.mean(sj2, axis=0)
# estimators
var_minus = W
var_plus = (n - 1.) / n * W + 1. / n * B
R_hat = np.sqrt(var_plus / var_minus)
return R_hat
# The input array must have dimensions (nchains, nsamp, npars) = (m, n, npars).
gelman_rubin(chain_dx_II)
array([1.00010273, 1.00116675, 1.01472867, 1.01219264])
# Remove burn-in phase
chain_dx_II_burn = np.array([chains_dx_II[j][1][Nburnin:] for j in range(Nchains)])
chain_dx_II_burn_flat = np.concatenate(chain_dx_II_burn, axis=0)
# Means of parameters (alpha, a, b, c)
chain_dx_II_burn.mean(axis=(0,1))
array([ 2.21274596, 0.14546414, -0.10294555, 0.2467706 ])
# Marginal standard deviations of parameters (alpha, a, b, c)
chain_dx_II_burn.std(axis=(0,1))
array([0.88682913, 0.04556606, 0.02903384, 0.02093263])
# Covariance matrix of parameters (alpha, a, b, c)
np.cov(chain_dx_II_burn_flat.T)
array([[ 7.86493733e-01, -3.11913471e-02, 9.67890826e-03, 4.59158279e-03], [-3.11913471e-02, 2.07633923e-03, -7.20143274e-04, -3.04753140e-04], [ 9.67890826e-03, -7.20143274e-04, 8.42993590e-04, -3.91585523e-05], [ 4.59158279e-03, -3.04753140e-04, -3.91585523e-05, 4.38190310e-04]])
fig = plt.figure(figsize=(20,20))
# Plot 1d marginal distributions
ax0 = fig.add_subplot(4,4,1)
pdf_alpha = get_marginal(chain_dx_II_burn_flat.T[0])
ax0.fill_between(pdf_alpha[0],pdf_alpha[1], 0, color='#52aae7', alpha=0.4)
ax0.set_ylim(0.,pdf_alpha[1].max()*1.05)
ax0.axvline(alpha_GR,color='black',linestyle='--')
ax0.axvline(alpha_Newton,color='black',linestyle='--')
ax0.set_xlabel("$\\alpha$")
ax1 = fig.add_subplot(4,4,6)
pdf_a = get_marginal(chain_dx_II_burn_flat.T[1])
ax1.fill_between(pdf_a[0],pdf_a[1], 0, color='#52aae7', alpha=0.4)
ax1.set_ylim(0.,pdf_a[1].max()*1.05)
ax1.set_xlabel("$a$")
ax2 = fig.add_subplot(4,4,11)
pdf_b = get_marginal(chain_dx_II_burn_flat.T[2])
ax2.fill_between(pdf_b[0],pdf_a[1], 0, color='#52aae7', alpha=0.4)
ax2.set_ylim(0.,pdf_b[1].max()*1.05)
ax2.set_xlabel("$b$")
ax3 = fig.add_subplot(4,4,16)
pdf_c = get_marginal(chain_dx_II_burn_flat.T[3])
ax3.fill_between(pdf_c[0],pdf_c[1], 0, color='#52aae7', alpha=0.4)
ax3.set_ylim(0.,pdf_c[1].max()*1.05)
ax3.set_xlabel("$c$")
# Plot 2d contours
ax0a = fig.add_subplot(4,4,5)
ax0a.scatter(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[1], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[1])
ax0a.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax0a.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax0a.axvline(alpha_GR,color='black',linestyle='--')
ax0a.axvline(alpha_Newton,color='black',linestyle='--')
ax0a.set_xlabel("$\\alpha$")
ax0a.set_ylabel("$a$")
ax1a = fig.add_subplot(4,4,9)
ax1a.scatter(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[2], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[2])
ax1a.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax1a.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax1a.axvline(alpha_GR,color='black',linestyle='--')
ax1a.axvline(alpha_Newton,color='black',linestyle='--')
ax1a.set_xlabel("$\\alpha$")
ax1a.set_ylabel("$b$")
ax1b = fig.add_subplot(4,4,10)
ax1b.scatter(chain_dx_II_burn_flat.T[1], chain_dx_II_burn_flat.T[2], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[1], chain_dx_II_burn_flat.T[2])
ax1b.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax1b.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax1b.set_xlabel("$a$")
ax2a = fig.add_subplot(4,4,13)
ax2a.scatter(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[3], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[3])
ax2a.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax2a.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax2a.axvline(alpha_GR,color='black',linestyle='--')
ax2a.axvline(alpha_Newton,color='black',linestyle='--')
ax2a.set_xlabel("$\\alpha$")
ax2a.set_ylabel("$c$")
ax2b = fig.add_subplot(4,4,14)
ax2b.scatter(chain_dx_II_burn_flat.T[1], chain_dx_II_burn_flat.T[3], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[1], chain_dx_II_burn_flat.T[3])
ax2b.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax2b.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax2b.set_xlabel("$a$")
ax2c = fig.add_subplot(4,4,15)
ax2c.scatter(chain_dx_II_burn_flat.T[2], chain_dx_II_burn_flat.T[3], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[2], chain_dx_II_burn_flat.T[3])
ax2c.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax2c.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax2c.set_xlabel("$b$")
plt.show()
# We now include a strong prior that nuisance parameters are close to zero
def prior_logpdf(theta):
alpha,a,b,c = theta[0],theta[1],theta[2],theta[3]
return ss.multivariate_normal([0.,0.,0.],np.diag(np.array([1e-2,1e-2,1e-2])**2)).logpdf([a,b,c])
def target_logpdf(theta,data,sigma_d):
return prior_logpdf(theta) + lh_platex_logpdf(theta,data,sigma_d)
Ntries3=5000
Nburnin=100
proposal_sigma=np.array([8e-1,1e-2,1e-2,1e-2])
proposal_cov=np.diag(proposal_sigma**2)
Nchains=5
# Run Nchains different chains starting at different positions in parameter space
chains_nuisance = [MH_sampler(Ntries3,theta_start,[dx_II],sigma_d,proposal_cov)
for theta_start in np.stack((np.linspace(0.,3.,Nchains),
np.linspace(-1e-2,1e-2,Nchains),
np.linspace(-1e-2,1e-2,Nchains),
np.linspace(-1e-2,1e-2,Nchains)),axis=1)]
chain_nuisance = np.array([chains_nuisance[j][1] for j in range(Nchains)])
fig, (ax0, ax1, ax2, ax3) = plt.subplots(4,1,figsize=(10,12))
ax0.set_xlim(0,700)
ax0.set_ylabel("$\\alpha$")
for samples in chains_nuisance:
ax0.plot(np.arange(Ntries3+1),samples[1].T[0],marker='.')
ax0.axvline(Nburnin,color='black',linestyle=':')
ax0.set_title("Trace plot for different chains")
ax1.set_xlim(0,700)
ax1.set_ylabel("$a$")
for samples in chains_nuisance:
ax1.plot(np.arange(Ntries3+1),samples[1].T[1],marker='.')
ax1.axvline(Nburnin,color='black',linestyle=':')
ax2.set_xlim(0,700)
ax2.set_ylabel("$b$")
for samples in chains_nuisance:
ax2.plot(np.arange(Ntries3+1),samples[1].T[2],marker='.')
ax2.axvline(Nburnin,color='black',linestyle=':')
ax3.set_xlim(0,700)
ax3.set_ylabel("$c$")
ax3.set_xlabel("sample number")
for samples in chains_nuisance:
ax3.plot(np.arange(Ntries3+1),samples[1].T[3],marker='.')
ax3.axvline(Nburnin,color='black',linestyle=':')
<matplotlib.lines.Line2D at 0x146e40095e80>
gelman_rubin(chain_nuisance)
array([1.00056404, 1.00171343, 1.00351002, 1.00075282])
# Remove burn-in phase
chain_nuisance_burn = np.array([chains_nuisance[j][1][Nburnin:] for j in range(Nchains)])
chain_nuisance_burn_flat = np.concatenate(chain_nuisance_burn, axis=0)
fig = plt.figure(figsize=(20,20))
# Plot 1d marginal distributions
ax0 = fig.add_subplot(4,4,1)
pdf_alpha = get_marginal(chain_nuisance_burn_flat.T[0])
ax0.fill_between(pdf_alpha[0],pdf_alpha[1], 0, color='#52aae7', alpha=0.4)
ax0.set_ylim(0.,pdf_alpha[1].max()*1.05)
ax0.axvline(alpha_GR,color='black',linestyle='--')
ax0.axvline(alpha_Newton,color='black',linestyle='--')
ax0.set_xlabel("$\\alpha$")
ax1 = fig.add_subplot(4,4,6)
pdf_a = get_marginal(chain_nuisance_burn_flat.T[1])
ax1.fill_between(pdf_a[0],pdf_a[1], 0, color='#52aae7', alpha=0.4)
ax1.set_ylim(0.,pdf_a[1].max()*1.05)
ax1.set_xlabel("$a$")
ax2 = fig.add_subplot(4,4,11)
pdf_b = get_marginal(chain_nuisance_burn_flat.T[2])
ax2.fill_between(pdf_b[0],pdf_a[1], 0, color='#52aae7', alpha=0.4)
ax2.set_ylim(0.,pdf_b[1].max()*1.05)
ax2.set_xlabel("$b$")
ax3 = fig.add_subplot(4,4,16)
pdf_c = get_marginal(chain_nuisance_burn_flat.T[3])
ax3.fill_between(pdf_c[0],pdf_c[1], 0, color='#52aae7', alpha=0.4)
ax3.set_ylim(0.,pdf_c[1].max()*1.05)
ax3.set_xlabel("$c$")
# Plot 2d contours
ax0a = fig.add_subplot(4,4,5)
ax0a.scatter(chain_nuisance_burn_flat.T[0], chain_nuisance_burn_flat.T[1], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_nuisance_burn_flat.T[0], chain_nuisance_burn_flat.T[1])
ax0a.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax0a.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax0a.axvline(alpha_GR,color='black',linestyle='--')
ax0a.axvline(alpha_Newton,color='black',linestyle='--')
ax0a.set_xlabel("$\\alpha$")
ax0a.set_ylabel("$a$")
ax1a = fig.add_subplot(4,4,9)
ax1a.scatter(chain_nuisance_burn_flat.T[0], chain_nuisance_burn_flat.T[2], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_nuisance_burn_flat.T[0], chain_nuisance_burn_flat.T[2])
ax1a.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax1a.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax1a.axvline(alpha_GR,color='black',linestyle='--')
ax1a.axvline(alpha_Newton,color='black',linestyle='--')
ax1a.set_xlabel("$\\alpha$")
ax1a.set_ylabel("$b$")
ax1b = fig.add_subplot(4,4,10)
ax1b.scatter(chain_nuisance_burn_flat.T[1], chain_nuisance_burn_flat.T[2], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_nuisance_burn_flat.T[1], chain_nuisance_burn_flat.T[2])
ax1b.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax1b.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax1b.set_xlabel("$a$")
ax2a = fig.add_subplot(4,4,13)
ax2a.scatter(chain_nuisance_burn_flat.T[0], chain_nuisance_burn_flat.T[3], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_nuisance_burn_flat.T[0], chain_nuisance_burn_flat.T[3])
ax2a.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax2a.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax2a.axvline(alpha_GR,color='black',linestyle='--')
ax2a.axvline(alpha_Newton,color='black',linestyle='--')
ax2a.set_xlabel("$\\alpha$")
ax2a.set_ylabel("$c$")
ax2b = fig.add_subplot(4,4,14)
ax2b.scatter(chain_nuisance_burn_flat.T[1], chain_nuisance_burn_flat.T[3], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_nuisance_burn_flat.T[1], chain_nuisance_burn_flat.T[3])
ax2b.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax2b.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax2b.set_xlabel("$a$")
ax2c = fig.add_subplot(4,4,15)
ax2c.scatter(chain_nuisance_burn_flat.T[2], chain_nuisance_burn_flat.T[3], color="black", s=2)
xbins, ybins, contours, chainLevels = get_contours(chain_nuisance_burn_flat.T[2], chain_nuisance_burn_flat.T[3])
ax2c.contourf(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5)
ax2c.contour(xbins, ybins, contours, levels=chainLevels,
colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2)
ax2c.set_xlabel("$b$")
plt.show()
The GR value of $\alpha=1.75$ is excluded with this prior on nuisance parameters!
Also, the posterior for $c$ largely excludes the prior mean ($c=0$), which is usually the sign of an issue in the analysis.
We go back to sampling from the likelihood, with an explicit prior. We run several chains for each data vector and check convergence using the Gelman-Rubin test.
def target_logpdf(theta,data,sigma_d):
return lh_platex_logpdf(theta,data,sigma_d)
Ntries3=8000
Nburnin=500
proposal_sigma=np.array([8e-1,1e-2,1e-2,1e-2])
proposal_cov=np.diag(proposal_sigma**2)
Nchains=5
# Run Nchains different chains starting at different positions in parameter space
chains_dx_I = [MH_sampler(Ntries3,theta_start,dx_I,sigma_d,proposal_cov)
for theta_start in np.stack((np.linspace(0.,3.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains)),axis=1)]
chain_dx_I = np.array([chains_dx_I[j][1] for j in range(Nchains)])
chains_dx_III = [MH_sampler(Ntries3,theta_start,dx_III,sigma_d,proposal_cov)
for theta_start in np.stack((np.linspace(0.,3.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains)),axis=1)]
chain_dx_III = np.array([chains_dx_III[j][1] for j in range(Nchains)])
chains_dx_IV = [MH_sampler(Ntries3,theta_start,dx_IV,sigma_d,proposal_cov)
for theta_start in np.stack((np.linspace(0.,3.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains)),axis=1)]
chain_dx_IV = np.array([chains_dx_IV[j][1] for j in range(Nchains)])
chains_dx_V = [MH_sampler(Ntries3,theta_start,dx_V,sigma_d,proposal_cov)
for theta_start in np.stack((np.linspace(0.,3.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains)),axis=1)]
chain_dx_V = np.array([chains_dx_V[j][1] for j in range(Nchains)])
chains_dx_VII = [MH_sampler(Ntries3,theta_start,dx_VII,sigma_d,proposal_cov)
for theta_start in np.stack((np.linspace(0.,3.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains)),axis=1)]
chain_dx_VII = np.array([chains_dx_VII[j][1] for j in range(Nchains)])
chains_dx_VIII = [MH_sampler(Ntries3,theta_start,dx_VIII,sigma_d,proposal_cov)
for theta_start in np.stack((np.linspace(0.,3.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains),
np.linspace(-1.,1.,Nchains)),axis=1)]
chain_dx_VIII = np.array([chains_dx_VIII[j][1] for j in range(Nchains)])
for chain_dx in [chain_dx_I, chain_dx_III, chain_dx_IV, chain_dx_V, chain_dx_VII, chain_dx_VII]:
print(gelman_rubin(chain_dx))
[1.02746077 1.03104548 1.01887537 1.01466666] [1.0182821 1.02147594 1.03138797 1.01855649] [1.02434891 1.02748291 1.02044756 1.02271191] [1.02566818 1.02993095 1.02483649 1.0176498 ] [1.02879326 1.03080267 1.02275608 1.02036373] [1.02879326 1.03080267 1.02275608 1.02036373]
# Remove burn-in phase and flatten
chain_dx_burn_flat = {}
for i, chain in enumerate([chain_dx_I, chain_dx_II, chain_dx_III, chain_dx_IV, chain_dx_V, chain_dx_VII, chain_dx_VII]):
chain_dx_burn_flat[i] = np.concatenate(chain[:,Nburnin:,:], axis=0)
The nuisance parameters are different for each plate. We therefore combine the marginal pdfs $p(\alpha|d_i)$ (or $p(d_i|\alpha)$) only.
The difficulty is that these marginal distributions are sampled, and it is generally difficult to multiply pdfs for which we only have samples. To circumvent this issue, we use a simple one-dimensional kernel density estimator provided by scikit-learn.
from sklearn.neighbors import KernelDensity
alpha_plot = np.linspace(-2, 6, 1000)[:,np.newaxis]
log_dens = np.empty([7,1000])
for i in range(7):
alpha_training = chain_dx_burn_flat[i][:,0,np.newaxis]
kde = KernelDensity(kernel='gaussian', bandwidth=0.25).fit(alpha_training)
log_dens[i,:] = kde.score_samples(alpha_plot)
combined_pdf = np.exp(np.sum(log_dens,axis=0))
fig = plt.figure(figsize=(6,6))
# Plot 1d marginal distributions
ax0 = fig.add_subplot(1,1,1)
ax0.set_xlim(alpha_plot.min(), alpha_plot.max())
for i in {0}:
pdf_alpha = get_marginal(chain_dx_burn_flat[i].T[0])
ax0.fill_between(pdf_alpha[0],pdf_alpha[1], 0, color='C'+str(i), alpha=0.4, label="histogram")
ax0.plot(alpha_plot, np.exp(log_dens[i])/np.exp(log_dens[i]).max(), color='C'+str(i), label="kernel density estimate")
for i in range(1,7):
pdf_alpha = get_marginal(chain_dx_burn_flat[i].T[0])
ax0.fill_between(pdf_alpha[0],pdf_alpha[1], 0, color='C'+str(i), alpha=0.4)
ax0.plot(alpha_plot, np.exp(log_dens[i])/np.exp(log_dens[i]).max(), color='C'+str(i))
ax0.plot(alpha_plot, combined_pdf/combined_pdf.max(), lw=2.5, color='black', label="combined likelihood")
ax0.set_ylim(0.,pdf_alpha[1].max()*1.05)
ax0.axvline(alpha_GR,color='black',linestyle='--')
ax0.axvline(alpha_Newton,color='black',linestyle='--')
ax0.set_xlabel("$\\alpha$")
handles, labels = plt.gca().get_legend_handles_labels()
order = [0,2,1]
ax0.legend([handles[idx] for idx in order],[labels[idx] for idx in order], loc="lower left")
<matplotlib.legend.Legend at 0x146e339a5580>
The full data now clearly prefer the GR value $\alpha_\mathrm{GR}$ versus the Newtonian value $\alpha_\mathrm{Newton}$.
We will compute the Bayes factor (for non-committal priors on the models):
$ \mathcal{B} = \dfrac{p(\alpha_\mathrm{GR}|d)}{p(\alpha_\mathrm{Newton}|d)} = \dfrac{p(d|\alpha_\mathrm{GR})}{p(d|\alpha_\mathrm{Newton})} $
This is linked to the Savage-Dickey density ratio for nested models.
The first possible idea is to estimate $p(\alpha_\mathrm{GR}|d)$ and $p(\alpha_\mathrm{Newton}|d)$ via approximate Bayesian computation. To do so, we simply count the number of $\alpha$ samples that are close to $\alpha_\mathrm{GR}$ (i.e. for some $\varepsilon$, $|\alpha-\alpha_\mathrm{GR}|<\varepsilon$) and $\alpha_\mathrm{Newton}$. The estimate of the Bayes factor is the ratio of these numbers.
def get_BayesFactor_ABC(chain, epsilon):
GR_samples = np.sum(np.where(np.abs(chain[:,:,0]-alpha_GR)<epsilon))
Newton_samples = np.sum(np.where(np.abs(chain[:,:,0]-alpha_Newton)<epsilon))
return GR_samples/Newton_samples
# We get an estimate of the Bayes factor (which strongly depends on epsilon)
get_BayesFactor_ABC(chain_dx_II, 0.01), get_BayesFactor_ABC(chain_dx_II, 0.005), get_BayesFactor_ABC(chain_dx_II, 0.002)
(1.8441849440616902, 2.013566452323308, 4.1560379536559955)
To compute the evidence $p(d|\alpha)$ in each case, we can marginalise the likelihood over the nuisance parameters $\psi = (a,b,c)$:
$ p(d|\alpha) = \int p(d,\psi|\alpha) \; \mathrm{d}\psi = \int p(d|\alpha,\psi) p(\psi) \; \mathrm{d}\psi = \left\langle p(d|\alpha,\psi )\right\rangle_{p(\psi)}. $
Since the integral is sufficiently low-dimensional (3d), it can be evaluated on a grid.
def integrand(a,b,c,alpha,data,sigma_d):
return np.exp(target_logpdf([alpha,a,b,c],data,sigma_d))
def get_evidence_exact(alpha,data,sigma_d):
from scipy.integrate import tplquad
evidence = tplquad(integrand, -5, 5, lambda a: -5, lambda x: 5,
lambda a, b: -5, lambda a, b: 5, args=(alpha,data,sigma_d))[0]
return evidence
def get_BayesFactor_exact(data,sigma_d):
evidence_GR = get_evidence_exact(alpha_GR,data,sigma_d)
evidence_Newton = get_evidence_exact(alpha_Newton,data,sigma_d)
return evidence_GR/evidence_Newton
get_BayesFactor_exact(dx_II,sigma_d)
/home/leclercq/.local/apps/anaconda/python3.8_2020.11/install/lib/python3.8/site-packages/scipy/integrate/quadpack.py:879: IntegrationWarning: The integral is probably divergent, or slowly convergent. quad_r = quad(f, low, high, args=args, full_output=self.full_output,
2.6514608716997525
An alternative to explicit integration (which works in higher dimension) is to evaluate the evidence by Monte-Carlo integration: for a set of $N$ samples $\psi_i$ drawn from the prior $p(\psi)$, we have
$ \left\langle p(d|\alpha,\psi )\right\rangle_{p(\psi)} \approx \dfrac{1}{N} \sum_{i=1}^N p(d|\alpha,\psi_i) $
Contrary to previous cases (where we implicitly assumed a flat prior on $\psi$, here we have to specify the prior on $\psi = (a,b,c)$ explicitly:
def psi_prior_pdf():
return ss.multivariate_normal([0.,0.,0.],np.diag(np.array([0.2,0.1,0.2])**2))
def get_evidence_MC(alpha,data,sigma_d,Nsamp):
# Draw a set of samples of nuisance parameters from a prior
samples = psi_prior_pdf().rvs(Nsamp)
# Compute the integral
target_pdf = np.exp([lh_platex_logpdf([alpha,a,b,c],data,sigma_d) for (a,b,c) in samples])
MonteCarloI = np.average(target_pdf)
return MonteCarloI
def get_BayesFactor_MC(data,sigma_d,Nsamp):
evidence_GR = get_evidence_MC(alpha_GR,data,sigma_d,Nsamp)
evidence_Newton= get_evidence_MC(alpha_Newton,data,sigma_d,Nsamp)
return evidence_GR/evidence_Newton
# We need enough samples to get a good estimate of the integrals
get_evidence_MC(alpha_GR,dx_II,sigma_d,500),\
get_evidence_MC(alpha_GR,dx_II,sigma_d,20000),\
get_evidence_MC(alpha_GR,dx_II,sigma_d,50000),\
get_evidence_MC(alpha_GR,dx_II,sigma_d,100000)
(0.00027765622342792263, 0.0002483169041026646, 0.0003019567503862281, 0.00033232170597620426)
# We then get an estimate of the Bayes factor (which still scatters quite a lot)
get_BayesFactor_MC(dx_II,sigma_d,50000), get_BayesFactor_MC(dx_II,sigma_d,50000), get_BayesFactor_MC(dx_II,sigma_d,50000)
(3.1191638567274924, 3.5891040329680095, 3.304929017933173)