#!/usr/bin/env python # coding: utf-8 # # The 1919 Eclipse: parameter inference and model comparison # Florent Leclercq
# Imperial Centre for Inference and Cosmology, Imperial College London
# florent.leclercq@polytechnique.org # In[1]: import numpy as np import scipy import scipy.stats as ss import matplotlib.pyplot as plt np.random.seed(3) # In[2]: # Some plotting tools and routines nBins = 30 nContourLevels = 3 # 2d contour levels: (68%, 95%, 99%) confLevels = [.3173, .0455, .0027] smoothingKernel = 1 def get_marginal(samples): # create 1d histogram hist1d, edges = np.histogram(samples, weights=np.ones_like(samples), density=True, bins=nBins) # Bin center between histogram edges centers = (edges[1:]+edges[:-1])/2 # Filter data pdf = np.array((centers, hist1d)) pdf = scipy.ndimage.gaussian_filter1d(pdf, sigma=smoothingKernel) # Clip the pdf to zero out of the bins centers, hist = pdf[0], pdf[1] centers = np.insert(centers, 0, centers[0]-(centers[1]-centers[0])) hist = np.insert(hist, 0, 0.) centers = np.insert(centers, len(centers), centers[len(centers)-1]-(centers[len(centers)-2]-centers[len(centers)-1])) hist = np.insert(hist, len(hist), 0.) # Normalize all the pdfs to the same height hist /= hist.max() pdf = np.array((centers, hist)) return pdf def get_contours(samples_x, samples_y): # Empty arrays needed below chainLevels = np.ones(nContourLevels+1) extents = np.empty(4) # These are needed to compute the contour levels nBinsFlat = np.linspace(0., nBins**2, nBins**2) # Create 2d histogram hist2d, xedges, yedges = np.histogram2d( samples_x, samples_y, weights=np.ones_like(samples_x), bins=nBins) # image extent, needed below for contour lines extents = [xedges[0], xedges[-1], yedges[0], yedges[-1]] # Normalize hist2d = hist2d/np.sum(hist2d) # Cumulative 1d distribution histOrdered = np.sort(hist2d.flat) histCumulative = np.cumsum(histOrdered) # Compute contour levels (from low to high for technical reasons) for l in range(nContourLevels): # Find location of contour level in 1d histCumulative temp = np.interp(confLevels[l], histCumulative, nBinsFlat) # Find "height" of contour level chainLevels[nContourLevels-1-l] = np.interp(temp, nBinsFlat, histOrdered) # Apply Gaussian smoothing contours = scipy.ndimage.gaussian_filter(hist2d.T, sigma=smoothingKernel) xbins = (xedges[1:]+xedges[:-1])/2 ybins = (yedges[1:]+yedges[:-1])/2 return xbins, ybins, contours, chainLevels # ## Data model # In[3]: def Dx_11(alpha,a,b,c): return c-0.160*b-1.261*a-0.587*alpha/19.8 def Dx_5(alpha,a,b,c): return c-1.107*b-0.160*a-0.557*alpha/19.8 def Dx_4(alpha,a,b,c): return c+0.472*b+0.334*a-0.186*alpha/19.8 def Dx_3(alpha,a,b,c): return c+0.360*b+0.348*a-0.222*alpha/19.8 def Dx_6(alpha,a,b,c): return c+1.099*b+0.587*a+0.080*alpha/19.8 def Dx_10(alpha,a,b,c): return c+1.321*b+0.860*a+0.158*alpha/19.8 def Dx_2(alpha,a,b,c): return c-0.328*b+1.079*a+1.540*alpha/19.8 def Dy_11(alpha,d,e,f): return f-1.261*d-0.160*e+0.036*alpha/19.8 def Dy_5(alpha,d,e,f): return f-0.160*d-1.107*e-0.789*alpha/19.8 def Dy_4(alpha,d,e,f): return f+0.334*d+0.472*e+1.336*alpha/19.8 def Dy_3(alpha,d,e,f): return f+0.348*d+0.360*e+1.574*alpha/19.8 def Dy_6(alpha,d,e,f): return f+0.587*d+1.099*e+0.726*alpha/19.8 def Dy_10(alpha,d,e,f): return f+0.860*d+1.321*e+0.589*alpha/19.8 def Dy_2(alpha,d,e,f): return f+1.079*d-0.328*e-0.156*alpha/19.8 stars = np.array([11,5,4,3,6,10,2]) alpha_GR=1.75 alpha_Newton=0.9 # ## Simulations # In[4]: nsims=10 a=ss.uniform(-1.,2.).rvs(nsims) b=ss.uniform(-1.,2.).rvs(nsims) c=ss.uniform(-1.,2.).rvs(nsims) d=ss.uniform(-1.,2.).rvs(nsims) e=ss.uniform(-1.,2.).rvs(nsims) f=ss.uniform(-1.,2.).rvs(nsims) alpha=ss.uniform(-0.75,2+0.75).rvs(nsims) sims_x = np.array([ Dx_11(alpha,a,b,c), Dx_5(alpha,a,b,c), Dx_4(alpha,a,b,c), Dx_3(alpha,a,b,c), Dx_6(alpha,a,b,c), Dx_10(alpha,a,b,c), Dx_2(alpha,a,b,c) ]) sims_y = np.array([ Dy_11(alpha,d,e,f), Dy_5(alpha,d,e,f), Dy_4(alpha,d,e,f), Dy_3(alpha,d,e,f), Dy_6(alpha,d,e,f), Dy_10(alpha,d,e,f), Dy_2(alpha,d,e,f) ]) # ## Data vectors # In[5]: # Using comparison plate 14_2a dx_I = np.array([ -1.411+1.500 - (-0.478+0.552), -1.048+1.500 - (-0.544+0.552), -1.216+1.500 - (-0.368+0.552), -1.237+1.500 - (-0.350+0.552), -1.342+1.500 - (-0.317+0.552), -1.289+1.500 - (-0.272+0.552), -0.789+1.500 - (-0.396+0.552), ]) dy_I = np.array([ -0.554+0.554 - (-0.109+0.206), -0.338+0.554 - (-0.204+0.206), +0.114+0.554 - (-0.136+0.206), +0.150+0.554 - (-0.073+0.206), +0.124+0.554 - (-0.144+0.206), +0.205+0.554 - (-0.146+0.206), +0.109+0.554 - (-0.182+0.206), ]) dx_II = np.array([ -1.416+1.500 - (-0.478+0.552), -1.221+1.500 - (-0.544+0.552), -1.054+1.500 - (-0.368+0.552), -1.079+1.500 - (-0.350+0.552), -1.012+1.500 - (-0.317+0.552), -0.999+1.500 - (-0.272+0.552), -0.733+1.500 - (-0.396+0.552), ]) dy_II = np.array([ -1.324+1.324 - (-0.109+0.206), -1.312+1.324 - (-0.204+0.206), -0.944+1.324 - (-0.136+0.206), -0.862+1.324 - (-0.073+0.206), -0.932+1.324 - (-0.144+0.206), -0.948+1.324 - (-0.146+0.206), -1.019+1.324 - (-0.182+0.206), ]) dx_III = np.array([ +0.592-0.500 - (-0.478+0.552), +0.756-0.500 - (-0.544+0.552), +0.979-0.500 - (-0.368+0.552), +0.958-0.500 - (-0.350+0.552), +1.052-0.500 - (-0.317+0.552), +1.157-0.500 - (-0.272+0.552), +1.256-0.500 - (-0.396+0.552), ]) dy_III = np.array([ +0.956-0.843 - (-0.109+0.206), +0.843-0.843 - (-0.204+0.206), +1.172-0.843 - (-0.136+0.206), +1.244-0.843 - (-0.073+0.206), +1.197-0.843 - (-0.144+0.206), +1.211-0.843 - (-0.146+0.206), +0.924-0.843 - (-0.182+0.206), ]) dx_IV = np.array([ +0.563-0.500 - (-0.478+0.552), +0.683-0.500 - (-0.544+0.552), +0.849-0.500 - (-0.368+0.552), +0.861-0.500 - (-0.350+0.552), +0.894-0.500 - (-0.317+0.552), +0.934-0.500 - (-0.272+0.552), +1.177-0.500 - (-0.396+0.552), ]) dy_IV = np.array([ +1.238-1.226 - (-0.109+0.206), +1.226-1.226 - (-0.204+0.206), +1.524-1.226 - (-0.136+0.206), +1.587-1.226 - (-0.073+0.206), +1.564-1.226 - (-0.144+0.206), +1.522-1.226 - (-0.146+0.206), +1.373-1.226 - (-0.182+0.206), ]) dx_V = np.array([ +0.406-0.400 - (-0.478+0.552), +0.468-0.400 - (-0.544+0.552), +0.721-0.400 - (-0.368+0.552), +0.733-0.400 - (-0.350+0.552), +0.798-0.400 - (-0.317+0.552), +0.864-0.400 - (-0.272+0.552), +0.995-0.400 - (-0.396+0.552), ]) dy_V = np.array([ +0.970-0.861 - (-0.109+0.206), +0.861-0.861 - (-0.204+0.206), +1.167-0.861 - (-0.136+0.206), +1.234-0.861 - (-0.073+0.206), +1.130-0.861 - (-0.144+0.206), +1.119-0.861 - (-0.146+0.206), +0.935-0.861 - (-0.182+0.206), ]) # Plate VI was not used! dx_VII = np.array([ -1.456+1.500 - (-0.478+0.552), -1.267+1.500 - (-0.544+0.552), -1.028+1.500 - (-0.368+0.552), -1.010+1.500 - (-0.350+0.552), -0.888+1.500 - (-0.317+0.552), -0.820+1.500 - (-0.272+0.552), -0.768+1.500 - (-0.396+0.552), ]) dy_VII = np.array([ +0.964-0.777 - (-0.109+0.206), +0.777-0.777 - (-0.204+0.206), +1.142-0.777 - (-0.136+0.206), +1.185-0.777 - (-0.073+0.206), +1.125-0.777 - (-0.144+0.206), +1.072-0.777 - (-0.146+0.206), +0.892-0.777 - (-0.182+0.206), ]) dx_VIII = np.array([ -1.285+1.300 - (-0.478+0.552), -1.152+1.300 - (-0.544+0.552), -0.927+1.300 - (-0.368+0.552), -0.897+1.300 - (-0.350+0.552), -0.838+1.300 - (-0.317+0.552), -0.768+1.300 - (-0.272+0.552), -0.585+1.300 - (-0.396+0.552), ]) dy_VIII = np.array([ -1.195+1.322 - (-0.109+0.206), -1.332+1.322 - (-0.204+0.206), -0.930+1.322 - (-0.136+0.206), -0.894+1.322 - (-0.073+0.206), -0.937+1.322 - (-0.144+0.206), -0.964+1.322 - (-0.146+0.206), -1.166+1.322 - (-0.182+0.206), ]) # Assumed error on the measurements sigma_d = 0.05 # In[6]: from matplotlib.lines import Line2D fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(20,8)) for i, star in enumerate(stars): ax0.scatter(star*np.ones(nsims),sims_x[i],marker="o",color="black") for dx in [dx_I,dx_II,dx_III,dx_IV,dx_V,dx_VII,dx_VIII]: ax0.scatter(stars,dx,color="C0",marker="D") ax0.set_xlabel("Star index") ax0.set_ylabel("Dx") legend_elements = [Line2D([0], [0], lw=0, marker="o", color="black", label="simulations"), Line2D([0], [0], lw=0, marker="D", color="C0", label="data"), ] ax0.legend(handles=legend_elements) for i, star in enumerate(stars): ax1.scatter(star*np.ones(nsims),sims_y[i],marker="o",color="black") for dy in [dy_I,dy_II,dy_III,dy_IV,dy_V,dy_VII,dy_VIII]: ax1.scatter(stars,dy,color="C0",marker="D") ax1.set_xlabel("Star index") ax1.set_ylabel("Dy") ax1.legend(handles=legend_elements) # ## Log-likelihood # In[7]: def lh_platex_logpdf(theta,data,sigma_d): alpha,a,b,c = theta[0],theta[1],theta[2],theta[3] theory = np.array([ Dx_11(alpha,a,b,c), Dx_5(alpha,a,b,c), Dx_4(alpha,a,b,c), Dx_3(alpha,a,b,c), Dx_6(alpha,a,b,c), Dx_10(alpha,a,b,c), Dx_2(alpha,a,b,c) ]) return -1/2.*np.sum((theory-data)**2/sigma_d**2) def lh_platey_logpdf(theta,data,sigma_d): alpha,d,e,f = theta[0],theta[1],theta[2],theta[3] theory = np.array([ Dy_11(alpha,d,e,f), Dy_5(alpha,d,e,f), Dy_4(alpha,d,e,f), Dy_3(alpha,d,e,f), Dy_6(alpha,d,e,f), Dy_10(alpha,d,e,f), Dy_2(alpha,d,e,f) ]) # Note that the second term is not relevant for MCMC, only for model comparison return -1/2.*np.sum((theory-data)**2/sigma_d**2) - (np.sqrt(2*np.pi)*sigma_d)**theory.shape[0] def target_logpdf(theta,data,sigma_d): return lh_platex_logpdf(theta,data,sigma_d) # In[8]: lh_platex_logpdf([1.7,0.,0.,0.],dx_II,sigma_d), lh_platey_logpdf([0.9,0.1,-0.1,0.05],dy_V,sigma_d) # ## Metropolis-Hastings sampler # In[9]: def uniform_proposal_pdf(proposal_cov): sigmas=np.sqrt(np.diag(proposal_cov)) return ss.uniform(loc=-sigmas*np.sqrt(3), scale=2*sigmas*np.sqrt(3)) def gaussian_proposal_pdf(proposal_cov): return ss.multivariate_normal(np.zeros(proposal_cov.shape[0]),proposal_cov) def proposal_pdf(proposal_cov): # return uniform_proposal_pdf(proposal_cov) return gaussian_proposal_pdf(proposal_cov) # In[10]: def MH_sampler(Ntries,theta_start,data,sigma_d,proposal_cov): Naccepted=0 samples=list() samples.append(theta_start) theta=theta_start for i in range(Ntries): theta_p = theta + proposal_pdf(proposal_cov).rvs() # the uniform/Gaussian proposal pdf satisfies the detailed balance equation, so the # acceptance ratio simplifies to the Metropolis ratio a = min(1, np.exp(target_logpdf(theta_p,data,sigma_d) - target_logpdf(theta,data,sigma_d))) u = ss.uniform().rvs() if u < a: Naccepted+=1 theta=theta_p samples.append(theta) return Naccepted, np.array(samples) # In[11]: Ntries1=1000 Nburnin=100 proposal_sigma=np.array([8e-1,4e-2,2e-2,2e-2]) proposal_cov=np.diag(proposal_sigma**2) theta_start=np.array([1.75,0.,0.,0.]) Naccepted, samples = MH_sampler(Ntries1,theta_start,dx_II,sigma_d,proposal_cov) fraction_accepted=float(Naccepted)/Ntries1 # In[12]: fig, (ax0, ax1, ax2, ax3) = plt.subplots(4,1,figsize=(10,12)) ax0.set_xlim(0,Ntries1+1) ax0.set_title("Trace plot") ax0.set_ylabel("$\\alpha$") ax0.plot(np.arange(Ntries1+1),samples.T[0],marker='.',color='C0') ax0.axhline(alpha_Newton,color='black',linestyle='--') ax0.axhline(alpha_GR,color='black',linestyle='--') ax0.axvline(Nburnin,color='black',linestyle=':') ax1.set_xlim(0,Ntries1+1) ax1.set_ylabel("$a$") ax1.plot(np.arange(Ntries1+1),samples.T[1],marker='.',color='C1') ax1.axvline(Nburnin,color='black',linestyle=':') ax2.set_xlim(0,Ntries1+1) ax2.set_ylabel("$b$") ax2.plot(np.arange(Ntries1+1),samples.T[2],marker='.',color='C2') ax2.axvline(Nburnin,color='black',linestyle=':') ax3.set_xlim(0,Ntries1+1) ax3.set_ylabel("$c$") ax3.plot(np.arange(Ntries1+1),samples.T[3],marker='.',color='C3') ax3.set_xlabel("sample number") ax3.axvline(Nburnin,color='black',linestyle=':') # In[13]: fraction_accepted # ## Markov chain diagnostics # ### 1- Step size # In[14]: Ntries2=1000 theta_start=np.array([1.75,0.,0.,0.]) # In[15]: # Suitable step size proposal_sigma_1=np.array([8e-1,4e-2,2e-2,2e-2]) proposal_cov=np.diag(proposal_sigma_1**2) Naccepted_1, samples_1 = MH_sampler(Ntries2,theta_start,dx_II,sigma_d,proposal_cov) fraction_accepted_1=float(Naccepted_1)/Ntries2 # In[16]: # Step size too large proposal_sigma_2=np.array([1.5,0.2,0.1,0.1]) proposal_cov=np.diag(proposal_sigma_2**2) Naccepted_2, samples_2 = MH_sampler(Ntries2,theta_start,dx_II,sigma_d,proposal_cov) fraction_accepted_2=float(Naccepted_2)/Ntries2 # In[17]: # Step size too small proposal_sigma_3=np.array([1e-2,2e-3,1e-3,1e-3]) proposal_cov=np.diag(proposal_sigma_3**2) Naccepted_3, samples_3 = MH_sampler(Ntries2,theta_start,dx_II,sigma_d,proposal_cov) fraction_accepted_3=float(Naccepted_3)/Ntries2 # In[18]: f, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2, 3, sharex='col', sharey='row', figsize=(15,10)) ax1.set_xlim(0,Ntries2) ax1.set_ylabel("$\\alpha$") ax1.plot(np.arange(Ntries2+1),samples_1.T[0],marker='.',color='C0') ymin, ymax = ax1.get_ylim() ax1.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_1,fraction_accepted_1)) ax2.set_xlim(0,Ntries2) ax2.set_ylim(ymin,ymax) ax2.plot(np.arange(Ntries2+1),samples_2.T[0],marker='.',color='C0') ax2.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_2,fraction_accepted_2)) ax3.set_xlim(0,Ntries2) ax3.set_ylim(ymin,ymax) ax3.plot(np.arange(Ntries2+1),samples_3.T[0],marker='.',color='C0') ax3.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_3,fraction_accepted_3)) ax4.set_xlim(0,Ntries2) ax4.set_xlabel("sample number") ax4.set_ylabel("$a$") ax4.plot(np.arange(Ntries2+1),samples_1.T[1],marker='.',color='C1') ymin, ymax = ax4.get_ylim() ax4.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_1,fraction_accepted_1)) ax5.set_xlim(0,Ntries2) ax5.set_ylim(ymin,ymax) ax5.set_xlabel("sample number") ax5.plot(np.arange(Ntries2+1),samples_2.T[1],marker='.',color='C1') ax5.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_2,fraction_accepted_2)) ax6.set_xlim(0,Ntries2) ax6.set_ylim(ymin,ymax) ax6.set_xlabel("sample number") ax6.plot(np.arange(Ntries2+1),samples_3.T[1],marker='.',color='C1') ax6.set_title("sigma={}, ar={:.3f}".format(proposal_sigma_3,fraction_accepted_3)) ymin, ymax = ax1.get_ylim() ax1.text(200,ymax-(ymax-ymin)/10., "adequate step size",fontsize=14) ax2.text(200,ymax-(ymax-ymin)/10., "step size too large",fontsize=14) ax3.text(200,ymax-(ymax-ymin)/10., "step size too small",fontsize=14) f.subplots_adjust(wspace=0.1) # ### 2- Multiple chains, different starting points # In[19]: Ntries3=6000 Nburnin=350 proposal_sigma=np.array([8e-1,4e-2,2e-2,2e-2]) proposal_cov=np.diag(proposal_sigma**2) Nchains=5 # In[20]: # Run Nchains different chains starting at different positions in parameter space chains_dx_II = [MH_sampler(Ntries3,theta_start,dx_II,sigma_d,proposal_cov) for theta_start in np.stack((np.linspace(0.,3.,Nchains), np.linspace(-1.,1.,Nchains), np.linspace(-1.,1.,Nchains), np.linspace(-1.,1.,Nchains)),axis=1)] chain_dx_II = np.array([chains_dx_II[j][1] for j in range(Nchains)]) # In[21]: fig, (ax0, ax1, ax2, ax3) = plt.subplots(4,1,figsize=(10,12)) ax0.set_xlim(0,700) ax0.set_ylabel("$\\alpha$") for samples in chains_dx_II: ax0.plot(np.arange(Ntries3+1),samples[1].T[0],marker='.') ax0.axvline(Nburnin,color='black',linestyle=':') ax0.set_title("Trace plot for different chains") ax1.set_xlim(0,700) ax1.set_ylabel("$a$") for samples in chains_dx_II: ax1.plot(np.arange(Ntries3+1),samples[1].T[1],marker='.') ax1.axvline(Nburnin,color='black',linestyle=':') ax2.set_xlim(0,700) ax2.set_ylabel("$b$") for samples in chains_dx_II: ax2.plot(np.arange(Ntries3+1),samples[1].T[2],marker='.') ax2.axvline(Nburnin,color='black',linestyle=':') ax3.set_xlim(0,700) ax3.set_ylabel("$c$") ax3.set_xlabel("sample number") for samples in chains_dx_II: ax3.plot(np.arange(Ntries3+1),samples[1].T[3],marker='.') ax3.axvline(Nburnin,color='black',linestyle=':') # ### 3-Gelman-Rubin test # Gelman *et al.*, "*Bayesian Data Analysis*" (third edition), p. 284-285 # # **Parameters**: # * $m$: number of chains # * $n$: length of chains # # **Definitions**: # * "between" chains variance: # \begin{equation} # B \equiv \frac{n}{m-1} \sum_{j=1}^m \left( \bar{\psi}_{. j} - \bar{\psi}_{..} \right)^2 \quad \mathrm{where} \quad \bar{\psi}_{. j} = \frac{1}{n} \sum_{i=1}^n \psi_{ij} \quad \mathrm{and} \quad \bar{\psi}_{..} = \frac{1}{m} \sum_{j=1}^m \bar{\psi}_{.j} # \end{equation} # * "within" chains variance: # \begin{equation} # W \equiv \frac{1}{m} \sum_{j=1}^m s_j^2 \quad \mathrm{where} \quad s_j^2 = \frac{1}{n-1} \sum_{i=1}^n \left( \psi_{ij} - \bar{\psi}_{.j} \right)^2 # \end{equation} # # **Estimators**: # Estimators of the marginal posterior variance of the estimand: # * $\widehat{\mathrm{var}}^- \equiv W$: underestimates the variance # * $\widehat{\mathrm{var}}^+ \equiv \frac{n}{n-1}W + \frac{1}{n} B$: overestimates the variance # # **Test**: # * Potential scale reduction factor: $\widehat{R} \equiv \sqrt{\frac{\widehat{\mathrm{var}}^+}{\widehat{\mathrm{var}}^-}}$ # * Test: $\widehat{R} \rightarrow 1$ as $n \rightarrow \infty$ # In[22]: def gelman_rubin(chain): # between chains variance Psi_dotj = np.mean(chain, axis=1) Psi_dotdot = np.mean(Psi_dotj, axis=0) m = chain.shape[0] n = chain.shape[1] B = n / (m - 1.) * np.sum((Psi_dotj - Psi_dotdot)**2, axis=0) # within chains variance sj2 = np.var(chain, axis=1, ddof=1) W = np.mean(sj2, axis=0) # estimators var_minus = W var_plus = (n - 1.) / n * W + 1. / n * B R_hat = np.sqrt(var_plus / var_minus) return R_hat # The input array must have dimensions (nchains, nsamp, npars) = (m, n, npars). # In[23]: gelman_rubin(chain_dx_II) # ## Likelihood surface # In[24]: # Remove burn-in phase chain_dx_II_burn = np.array([chains_dx_II[j][1][Nburnin:] for j in range(Nchains)]) chain_dx_II_burn_flat = np.concatenate(chain_dx_II_burn, axis=0) # In[25]: # Means of parameters (alpha, a, b, c) chain_dx_II_burn.mean(axis=(0,1)) # In[26]: # Marginal standard deviations of parameters (alpha, a, b, c) chain_dx_II_burn.std(axis=(0,1)) # In[27]: # Covariance matrix of parameters (alpha, a, b, c) np.cov(chain_dx_II_burn_flat.T) # In[28]: fig = plt.figure(figsize=(20,20)) # Plot 1d marginal distributions ax0 = fig.add_subplot(4,4,1) pdf_alpha = get_marginal(chain_dx_II_burn_flat.T[0]) ax0.fill_between(pdf_alpha[0],pdf_alpha[1], 0, color='#52aae7', alpha=0.4) ax0.set_ylim(0.,pdf_alpha[1].max()*1.05) ax0.axvline(alpha_GR,color='black',linestyle='--') ax0.axvline(alpha_Newton,color='black',linestyle='--') ax0.set_xlabel("$\\alpha$") ax1 = fig.add_subplot(4,4,6) pdf_a = get_marginal(chain_dx_II_burn_flat.T[1]) ax1.fill_between(pdf_a[0],pdf_a[1], 0, color='#52aae7', alpha=0.4) ax1.set_ylim(0.,pdf_a[1].max()*1.05) ax1.set_xlabel("$a$") ax2 = fig.add_subplot(4,4,11) pdf_b = get_marginal(chain_dx_II_burn_flat.T[2]) ax2.fill_between(pdf_b[0],pdf_a[1], 0, color='#52aae7', alpha=0.4) ax2.set_ylim(0.,pdf_b[1].max()*1.05) ax2.set_xlabel("$b$") ax3 = fig.add_subplot(4,4,16) pdf_c = get_marginal(chain_dx_II_burn_flat.T[3]) ax3.fill_between(pdf_c[0],pdf_c[1], 0, color='#52aae7', alpha=0.4) ax3.set_ylim(0.,pdf_c[1].max()*1.05) ax3.set_xlabel("$c$") # Plot 2d contours ax0a = fig.add_subplot(4,4,5) ax0a.scatter(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[1], color="black", s=2) xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[1]) ax0a.contourf(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5) ax0a.contour(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2) ax0a.axvline(alpha_GR,color='black',linestyle='--') ax0a.axvline(alpha_Newton,color='black',linestyle='--') ax0a.set_xlabel("$\\alpha$") ax0a.set_ylabel("$a$") ax1a = fig.add_subplot(4,4,9) ax1a.scatter(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[2], color="black", s=2) xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[2]) ax1a.contourf(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5) ax1a.contour(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2) ax1a.axvline(alpha_GR,color='black',linestyle='--') ax1a.axvline(alpha_Newton,color='black',linestyle='--') ax1a.set_xlabel("$\\alpha$") ax1a.set_ylabel("$b$") ax1b = fig.add_subplot(4,4,10) ax1b.scatter(chain_dx_II_burn_flat.T[1], chain_dx_II_burn_flat.T[2], color="black", s=2) xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[1], chain_dx_II_burn_flat.T[2]) ax1b.contourf(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5) ax1b.contour(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2) ax1b.set_xlabel("$a$") ax2a = fig.add_subplot(4,4,13) ax2a.scatter(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[3], color="black", s=2) xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[0], chain_dx_II_burn_flat.T[3]) ax2a.contourf(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5) ax2a.contour(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2) ax2a.axvline(alpha_GR,color='black',linestyle='--') ax2a.axvline(alpha_Newton,color='black',linestyle='--') ax2a.set_xlabel("$\\alpha$") ax2a.set_ylabel("$c$") ax2b = fig.add_subplot(4,4,14) ax2b.scatter(chain_dx_II_burn_flat.T[1], chain_dx_II_burn_flat.T[3], color="black", s=2) xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[1], chain_dx_II_burn_flat.T[3]) ax2b.contourf(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5) ax2b.contour(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2) ax2b.set_xlabel("$a$") ax2c = fig.add_subplot(4,4,15) ax2c.scatter(chain_dx_II_burn_flat.T[2], chain_dx_II_burn_flat.T[3], color="black", s=2) xbins, ybins, contours, chainLevels = get_contours(chain_dx_II_burn_flat.T[2], chain_dx_II_burn_flat.T[3]) ax2c.contourf(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5) ax2c.contour(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2) ax2c.set_xlabel("$b$") plt.show() # ## Effect of nuisance parameters # In[29]: # We now include a strong prior that nuisance parameters are close to zero def prior_logpdf(theta): alpha,a,b,c = theta[0],theta[1],theta[2],theta[3] return ss.multivariate_normal([0.,0.,0.],np.diag(np.array([1e-2,1e-2,1e-2])**2)).logpdf([a,b,c]) def target_logpdf(theta,data,sigma_d): return prior_logpdf(theta) + lh_platex_logpdf(theta,data,sigma_d) # In[30]: Ntries3=5000 Nburnin=100 proposal_sigma=np.array([8e-1,1e-2,1e-2,1e-2]) proposal_cov=np.diag(proposal_sigma**2) Nchains=5 # In[31]: # Run Nchains different chains starting at different positions in parameter space chains_nuisance = [MH_sampler(Ntries3,theta_start,[dx_II],sigma_d,proposal_cov) for theta_start in np.stack((np.linspace(0.,3.,Nchains), np.linspace(-1e-2,1e-2,Nchains), np.linspace(-1e-2,1e-2,Nchains), np.linspace(-1e-2,1e-2,Nchains)),axis=1)] chain_nuisance = np.array([chains_nuisance[j][1] for j in range(Nchains)]) # In[32]: fig, (ax0, ax1, ax2, ax3) = plt.subplots(4,1,figsize=(10,12)) ax0.set_xlim(0,700) ax0.set_ylabel("$\\alpha$") for samples in chains_nuisance: ax0.plot(np.arange(Ntries3+1),samples[1].T[0],marker='.') ax0.axvline(Nburnin,color='black',linestyle=':') ax0.set_title("Trace plot for different chains") ax1.set_xlim(0,700) ax1.set_ylabel("$a$") for samples in chains_nuisance: ax1.plot(np.arange(Ntries3+1),samples[1].T[1],marker='.') ax1.axvline(Nburnin,color='black',linestyle=':') ax2.set_xlim(0,700) ax2.set_ylabel("$b$") for samples in chains_nuisance: ax2.plot(np.arange(Ntries3+1),samples[1].T[2],marker='.') ax2.axvline(Nburnin,color='black',linestyle=':') ax3.set_xlim(0,700) ax3.set_ylabel("$c$") ax3.set_xlabel("sample number") for samples in chains_nuisance: ax3.plot(np.arange(Ntries3+1),samples[1].T[3],marker='.') ax3.axvline(Nburnin,color='black',linestyle=':') # In[33]: gelman_rubin(chain_nuisance) # In[34]: # Remove burn-in phase chain_nuisance_burn = np.array([chains_nuisance[j][1][Nburnin:] for j in range(Nchains)]) chain_nuisance_burn_flat = np.concatenate(chain_nuisance_burn, axis=0) # In[35]: fig = plt.figure(figsize=(20,20)) # Plot 1d marginal distributions ax0 = fig.add_subplot(4,4,1) pdf_alpha = get_marginal(chain_nuisance_burn_flat.T[0]) ax0.fill_between(pdf_alpha[0],pdf_alpha[1], 0, color='#52aae7', alpha=0.4) ax0.set_ylim(0.,pdf_alpha[1].max()*1.05) ax0.axvline(alpha_GR,color='black',linestyle='--') ax0.axvline(alpha_Newton,color='black',linestyle='--') ax0.set_xlabel("$\\alpha$") ax1 = fig.add_subplot(4,4,6) pdf_a = get_marginal(chain_nuisance_burn_flat.T[1]) ax1.fill_between(pdf_a[0],pdf_a[1], 0, color='#52aae7', alpha=0.4) ax1.set_ylim(0.,pdf_a[1].max()*1.05) ax1.set_xlabel("$a$") ax2 = fig.add_subplot(4,4,11) pdf_b = get_marginal(chain_nuisance_burn_flat.T[2]) ax2.fill_between(pdf_b[0],pdf_a[1], 0, color='#52aae7', alpha=0.4) ax2.set_ylim(0.,pdf_b[1].max()*1.05) ax2.set_xlabel("$b$") ax3 = fig.add_subplot(4,4,16) pdf_c = get_marginal(chain_nuisance_burn_flat.T[3]) ax3.fill_between(pdf_c[0],pdf_c[1], 0, color='#52aae7', alpha=0.4) ax3.set_ylim(0.,pdf_c[1].max()*1.05) ax3.set_xlabel("$c$") # Plot 2d contours ax0a = fig.add_subplot(4,4,5) ax0a.scatter(chain_nuisance_burn_flat.T[0], chain_nuisance_burn_flat.T[1], color="black", s=2) xbins, ybins, contours, chainLevels = get_contours(chain_nuisance_burn_flat.T[0], chain_nuisance_burn_flat.T[1]) ax0a.contourf(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5) ax0a.contour(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2) ax0a.axvline(alpha_GR,color='black',linestyle='--') ax0a.axvline(alpha_Newton,color='black',linestyle='--') ax0a.set_xlabel("$\\alpha$") ax0a.set_ylabel("$a$") ax1a = fig.add_subplot(4,4,9) ax1a.scatter(chain_nuisance_burn_flat.T[0], chain_nuisance_burn_flat.T[2], color="black", s=2) xbins, ybins, contours, chainLevels = get_contours(chain_nuisance_burn_flat.T[0], chain_nuisance_burn_flat.T[2]) ax1a.contourf(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5) ax1a.contour(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2) ax1a.axvline(alpha_GR,color='black',linestyle='--') ax1a.axvline(alpha_Newton,color='black',linestyle='--') ax1a.set_xlabel("$\\alpha$") ax1a.set_ylabel("$b$") ax1b = fig.add_subplot(4,4,10) ax1b.scatter(chain_nuisance_burn_flat.T[1], chain_nuisance_burn_flat.T[2], color="black", s=2) xbins, ybins, contours, chainLevels = get_contours(chain_nuisance_burn_flat.T[1], chain_nuisance_burn_flat.T[2]) ax1b.contourf(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5) ax1b.contour(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2) ax1b.set_xlabel("$a$") ax2a = fig.add_subplot(4,4,13) ax2a.scatter(chain_nuisance_burn_flat.T[0], chain_nuisance_burn_flat.T[3], color="black", s=2) xbins, ybins, contours, chainLevels = get_contours(chain_nuisance_burn_flat.T[0], chain_nuisance_burn_flat.T[3]) ax2a.contourf(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5) ax2a.contour(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2) ax2a.axvline(alpha_GR,color='black',linestyle='--') ax2a.axvline(alpha_Newton,color='black',linestyle='--') ax2a.set_xlabel("$\\alpha$") ax2a.set_ylabel("$c$") ax2b = fig.add_subplot(4,4,14) ax2b.scatter(chain_nuisance_burn_flat.T[1], chain_nuisance_burn_flat.T[3], color="black", s=2) xbins, ybins, contours, chainLevels = get_contours(chain_nuisance_burn_flat.T[1], chain_nuisance_burn_flat.T[3]) ax2b.contourf(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5) ax2b.contour(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2) ax2b.set_xlabel("$a$") ax2c = fig.add_subplot(4,4,15) ax2c.scatter(chain_nuisance_burn_flat.T[2], chain_nuisance_burn_flat.T[3], color="black", s=2) xbins, ybins, contours, chainLevels = get_contours(chain_nuisance_burn_flat.T[2], chain_nuisance_burn_flat.T[3]) ax2c.contourf(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), alpha=0.5) ax2c.contour(xbins, ybins, contours, levels=chainLevels, colors=('#85ddff','#52aae7','#1f77b4'), linewidths=2) ax2c.set_xlabel("$b$") plt.show() # The GR value of $\alpha=1.75$ is excluded with this prior on nuisance parameters!
# Also, the posterior for $c$ largely excludes the prior mean ($c=0$), which is usually the sign of an issue in the analysis. # ## Use of more data # We go back to sampling from the likelihood, with an explicit prior. We run several chains for each data vector and check convergence using the Gelman-Rubin test. # In[36]: def target_logpdf(theta,data,sigma_d): return lh_platex_logpdf(theta,data,sigma_d) # In[37]: Ntries3=8000 Nburnin=500 proposal_sigma=np.array([8e-1,1e-2,1e-2,1e-2]) proposal_cov=np.diag(proposal_sigma**2) Nchains=5 # In[38]: # Run Nchains different chains starting at different positions in parameter space chains_dx_I = [MH_sampler(Ntries3,theta_start,dx_I,sigma_d,proposal_cov) for theta_start in np.stack((np.linspace(0.,3.,Nchains), np.linspace(-1.,1.,Nchains), np.linspace(-1.,1.,Nchains), np.linspace(-1.,1.,Nchains)),axis=1)] chain_dx_I = np.array([chains_dx_I[j][1] for j in range(Nchains)]) chains_dx_III = [MH_sampler(Ntries3,theta_start,dx_III,sigma_d,proposal_cov) for theta_start in np.stack((np.linspace(0.,3.,Nchains), np.linspace(-1.,1.,Nchains), np.linspace(-1.,1.,Nchains), np.linspace(-1.,1.,Nchains)),axis=1)] chain_dx_III = np.array([chains_dx_III[j][1] for j in range(Nchains)]) chains_dx_IV = [MH_sampler(Ntries3,theta_start,dx_IV,sigma_d,proposal_cov) for theta_start in np.stack((np.linspace(0.,3.,Nchains), np.linspace(-1.,1.,Nchains), np.linspace(-1.,1.,Nchains), np.linspace(-1.,1.,Nchains)),axis=1)] chain_dx_IV = np.array([chains_dx_IV[j][1] for j in range(Nchains)]) chains_dx_V = [MH_sampler(Ntries3,theta_start,dx_V,sigma_d,proposal_cov) for theta_start in np.stack((np.linspace(0.,3.,Nchains), np.linspace(-1.,1.,Nchains), np.linspace(-1.,1.,Nchains), np.linspace(-1.,1.,Nchains)),axis=1)] chain_dx_V = np.array([chains_dx_V[j][1] for j in range(Nchains)]) chains_dx_VII = [MH_sampler(Ntries3,theta_start,dx_VII,sigma_d,proposal_cov) for theta_start in np.stack((np.linspace(0.,3.,Nchains), np.linspace(-1.,1.,Nchains), np.linspace(-1.,1.,Nchains), np.linspace(-1.,1.,Nchains)),axis=1)] chain_dx_VII = np.array([chains_dx_VII[j][1] for j in range(Nchains)]) chains_dx_VIII = [MH_sampler(Ntries3,theta_start,dx_VIII,sigma_d,proposal_cov) for theta_start in np.stack((np.linspace(0.,3.,Nchains), np.linspace(-1.,1.,Nchains), np.linspace(-1.,1.,Nchains), np.linspace(-1.,1.,Nchains)),axis=1)] chain_dx_VIII = np.array([chains_dx_VIII[j][1] for j in range(Nchains)]) # In[39]: for chain_dx in [chain_dx_I, chain_dx_III, chain_dx_IV, chain_dx_V, chain_dx_VII, chain_dx_VII]: print(gelman_rubin(chain_dx)) # In[40]: # Remove burn-in phase and flatten chain_dx_burn_flat = {} for i, chain in enumerate([chain_dx_I, chain_dx_II, chain_dx_III, chain_dx_IV, chain_dx_V, chain_dx_VII, chain_dx_VII]): chain_dx_burn_flat[i] = np.concatenate(chain[:,Nburnin:,:], axis=0) # The nuisance parameters are different for each plate. We therefore combine the marginal pdfs $p(\alpha|d_i)$ (or $p(d_i|\alpha)$) only.
# The difficulty is that these marginal distributions are *sampled*, and it is generally difficult to multiply pdfs for which we only have samples. To circumvent this issue, we use a simple one-dimensional kernel density estimator provided by scikit-learn. # In[41]: from sklearn.neighbors import KernelDensity # In[42]: alpha_plot = np.linspace(-2, 6, 1000)[:,np.newaxis] log_dens = np.empty([7,1000]) for i in range(7): alpha_training = chain_dx_burn_flat[i][:,0,np.newaxis] kde = KernelDensity(kernel='gaussian', bandwidth=0.25).fit(alpha_training) log_dens[i,:] = kde.score_samples(alpha_plot) combined_pdf = np.exp(np.sum(log_dens,axis=0)) # In[43]: fig = plt.figure(figsize=(6,6)) # Plot 1d marginal distributions ax0 = fig.add_subplot(1,1,1) ax0.set_xlim(alpha_plot.min(), alpha_plot.max()) for i in {0}: pdf_alpha = get_marginal(chain_dx_burn_flat[i].T[0]) ax0.fill_between(pdf_alpha[0],pdf_alpha[1], 0, color='C'+str(i), alpha=0.4, label="histogram") ax0.plot(alpha_plot, np.exp(log_dens[i])/np.exp(log_dens[i]).max(), color='C'+str(i), label="kernel density estimate") for i in range(1,7): pdf_alpha = get_marginal(chain_dx_burn_flat[i].T[0]) ax0.fill_between(pdf_alpha[0],pdf_alpha[1], 0, color='C'+str(i), alpha=0.4) ax0.plot(alpha_plot, np.exp(log_dens[i])/np.exp(log_dens[i]).max(), color='C'+str(i)) ax0.plot(alpha_plot, combined_pdf/combined_pdf.max(), lw=2.5, color='black', label="combined likelihood") ax0.set_ylim(0.,pdf_alpha[1].max()*1.05) ax0.axvline(alpha_GR,color='black',linestyle='--') ax0.axvline(alpha_Newton,color='black',linestyle='--') ax0.set_xlabel("$\\alpha$") handles, labels = plt.gca().get_legend_handles_labels() order = [0,2,1] ax0.legend([handles[idx] for idx in order],[labels[idx] for idx in order], loc="lower left") # The full data now clearly prefer the GR value $\alpha_\mathrm{GR}$ versus the Newtonian value $\alpha_\mathrm{Newton}$. # ## Model comparison # We will compute the Bayes factor (for non-committal priors on the models): # # $ # \mathcal{B} = \dfrac{p(\alpha_\mathrm{GR}|d)}{p(\alpha_\mathrm{Newton}|d)} = \dfrac{p(d|\alpha_\mathrm{GR})}{p(d|\alpha_\mathrm{Newton})} # $ # # This is linked to the Savage-Dickey density ratio for nested models. # ### 1- Approximate Bayesian computation # The first possible idea is to estimate $p(\alpha_\mathrm{GR}|d)$ and $p(\alpha_\mathrm{Newton}|d)$ via approximate Bayesian computation. To do so, we simply count the number of $\alpha$ samples that are close to $\alpha_\mathrm{GR}$ (i.e. for some $\varepsilon$, $|\alpha-\alpha_\mathrm{GR}|<\varepsilon$) and $\alpha_\mathrm{Newton}$. The estimate of the Bayes factor is the ratio of these numbers. # In[44]: def get_BayesFactor_ABC(chain, epsilon): GR_samples = np.sum(np.where(np.abs(chain[:,:,0]-alpha_GR)