Florent Leclercq,
Imperial Centre for Inference and Cosmology, Imperial College London,
florent.leclercq@polytechnique.org
import numpy as np
from scipy.integrate import quad
from scipy.stats import norm
from matplotlib import pyplot as plt
from math import sin, pi
np.random.seed(123456)
%matplotlib inline
plt.rcParams.update({'lines.linewidth': 2})
Nsamp=200
def target_pdf(x):
return sin(x)*sin(x)
target_pdf=np.vectorize(target_pdf)
a=0.
b=pi
x_arr=np.linspace(a,b,100)
f_arr=target_pdf(x_arr)
plt.figure(figsize=(10,6))
plt.xlim(a,b)
plt.ylim(0,1.05)
plt.plot(x_arr,f_arr)
plt.title("Target pdf")
plt.show()
trueI=quad(target_pdf,a,b)[0]
trueI
1.5707963267948966
We directly draw samples from the target pdf.
randoms=np.random.uniform(a,b,Nsamp)
samples=target_pdf(randoms)
plt.figure(figsize=(10,6))
plt.xlim(a,b)
plt.ylim(0,1.05)
plt.plot(x_arr,f_arr)
markerline, stemlines, baseline = plt.stem(randoms,samples,linefmt='-k',markerfmt='k.')
baseline.set_visible(False)
plt.title("Standard Monte Carlo integration")
plt.show()
StandardMonteCarloI=np.sum(samples)*(b-a)/Nsamp
StandardMonteCarloI
1.660402945484072
We draw samples from a proposal pdf, designed to be as close as possible to the target pdf. We then assign each sample a weight proportional to its likelihood divided by its prior probability.
proposal=norm(loc=(b-a)/2,scale=0.5)
proposal_pdf=proposal.pdf
plt.figure(figsize=(10,6))
plt.xlim(a,b)
plt.ylim(0,1.05)
plt.plot(x_arr,f_arr,label="target")
plt.plot(x_arr,proposal_pdf(x_arr),label="proposal")
plt.title("Importance sampling: target pdf and proposal pdf")
plt.legend(loc='best')
plt.show()
samples=proposal.rvs(size=Nsamp)
weights=target_pdf(samples)/proposal_pdf(samples)
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(10,12))
ax1.set_xlim(a,b)
ax1.set_ylim(0,1.05)
ax1.plot(x_arr,f_arr,label="target")
ax1.plot(x_arr,proposal_pdf(x_arr),label="proposal")
markerline, stemlines, baseline = ax1.stem(samples,proposal_pdf(samples),linefmt='-k',markerfmt='k.')
baseline.set_visible(False)
ax1.legend(loc='best')
ax1.set_title("Importance sampling: samples and weights")
markerline, stemlines, baseline = ax2.stem(samples,weights,linefmt='-k',markerfmt='k.')
baseline.set_visible(False)
ax2.plot(x_arr,f_arr/proposal_pdf(x_arr),color='C2',label="target/proposal")
ax2.set_ylim([0,weights.max()+0.5])
ax2.legend(loc='best')
f.subplots_adjust(hspace=0)
plt.show()
ImportanceI=np.average(samples,weights=weights)
ImportanceI
1.5100957559182684
A problem with importance sampling is the situation in which all but one of the weights are close to zero. To avoid with situation, we can do importance resampling. We draw Nresamp new samples from the current sample set with probabilities proportional to their weights. We replace the current samples with this new set, and the the current weights by 1/Nresamp (drawing according to the importance weight replaces likelihoods by frequencies).
Nresamp=100
normalizedweights=weights/np.sum(weights)
resamples=np.random.choice(samples, size=Nresamp, replace=True, p=normalizedweights)
reweights=1./Nresamp*np.ones(Nresamp)
Weights are then updated given their likelihood, as in the previous importance sampling step.
reweights*=target_pdf(resamples)/(1./Nresamp)
plt.figure(figsize=(10,6))
plt.xlim(a,b)
plt.ylim(0,1.05)
plt.plot(x_arr,f_arr)
markerline, stemlines, baseline = plt.stem(resamples,reweights,linefmt='-k',markerfmt='k.')
baseline.set_visible(False)
plt.title("Importance resampling")
plt.show()
ImportanceReI=np.average(resamples,weights=reweights)
ImportanceReI
1.531675529534044
Iterating this procedure yields the Sequential Importance Resampling (SIR) algorithm, which is a simple "particle filter" or "Sequential Monte Carlo" algorithm.
upperbound=1
xs=np.random.uniform(a,b,Nsamp)
ys=np.random.uniform(0,upperbound,Nsamp)
plt.figure(figsize=(10,6))
plt.xlim(a-0.1,b+0.1)
plt.ylim(0,1.05)
plt.plot(x_arr,f_arr)
plt.scatter(xs,ys,s=15,marker='o',color='black')
plt.fill_between(x_arr,np.zeros_like(x_arr),upperbound*np.ones_like(x_arr),color='grey',alpha=0.25)
plt.plot([a,b],[upperbound,upperbound],color='black')
plt.plot([a,a],[0,upperbound],color='black')
plt.plot([b,b],[0,upperbound],color='black')
plt.title("Rejection sampling")
plt.show()
accepted=np.where(ys<=target_pdf(xs))
rejected=np.where(ys>target_pdf(xs))
accepted_randoms=xs[accepted]
accepted_samples=ys[accepted]
rejected_randoms=xs[rejected]
rejected_samples=ys[rejected]
plt.figure(figsize=(10,6))
plt.xlim(a-0.1,b+0.1)
plt.ylim(0,1.05)
plt.plot(x_arr,f_arr)
plt.scatter(accepted_randoms,accepted_samples,s=15,marker='o',color='green')
plt.scatter(rejected_randoms,rejected_samples,s=15,marker='o',color='red')
plt.fill_between(x_arr,np.zeros_like(x_arr),f_arr,color='green',alpha=0.25)
plt.fill_between(x_arr,upperbound*np.ones_like(x_arr),f_arr,color='red',alpha=0.25)
plt.plot([a,b],[upperbound,upperbound],color='black')
plt.plot([a,a],[0,upperbound],color='black')
plt.plot([b,b],[0,upperbound],color='black')
plt.title("Rejection sampling")
plt.show()
fraction=float(len(accepted_samples))/Nsamp
fraction
0.465
fraction=float(len(accepted_samples))/Nsamp
RejectionI=fraction*upperbound*(b-a)
RejectionI
1.460840583919254