%load_ext line_profiler
import matplotlib.pyplot as plt
import numba
import numpy as np
import scipy.stats
Markov Chain Monte Carlo is a good example of a non-trivial algorithm we'd like to compute. To keep this tractable, we are just going to implement a gaussian sampler from MCMC. Don't worry about the code; we'll look at a much simpler Metropolis sampler at the end.
def sampler(
data, samples=4, mu_init=0.5, proposal_width=0.5, mu_prior_mu=0, mu_prior_sd=1.0
):
mu_current = mu_init
posterior = [mu_current]
for _ in range(samples):
# suggest new position
mu_proposal = scipy.stats.norm(mu_current, proposal_width).rvs()
# Compute likelihood by multiplying probabilities of each data point
likelihood_current = scipy.stats.norm(mu_current, 1).pdf(data).prod()
likelihood_proposal = scipy.stats.norm(mu_proposal, 1).pdf(data).prod()
# Compute prior probability of current and proposed mu
prior_current = scipy.stats.norm(mu_prior_mu, mu_prior_sd).pdf(mu_current)
prior_proposal = scipy.stats.norm(mu_prior_mu, mu_prior_sd).pdf(mu_proposal)
p_current = likelihood_current * prior_current
p_proposal = likelihood_proposal * prior_proposal
# Accept proposal?
p_accept = p_proposal / p_current
# Usually would include prior probability, which we neglect here for simplicity
accept = np.random.rand() < p_accept
if accept:
# Update position
mu_current = mu_proposal
posterior.append(mu_current)
return posterior
%%time
np.random.seed(123)
data = np.random.randn(20)
posterior = sampler(data, samples=15_000, mu_init=1.0)
fig, axs = plt.subplots(1, 2, figsize=(12, 5))
vals = posterior
axs[0].plot(vals)
axs[0].set_xlabel("Raw data")
axs[1].hist(vals[500:], bins=np.linspace(-1, 1, 100))
axs[1].set_xlabel("Histogram")
plt.show()
I made a big, fat performance mistake. I expect it might not be the one you might expect. If I profiled the code above, I could see it:
%lprun -f sampler sampler(data, samples=1_500, mu_init=1.)
Now do you see it?
Here's a much lighter weight norm:
def norm_pdf(loc, scale, x):
y = (x - loc) / scale
return np.exp(-(y**2) / 2) / np.sqrt(2 * np.pi) / scale
assert norm_pdf(0.4, 0.7, 0.2) == scipy.stats.norm(0.4, 0.7).pdf(0.2)
assert scipy.stats.norm(0.3, 1).pdf(data).prod() == np.prod(norm_pdf(0.3, 1, data))
Here's a new version. Let's remove the list appending while we are at it:
def sampler(
data, samples=4, mu_init=0.5, proposal_width=0.5, mu_prior_mu=0, mu_prior_sd=1.0
):
mu_current = mu_init
posterior = np.empty(samples + 1)
posterior[0] = mu_current
for i in range(samples):
# suggest new position
mu_proposal = np.random.normal(mu_current, proposal_width)
# Compute likelihood by multiplying probabilities of each data point
likelihood_current = np.prod(norm_pdf(mu_current, 1, data))
likelihood_proposal = np.prod(norm_pdf(mu_proposal, 1, data))
# Compute prior probability of current and proposed mu
prior_current = norm_pdf(mu_prior_mu, mu_prior_sd, mu_current)
prior_proposal = norm_pdf(mu_prior_mu, mu_prior_sd, mu_proposal)
p_current = likelihood_current * prior_current
p_proposal = likelihood_proposal * prior_proposal
# Accept proposal?
p_accept = p_proposal / p_current
# Usually would include prior probability, which we neglect here for simplicity
accept = np.random.rand() < p_accept
if accept:
# Update position
mu_current = mu_proposal
posterior[i + 1] = mu_current
return posterior
%%time
np.random.seed(123)
data = np.random.randn(20)
posterior = sampler(data, samples=15_000, mu_init=1.0)
%lprun -f sampler sampler(data, samples=15_000, mu_init=1.)
Much better. Instancing scipy distributions is very costly. But we'd love to be able to produce massive amounts of MC. Can we try Numba?
@numba.vectorize([numba.float64(numba.float64, numba.float64, numba.float64)])
def norm_pdf(loc, scale, x):
y = (x - loc) / scale
return np.exp(-(y**2) / 2) / np.sqrt(2 * np.pi) / scale
Python note¶
Functions look up methods and values from their surrounding scope when called. This means that
norm_pdf
is now the newnorm_pdf
, even though we have not changed the surrounding function.You probably should not normally do this. And it makes this notebook depend on the order of execution, which is not ideal.
%%time
np.random.seed(123)
data = np.random.randn(20)
posterior = sampler(data, samples=15000, mu_init=1.0)
%lprun -f sampler sampler(data, samples=15_000, mu_init=1.)
Let's go all in and do Numba start to finish:
@numba.njit
def norm_pdf(loc, scale, x):
y = (x - loc) / scale
return np.exp(-(y**2) / 2) / np.sqrt(2 * np.pi) / scale
Note: This is mostly redefined to show that is could be done with njit
instead of vectorize
, vectorize
is actually a bit simpler/faster.
@numba.njit
def sampler(
data,
samples=4,
mu_init=0.5,
proposal_width=0.5,
mu_prior_mu=0,
mu_prior_sd=1.0,
):
mu_current = mu_init
posterior = np.empty(samples + 1)
posterior[0] = mu_current
for i in range(samples):
# suggest new position
mu_proposal = np.random.normal(mu_current, proposal_width)
# Compute likelihood by multiplying probabilities of each data point
likelihood_current = np.prod(norm_pdf(mu_current, 1, data))
likelihood_proposal = np.prod(norm_pdf(mu_proposal, 1, data))
# Compute prior probability of current and proposed mu
prior_current = norm_pdf(mu_prior_mu, mu_prior_sd, mu_current)
prior_proposal = norm_pdf(mu_prior_mu, mu_prior_sd, mu_proposal)
p_current = likelihood_current * prior_current
p_proposal = likelihood_proposal * prior_proposal
# Accept proposal?
p_accept = p_proposal / p_current
# Usually would include prior probability, which we neglect here for simplicity
accept = np.random.rand() < p_accept
if accept:
# Update position
mu_current = mu_proposal
posterior[i + 1] = mu_current
return posterior
%%time
np.random.seed(123)
data = np.random.randn(20)
posterior = sampler(data, samples=15_000, mu_init=1.0)
Ouch! Let's try that again:
%%time
np.random.seed(123)
data = np.random.randn(20)
posterior = sampler(data, samples=15_000, mu_init=1.0)
Sweet.
Let's look at a simpler example, and one that you might find more useful/instructive.
## Metropolis sampler
def p(x):
"Any function that you want to sample. Plot will look better if it is normalized."
return 1 / (1 + x**2) / np.pi
# return 1/(1 + x**2) * np.sin(x)**2 / (np.pi * np.sinh(1) / np.exp(1))
def metropolis(p, samples=50_000, sigma=1):
x = np.zeros(samples + 1)
x[0] = np.random.rand()
for i in range(samples):
# suggest new position
x_Star = np.random.normal(x[i], sigma)
# Compute alpha - the fractional chance of moving to a new point
alpha = p(x_Star) / p(x[i])
# Accept/reject based on alpha
accept = alpha > np.random.rand()
# Add the current (moved?) point
x[i + 1] = x_Star if accept else x[i]
return x
vals = metropolis(p)
%%timeit
vals = metropolis(p)
x = np.linspace(-10, 10, 200)
fig, axs = plt.subplots(2, figsize=(10, 6))
axs[0].plot(vals[:500], "r")
axs[0].plot(np.arange(500, len(vals)), vals[500:], "g")
axs[1].hist(vals[500:], bins=400, range=(-20, 20), density=True)
axs[1].plot(x, p(x), lw=3)
axs[1].set_xlim(-10, 10);
Try the other function, and try adding @numba.njit
above.