In [1]:
import numpy as np
import pandas as pd
from lets_plot import *

LetsPlot.setup_html()
In [2]:
from scipy.stats import binom, beta

Beta-Binomial model

$\displaystyle \boxed{\begin{array}{l} X|\theta\sim B_n(\theta) \text{ - likelihood} \\ \theta \sim B(\alpha,\beta) \text{ - prior} \end{array} \Rightarrow \theta|(X=x) \sim B(\alpha+x,\beta+n-x) \text{ - posterior}}$

The model:

  • Likelihood - Binomial distribution: $\displaystyle{\mathbb{P}(X=x|\theta)=\frac{n!}{(n-k)!k!}\theta^x(1-\theta)^{n-x}}$
  • Prior - Beta distribution: $\displaystyle{p(\theta)=\frac{\Gamma(\alpha+\beta)}{\Gamma(\alpha)\Gamma(\beta)}\theta^{\alpha-1}(1-\theta)^{\beta-1}}$
  • Posterior - Beta distribution: $\displaystyle{p(\theta|X=x)\sim B(\alpha+x,\beta+n-x)}$
In [3]:
class BetaBinomial(object):

    def __init__(self, prior_params=(1., 1.), dist=beta, x=np.linspace(0., 1., 100)):
        self.prior_params = prior_params
        self.dist = dist
        self.x = x

    def posterior(self, trials = None, observations = None):
        a_prior, b_prior = self.prior_params
        self.posterior_prob =[]
        self.ci = []
        for i, n in enumerate (trials):
            y = observations[i]
            post = self.dist(a_prior + y, b_prior + n - y)
            self.posterior_prob.append(post.pdf(self.x))
            self.ci.append(post.interval(0.95))
        return self.posterior_prob
In [4]:
a_prior = 1.
b_prior = 1.
n = 50
prior_params = (a_prior, b_prior)
bb = BetaBinomial(prior_params)
true_theta = 0.25
trials = np.arange(n)
observations = np.array([binom(n=trials[i], p=true_theta).rvs(size=1)[0] for i in range(n)])
posterior = bb.posterior(trials, observations)
In [5]:
columns = ['x']
columns.extend(['p_' + str(i) for i in np.arange(n)])
post = columns[-1]
prior = columns[1]
data = pd.DataFrame(np.hstack((bb.x[:, np.newaxis], np.array(posterior).T)), columns=columns)
In [6]:
data_melt = data.melt(id_vars='x')
In [7]:
p = ggplot(data_melt) \
        + geom_area(aes(x='x', y='value', group='variable'),  
                    position='identity', 
                    size=0, alpha=0.1)
p
Out[7]:
In [8]:
p += geom_vline(xintercept=true_theta, color='blue', linetype='dashed')
p += geom_text(x=true_theta, y=14, label='Actual', color='blue', hjust=1)
p += scale_y_continuous(limits=[0, 15], expand=[0, 0])
p
Out[8]:
In [9]:
where_mode = bb.x[np.where(posterior[n - 1] == np.max(posterior[n - 1]))[0][0]]
In [10]:
p += geom_vline(xintercept=where_mode, color='red', linetype='dashed')
p += geom_text(x=where_mode, y=13, label='Posterior', color='red', hjust=1)
p
Out[10]:
In [11]:
p += geom_line(aes(x='x', y=post), data=data, color='red')
p
Out[11]:
In [12]:
p += geom_path(aes(x='x', y=prior), data=data, color='black')
p
Out[12]: