#!/usr/bin/env python # coding: utf-8 # ## Bayesian Statistics Made Simple # # Code and exercises from my workshop on Bayesian statistics in Python. # # Copyright 2019 Allen Downey # # MIT License: https://opensource.org/licenses/MIT # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import pandas as pd import seaborn as sns sns.set_style('white') sns.set_context('talk') import matplotlib.pyplot as plt from distribution import Pmf # Suppose you have two urns full of marbles: # # * Urn 1 has 20 blue and 10 red marbles # # * Urn 2 has 10 blue and 20 red marbles. # # You choose an urn at random and draw two marbles. They are both blue. What is the probability that you chose Urn 1? # In[2]: pmf = Pmf.from_seq(['Urn 1', 'Urn 2']) pmf['Urn 1'] *= (2/3)**2 pmf['Urn 2'] *= (1/3)**2 pmf.normalize() pmf # In[3]: pmf = Pmf.from_seq(['Urn 1', 'Urn 2']) pmf['Urn 1'] *= 20/30 * 19/29 pmf['Urn 2'] *= 10/30 * 9/29 pmf.normalize() pmf # Suppose I have a box of dice with one each of 4-sided, 6-sided, 8-sided, 10-sided, 12-sided, and 20-sided dice. # # I choose a die at random, roll it, and get a 9. What is the probability that I chose the 12-sided die? # In[4]: dice = Pmf.from_seq([10,12,20]) dice[10] /= 10 dice[12] /= 12 dice[20] /= 20 dice.normalize() dice # Write a few lines of code that use the Pmf class to compute the answer to the following question. # # Suppose I roll two six-sided dice and tell you (honestly) that the sum is not 7. # # What is the probability that the sum is 2 or 12? # In[5]: d6 = Pmf.from_seq([1,2,3,4,5,6]) twice = d6 + d6 twice[7] = 0 twice.normalize() twice # In[6]: twice[2] + twice[12] # Suppose I flip the same coin twice and tell you only whether the outcomes are the same or different. # # Write a likelihood function that takes data and hypo and returns the probability of the data under the hypothesis, where: # # 1. data is a string, either 'S' for the same outcome or 'D' for different, and # # 2. hypo is the hypothetical probability of heads, from 0-1 # In[7]: def likelihood_flip(data, hypo): """ Likelihood function for the Euro problem. data: string, either 'S' or 'D' hypo: prob of heads (0-1) returns: float probability """ x = hypo if data == 'S': return x*x + (1-x)*(1-x) else: return 2 * x * (1-x) # In[8]: def decorate_flip(title): """Labels the axes. title: string """ plt.xlabel('Probability of heads') plt.ylabel('PMF') plt.title(title) # In[9]: same = Pmf.from_seq(np.linspace(0, 1)) same.update(likelihood_flip, 'S') same.plot() decorate_flip('Same outcome') # In[10]: diff = Pmf.from_seq(np.linspace(0, 1)) diff.update(likelihood_flip, 'D') diff.plot() decorate_flip('Different outcome') # Suppose you have a fixed but unknown probability, `p`, of answering a question on this quiz correctly. # # Out of six questions, the number you get correct, `k`, is well modeled by the binomial distribution with parameters `n` and `p`, where `n` is the number of questions. # # Write a likelihood function that takes `data` and `hypo` and returns the probability of the data under the hypothesis, where: # # 1. `data` is a list of two values, `k` and `n` # # 2. `hypo` is your probability of getting a question right, `p`, from 0-1 # In[11]: from scipy.stats import binom def likelihood_quiz(data, hypo): """ Likelihood function for the Euro problem. data: k, n hypo: p returns: float probability """ k, n = data p = hypo return binom.pmf(k, n, p) # In[12]: def decorate_quiz(title): """Labels the axes. title: string """ plt.xlabel('Probability of correct') plt.ylabel('PMF') plt.title(title) # In[13]: quiz = Pmf.from_seq(np.linspace(0, 1)) quiz.update(likelihood_quiz, (7, 9)) quiz.plot() decorate_quiz('7 out of 9')