#!/usr/bin/env python # coding: utf-8 # # Bayes' theorem # $$ P(A \mid B) = \frac{P(B \mid A) P(A)}{P(B)}$$ # ## Problem statement # # - A drug test where the probability of a user testing positive is 0.99 and the probability of a non-user testing negative is also 0.99. # - If someone tests positive, what's the probability of them being a user? # - Need to know the level of users in the population, suppose it's 0.5%. # ## Events, outcomes, probabilities # # ![Deck of cards](https://i.imgur.com/1THqdvh.png) # - A probability is a number between zero and one inclusive: $p \in [0,1]$. # - Start with a set of elements called possible outcomes. # - Experiment is the selection of one outcome. # - Event is a subset of possible outcomes. # - An event occurs if the selected outcome is in the subset. # - Probability of an event is number of possible outcomes in event divided by the total number. # - Watch out, sets can be infinite and/or uncountable. # ## Simulation # In[1]: import numpy as np print(np.random.binomial(1, 0.01)) # https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.random.binomial.html x = np.random.binomial(1, 0.01, 1000) print(np.sum(x)) # In[2]: # Helper function, returns True with probability P, False otherwise. def true_with_prob_p(p): return True if np.random.binomial(1, p) == 1 else False # Simulate the selection of a random person from the population. # Return True if they are a drug user, False otherwise. # True is returned with probability 0.005. def select_random_person(): return true_with_prob_p(0.005) # Simulate the testing of a person from the population. # Return True if they test positive, False otherwise. # Non-users test positive with probability 0.01. # Users test positive with probability 0.99. def test_person(user): if user: return true_with_prob_p(0.99) else: return true_with_prob_p(0.01) # Run an experiment - take a random person from the population # and test whether or not they are positive. def run_experiment(): user = select_random_person() test = test_person(user) return (user, test) # In[3]: # Run the experiemnt 10,000 times. y = [run_experiment() for i in range(10000)] # Count the number of users who tested positive. user_and_positive = [True for i in y if i[0] == True and i[1] == True] # Count the number of non-users who tested positive. nonuser_and_positive = [True for i in y if i[0] == False and i[1] == True] # In[4]: np.sum(user_and_positive) # In[5]: np.sum(nonuser_and_positive) # In[6]: import matplotlib.pyplot as plt plt.bar([0, 1], [np.sum(user_and_positive), np.sum(nonuser_and_positive)]) plt.xticks([0, 1], ('Users', ('Non-Users'))) plt.title("People who tested positive") # ## Analysis # $$ P(User \mid Positive) = \frac{P(Positive \mid User) P(User)}{P(Positive)} = \frac{P(Positive \mid User) P(User)}{P(Positive \mid User)P(User) + P(Positive \mid Nonuser)P(Nonuser)}$$ # In[7]: # Probability that you're a user. p_user = 0.005 # Probability that you're a non-user. p_nonuser = 1 - p_user # Probability that a user tests positive. p_positive_user = 0.99 # Probability that a non-user tests negative. p_positive_nonuser = 1.0 - 0.99 # Probability that you test positive. p_positive = p_positive_user * p_user + p_positive_nonuser * p_nonuser # Bayes' theorem. top_line = p_positive_user * p_user bottom_line = p_positive p_user_positive = top_line / bottom_line # Show result. print(p_user_positive) # ## End