#!/usr/bin/env python
# coding: utf-8

# # Bayes' theorem

# $$ P(A \mid B) = \frac{P(B \mid A) P(A)}{P(B)}$$

# ## Problem statement
# 
# - A drug test where the probability of a user testing positive is 0.99 and the probability of a non-user testing negative is also 0.99.
# - If someone tests positive, what's the probability of them being a user?
# - Need to know the level of users in the population, suppose it's 0.5%.

# ## Events, outcomes, probabilities
# 
# ![Deck of cards](https://i.imgur.com/1THqdvh.png)

# - A probability is a number between zero and one inclusive: $p \in [0,1]$.
# - Start with a set of elements called possible outcomes.
# - Experiment is the selection of one outcome.
# - Event is a subset of possible outcomes.
# - An event occurs if the selected outcome is in the subset.
# - Probability of an event is number of possible outcomes in event divided by the total number.
# - Watch out, sets can be infinite and/or uncountable.

# ## Simulation

# In[1]:


import numpy as np

print(np.random.binomial(1, 0.01))

# https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.random.binomial.html
x = np.random.binomial(1, 0.01, 1000)
print(np.sum(x))


# In[2]:


# Helper function, returns True with probability P, False otherwise.
def true_with_prob_p(p):
    return True if np.random.binomial(1, p) == 1 else False

# Simulate the selection of a random person from the population.
# Return True if they are a drug user, False otherwise.
# True is returned with probability 0.005.
def select_random_person():
    return true_with_prob_p(0.005)

# Simulate the testing of a person from the population.
# Return True if they test positive, False otherwise.
# Non-users test positive with probability 0.01.
# Users test positive with probability 0.99.
def test_person(user):
    if user:
        return true_with_prob_p(0.99)
    else:
        return true_with_prob_p(0.01)
    
# Run an experiment - take a random person from the population
# and test whether or not they are positive.
def run_experiment():
    user = select_random_person()
    test = test_person(user)
    return (user, test)


# In[3]:


# Run the experiemnt 10,000 times.
y = [run_experiment() for i in range(10000)]

# Count the number of users who tested positive.
user_and_positive = [True for i in y if i[0] == True and i[1] == True]

# Count the number of non-users who tested positive.
nonuser_and_positive = [True  for i in y if i[0] == False and i[1] == True]


# In[4]:


np.sum(user_and_positive)


# In[5]:


np.sum(nonuser_and_positive)


# In[6]:


import matplotlib.pyplot as plt

plt.bar([0, 1], [np.sum(user_and_positive), np.sum(nonuser_and_positive)])
plt.xticks([0, 1], ('Users', ('Non-Users')))
plt.title("People who tested positive")


# ## Analysis

# $$ P(User \mid Positive) = \frac{P(Positive \mid User) P(User)}{P(Positive)} =  \frac{P(Positive \mid User) P(User)}{P(Positive \mid User)P(User) + P(Positive \mid Nonuser)P(Nonuser)}$$

# In[7]:


# Probability that you're a user.
p_user = 0.005

# Probability that you're a non-user.
p_nonuser = 1 - p_user

# Probability that a user tests positive.
p_positive_user = 0.99

# Probability that a non-user tests negative.
p_positive_nonuser = 1.0 - 0.99

# Probability that you test positive.
p_positive = p_positive_user * p_user + p_positive_nonuser * p_nonuser

# Bayes' theorem.
top_line = p_positive_user * p_user
bottom_line = p_positive
p_user_positive = top_line / bottom_line

# Show result.
print(p_user_positive)


# ## End