#!/usr/bin/env python
# coding: utf-8
# # Introduction to Simulations #
#
# In this notebook we will learn:
#
# - Comparisons
# - For-loops
# - Basic structure of a simulation by for-loops
# - Use of sum with comparison to count successes in a simulation
#
# In[ ]:
from datascience import *
import numpy as np
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
# ## Comparison ##
# In[ ]:
3 > 1
# In[ ]:
type(3 > 1)
# In[ ]:
True
# In[ ]:
true
# In[ ]:
3 = 3
# In[ ]:
3 == 3.0
# In[ ]:
10 != 2
# In[ ]:
x = 14
y = 3
# In[ ]:
x > 15
# In[ ]:
12 < x
# In[ ]:
x < 20
# In[ ]:
12 < x < 20
# In[ ]:
10 < x-y < 13
# In[ ]:
x > 13 and y < 3.14159
# ## Comparisons with arrays
# In[ ]:
pets = make_array('cat', 'cat', 'dog', 'cat', 'dog', 'rabbit')
# In[ ]:
pets == 'cat'
# In[ ]:
1 + 1 + 0 + 1 + 0 + 0
# In[ ]:
sum(make_array(True, True, False, True, False, False))
# In[ ]:
sum(pets == 'dog')
# In[ ]:
np.count_nonzero(pets == 'dog')
# In[ ]:
x = np.arange(20, 31)
# In[ ]:
x > 28
# ## For-loops##
#
# Python has a `for`. The stucture is like this:
#
# for variable in list or array:
#
# body of loop
#
#
# In[ ]:
rainbow = make_array('red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet')
for color in rainbow:
print(color)
# In[ ]:
for thing in rainbow:
print(thing)
# In[ ]:
num_array = np.arange(1, 3.25, 0.25)
## This for-loop is meaningless, don't try to figure out what's being computed
## we just want to demonstrate that a for-loop can involve multiple steps
for i in num_array:
i2 = i**2
i3 = i2 - 1
i4 = i3*(1.09)
print(i4)
# In[ ]:
for k in np.arange(11):
print(k**3)
# In[ ]:
num_list = [1, 2, 3, 4, 5, 6, 7, 8]
for k in num_list:
print((k - 1)**0.5)
# ### Appending Arrays
#
# We'll see that appending an array can be a good way to keep track to the results of multiple simulations.
# In[ ]:
first = np.arange(4)
second = np.arange(10, 17)
second
# In[ ]:
np.append(first, 6)
# In[ ]:
first
# In[ ]:
np.append(first, second)
# In[ ]:
first
# In[ ]:
second
# In[ ]:
squares = make_array() # an empty array
num_array = np.arange(11)
for i in num_array:
squares = np.append(squares, i**2)
squares
# In[ ]:
# ## Simulation
# Let's play a game: we each roll a die.
#
# If my number is bigger: you pay me a dollar.
#
# If they're the same: we do nothing.
#
# If your number is bigger: I pay you a dollar.
# Steps:
# 1. Find a way to simulate the roll of a die, then generalize to two dice.
# 2. Compute how much money we win/lose based on the result.
# 3. Do steps 1 and 2 10,000 times.
# ### Random Selection
#
# The `np.random.choice` function can help here.
# In[ ]:
die_faces = np.arange(1, 7)
die_faces
# In[ ]:
np.random.choice(die_faces)
# In[ ]:
np.random.choice(die_faces, 10)
# In[ ]:
# ### Conditional Statements
# In[ ]:
# Work in progress
def one_round(my_roll, your_roll):
if my_roll > your_roll:
return 1
# In[ ]:
one_round(4, 3)
# In[ ]:
one_round(2, 6)
# In[ ]:
# Final correct version
def one_round(my_roll, your_roll):
if my_roll > your_roll:
return 1
elif your_roll > my_roll:
return -1
elif your_roll == my_roll:
return 0
# In[ ]:
one_round(1, 1)
# In[ ]:
one_round(6, 5)
# In[ ]:
one_round(7, -1)
# In[ ]:
def simulate_one_round():
my_roll = np.random.choice(die_faces)
your_roll = np.random.choice(die_faces)
return one_round(my_roll, your_roll)
# In[ ]:
simulate_one_round()
# ### Repeated Betting ###
# In[ ]:
results = make_array()
results
# In[ ]:
results = np.append(results, simulate_one_round())
results
# In[ ]:
game_outcomes = make_array()
for i in np.arange(5):
game_outcomes = np.append(game_outcomes, simulate_one_round())
game_outcomes
# In[ ]:
game_outcomes = make_array()
for i in np.arange(10000):
game_outcomes = np.append(game_outcomes, simulate_one_round())
game_outcomes
# In[ ]:
len(game_outcomes)
# In[ ]:
results = Table().with_column('My winnings', game_outcomes)
# In[ ]:
results
# In[ ]:
results.group('My winnings').barh('My winnings')
# In[ ]:
game_outcomes = make_array()
for i in np.arange(10000):
game_outcomes = np.append(game_outcomes, simulate_one_round())
results = Table().with_column('My winnings', game_outcomes)
results.group('My winnings').barh('My winnings')
# ## Would this game be a good way to make money? ##
# In[ ]:
sum(results.column(0))
# In[ ]:
# Bonus question: This simulation is relatively simple.
# Can you find a way to run it without using a for loop?
my_rolls = np.random.choice(np.arange(1,7), size = 10000)
your_rolls = np.random.choice(np.arange(1,7), size = 10000)
results = Table().with_columns("Mine", my_rolls, "Yours", your_rolls)
results = results.with_column("Results", results.apply(one_round, "Mine", "Yours"))
results.group("Results")
results.group("Results").barh("Results")
# ### Another example: simulating heads in 100 coin tosses
#
# If 100 people individually flipped their own fair coin at the same time (or one very bored person flipped a fair coin 100 times), would it be reasonable if 40 or fewer of them came up heads?
#
#
# In[ ]:
coin = make_array('heads', 'tails')
# In[ ]:
sum(np.random.choice(coin, 100) == 'heads')
# In[ ]:
# Simulate one outcome
def num_heads():
return sum(np.random.choice(coin, 100) == 'heads')
# In[ ]:
# Decide how many times you want to repeat the experiment
repetitions = 10000
# In[ ]:
# Simulate that many outcomes
outcomes = make_array()
for i in np.arange(repetitions):
outcomes = np.append(outcomes, num_heads())
heads = Table().with_column('Heads', outcomes)
heads.hist(bins = np.arange(29.5, 70.6), right_end = 40)
# In[ ]:
heads = Table().with_column('Heads', outcomes)
heads.hist(bins = np.arange(29.5, 70.6), right_end = 40)
# They yellow section; how many is that?
# In[ ]:
sum(heads.column(0)<=40)
# In[ ]:
sum(outcomes <=40)
# Then what proportion is that?
# In[ ]:
290/10000
# In[ ]:
# What interval captures the middle 95% of these outcomes?
# In[ ]:
np.percentile(outcomes, make_array(2.5, 97.5))
# ## Famous Monty Hall Problem ##
#
# On the game show, Let's Make a Deal, one of the more popular games was a simple guessing game involving three doors. One door would hide a desireable prize (an expensive vacation, a new car, or something of similar value). The other two doors would hide a fake prize, often a goat. The way the game was played was simple:
#
# 1. The player picks a door
# 2. Monty Hall (the show's host) would ask that *a different* door be opened, revealing one of the two goats.
# 3. Monty would offer the player the opportunity to switch to the *other* unopened door.
#
# The mathematical/probability/statistical question is this: should the player switch doors?
#
# To put it another way, which player strategy has the higher likelihood of winning, picking a door and sticking with it, or picking a door and automatically switching once another door has been opened?
#
# **Strategy 1:** The pick & stick (pick a door and don't switch when given the change)
#
# **Strategy 2:** The pick & switch (pick a door, but automatically switch to the other when it's offered)
#
# Let's use simulations to decide which strategy is better.
#
# In[ ]:
doors = make_array('car', 'first goat', 'second goat')
goats = make_array('first goat', 'second goat')
def other_goat(a_goat):
if a_goat == 'first goat':
return 'second goat'
elif a_goat == 'second goat':
return 'first goat'
# In[ ]:
def monty_hall():
contestant_choice = np.random.choice(doors)
if contestant_choice == 'first goat':
monty_choice = 'second goat'
remaining_door = 'car'
elif contestant_choice == 'second goat':
monty_choice = 'first goat'
remaining_door = 'car'
elif contestant_choice == 'car':
monty_choice = np.random.choice(goats)
remaining_door = other_goat(monty_choice)
return [contestant_choice, monty_choice, remaining_door]
# In[ ]:
# In[ ]:
games = Table(['Strategy 1 Prize', 'Revealed', 'Strategy 2 Prize'])
reps = 10000
for i in range(reps):
games.append(monty_hall())
games
# In[ ]:
sum(games.column('Strategy 1 Prize')=='car')/reps
# In[ ]:
sum(games.column('Strategy 2 Prize')=='car')/reps
# In[ ]:
# In[ ]: