#!/usr/bin/env python # coding: utf-8 # # Introduction to Simulations # # # In this notebook we will learn: # # - Comparisons # - For-loops # - Basic structure of a simulation by for-loops # - Use of sum with comparison to count successes in a simulation # # In[ ]: from datascience import * import numpy as np get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plots plots.style.use('fivethirtyeight') # ## Comparison ## # In[ ]: 3 > 1 # In[ ]: type(3 > 1) # In[ ]: True # In[ ]: true # In[ ]: 3 = 3 # In[ ]: 3 == 3.0 # In[ ]: 10 != 2 # In[ ]: x = 14 y = 3 # In[ ]: x > 15 # In[ ]: 12 < x # In[ ]: x < 20 # In[ ]: 12 < x < 20 # In[ ]: 10 < x-y < 13 # In[ ]: x > 13 and y < 3.14159 # ## Comparisons with arrays # In[ ]: pets = make_array('cat', 'cat', 'dog', 'cat', 'dog', 'rabbit') # In[ ]: pets == 'cat' # In[ ]: 1 + 1 + 0 + 1 + 0 + 0 # In[ ]: sum(make_array(True, True, False, True, False, False)) # In[ ]: sum(pets == 'dog') # In[ ]: np.count_nonzero(pets == 'dog') # In[ ]: x = np.arange(20, 31) # In[ ]: x > 28 # ## For-loops## # # Python has a `for`. The stucture is like this: # # for variable in list or array: # # body of loop # # # In[ ]: rainbow = make_array('red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet') for color in rainbow: print(color) # In[ ]: for thing in rainbow: print(thing) # In[ ]: num_array = np.arange(1, 3.25, 0.25) ## This for-loop is meaningless, don't try to figure out what's being computed ## we just want to demonstrate that a for-loop can involve multiple steps for i in num_array: i2 = i**2 i3 = i2 - 1 i4 = i3*(1.09) print(i4) # In[ ]: for k in np.arange(11): print(k**3) # In[ ]: num_list = [1, 2, 3, 4, 5, 6, 7, 8] for k in num_list: print((k - 1)**0.5) # ### Appending Arrays # # We'll see that appending an array can be a good way to keep track to the results of multiple simulations. # In[ ]: first = np.arange(4) second = np.arange(10, 17) second # In[ ]: np.append(first, 6) # In[ ]: first # In[ ]: np.append(first, second) # In[ ]: first # In[ ]: second # In[ ]: squares = make_array() # an empty array num_array = np.arange(11) for i in num_array: squares = np.append(squares, i**2) squares # In[ ]: # ## Simulation # Let's play a game: we each roll a die. # # If my number is bigger: you pay me a dollar. # # If they're the same: we do nothing. # # If your number is bigger: I pay you a dollar. # Steps: # 1. Find a way to simulate the roll of a die, then generalize to two dice. # 2. Compute how much money we win/lose based on the result. # 3. Do steps 1 and 2 10,000 times. # ### Random Selection # # The `np.random.choice` function can help here. # In[ ]: die_faces = np.arange(1, 7) die_faces # In[ ]: np.random.choice(die_faces) # In[ ]: np.random.choice(die_faces, 10) # In[ ]: # ### Conditional Statements # In[ ]: # Work in progress def one_round(my_roll, your_roll): if my_roll > your_roll: return 1 # In[ ]: one_round(4, 3) # In[ ]: one_round(2, 6) # In[ ]: # Final correct version def one_round(my_roll, your_roll): if my_roll > your_roll: return 1 elif your_roll > my_roll: return -1 elif your_roll == my_roll: return 0 # In[ ]: one_round(1, 1) # In[ ]: one_round(6, 5) # In[ ]: one_round(7, -1) # In[ ]: def simulate_one_round(): my_roll = np.random.choice(die_faces) your_roll = np.random.choice(die_faces) return one_round(my_roll, your_roll) # In[ ]: simulate_one_round() # ### Repeated Betting ### # In[ ]: results = make_array() results # In[ ]: results = np.append(results, simulate_one_round()) results # In[ ]: game_outcomes = make_array() for i in np.arange(5): game_outcomes = np.append(game_outcomes, simulate_one_round()) game_outcomes # In[ ]: game_outcomes = make_array() for i in np.arange(10000): game_outcomes = np.append(game_outcomes, simulate_one_round()) game_outcomes # In[ ]: len(game_outcomes) # In[ ]: results = Table().with_column('My winnings', game_outcomes) # In[ ]: results # In[ ]: results.group('My winnings').barh('My winnings') # In[ ]: game_outcomes = make_array() for i in np.arange(10000): game_outcomes = np.append(game_outcomes, simulate_one_round()) results = Table().with_column('My winnings', game_outcomes) results.group('My winnings').barh('My winnings') # ## Would this game be a good way to make money? ## # In[ ]: sum(results.column(0)) # In[ ]: # Bonus question: This simulation is relatively simple. # Can you find a way to run it without using a for loop? my_rolls = np.random.choice(np.arange(1,7), size = 10000) your_rolls = np.random.choice(np.arange(1,7), size = 10000) results = Table().with_columns("Mine", my_rolls, "Yours", your_rolls) results = results.with_column("Results", results.apply(one_round, "Mine", "Yours")) results.group("Results") results.group("Results").barh("Results") # ### Another example: simulating heads in 100 coin tosses # # If 100 people individually flipped their own fair coin at the same time (or one very bored person flipped a fair coin 100 times), would it be reasonable if 40 or fewer of them came up heads? # # # In[ ]: coin = make_array('heads', 'tails') # In[ ]: sum(np.random.choice(coin, 100) == 'heads') # In[ ]: # Simulate one outcome def num_heads(): return sum(np.random.choice(coin, 100) == 'heads') # In[ ]: # Decide how many times you want to repeat the experiment repetitions = 10000 # In[ ]: # Simulate that many outcomes outcomes = make_array() for i in np.arange(repetitions): outcomes = np.append(outcomes, num_heads()) heads = Table().with_column('Heads', outcomes) heads.hist(bins = np.arange(29.5, 70.6), right_end = 40) # In[ ]: heads = Table().with_column('Heads', outcomes) heads.hist(bins = np.arange(29.5, 70.6), right_end = 40) # They yellow section; how many is that? # In[ ]: sum(heads.column(0)<=40) # In[ ]: sum(outcomes <=40) # Then what proportion is that? # In[ ]: 290/10000 # In[ ]: # What interval captures the middle 95% of these outcomes? # In[ ]: np.percentile(outcomes, make_array(2.5, 97.5)) # ## Famous Monty Hall Problem ## # # On the game show, Let's Make a Deal, one of the more popular games was a simple guessing game involving three doors. One door would hide a desireable prize (an expensive vacation, a new car, or something of similar value). The other two doors would hide a fake prize, often a goat. The way the game was played was simple: # # 1. The player picks a door # 2. Monty Hall (the show's host) would ask that *a different* door be opened, revealing one of the two goats. # 3. Monty would offer the player the opportunity to switch to the *other* unopened door. # # The mathematical/probability/statistical question is this: should the player switch doors? # # To put it another way, which player strategy has the higher likelihood of winning, picking a door and sticking with it, or picking a door and automatically switching once another door has been opened? # # **Strategy 1:** The pick & stick (pick a door and don't switch when given the change) # # **Strategy 2:** The pick & switch (pick a door, but automatically switch to the other when it's offered) # # Let's use simulations to decide which strategy is better. # # In[ ]: doors = make_array('car', 'first goat', 'second goat') goats = make_array('first goat', 'second goat') def other_goat(a_goat): if a_goat == 'first goat': return 'second goat' elif a_goat == 'second goat': return 'first goat' # In[ ]: def monty_hall(): contestant_choice = np.random.choice(doors) if contestant_choice == 'first goat': monty_choice = 'second goat' remaining_door = 'car' elif contestant_choice == 'second goat': monty_choice = 'first goat' remaining_door = 'car' elif contestant_choice == 'car': monty_choice = np.random.choice(goats) remaining_door = other_goat(monty_choice) return [contestant_choice, monty_choice, remaining_door] # In[ ]: # In[ ]: games = Table(['Strategy 1 Prize', 'Revealed', 'Strategy 2 Prize']) reps = 10000 for i in range(reps): games.append(monty_hall()) games # In[ ]: sum(games.column('Strategy 1 Prize')=='car')/reps # In[ ]: sum(games.column('Strategy 2 Prize')=='car')/reps # In[ ]: # In[ ]: