#!/usr/bin/env python # coding: utf-8 # # Computational checking of the girl/boy probability problem # Here I replay the classical [two child problem](https://en.wikipedia.org/wiki/Boy_or_Girl_paradox) and code it up in Python. # # In a nutshell, the solution changes depending on if and how we differentiate between the children (for example, whether we talk about 'older' or 'younger' child or just refering to them as 'either'). # # I also chart the solutions while I run the simulation to see how the probabilites tend to converge to their values. # In[1]: import enum, random # In[2]: class Kid(enum.Enum): Boy = 0 Girl = 1 # In[3]: def random_kid() -> Kid: return random.choice([Kid.Boy, Kid.Girl]) # In[4]: random.seed(42) # In[5]: both_girls = 0 older_girl = 0 either_girl = 0 results = [] for _ in range(1000): younger = random_kid() older = random_kid() if older == Kid.Girl: older_girl += 1 if older == Kid.Girl and younger == Kid.Girl: both_girls += 1 if older == Kid.Girl or younger == Kid.Girl: either_girl += 1 try: p_both_older = both_girls / older_girl except ZeroDivisionError: p_both_older = 0 try: p_both_either = both_girls / either_girl except ZeroDivisionError: p_both_either = 0 results.append([younger.name, older.name, both_girls, older_girl, either_girl, p_both_either, p_both_older]) # In[6]: import altair as alt import pandas as pd # In[7]: df_results = pd.DataFrame(results, columns=['younger', 'older', 'both girls', 'older girl', 'either girl', 'P(Both|Either)', 'P(Both|Older)']).reset_index() # In[8]: df_results # In[9]: to_plot = df_results.melt(id_vars='index') to_plot.loc[to_plot['variable'].isin(['both girls', 'older girl', 'either girl']), 'type'] = 'count' to_plot.loc[to_plot['variable'].isin(['P(Both|Either)', 'P(Both|Older)']), 'type'] = 'probability' to_plot # In[10]: chart = alt.Chart(to_plot).encode(alt.X('index:Q')) label = alt.selection_single(encodings=['x'], on='mouseover', nearest=True, empty='none') count_chart = alt.Chart(data=to_plot[to_plot['type'] == 'count']).mark_line().encode( alt.X('index:Q'), alt.Y('value:Q'), color=alt.Color('variable:N'), ) probablity_chart = alt.Chart(to_plot[to_plot['type'] == 'probability']).mark_line().encode( alt.X('index:Q'), alt.Y('value:Q'), color=alt.Color('variable:N') ) values_chart = alt.layer( probablity_chart.add_selection(label), probablity_chart.mark_rule(color='gray').encode(alt.X('index:Q')).transform_filter(label), count_chart, ).resolve_scale(y='independent').properties(width=600) values_chart