# Computational checking of the girl/boy probability problem¶

Here I replay the classical two child problem and code it up in Python.

In a nutshell, the solution changes depending on if and how we differentiate between the children (for example, whether we talk about 'older' or 'younger' child or just refering to them as 'either').

I also chart the solutions while I run the simulation to see how the probabilites tend to converge to their values.

In [1]:
import enum, random

In [2]:
class Kid(enum.Enum):
Boy = 0
Girl = 1

In [3]:
def random_kid() -> Kid:
return random.choice([Kid.Boy, Kid.Girl])

In [4]:
random.seed(42)

In [5]:
both_girls = 0
older_girl = 0
either_girl = 0

results = []

for _ in range(1000):
younger = random_kid()
older = random_kid()

if older == Kid.Girl:
older_girl += 1

if older == Kid.Girl and younger == Kid.Girl:
both_girls += 1

if older == Kid.Girl or younger == Kid.Girl:
either_girl += 1

try:
p_both_older = both_girls / older_girl
except ZeroDivisionError:
p_both_older = 0

try:
p_both_either = both_girls / either_girl
except ZeroDivisionError:
p_both_either = 0

results.append([younger.name, older.name, both_girls, older_girl, either_girl, p_both_either, p_both_older])

In [6]:
import altair as alt
import pandas as pd

In [7]:
df_results = pd.DataFrame(results, columns=['younger', 'older', 'both girls', 'older girl', 'either girl', 'P(Both|Either)', 'P(Both|Older)']).reset_index()

In [8]:
df_results

Out[8]:
index younger older both girls older girl either girl P(Both|Either) P(Both|Older)
0 0 Boy Boy 0 0 0 0.000000 0.000000
1 1 Girl Boy 0 0 1 0.000000 0.000000
2 2 Boy Boy 0 0 1 0.000000 0.000000
3 3 Boy Boy 0 0 1 0.000000 0.000000
4 4 Girl Boy 0 0 2 0.000000 0.000000
... ... ... ... ... ... ... ... ...
995 995 Girl Boy 263 513 763 0.344692 0.512671
996 996 Girl Girl 264 514 764 0.345550 0.513619
997 997 Girl Girl 265 515 765 0.346405 0.514563
998 998 Girl Boy 265 515 766 0.345953 0.514563
999 999 Girl Girl 266 516 767 0.346806 0.515504

1000 rows × 8 columns

In [9]:
to_plot = df_results.melt(id_vars='index')
to_plot.loc[to_plot['variable'].isin(['both girls', 'older girl', 'either girl']), 'type']  = 'count'
to_plot.loc[to_plot['variable'].isin(['P(Both|Either)', 'P(Both|Older)']), 'type']  = 'probability'
to_plot

Out[9]:
index variable value type
0 0 younger Boy NaN
1 1 younger Girl NaN
2 2 younger Boy NaN
3 3 younger Boy NaN
4 4 younger Girl NaN
... ... ... ... ...
6995 995 P(Both|Older) 0.512671 probability
6996 996 P(Both|Older) 0.513619 probability
6997 997 P(Both|Older) 0.514563 probability
6998 998 P(Both|Older) 0.514563 probability
6999 999 P(Both|Older) 0.515504 probability

7000 rows × 4 columns

In [10]:
chart = alt.Chart(to_plot).encode(alt.X('index:Q'))

label = alt.selection_single(encodings=['x'], on='mouseover', nearest=True, empty='none')

count_chart = alt.Chart(data=to_plot[to_plot['type'] == 'count']).mark_line().encode(
alt.X('index:Q'), alt.Y('value:Q'), color=alt.Color('variable:N'),
)

probablity_chart = alt.Chart(to_plot[to_plot['type'] == 'probability']).mark_line().encode(
alt.X('index:Q'), alt.Y('value:Q'), color=alt.Color('variable:N')
)

values_chart = alt.layer(