Here I replay the classical two child problem and code it up in Python.
In a nutshell, the solution changes depending on if and how we differentiate between the children (for example, whether we talk about 'older' or 'younger' child or just refering to them as 'either').
I also chart the solutions while I run the simulation to see how the probabilites tend to converge to their values.
import enum, random
class Kid(enum.Enum):
Boy = 0
Girl = 1
def random_kid() -> Kid:
return random.choice([Kid.Boy, Kid.Girl])
random.seed(42)
both_girls = 0
older_girl = 0
either_girl = 0
results = []
for _ in range(1000):
younger = random_kid()
older = random_kid()
if older == Kid.Girl:
older_girl += 1
if older == Kid.Girl and younger == Kid.Girl:
both_girls += 1
if older == Kid.Girl or younger == Kid.Girl:
either_girl += 1
try:
p_both_older = both_girls / older_girl
except ZeroDivisionError:
p_both_older = 0
try:
p_both_either = both_girls / either_girl
except ZeroDivisionError:
p_both_either = 0
results.append([younger.name, older.name, both_girls, older_girl, either_girl, p_both_either, p_both_older])
import altair as alt
import pandas as pd
df_results = pd.DataFrame(results, columns=['younger', 'older', 'both girls', 'older girl', 'either girl', 'P(Both|Either)', 'P(Both|Older)']).reset_index()
df_results
index | younger | older | both girls | older girl | either girl | P(Both|Either) | P(Both|Older) | |
---|---|---|---|---|---|---|---|---|
0 | 0 | Boy | Boy | 0 | 0 | 0 | 0.000000 | 0.000000 |
1 | 1 | Girl | Boy | 0 | 0 | 1 | 0.000000 | 0.000000 |
2 | 2 | Boy | Boy | 0 | 0 | 1 | 0.000000 | 0.000000 |
3 | 3 | Boy | Boy | 0 | 0 | 1 | 0.000000 | 0.000000 |
4 | 4 | Girl | Boy | 0 | 0 | 2 | 0.000000 | 0.000000 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
995 | 995 | Girl | Boy | 263 | 513 | 763 | 0.344692 | 0.512671 |
996 | 996 | Girl | Girl | 264 | 514 | 764 | 0.345550 | 0.513619 |
997 | 997 | Girl | Girl | 265 | 515 | 765 | 0.346405 | 0.514563 |
998 | 998 | Girl | Boy | 265 | 515 | 766 | 0.345953 | 0.514563 |
999 | 999 | Girl | Girl | 266 | 516 | 767 | 0.346806 | 0.515504 |
1000 rows × 8 columns
to_plot = df_results.melt(id_vars='index')
to_plot.loc[to_plot['variable'].isin(['both girls', 'older girl', 'either girl']), 'type'] = 'count'
to_plot.loc[to_plot['variable'].isin(['P(Both|Either)', 'P(Both|Older)']), 'type'] = 'probability'
to_plot
index | variable | value | type | |
---|---|---|---|---|
0 | 0 | younger | Boy | NaN |
1 | 1 | younger | Girl | NaN |
2 | 2 | younger | Boy | NaN |
3 | 3 | younger | Boy | NaN |
4 | 4 | younger | Girl | NaN |
... | ... | ... | ... | ... |
6995 | 995 | P(Both|Older) | 0.512671 | probability |
6996 | 996 | P(Both|Older) | 0.513619 | probability |
6997 | 997 | P(Both|Older) | 0.514563 | probability |
6998 | 998 | P(Both|Older) | 0.514563 | probability |
6999 | 999 | P(Both|Older) | 0.515504 | probability |
7000 rows × 4 columns
chart = alt.Chart(to_plot).encode(alt.X('index:Q'))
label = alt.selection_single(encodings=['x'], on='mouseover', nearest=True, empty='none')
count_chart = alt.Chart(data=to_plot[to_plot['type'] == 'count']).mark_line().encode(
alt.X('index:Q'), alt.Y('value:Q'), color=alt.Color('variable:N'),
)
probablity_chart = alt.Chart(to_plot[to_plot['type'] == 'probability']).mark_line().encode(
alt.X('index:Q'), alt.Y('value:Q'), color=alt.Color('variable:N')
)
values_chart = alt.layer(
probablity_chart.add_selection(label),
probablity_chart.mark_rule(color='gray').encode(alt.X('index:Q')).transform_filter(label),
count_chart,
).resolve_scale(y='independent').properties(width=600)
values_chart