#!/usr/bin/env python
# coding: utf-8

# # Computational checking of the girl/boy probability problem

# Here I replay the classical [two child problem](https://en.wikipedia.org/wiki/Boy_or_Girl_paradox) and code it up in Python.
# 
# In a nutshell, the solution changes depending on if and how we differentiate between the children (for example, whether we talk about 'older' or 'younger' child or just refering to them as 'either').
# 
# I also chart the solutions while I run the simulation to see how the probabilites tend to converge to their values.

# In[1]:


import enum, random


# In[2]:


class Kid(enum.Enum):
    Boy = 0
    Girl = 1


# In[3]:


def random_kid() -> Kid:
    return random.choice([Kid.Boy, Kid.Girl])


# In[4]:


random.seed(42)


# In[5]:


both_girls = 0
older_girl = 0
either_girl = 0

results = []

for _ in range(1000):
    younger = random_kid()
    older = random_kid()

    if older == Kid.Girl:
        older_girl += 1
    
    if older == Kid.Girl and younger == Kid.Girl:
        both_girls += 1
    
    if older == Kid.Girl or younger == Kid.Girl:
        either_girl += 1

    try:
        p_both_older = both_girls / older_girl
    except ZeroDivisionError:
        p_both_older = 0
    
    try: 
        p_both_either = both_girls / either_girl
    except ZeroDivisionError:
        p_both_either = 0

    results.append([younger.name, older.name, both_girls, older_girl, either_girl, p_both_either, p_both_older])


# In[6]:


import altair as alt
import pandas as pd


# In[7]:


df_results = pd.DataFrame(results, columns=['younger', 'older', 'both girls', 'older girl', 'either girl', 'P(Both|Either)', 'P(Both|Older)']).reset_index()


# In[8]:


df_results


# In[9]:


to_plot = df_results.melt(id_vars='index')
to_plot.loc[to_plot['variable'].isin(['both girls', 'older girl', 'either girl']), 'type']  = 'count'
to_plot.loc[to_plot['variable'].isin(['P(Both|Either)', 'P(Both|Older)']), 'type']  = 'probability'
to_plot


# In[10]:


chart = alt.Chart(to_plot).encode(alt.X('index:Q'))

label = alt.selection_single(encodings=['x'], on='mouseover', nearest=True, empty='none')

count_chart = alt.Chart(data=to_plot[to_plot['type'] == 'count']).mark_line().encode(
    alt.X('index:Q'), alt.Y('value:Q'), color=alt.Color('variable:N'), 
)

probablity_chart = alt.Chart(to_plot[to_plot['type'] == 'probability']).mark_line().encode(
    alt.X('index:Q'), alt.Y('value:Q'), color=alt.Color('variable:N')
)

values_chart = alt.layer(
    probablity_chart.add_selection(label),
    probablity_chart.mark_rule(color='gray').encode(alt.X('index:Q')).transform_filter(label),
    count_chart,
).resolve_scale(y='independent').properties(width=600)

values_chart