#!/usr/bin/env python # coding: utf-8 # ## Chapter 9 - Hierarchical Models # - [9.2.4 - Example: Therapeutic touch](#9.2.4---Example:-Therapeutic-touch) # - [Shrinkage](#Shrinkage) # - [9.5.1 - Example: Baseball batting abilities by position (subjects within categories)](#9.5.1---Example:-Baseball-batting-abilities-by-position) # In[1]: import pandas as pd import numpy as np import pymc3 as pm import matplotlib.pyplot as plt import seaborn as sns import warnings warnings.filterwarnings("ignore", category=FutureWarning) from IPython.display import Image from matplotlib import gridspec get_ipython().run_line_magic('matplotlib', 'inline') plt.style.use('seaborn-white') color = '#87ceeb' # In[2]: get_ipython().run_line_magic('load_ext', 'watermark') get_ipython().run_line_magic('watermark', '-p pandas,numpy,pymc3,matplotlib,seaborn') # ### 9.2.4 - Example: Therapeutic touch # In[3]: df = pd.read_csv('data/TherapeuticTouchData.csv', dtype={'s':'category'}) df.info() # In[4]: df.head() # #### Figure 9.9 # In[5]: df_proportions = df.groupby('s')['y'].apply(lambda x: x.sum()/len(x)) ax = sns.distplot(df_proportions, bins=8, kde=False, color='gray') ax.set(xlabel='Proportion Correct', ylabel='# Practitioners') sns.despine(ax=ax); # #### Model (Kruschke, 2015) # In[6]: Image('images/fig9_7.png', width=200) # In[7]: practitioner_idx = df.s.cat.codes.values practitioner_codes = df.s.cat.categories n_practitioners = practitioner_codes.size with pm.Model() as hierarchical_model: omega = pm.Beta('omega', 1., 1.) kappa_minus2 = pm.Gamma('kappa_minus2', 0.01, 0.01) kappa = pm.Deterministic('kappa', kappa_minus2 + 2) theta = pm.Beta('theta', alpha=omega*(kappa-2)+1, beta=(1-omega)*(kappa-2)+1, shape=n_practitioners) y = pm.Bernoulli('y', theta[practitioner_idx], observed=df.y) pm.model_to_graphviz(hierarchical_model) # In[8]: with hierarchical_model: trace = pm.sample(5000, cores=4, nuts_kwargs={'target_accept': 0.95}) # In[9]: pm.traceplot(trace, ['omega','kappa', 'theta']); # In[10]: pm.summary(trace) # Note that theta is indexed starting with 0 and not 1, as is the case in Kruschke (2015). # #### Figure 9.10 - Marginal posterior distributions # In[11]: plt.figure(figsize=(10,12)) # Define gridspec gs = gridspec.GridSpec(4, 6) ax1 = plt.subplot(gs[0,:3]) ax2 = plt.subplot(gs[0,3:]) ax3 = plt.subplot(gs[1,:2]) ax4 = plt.subplot(gs[1,2:4]) ax5 = plt.subplot(gs[1,4:6]) ax6 = plt.subplot(gs[2,:2]) ax7 = plt.subplot(gs[2,2:4]) ax8 = plt.subplot(gs[2,4:6]) ax9 = plt.subplot(gs[3,:2]) ax10 = plt.subplot(gs[3,2:4]) ax11 = plt.subplot(gs[3,4:6]) # thetas and theta pairs to plot thetas = (0, 13, 27) theta_pairs = ((0,13),(0,27),(13,27)) font_d = {'size':14} # kappa & omega posterior plots for var, ax in zip(['kappa', 'omega'], [ax1, ax2]): pm.plot_posterior(trace[var], point_estimate='mode', ax=ax, color=color, round_to=2) ax.set_xlabel('$\{}$'.format(var), fontdict={'size':20, 'weight':'bold'}) ax1.set(xlim=(0,500)) # theta posterior plots for var, ax in zip(thetas,[ax3, ax7, ax11]): pm.plot_posterior(trace['theta'][:,var], point_estimate='mode', ax=ax, color=color) ax.set_xlabel('theta[{}]'.format(var), fontdict=font_d) # theta scatter plots for var, ax in zip(theta_pairs,[ax6, ax9, ax10]): ax.scatter(trace['theta'][::10,var[0]], trace['theta'][::10,var[1]], alpha=0.75, color=color, facecolor='none') ax.plot([0, 1], [0, 1], ':k', transform=ax.transAxes, alpha=0.5) ax.set_xlabel('theta[{}]'.format(var[0]), fontdict=font_d) ax.set_ylabel('theta[{}]'.format(var[1]), fontdict=font_d) ax.set(xlim=(0,1), ylim=(0,1), aspect='equal') # theta posterior differences plots for var, ax in zip(theta_pairs,[ax4, ax5, ax8]): pm.plot_posterior(trace['theta'][:,var[0]]-trace['theta'][:,var[1]], point_estimate='mode', ax=ax, color=color) ax.set_xlabel('theta[{}] - theta[{}]'.format(*var), fontdict=font_d) plt.tight_layout() # ### Shrinkage # Let's create a model with just the theta estimations per practitioner, without the influence of a higher level distribution. Then we can compare the theta values with the hierarchical model above. # In[12]: with pm.Model() as unpooled_model: theta = pm.Beta('theta', 1, 1, shape=n_practitioners) y = pm.Bernoulli('y', theta[practitioner_idx], observed=df.y) pm.model_to_graphviz(unpooled_model) # In[13]: with unpooled_model: unpooled_trace = pm.sample(5000, cores=4) # Here we concatenate the trace results (thetas) from both models into a dataframe. Next we shape the data into a format that we can use with Seaborn's pointplot. # In[14]: df_shrinkage = (pd.concat([pm.summary(unpooled_trace).iloc[:,0], pm.summary(trace).iloc[3:,0]], axis=1) .reset_index()) df_shrinkage.columns = ['theta', 'unpooled', 'hierarchical'] df_shrinkage = pd.melt(df_shrinkage, 'theta', ['unpooled', 'hierarchical'], var_name='Model') df_shrinkage.head() # The below plot shows that the theta estimates on practitioner level are pulled towards the group mean of the hierarchical model. # In[15]: plt.figure(figsize=(10,9)) plt.scatter(1, pm.summary(trace).iloc[0,0], s=100, c='r', marker='x', zorder=999, label='Group mean') sns.pointplot(x='Model', y='value', hue='theta', data=df_shrinkage); # ### 9.5.1 - Example: Baseball batting abilities by position # In[16]: df2 = pd.read_csv('data/BattingAverage.csv', usecols=[0,1,2,3], dtype={'PriPos':'category'}) df2.info() # The DataFrame contains records for 948 players in the 2012 regular season of Major League Baseball. # - One record per player # - 9 primary field positions # In[17]: df2['BatAv'] = df2.Hits.divide(df2.AtBats) df2.head(10) # In[18]: # Batting average by primary field positions calculated from the data df2.groupby('PriPos')['Hits','AtBats'].sum().pipe(lambda x: x.Hits/x.AtBats) # #### Model (Kruschke, 2015) # In[19]: Image('images/fig9_13.png', width=300) # In[20]: pripos_idx = df2.PriPos.cat.codes.values pripos_codes = df2.PriPos.cat.categories n_pripos = pripos_codes.size # df2 contains one entry per player n_players = df2.index.size with pm.Model() as hierarchical_model2: # Hyper parameters omega = pm.Beta('omega', 1, 1) kappa_minus2 = pm.Gamma('kappa_minus2', 0.01, 0.01) kappa = pm.Deterministic('kappa', kappa_minus2 + 2) # Parameters for categories (Primary field positions) omega_c = pm.Beta('omega_c', omega*(kappa-2)+1, (1-omega)*(kappa-2)+1, shape = n_pripos) kappa_c_minus2 = pm.Gamma('kappa_c_minus2', 0.01, 0.01, shape = n_pripos) kappa_c = pm.Deterministic('kappa_c', kappa_c_minus2 + 2) # Parameter for individual players theta = pm.Beta('theta', omega_c[pripos_idx]*(kappa_c[pripos_idx]-2)+1, (1-omega_c[pripos_idx])*(kappa_c[pripos_idx]-2)+1, shape = n_players) y2 = pm.Binomial('y2', n=df2.AtBats.values, p=theta, observed=df2.Hits) pm.model_to_graphviz(hierarchical_model2) # In[21]: with hierarchical_model2: trace2 = pm.sample(3000, cores=4) # In[22]: pm.traceplot(trace2, ['omega', 'kappa', 'omega_c', 'kappa_c']); # #### Figure 9.17 # #### Posterior distribution of hyper parameter omega after sampling. # In[23]: pm.plot_posterior(trace2['omega'], point_estimate='mode', color=color) plt.title('Overall', fontdict={'fontsize':16, 'fontweight':'bold'}) plt.xlabel('omega', fontdict={'fontsize':14}); # #### Posterior distributions of the omega_c parameters after sampling. # In[24]: fig, axes = plt.subplots(3,3, figsize=(14,8)) for i, ax in enumerate(axes.T.flatten()): pm.plot_posterior(trace2['omega_c'][:,i], ax=ax, point_estimate='mode', color=color) ax.set_title(pripos_codes[i], fontdict={'fontsize':16, 'fontweight':'bold'}) ax.set_xlabel('omega_c__{}'.format(i), fontdict={'fontsize':14}) ax.set_xlim(0.10,0.30) plt.tight_layout(h_pad=3)