#!/usr/bin/env python # coding: utf-8 #
# #
# # # Beetle Mortality Data # Author(s): Paul Miles | Date Created: June 18, 2018 # # This is a binomial logistic regression example with dose-response data. # # A. Dobson, "An Introduction to Generalized Linear Models", Chapman & Hall/CRC, 2002. # In[1]: # import required packages import numpy as np import math from pymcmcstat.MCMC import MCMC import matplotlib.pyplot as plt import pymcmcstat print(pymcmcstat.__version__) # # Define mortality data # In[2]: # Beetle mortality data dose = np.array([1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839]) number_of_beetles = np.array([59, 60, 62, 56, 63, 59, 62, 60]) number_of_beetles_killed = np.array([6, 13, 18, 28, 52, 53, 61, 60]) x = np.array([dose, number_of_beetles]).T y = number_of_beetles_killed # # Define model options # The user is given several modeling options. The user chooses the desired options by defining the appropriate keywork to `beetle_link`. # # - `logitmodelfun`: # $$y(d,\theta) = \frac{1}{1 + \exp(\theta_0 + \theta_1d)}$$ # - `loglogmodelfun`: # $$y(d,\theta) = 1 - \exp(-\exp(\theta_0 + \theta_1d))$$ # - `probitmodelfun`: # $$y(d,\theta) = \frac{1}{2}\Big[1 + \text{erf}\Big(\frac{\theta_0 + \theta_1d -\mu}{\sigma\sqrt{2}}\Big)\Big], \quad \mu = 0, \quad \sigma^2 = 1$$ # In[3]: def logitmodelfun(d, t): return 1/(1+np.exp(t[0]+t[1]*d)) def loglogmodelfun(d, t): return 1 - np.exp(-np.exp(t[0] + t[1]*d)) def nordf(x, mu = 0, sigma2 = 1): # NORDF the standard normal (Gaussian) cumulative distribution. # NORPF(x,mu,sigma2) x quantile, mu mean, sigma2 variance # Marko Laine # $Revision: 1.5 $ $Date: 2007/12/04 08:57:00 $ # adapted for Python by Paul Miles, November 8, 2017 return 0.5 + 0.5*math.erf((x-mu)/math.sqrt(sigma2)/math.sqrt(2)) def probitmodelfun(d, t): tmp = np.vectorize(nordf) return tmp(t[0] + t[1]*d) beetle_link_dictionary = dict( logit={'theta0': [60, -35], 'modelfun': logitmodelfun, 'label': 'Beetle data with logit link'}, loglog={'theta0': [-40, 22], 'modelfun': loglogmodelfun, 'label': 'Beetle data with loglog link'}, probit={'theta0': [-35, 20], 'modelfun': probitmodelfun, 'label': 'Beetle data with loglog link'}, ) # specify model type beetle_link = 'loglog' beetle_model_object = beetle_link_dictionary[beetle_link] # # Setup MCMC Simulation # Note, $\theta_0 = b_0$ and $\theta_1 = b_1$. # In[4]: # Initialize MCMC object mcstat = MCMC() # initialize data structure mcstat.data.add_data_set( x, y, user_defined_object=beetle_model_object) # initialize parameter array theta0 = beetle_model_object['theta0'] mcstat.parameters.add_model_parameter(name='$b_0$', theta0=theta0[0]) mcstat.parameters.add_model_parameter(name='$b_1$', theta0=theta0[1]) # Generate options mcstat.simulation_options.define_simulation_options(nsimu=5.0e3) # The sum-of-squares function is somewhat involved as you must make sure the data component are the appropriate sizes. # In[5]: # define sum-of-squares model function def ssfun(theta,data): # unpack data ss = np.zeros([1]) y = data.ydata[0] ndp = len(y) dose = data.xdata[0][:,0].reshape(ndp, 1) n = data.xdata[0][:,1].reshape(ndp, 1) obj = data.user_defined_object[0] model = obj['modelfun'] # evaluate model p = model(dose, theta) # calculate loglikelihood ss = -2*(y*np.log(p) + (n-y)*np.log(1-p)).sum() return ss # Define model object: mcstat.model_settings.define_model_settings(sos_function = ssfun) # # Run simulation # We observe several error messages associated with numerical overflow. This is acceptable in light of those points being rejected during the simulation. # In[6]: # Run mcmcrun mcstat.run_simulation() # # Extract results and display chain statistics # In[7]: # Extract results results = mcstat.simulation_results.results names = results['names'] chain = results['chain'] s2chain = results['s2chain'] names = results['names'] # parameter names # display chain stats mcstat.chainstats(chain, results) # # Plot the chain and pairwise correlation panel # In[8]: from pymcmcstat import mcmcplot as mcp # plot chain panel mcp.plot_chain_panel(chain, names) # pairwise correlation settings = dict(fig=dict(figsize=(4, 4))) f = mcp.plot_pairwise_correlation_panel(chain, names, settings=settings) # # Generate credible intervals # ## Option 1 # In[9]: # Define function to generate credible intervals def predmodelfun(data, theta): dose = data.xdata[0][:, 0] obj = data.user_defined_object[0] model = obj['modelfun'] # evaluate model p = model(dose, theta) return p # define data structure for prediction predmcmc = MCMC() xmod = np.linspace(1.5,2) predmcmc.data.add_data_set(x=xmod, y=xmod, user_defined_object=beetle_model_object) mcstat.PI.setup_prediction_interval_calculation( results=results, data=predmcmc.data, modelfunction=predmodelfun); mcstat.PI.generate_prediction_intervals( nsample=500, calc_pred_int=False); # plot credible intervals mcstat.PI.plot_prediction_intervals(adddata=False) plt.plot(dose, number_of_beetles_killed/number_of_beetles, 'ok', label = 'data'); # add data points to the plot plt.xlabel('log(dose)', Fontsize=20) plt.xticks(Fontsize=20) plt.ylabel('proportion killed', Fontsize=20) plt.yticks(Fontsize=20) plt.title('Beetle data with loglog link', Fontsize=20) ax = plt.gca() handles, labels = ax.get_legend_handles_labels() plt.legend(handles, labels, loc='upper left') plt.tight_layout() # ## Option 2 # In[25]: # Define function to generate credible intervals def predmodelfun2(q, data): dose = data.xdata[0][:, 0] obj = data.user_defined_object[0] model = obj['modelfun'] # evaluate model p = model(dose, q) return p # define data structure for prediction from pymcmcstat import propagation as up from pymcmcstat.MCMC import DataStructure pdata = DataStructure() xmod = np.linspace(1.5,2) pdata.add_data_set(x=xmod, y=xmod, user_defined_object=beetle_model_object) intervals = up.calculate_intervals(chain=chain, results=results, data=pdata, model=predmodelfun2, nsample=500) ciset = dict( limits=[50, 90, 95, 99], ) data_display = dict( color='k', marker='o') fig, ax = up.plot_intervals(intervals, xmod, ydata=number_of_beetles_killed/number_of_beetles, xdata=dose, adddata=True, addprediction=False, ciset=ciset, data_display=data_display) for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(20) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(20) ax.set_xlabel('log(dose)', Fontsize=20) ax.set_ylabel('proportion killed', Fontsize=20) ax.set_title('Beetle data with loglog link', Fontsize=20) fig.tight_layout()