#!/usr/bin/env python
# coding: utf-8
#
#
# # Beetle Mortality Data
# Author(s): Paul Miles | Date Created: June 18, 2018
#
# This is a binomial logistic regression example with dose-response data.
#
# A. Dobson, "An Introduction to Generalized Linear Models", Chapman & Hall/CRC, 2002.
# In[1]:
# import required packages
import numpy as np
import math
from pymcmcstat.MCMC import MCMC
import matplotlib.pyplot as plt
import pymcmcstat
print(pymcmcstat.__version__)
# # Define mortality data
# In[2]:
# Beetle mortality data
dose = np.array([1.6907, 1.7242, 1.7552, 1.7842,
1.8113, 1.8369, 1.8610, 1.8839])
number_of_beetles = np.array([59, 60, 62, 56, 63, 59, 62, 60])
number_of_beetles_killed = np.array([6, 13, 18, 28, 52, 53, 61, 60])
x = np.array([dose, number_of_beetles]).T
y = number_of_beetles_killed
# # Define model options
# The user is given several modeling options. The user chooses the desired options by defining the appropriate keywork to `beetle_link`.
#
# - `logitmodelfun`:
# $$y(d,\theta) = \frac{1}{1 + \exp(\theta_0 + \theta_1d)}$$
# - `loglogmodelfun`:
# $$y(d,\theta) = 1 - \exp(-\exp(\theta_0 + \theta_1d))$$
# - `probitmodelfun`:
# $$y(d,\theta) = \frac{1}{2}\Big[1 + \text{erf}\Big(\frac{\theta_0 + \theta_1d -\mu}{\sigma\sqrt{2}}\Big)\Big], \quad \mu = 0, \quad \sigma^2 = 1$$
# In[3]:
def logitmodelfun(d, t):
return 1/(1+np.exp(t[0]+t[1]*d))
def loglogmodelfun(d, t):
return 1 - np.exp(-np.exp(t[0] + t[1]*d))
def nordf(x, mu = 0, sigma2 = 1):
# NORDF the standard normal (Gaussian) cumulative distribution.
# NORPF(x,mu,sigma2) x quantile, mu mean, sigma2 variance
# Marko Laine
# $Revision: 1.5 $ $Date: 2007/12/04 08:57:00 $
# adapted for Python by Paul Miles, November 8, 2017
return 0.5 + 0.5*math.erf((x-mu)/math.sqrt(sigma2)/math.sqrt(2))
def probitmodelfun(d, t):
tmp = np.vectorize(nordf)
return tmp(t[0] + t[1]*d)
beetle_link_dictionary = dict(
logit={'theta0': [60, -35], 'modelfun': logitmodelfun,
'label': 'Beetle data with logit link'},
loglog={'theta0': [-40, 22], 'modelfun': loglogmodelfun,
'label': 'Beetle data with loglog link'},
probit={'theta0': [-35, 20], 'modelfun': probitmodelfun,
'label': 'Beetle data with loglog link'},
)
# specify model type
beetle_link = 'loglog'
beetle_model_object = beetle_link_dictionary[beetle_link]
# # Setup MCMC Simulation
# Note, $\theta_0 = b_0$ and $\theta_1 = b_1$.
# In[4]:
# Initialize MCMC object
mcstat = MCMC()
# initialize data structure
mcstat.data.add_data_set(
x, y, user_defined_object=beetle_model_object)
# initialize parameter array
theta0 = beetle_model_object['theta0']
mcstat.parameters.add_model_parameter(name='$b_0$',
theta0=theta0[0])
mcstat.parameters.add_model_parameter(name='$b_1$',
theta0=theta0[1])
# Generate options
mcstat.simulation_options.define_simulation_options(nsimu=5.0e3)
# The sum-of-squares function is somewhat involved as you must make sure the data component are the appropriate sizes.
# In[5]:
# define sum-of-squares model function
def ssfun(theta,data):
# unpack data
ss = np.zeros([1])
y = data.ydata[0]
ndp = len(y)
dose = data.xdata[0][:,0].reshape(ndp, 1)
n = data.xdata[0][:,1].reshape(ndp, 1)
obj = data.user_defined_object[0]
model = obj['modelfun']
# evaluate model
p = model(dose, theta)
# calculate loglikelihood
ss = -2*(y*np.log(p) + (n-y)*np.log(1-p)).sum()
return ss
# Define model object:
mcstat.model_settings.define_model_settings(sos_function = ssfun)
# # Run simulation
# We observe several error messages associated with numerical overflow. This is acceptable in light of those points being rejected during the simulation.
# In[6]:
# Run mcmcrun
mcstat.run_simulation()
# # Extract results and display chain statistics
# In[7]:
# Extract results
results = mcstat.simulation_results.results
names = results['names']
chain = results['chain']
s2chain = results['s2chain']
names = results['names'] # parameter names
# display chain stats
mcstat.chainstats(chain, results)
# # Plot the chain and pairwise correlation panel
# In[8]:
from pymcmcstat import mcmcplot as mcp
# plot chain panel
mcp.plot_chain_panel(chain, names)
# pairwise correlation
settings = dict(fig=dict(figsize=(4, 4)))
f = mcp.plot_pairwise_correlation_panel(chain, names, settings=settings)
# # Generate credible intervals
# ## Option 1
# In[9]:
# Define function to generate credible intervals
def predmodelfun(data, theta):
dose = data.xdata[0][:, 0]
obj = data.user_defined_object[0]
model = obj['modelfun']
# evaluate model
p = model(dose, theta)
return p
# define data structure for prediction
predmcmc = MCMC()
xmod = np.linspace(1.5,2)
predmcmc.data.add_data_set(x=xmod,
y=xmod,
user_defined_object=beetle_model_object)
mcstat.PI.setup_prediction_interval_calculation(
results=results,
data=predmcmc.data,
modelfunction=predmodelfun);
mcstat.PI.generate_prediction_intervals(
nsample=500,
calc_pred_int=False);
# plot credible intervals
mcstat.PI.plot_prediction_intervals(adddata=False)
plt.plot(dose, number_of_beetles_killed/number_of_beetles,
'ok', label = 'data'); # add data points to the plot
plt.xlabel('log(dose)', Fontsize=20)
plt.xticks(Fontsize=20)
plt.ylabel('proportion killed', Fontsize=20)
plt.yticks(Fontsize=20)
plt.title('Beetle data with loglog link', Fontsize=20)
ax = plt.gca()
handles, labels = ax.get_legend_handles_labels()
plt.legend(handles, labels, loc='upper left')
plt.tight_layout()
# ## Option 2
# In[25]:
# Define function to generate credible intervals
def predmodelfun2(q, data):
dose = data.xdata[0][:, 0]
obj = data.user_defined_object[0]
model = obj['modelfun']
# evaluate model
p = model(dose, q)
return p
# define data structure for prediction
from pymcmcstat import propagation as up
from pymcmcstat.MCMC import DataStructure
pdata = DataStructure()
xmod = np.linspace(1.5,2)
pdata.add_data_set(x=xmod, y=xmod,
user_defined_object=beetle_model_object)
intervals = up.calculate_intervals(chain=chain, results=results, data=pdata,
model=predmodelfun2, nsample=500)
ciset = dict(
limits=[50, 90, 95, 99],
)
data_display = dict(
color='k',
marker='o')
fig, ax = up.plot_intervals(intervals, xmod,
ydata=number_of_beetles_killed/number_of_beetles,
xdata=dose,
adddata=True,
addprediction=False,
ciset=ciset,
data_display=data_display)
for tick in ax.xaxis.get_major_ticks():
tick.label.set_fontsize(20)
for tick in ax.yaxis.get_major_ticks():
tick.label.set_fontsize(20)
ax.set_xlabel('log(dose)', Fontsize=20)
ax.set_ylabel('proportion killed', Fontsize=20)
ax.set_title('Beetle data with loglog link', Fontsize=20)
fig.tight_layout()