#!/usr/bin/env python # coding: utf-8 # # # # Specifying Sample Variables # Author(s): Paul Miles | Date Created: July 19, 2019 # # Many models require a set of parameters as input; however, it is not always of interest to estimate those parameter's posterior distribution. The [pymcmcstat](https://github.com/prmiles/pymcmcstat/wiki) package will allow you to send static parameters to the model along with the parameters being sampled. We demonstrated this feature by considering a simple linear model. # In[1]: # Import required packages import numpy as np from pymcmcstat.MCMC import MCMC import matplotlib.pyplot as plt import pymcmcstat print(pymcmcstat.__version__) np.seterr(over = 'ignore'); # We define a linear model function and corresponding sum-of-squares function. # In[2]: # define test model function def linear_model(xdata, theta): m = theta[0] b = theta[1] nrow, ncol = xdata.shape y = np.zeros([nrow, 1]) y[:, 0] = m*xdata.reshape(nrow,) + b return y def ssfun(theta, data): xdata = data.xdata[0] ydata = data.ydata[0] # eval model ymodel = linear_model(xdata, theta) # calc sos ss = sum((ymodel[:, 0] - ydata[:, 0])**2) return ss # # Generate Synthetic Data # We generate sythetic data using the linear model and adding observation errors # $$y_i = 2x_i + 3 + \epsilon_i, \;\; \epsilon_i \sim N(0, 0.1)$$ # We plot the data and linear model and see randomly distributed noise with respect to the model response. # In[3]: nds = 100 x = np.linspace(2, 3, num=nds).reshape(nds, 1) theta_data = np.array([2.0, 3.0]) y = linear_model(x, theta_data) + 0.1*np.random.standard_normal(x.shape) plt.plot(x, y, '.b'); plt.plot(x, linear_model(x, theta_data), '-r'); # # Initialize MCMC Object # - initialize data structure # - define simulation options # In[4]: # Initialize MCMC object mcstat = MCMC() mcstat.data.add_data_set(x, y) mcstat.simulation_options.define_simulation_options( nsimu=int(5.0e3), updatesigma=True, method='dram') mcstat.model_settings.define_model_settings(sos_function=ssfun) # # Specify Sample # When defining the model parameters, we can turn the sampling on or off by specifying the value of the `sample` argument. # - `sample = True`: Parameter will be treated as a random variable and be included in the sampling chain. This is the default behavior. # - `sample = False`: Parameter will be held constant at the initial value defined by the user. # In[5]: mcstat.parameters.add_model_parameter( name='m', theta0=2., sample=False) mcstat.parameters.add_model_parameter( name='b', theta0=5., minimum=-10, maximum=100, sample=True) # # Run Simulation # Observe that the only parameters displayed are the ones where `sample = True`. # In[12]: mcstat.run_simulation() # Extract results results = mcstat.simulation_results.results burnin = int(results['nsimu']/2) chain = results['chain'][burnin:, :] s2chain = results['s2chain'][burnin:, :] sschain = results['sschain'][burnin:, :] names = results['names'] # display chain statistics mcstat.chainstats(chain, results) # # MCMC Plots # The plots only show the results of the sampled parameters `b`. # In[14]: from pymcmcstat import mcmcplot as mcp # generate mcmc plots settings = dict( fig=dict( figsize=(3, 3) ) ) mcp.plot_density_panel(chain, names, settings) mcp.plot_chain_panel(chain, names, settings); # # Plot Prediction/Credible Intervals # In[15]: from pymcmcstat import propagation as up def predmodel(q, data): return linear_model(data.xdata[0], q) pdata = mcstat.data # generate prediction intervals intervals = up.calculate_intervals( chain, results, pdata, predmodel, waitbar=True, s2chain=s2chain) # plot prediction intervals data_display = dict( marker='o', markersize=5, markeredgecolor='k', markeredgewidth=1, markerfacecolor='w') model_display = dict(color='r', linestyle='--') up.plot_intervals( intervals, pdata.xdata[0], pdata.ydata[0], figsize=(6, 6), model_display=model_display, data_display=data_display);