import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import pickle
import arviz as az
import pymc3 as pm
WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
from matplotlib.colors import to_rgb
import scipy.stats as stats
from IPython.display import display
import matplotlib as mpl
%load_ext autoreload
%autoreload 2
import plotting_lib
writeOut = True
outPathPlots = "../plots/statistical_model_neweplsar/"
outPathData = "../derived_data/statistical_model_neweplsar/"
widthMM = 190
widthInch = widthMM / 25.4
ratio = 0.66666
heigthInch = ratio*widthInch
SMALL_SIZE = 8
MEDIUM_SIZE = 10
BIGGER_SIZE = 12
plt.rc('font', size=SMALL_SIZE) # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title
sns.set_style("ticks")
dpi = 300
sizes = [SMALL_SIZE,MEDIUM_SIZE,BIGGER_SIZE]
numSamples = 500
numCores = 10
numTune = 1000
numPredSamples = 2000
random_seed=3651456135
datafile = "../derived_data/preprocessing/preprocessed.dat"
with open(datafile, "rb") as f:
x1,x2,_,df,dataZ,dictMeanStd,dictTreatment,dictSoftware = pickle.load(f)
Show that everything is correct:
display(pd.DataFrame.from_dict({'x1':x1,'x2':x2}))
x1 | x2 | |
---|---|---|
0 | 0 | 5 |
1 | 1 | 5 |
2 | 0 | 5 |
3 | 1 | 5 |
4 | 0 | 5 |
... | ... | ... |
275 | 1 | 9 |
276 | 0 | 9 |
277 | 1 | 9 |
278 | 0 | 9 |
279 | 1 | 9 |
280 rows × 2 columns
x1 indicates the software used, x2 indicates the treatment applied.
for surfaceParam,(mean,std) in dictMeanStd.items():
print("Surface parameter {} has mean {} and standard deviation {}".format(surfaceParam,mean,std))
Surface parameter epLsar has mean 0.0028221512571428575 and standard deviation 0.0019000504797019124 Surface parameter R² has mean 0.998765273042857 and standard deviation 0.0015328558023807836 Surface parameter Asfc has mean 18.13129049385357 and standard deviation 16.348381888991312 Surface parameter Smfc has mean 4.938492967075 and standard deviation 38.106353908569346 Surface parameter HAsfc9 has mean 0.3085220006979256 and standard deviation 0.2211140516395699 Surface parameter HAsfc81 has mean 0.5639598041398011 and standard deviation 0.40668126467296095
for k,v in dictTreatment.items():
print("Number {} encodes treatment {}".format(k,v))
Number 5 encodes treatment Dry Bamboo Number 6 encodes treatment Dry Grass Number 7 encodes treatment Dry Lucerne Number 0 encodes treatment BrushDirt Number 1 encodes treatment BrushNoDirt Number 4 encodes treatment Control Number 10 encodes treatment RubDirt Number 2 encodes treatment Clover Number 3 encodes treatment Clover+Dust Number 8 encodes treatment Grass Number 9 encodes treatment Grass+Dust
for k,v in dictSoftware.items():
print("Number {} encodes software {}".format(k,v))
Number 0 encodes software ConfoMap Number 1 encodes software Toothfrax
display(dataZ)
index | TreatmentNumber | SoftwareNumber | DatasetNumber | NameNumber | epLsar_z | R²_z | Asfc_z | Smfc_z | HAsfc9_z | HAsfc81_z | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 5 | 0 | 0 | 116 | 0.839414 | -0.017104 | -0.448128 | -0.120476 | -0.806963 | -0.512247 |
1 | 1 | 5 | 1 | 0 | 116 | 0.999368 | 0.518462 | -0.477757 | -0.126469 | -0.782634 | -0.497016 |
2 | 2 | 5 | 0 | 0 | 117 | 1.601888 | -0.509024 | -0.276513 | -0.119325 | -0.584158 | -0.662886 |
3 | 3 | 5 | 1 | 0 | 117 | 1.596720 | 0.457791 | -0.301685 | -0.126469 | -0.629947 | -0.744422 |
4 | 4 | 5 | 0 | 0 | 118 | 1.168099 | -0.221668 | -0.393502 | -0.121498 | -0.269712 | -0.370958 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
275 | 275 | 9 | 1 | 2 | 51 | 0.843056 | 0.387986 | -0.997743 | -0.092631 | 2.388080 | 1.346868 |
276 | 276 | 9 | 0 | 2 | 52 | 0.305544 | -0.791837 | -0.967607 | -0.104937 | 2.014963 | 2.573677 |
277 | 277 | 9 | 1 | 2 | 52 | 0.166758 | 0.635237 | -0.940337 | -0.126098 | 2.926894 | 3.117333 |
278 | 278 | 9 | 0 | 2 | 53 | -0.843412 | 0.042974 | -1.022523 | -0.085082 | 0.280534 | 0.543577 |
279 | 279 | 9 | 1 | 2 | 53 | -1.115313 | 0.712218 | -1.021455 | -0.118879 | 0.169999 | 0.678630 |
280 rows × 11 columns
display(df)
Dataset | Name | Software | Diet | Treatment | Before.after | epLsar | R² | Asfc | Smfc | HAsfc9 | HAsfc81 | NewEplsar | TreatmentNumber | SoftwareNumber | DatasetNumber | NameNumber | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | GuineaPigs | capor_2CC6B1_txP4_#1_1_100xL_1 | ConfoMap | Dry Bamboo | Dry Bamboo | NaN | 0.004417 | 0.998739 | 10.805118 | 0.347586 | 0.130091 | 0.355639 | 0.019460 | 5 | 0 | 0 | 116 |
1 | GuineaPigs | capor_2CC6B1_txP4_#1_1_100xL_1 | Toothfrax | Dry Bamboo | Dry Bamboo | NaN | 0.004721 | 0.999560 | 10.320730 | 0.119219 | 0.135471 | 0.361833 | NaN | 5 | 1 | 0 | 116 |
2 | GuineaPigs | capor_2CC6B1_txP4_#1_1_100xL_2 | ConfoMap | Dry Bamboo | Dry Bamboo | NaN | 0.005866 | 0.997985 | 13.610750 | 0.391436 | 0.179356 | 0.294377 | 0.020079 | 5 | 0 | 0 | 117 |
3 | GuineaPigs | capor_2CC6B1_txP4_#1_1_100xL_2 | Toothfrax | Dry Bamboo | Dry Bamboo | NaN | 0.005856 | 0.999467 | 13.199232 | 0.119219 | 0.169232 | 0.261217 | NaN | 5 | 1 | 0 | 117 |
4 | GuineaPigs | capor_2CC6B1_txP4_#1_1_100xL_3 | ConfoMap | Dry Bamboo | Dry Bamboo | NaN | 0.005042 | 0.998425 | 11.698166 | 0.308648 | 0.248885 | 0.413098 | 0.019722 | 5 | 0 | 0 | 118 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
275 | Sheeps | L8-Ovis-90730-lm2sin-a | Toothfrax | Grass+Dust | Grass+Dust | NaN | 0.004424 | 0.999360 | 1.819802 | 1.408678 | 0.836560 | 1.111706 | NaN | 9 | 1 | 2 | 51 |
276 | Sheeps | L8-Ovis-90764-lm2sin-a | ConfoMap | Grass+Dust | Grass+Dust | NaN | 0.003403 | 0.997552 | 2.312486 | 0.939718 | 0.754059 | 1.610626 | 0.018978 | 9 | 0 | 2 | 52 |
277 | Sheeps | L8-Ovis-90764-lm2sin-a | Toothfrax | Grass+Dust | Grass+Dust | NaN | 0.003139 | 0.999739 | 2.758297 | 0.133366 | 0.955699 | 1.831721 | NaN | 9 | 1 | 2 | 52 |
278 | Sheeps | L8-Ovis-90814-lm2sin-a | ConfoMap | Grass+Dust | Grass+Dust | NaN | 0.001220 | 0.998831 | 1.414701 | 1.696316 | 0.370552 | 0.785022 | 0.017498 | 9 | 0 | 2 | 53 |
279 | Sheeps | L8-Ovis-90814-lm2sin-a | Toothfrax | Grass+Dust | Grass+Dust | NaN | 0.000703 | 0.999857 | 1.432148 | 0.408433 | 0.346111 | 0.839946 | NaN | 9 | 1 | 2 | 53 |
280 rows × 17 columns
dfNewAvail = df[~df.NewEplsar.isna()].copy()
We look at the overall relationship between both epLsar variants on ConfoMap.
yrange = [0.015,0.022]
xrange = [ -0.0005 , 0.0085]
ax = sns.scatterplot(data=dfNewAvail,x='epLsar',y='NewEplsar');
ax.set_xlim(xrange);
ax.set_ylim(yrange);
Could be linear, but there is also a lot of noise.
Maybe different treatments have different behavior?
ax = sns.scatterplot(data=dfNewAvail,x='epLsar',y='NewEplsar',hue="Treatment");
ax.set_xlim(xrange);
ax.set_ylim(yrange);
Too crowded, let's try it per dataset
ax = sns.scatterplot(data=dfNewAvail[dfNewAvail.Dataset == "Sheeps"],x='epLsar',y='NewEplsar',hue="Treatment");
ax.set_xlim(xrange);
ax.set_ylim(yrange);
ax = sns.scatterplot(data=dfNewAvail[dfNewAvail.Dataset == "GuineaPigs"],x='epLsar',y='NewEplsar',hue="Treatment");
ax.set_xlim(xrange);
ax.set_ylim(yrange);
ax = sns.scatterplot(data=dfNewAvail[dfNewAvail.Dataset == "Lithics"],x='epLsar',y='NewEplsar',hue="Treatment");
ax.set_xlim(xrange);
ax.set_ylim(yrange);
dfNewAvail.NewEplsar
0 0.019460 2 0.020079 4 0.019722 6 0.020694 8 0.019841 ... 270 0.018765 272 0.018581 274 0.017697 276 0.018978 278 0.017498 Name: NewEplsar, Length: 140, dtype: float64
mu = dfNewAvail.NewEplsar.mean()
mu
0.018350751528571425
sig = dfNewAvail.NewEplsar.std()
sig
0.0009354858792128117
dictMeanStd['NewEplsar'] = (mu,sig)
newZ = (dfNewAvail.NewEplsar.values-mu) / sig
newIndex = df[~df.NewEplsar.isna()].index.values
class Model_NewEplsar(pm.Model):
"""
Compute params of priors and hyperpriors.
"""
def getParams(self,x2,y):
# get lengths
Nx2Lvl = np.unique(x2).size
dims = (Nx2Lvl)
### get standard deviations
# convert to pandas dataframe to use their logic
df = pd.DataFrame.from_dict({'x2':x2,'y':y})
s2 = df.groupby('x2').std()['y'].max()
stdSingle = s2
return (dims, stdSingle)
def printParams(self,x2,y):
dims, stdSingle= self.getParams(x2,y)
Nx2Lvl = dims
s2 = stdSingle
print("The number of levels of the x variables are {}".format(dims))
print("The standard deviations used for the beta prior is {}".format(stdSingle))
def __init__(self,name,x2,y,model=None):
# call super's init first, passing model and name
super().__init__(name, model)
# get parameter of hyperpriors
dims, stdSingle = self.getParams(x2,y)
Nx2Lvl = dims
s2 = stdSingle
### hyperpriors ###
# observation hyperpriors
lamY = 1/30.
muGamma = 0.5
sigmaGamma = 2.
# prediction hyperpriors
sigma0 = pm.HalfNormal('sigma0',sd=1)
sigma2 = pm.HalfNormal('sigma2',sd=s2, shape=Nx2Lvl)
mu_b0 = pm.Normal('mu_b0', mu=0., sd=1)
mu_b2 = pm.Normal('mu_b2', mu=0., sd=1, shape=Nx2Lvl)
beta2 = 1.5*(np.sqrt(6)*sigma2)/(np.pi)
### priors ###
# observation priors
nuY = pm.Exponential('nuY',lam=lamY)
sigmaY = pm.Gamma('sigmaY',mu=muGamma, sigma=sigmaGamma)
# prediction priors
b0_dist = pm.Normal('b0_dist', mu=0, sd=1)
b0 = pm.Deterministic("b0", mu_b0 + b0_dist * sigma0)
b2_beta = pm.HalfNormal('b2_beta', sd=beta2, shape=Nx2Lvl)
b2_dist = pm.Gumbel('b2_dist', mu=0, beta=1)
b2 = pm.Deterministic("b2", mu_b2 + b2_beta * b2_dist)
#### prediction ###
mu = pm.Deterministic('mu',b0 + b2[x2])
### observation ###
y = pm.StudentT('y',nu = nuY, mu=mu, sd=sigmaY, observed=y)
with pm.Model() as model:
new_epLsarModel = Model_NewEplsar('NewEplsar',x2[newIndex],newZ)
new_epLsarModel.printParams(x2[newIndex],newZ)
The number of levels of the x variables are 11 The standard deviations used for the beta prior is 1.1510253614970998
try:
graph_new_epLsar = pm.model_to_graphviz(new_epLsarModel)
except:
graph_new_epLsar = "Could not make graph"
graph_new_epLsar
with new_epLsarModel as model:
prior_pred_new_epLsar = pm.sample_prior_predictive(samples=numPredSamples,random_seed=random_seed)
plotting_lib.plotPriorPredictive(widthInch,heigthInch,dpi,writeOut,outPathPlots,dfNewAvail.reset_index(),dictMeanStd,prior_pred_new_epLsar,newZ,'NewEplsar')
Prior choice is as intended: Broad over the data range.
with new_epLsarModel as model:
trace_new_epLsar = pm.sample(numSamples,cores=numCores,tune=numTune,max_treedepth=20, init='auto',target_accept=0.99,random_seed=random_seed)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (10 chains in 10 jobs) NUTS: [NewEplsar_b2_dist, NewEplsar_b2_beta, NewEplsar_b0_dist, NewEplsar_sigmaY, NewEplsar_nuY, NewEplsar_mu_b2, NewEplsar_mu_b0, NewEplsar_sigma2, NewEplsar_sigma0]
Sampling 10 chains for 1_000 tune and 500 draw iterations (10_000 + 5_000 draws total) took 106 seconds. The number of effective samples is smaller than 25% for some parameters.
with new_epLsarModel as model:
if writeOut:
with open(outPathData + 'model_{}.pkl'.format('NewEplsar'), 'wb') as buff:
pickle.dump({'model':new_epLsarModel, 'trace': trace_new_epLsar}, buff)
with new_epLsarModel as model:
dataTrace_new_epLsar = az.from_pymc3(trace=trace_new_epLsar)
pm.summary(dataTrace_new_epLsar,hdi_prob=0.95).round(2)
mean | sd | hdi_2.5% | hdi_97.5% | mcse_mean | mcse_sd | ess_mean | ess_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|---|---|
NewEplsar_mu_b0 | -0.21 | 0.63 | -1.42 | 1.05 | 0.01 | 0.01 | 1973.0 | 1973.0 | 1943.0 | 2814.0 | 1.0 |
NewEplsar_mu_b2[0] | -0.18 | 0.48 | -1.12 | 0.72 | 0.01 | 0.01 | 2369.0 | 2369.0 | 2375.0 | 2973.0 | 1.0 |
NewEplsar_mu_b2[1] | -0.13 | 0.44 | -1.02 | 0.72 | 0.01 | 0.01 | 2180.0 | 2180.0 | 2126.0 | 3004.0 | 1.0 |
NewEplsar_mu_b2[2] | 0.07 | 0.47 | -0.83 | 0.96 | 0.01 | 0.01 | 2384.0 | 1978.0 | 2410.0 | 2600.0 | 1.0 |
NewEplsar_mu_b2[3] | -0.29 | 0.46 | -1.16 | 0.61 | 0.01 | 0.01 | 2076.0 | 2022.0 | 2083.0 | 2874.0 | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
NewEplsar_mu[135] | -0.67 | 0.22 | -1.09 | -0.24 | 0.00 | 0.00 | 5740.0 | 5636.0 | 5753.0 | 4350.0 | 1.0 |
NewEplsar_mu[136] | -0.67 | 0.22 | -1.09 | -0.24 | 0.00 | 0.00 | 5740.0 | 5636.0 | 5753.0 | 4350.0 | 1.0 |
NewEplsar_mu[137] | -0.67 | 0.22 | -1.09 | -0.24 | 0.00 | 0.00 | 5740.0 | 5636.0 | 5753.0 | 4350.0 | 1.0 |
NewEplsar_mu[138] | -0.67 | 0.22 | -1.09 | -0.24 | 0.00 | 0.00 | 5740.0 | 5636.0 | 5753.0 | 4350.0 | 1.0 |
NewEplsar_mu[139] | -0.67 | 0.22 | -1.09 | -0.24 | 0.00 | 0.00 | 5740.0 | 5636.0 | 5753.0 | 4350.0 | 1.0 |
191 rows × 11 columns
az.plot_forest(dataTrace_new_epLsar,var_names=['b0','b2'],filter_vars='like',figsize=(widthInch,5*heigthInch),hdi_prob=0.95,ess=True,r_hat=True);
if writeOut:
plt.savefig(outPathPlots + "posterior_forest_{}.pdf".format('NewEplsar'),dpi=dpi)
with new_epLsarModel as model:
plotting_lib.plotTracesB(widthInch,heigthInch,dpi,writeOut,outPathPlots,trace_new_epLsar,'NewEplsar')
with new_epLsarModel as model:
plotting_lib.pm.energyplot(trace_new_epLsar)
with new_epLsarModel as model:
posterior_pred_new_epLsar = pm.sample_posterior_predictive(trace_new_epLsar,samples=numPredSamples,random_seed=random_seed)
/home/bob/.local/lib/python3.8/site-packages/pymc3/sampling.py:1707: UserWarning: samples parameter is smaller than nchains times ndraws, some draws and/or chains may not be represented in the returned posterior predictive sample warnings.warn(
plotting_lib.plotPriorPosteriorPredictive(widthInch,heigthInch,dpi,writeOut,outPathPlots,dfNewAvail.reset_index(),dictMeanStd,prior_pred_new_epLsar,posterior_pred_new_epLsar,newZ,'NewEplsar')
with new_epLsarModel as model:
pm_data_new_epLsar = az.from_pymc3(trace=trace_new_epLsar,prior=prior_pred_new_epLsar,posterior_predictive=posterior_pred_new_epLsar)
arviz.data.io_pymc3 - WARNING - posterior predictive variable NewEplsar_y's shape not compatible with number of chains and draws. This can mean that some draws or even whole chains are not represented.
plotting_lib.plotPosterior(widthInch,heigthInch,dpi,writeOut,outPathPlots,dictMeanStd,pm_data_new_epLsar,'NewEplsar')
b1P_Old = np.load("../derived_data/statistical_model_two_factors/epLsar_oldb1.npy")
b2P_Old = np.load("../derived_data/statistical_model_two_factors/epLsar_oldb2.npy")
M12P_Old = np.load("../derived_data/statistical_model_two_factors/epLsar_oldM12.npy")
from collections import defaultdict
def plotTreatmentPosterior(widthInch,heigthInch,dpi,sizes,writeOut,path,dictMeanStd,dictTreatment,dictSoftware,trace,yname,x1,x3):
SMALL_SIZE,MEDIUM_SIZE,BIGGER_SIZE = sizes
mu_Val,sig_Val = dictMeanStd[yname]
# get posterior samples
b2P = sig_Val*trace['{}_b2'.format(yname)]
# prepare color dict for treatments
# use groups of 4 colors, as in tab20c
colorIndex = dict({5:0,6:1,7:2,0:4,1:5,4:6,10:7,2:8,3:9,8:10,9:11})
# prepare dataset dict for treatments
dictDataset = dict({5:0,6:0,7:0,0:1,1:1,4:1,10:1,2:2,3:2,8:2,9:2})
# === inverse dict ====
inv_dictDataset = defaultdict(list)
# using loop to perform reverse mapping
for keys, vals in dictDataset.items():
for val in [vals]:
inv_dictDataset[val].append(keys)
# ===
# get number of datasets
numDatasets = len(np.unique(list(dictDataset.values())))
# get number of treatments per dataset
dictDataset2NumberTreats = dict()
for numDataset in range(numDatasets):
n = len(inv_dictDataset[numDataset])
dictDataset2NumberTreats[numDataset] = n
# Get maximum of treatments per dataset
tmax = np.max(list(dictDataset2NumberTreats.values()))
# compute maximal number of pairs
maxpair = int(tmax*(tmax-1)/2)
fig = plt.subplots(squeeze=False, figsize=(numDatasets*widthInch,maxpair*heigthInch), dpi=dpi);
# store list for hdi
hdiList = []
for indexDataset in np.arange(numDatasets):
# counter for row
rowCounter = 0
# first treatment
for treatmentNum_i,lvl2_i in enumerate(inv_dictDataset[indexDataset]):
# second treatment
for treatmentNum_j,lvl2_j in enumerate(inv_dictDataset[indexDataset]):
if treatmentNum_i > treatmentNum_j:
# set subplot
curr_ax = plt.subplot2grid((maxpair, numDatasets), (rowCounter,indexDataset))
# compute difference between treatments for each software
diffS0 = sig_Val*((M12P_Old[:,0,lvl2_i]+b2P_Old[:,lvl2_i]) -(M12P_Old[:,0,lvl2_j]+b2P_Old[:,lvl2_j]))
diffS1 = sig_Val*((M12P_Old[:,1,lvl2_i]+b2P_Old[:,lvl2_i]) -(M12P_Old[:,1,lvl2_j]+b2P_Old[:,lvl2_j]))
#plot posterior
sns.kdeplot(diffS1,ax=curr_ax,label="epLsar on {}".format(dictSoftware[1]),color='gray',alpha=0.3,ls='--');
sns.kdeplot(diffS0,ax=curr_ax,label="epLsar on {}".format(dictSoftware[0]),color='gray',alpha=0.3,ls='dotted');
sns.kdeplot(b2P[:,lvl2_i]-b2P[:,lvl2_j],ax=curr_ax,label="NewEplsar on {}".format(dictSoftware[0]),color='C0',alpha=0.3,ls='dotted');
# plot reference value zero
curr_ax.axvline(x=0,color="C1")
# get hdi
hdi_new = az.hdi(az.convert_to_inference_data(b2P[:,lvl2_i]-b2P[:,lvl2_j]),hdi_prob=0.95)['x'].values
hdiS0 = az.hdi(az.convert_to_inference_data(diffS0),hdi_prob=0.95)['x'].values
hdiS1 = az.hdi(az.convert_to_inference_data(diffS1),hdi_prob=0.95)['x'].values
isSignificant = lambda x: (x[0] > 0.0) or (x[1] < 0.0)
# store hdi
hdiList.append([dictTreatment[lvl2_i],dictTreatment[lvl2_j],
hdi_new[0],hdi_new[1],isSignificant(hdi_new),
hdiS0[0],hdiS0[1],isSignificant(hdiS0),
hdiS1[0],hdiS1[1],isSignificant(hdiS1)
])
# set title
nameFirst = dictTreatment[lvl2_i]
nameSecond = dictTreatment[lvl2_j]
title = "{} vs. {}".format(nameFirst,nameSecond)
if isSignificant(hdi_new):
title += ": Significant on NewEplsar"
curr_ax.set_title(title)
# add legend
curr_ax.legend()
# set x label
curr_ax.set_xlabel('Delta')
# remove y label decoration
curr_ax.tick_params(left=False)
curr_ax.set(yticklabels=[])
# increment counter
rowCounter += 1
#plt.suptitle('Estimated differences between treatments on {}'.format(yname))
plt.tight_layout()
if writeOut:
plt.savefig(path + "treatment_pairs_{}.pdf".format(yname),dpi=dpi)
plt.show()
# convert hdi to df
df = pd.DataFrame(hdiList,columns=["Treatment_i","Treatment_j",
"hdi_NewEplsar_2.5%","hdi_NewEplsar_97.5%","isSignificant_NewEplsar","hdi_{}_2.5%".format(dictSoftware[0]),"hdi_{}_97.5%".format(dictSoftware[0]),"isSignificant_on_{}".format(dictSoftware[0]),
"hdi_{}_2.5%".format(dictSoftware[1]),"hdi_{}_97.5%".format(dictSoftware[1]),"isSignificant_on_{}".format(dictSoftware[1])])
return df
dfHDI = plotTreatmentPosterior(widthInch,heigthInch,dpi,sizes,writeOut,outPathPlots,dictMeanStd,dictTreatment,dictSoftware,trace_new_epLsar,'NewEplsar',x1[newIndex],x2[newIndex])
dfHDI
Treatment_i | Treatment_j | hdi_NewEplsar_2.5% | hdi_NewEplsar_97.5% | isSignificant_NewEplsar | hdi_ConfoMap_2.5% | hdi_ConfoMap_97.5% | isSignificant_on_ConfoMap | hdi_Toothfrax_2.5% | hdi_Toothfrax_97.5% | isSignificant_on_Toothfrax | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | Dry Grass | Dry Bamboo | -0.002142 | -0.001471 | True | -0.002002 | -0.001351 | True | -0.001957 | -0.001303 | True |
1 | Dry Lucerne | Dry Bamboo | -0.001520 | -0.000917 | True | -0.001620 | -0.000971 | True | -0.001706 | -0.001088 | True |
2 | Dry Lucerne | Dry Grass | 0.000239 | 0.000892 | True | 0.000106 | 0.000752 | True | -0.000082 | 0.000555 | False |
3 | BrushNoDirt | BrushDirt | -0.000522 | 0.000663 | False | -0.000651 | 0.000496 | False | -0.000745 | 0.000278 | False |
4 | Control | BrushDirt | -0.000967 | 0.000241 | False | -0.001048 | 0.000116 | False | -0.000962 | 0.000105 | False |
5 | Control | BrushNoDirt | -0.000965 | 0.000131 | False | -0.000850 | 0.000188 | False | -0.000681 | 0.000328 | False |
6 | RubDirt | BrushDirt | -0.001011 | 0.000149 | False | -0.000843 | 0.000264 | False | -0.000676 | 0.000359 | False |
7 | RubDirt | BrushNoDirt | -0.000979 | 0.000024 | False | -0.000702 | 0.000303 | False | -0.000446 | 0.000526 | False |
8 | RubDirt | Control | -0.000593 | 0.000470 | False | -0.000347 | 0.000679 | False | -0.000276 | 0.000733 | False |
9 | Clover+Dust | Clover | -0.000912 | 0.000170 | False | -0.000529 | 0.000561 | False | -0.000377 | 0.000802 | False |
10 | Grass | Clover | -0.000695 | 0.000519 | False | -0.000095 | 0.001442 | False | 0.000136 | 0.001852 | True |
11 | Grass | Clover+Dust | -0.000258 | 0.000920 | False | -0.000065 | 0.001494 | False | -0.000046 | 0.001692 | False |
12 | Grass+Dust | Clover | -0.001025 | 0.000078 | False | 0.000259 | 0.001605 | True | 0.000421 | 0.001720 | True |
13 | Grass+Dust | Clover+Dust | -0.000643 | 0.000438 | False | 0.000257 | 0.001669 | True | 0.000231 | 0.001516 | True |
14 | Grass+Dust | Grass | -0.000993 | 0.000166 | False | -0.000617 | 0.001148 | False | -0.000891 | 0.000924 | False |
if writeOut:
dfHDI.to_csv(outPathData+ 'hdi_{}.csv'.format('NewEplsar'))
Show where NewEplsar yields other results then epLsar:
df_summary = dfHDI[(dfHDI.isSignificant_NewEplsar != dfHDI.isSignificant_on_ConfoMap) | (dfHDI.isSignificant_NewEplsar != dfHDI.isSignificant_on_Toothfrax) ][["Treatment_i","Treatment_j","isSignificant_NewEplsar","isSignificant_on_Toothfrax","isSignificant_on_ConfoMap","hdi_NewEplsar_2.5%","hdi_NewEplsar_97.5%","hdi_ConfoMap_2.5%","hdi_ConfoMap_97.5%","hdi_Toothfrax_2.5%","hdi_Toothfrax_97.5%"]]
df_summary
Treatment_i | Treatment_j | isSignificant_NewEplsar | isSignificant_on_Toothfrax | isSignificant_on_ConfoMap | hdi_NewEplsar_2.5% | hdi_NewEplsar_97.5% | hdi_ConfoMap_2.5% | hdi_ConfoMap_97.5% | hdi_Toothfrax_2.5% | hdi_Toothfrax_97.5% | |
---|---|---|---|---|---|---|---|---|---|---|---|
2 | Dry Lucerne | Dry Grass | True | False | True | 0.000239 | 0.000892 | 0.000106 | 0.000752 | -0.000082 | 0.000555 |
10 | Grass | Clover | False | True | False | -0.000695 | 0.000519 | -0.000095 | 0.001442 | 0.000136 | 0.001852 |
12 | Grass+Dust | Clover | False | True | True | -0.001025 | 0.000078 | 0.000259 | 0.001605 | 0.000421 | 0.001720 |
13 | Grass+Dust | Clover+Dust | False | True | True | -0.000643 | 0.000438 | 0.000257 | 0.001669 | 0.000231 | 0.001516 |
if writeOut:
df_summary.to_csv(outPathData+ 'summary.csv')
!jupyter nbconvert --to html Statistical_Model_NewEplsar.ipynb
[NbConvertApp] Converting notebook Statistical_Model_NewEplsar.ipynb to html [NbConvertApp] Writing 4926172 bytes to Statistical_Model_NewEplsar.html
!jupyter nbconvert --to markdown Statistical_Model_NewEplsar.ipynb
[NbConvertApp] Converting notebook Statistical_Model_NewEplsar.ipynb to markdown [NbConvertApp] Support files will be in Statistical_Model_NewEplsar_files/ [NbConvertApp] Making directory Statistical_Model_NewEplsar_files [NbConvertApp] Making directory Statistical_Model_NewEplsar_files [NbConvertApp] Making directory Statistical_Model_NewEplsar_files [NbConvertApp] Making directory Statistical_Model_NewEplsar_files [NbConvertApp] Making directory Statistical_Model_NewEplsar_files [NbConvertApp] Making directory Statistical_Model_NewEplsar_files [NbConvertApp] Making directory Statistical_Model_NewEplsar_files [NbConvertApp] Making directory Statistical_Model_NewEplsar_files [NbConvertApp] Making directory Statistical_Model_NewEplsar_files [NbConvertApp] Making directory Statistical_Model_NewEplsar_files [NbConvertApp] Making directory Statistical_Model_NewEplsar_files [NbConvertApp] Making directory Statistical_Model_NewEplsar_files [NbConvertApp] Making directory Statistical_Model_NewEplsar_files [NbConvertApp] Writing 44425 bytes to Statistical_Model_NewEplsar.md