#!/usr/bin/env python # coding: utf-8 # # Histogrammar # In[1]: get_ipython().run_line_magic('pylab', 'inline') import histogrammar as hg import numpy as np # ## Some Datasets form which we create Histograms # # generally will be ROOT files / panda dataframes, etc... we have multiple files for each MC sample and data # In[2]: background_sample = np.random.normal(loc=0.0, scale=1.0, size=30) signal_sample = np.random.normal(loc=0.5, scale=1.0, size=15) data_sample = np.random.normal(loc=0.1, scale=1.0, size=45) # ## Creating Histograms with hg # In[3]: signal_histogram = hg.Bin(3, -1.5, 1.5, lambda d: d, hg.Count()) background_histogram = hg.Bin(3, -1.5, 1.5, lambda d: d, hg.Count()) data_histogram = hg.Bin(3, -1.5, 1.5, lambda d: d, hg.Count()) # In[4]: for d in background_sample: background_histogram.fill(d) for d in signal_sample: signal_histogram.fill(d) for d in data_sample: data_histogram.fill(d) # In[5]: print(background_histogram.toJson()['data']['values']) print(signal_histogram.toJson()['data']['values']) print(data_histogram.toJson()['data']['values']) # ## Creating a Statistical model based on the Histograms (HistFactory) # In[6]: import pyhf # in this simple model, we just take the histograms for each sample. For the background normalization we assign a 10% normalization uncertainty # In[7]: spec = { 'singlechannel': { 'signal': { 'data': signal_histogram.toJson()['data']['values'], 'mods': [{'name': 'mu', 'type': 'normfactor', 'data': None}], }, 'background': { 'data': background_histogram.toJson()['data']['values'], 'mods': [ { 'name': 'bkg_norm', 'type': 'normsys', 'data': {'lo': 0.90, 'hi': 1.10}, } ], }, } } # In[8]: p = pyhf.Model(spec) data = data_histogram.toJson()['data']['values'] + p.config.auxdata # ## Interval Estimation / Limit Setting # # let's set a limit on the signal strength ยต # In[9]: def invert_interval(testmus, cls_obs, cls_exp, test_size=0.05): point05cross = {'exp': [], 'obs': None} for cls_exp_sigma in cls_exp: yvals = [x for x in cls_exp_sigma] point05cross['exp'].append( np.interp(test_size, list(reversed(yvals)), list(reversed(testmus))) ) yvals = cls_obs point05cross['obs'] = np.interp( test_size, list(reversed(yvals)), list(reversed(testmus)) ) return point05cross def plot_results(testmus, cls_obs, cls_exp, test_size=0.05): plt.plot(mutests, cls_obs, c='k') for i, c in zip(range(5), ['grey', 'grey', 'grey', 'grey', 'grey']): plt.plot(mutests, cls_exp[i], c=c) plt.plot(testmus, [test_size] * len(testmus), c='r') plt.ylim(0, 1) # In[10]: mutests = np.linspace(0, 5, 51) tests = [ runOnePoint( muTest, data, p, p.config.suggested_init(), p.config.suggested_bounds() )[-2:] for muTest in mutests ] cls_obs = [test[0] for test in tests] cls_exp = [[test[1][i] for test in tests] for i in range(5)] plot_results(mutests, cls_obs, cls_exp) invert_interval(mutests, cls_obs, cls_exp)