Notebook

In [1]:

# Code based on https://github.com/alexander-held/PyHEP-2021-cabinetry/blob/main/talk.ipynb
import cabinetry
cabinetry.set_logging()

In [2]:

# Make sure we have all the data files

import os
import requests

# If any of the necessary folders don't exist, make them
if not os.path.isdir('4lep'):
    os.mkdir('4lep')
if not os.path.isdir('4lep/Data'):
    os.mkdir('4lep/Data')
if not os.path.isdir('4lep/MC'):
    os.mkdir('4lep/MC')
    
# List of all of the files we need
filelist = ['4lep/Data/data_A.4lep.root',
            '4lep/Data/data_B.4lep.root',
            '4lep/Data/data_C.4lep.root',
            '4lep/Data/data_D.4lep.root',
            '4lep/MC/mc_361106.Zee.4lep.root',
            '4lep/MC/mc_361107.Zmumu.4lep.root',
            '4lep/MC/mc_410000.ttbar_lep.4lep.root',
            '4lep/MC/mc_363490.llll.4lep.root',
            '4lep/MC/mc_345060.ggH125_ZZ4lep.4lep.root',
            '4lep/MC/mc_344235.VBFH125_ZZ4lep.4lep.root',
            '4lep/MC/mc_341964.WH125_ZZ4lep.4lep.root',
            '4lep/MC/mc_341947.ZH125_ZZ4lep.4lep.root']

# For each of these files
for filepath in filelist:

    # Check if the file exists locally
    if not os.path.isfile(filepath):

        #If the file doesn't exist, download it
        data_download = requests.get('https://atlas-opendata.web.cern.ch/atlas-opendata/samples/2020/' + filepath)
        new_file = open(filepath, 'wb')
        new_file.write(data_download.content)
        new_file.close()

In [3]:

config = {
   "General":{
      "Measurement": "CabinetryHZZAnalysis",
      "POI": "Signal_norm",
      "InputPath": "4lep/{SamplePath}",
      "HistogramFolder": "histograms/"
   }
}

In [4]:

bin_list = list(range(80, 255, 5))

In [5]:

config.update({
   "Regions":[
      {
         "Name": "Signal_region",
         "Filter": "(lep_charge[:,0] + lep_charge[:,1] + lep_charge[:,2] + lep_charge[:,3] == 0) & ((lep_type[:,0] + lep_type[:,1] + lep_type[:,2] + lep_type[:,3] == 44) | (lep_type[:,0] + lep_type[:,1] + lep_type[:,2] + lep_type[:,3] == 48) | (lep_type[:,0] + lep_type[:,1] + lep_type[:,2] + lep_type[:,3] == 52))",
         "Variable": "sqrt((lep_E[:,0] + lep_E[:,1] + lep_E[:,2] + lep_E[:,3])**2 - (lep_pt[:,0]*cos(lep_phi[:,0]) + lep_pt[:,1]*cos(lep_phi[:,1]) + lep_pt[:,2]*cos(lep_phi[:,2]) + lep_pt[:,3]*cos(lep_phi[:,3]))**2 - (lep_pt[:,0]*sin(lep_phi[:,0]) + lep_pt[:,1]*sin(lep_phi[:,1]) + lep_pt[:,2]*sin(lep_phi[:,2]) + lep_pt[:,3]*sin(lep_phi[:,3]))**2 - (lep_pt[:,0]*sinh(lep_eta[:,0]) + lep_pt[:,1]*sinh(lep_eta[:,1]) + lep_pt[:,2]*sinh(lep_eta[:,2]) + lep_pt[:,3]*sinh(lep_eta[:,3]))**2)/1000",
         "Binning": bin_list
      }
   ]
})

In [6]:

config.update({
   "Samples":[
      {
         "Name": "Data",
         "Tree": "mini",
         "SamplePath": ["Data/data_A.4lep.root",
                         "Data/data_B.4lep.root",
                         "Data/data_C.4lep.root",
                         "Data/data_D.4lep.root"],
         "Data": True
      },
      {
         "Name": "Signal",
         "Tree": "mini",
         "SamplePath": ['MC/mc_345060.ggH125_ZZ4lep.4lep.root',
                         'MC/mc_344235.VBFH125_ZZ4lep.4lep.root',
                         'MC/mc_341964.WH125_ZZ4lep.4lep.root',
                         'MC/mc_341947.ZH125_ZZ4lep.4lep.root'],
         "Weight": "((channelNumber == 345060)*2.1605e-6 + (channelNumber == 344235)*1.2588e-6 + (channelNumber == 341964)*2.5228e-5 + (channelNumber == 341947)*1.4283e-7)*mcWeight*scaleFactor_PILEUP*scaleFactor_ELE*scaleFactor_MUON*scaleFactor_LepTRIGGER"
      },
      {
         "Name": "Background $ZZ^{star}$",
         "Tree": "mini",
         "SamplePath": "MC/mc_363490.llll.4lep.root",
         "Weight": "0.0016685*mcWeight*scaleFactor_PILEUP*scaleFactor_ELE*scaleFactor_MUON*scaleFactor_LepTRIGGER"
      },
      {
         "Name": "Background $Z,tt^{bar}$",
         "Tree": "mini",
         "SamplePath": ['MC/mc_361106.Zee.4lep.root',
                         'MC/mc_361107.Zmumu.4lep.root',
                         'MC/mc_410000.ttbar_lep.4lep.root'],
         "Weight": "((channelNumber == 361106)*1.2980e-4 + (channelNumber == 361107)*1.3239e-4 + (channelNumber == 410000)*0.091663)*mcWeight*scaleFactor_PILEUP*scaleFactor_ELE*scaleFactor_MUON*scaleFactor_LepTRIGGER"
      }
   ]
})

In [7]:

config.update({"Systematics": []})

In [8]:

config.update({
   "NormFactors":[
      {
         "Name": "Signal_norm",
         "Samples": "Signal",
         "Nominal": 1,
         "Bounds": [0, 10]
      },
      {
         "Name": "ZZ_norm",
         "Samples": "Background $ZZ^{star}$",
         "Nominal": 1,
         "Bounds": [0, 10]
      },
      {
         "Name": "Ztt_norm",
         "Samples": "Background $Z,tt^{bar}$",
         "Nominal": 1,
         "Bounds": [0, 10]
      }
   ]
})

In [9]:

import time

start_time = time.time()
cabinetry.templates.build(config, method="uproot")
# cabinetry.template_builder.create_histograms(config, method="coffea")
# Using the coffea backend requires this branch of cabinetry: https://github.com/alexander-held/cabinetry/pull/216
# It also further requires the modified coffea_wrapper.py found at https://github.com/stormsomething/CoffeaHZZAnalysis
finish_time = time.time()

print("Total runtime in seconds: " + str(finish_time - start_time))

DEBUG - cabinetry.route -   in region Signal_region
DEBUG - cabinetry.route -     reading sample Data
DEBUG - cabinetry.route -       variation Nominal
DEBUG - cabinetry.histo - saving histogram to histograms/Signal_region_Data.npz
DEBUG - cabinetry.route -     reading sample Signal
DEBUG - cabinetry.route -       variation Nominal
DEBUG - cabinetry.histo - saving histogram to histograms/Signal_region_Signal.npz
DEBUG - cabinetry.route -     reading sample Background $ZZ^{star}$
DEBUG - cabinetry.route -       variation Nominal
DEBUG - cabinetry.histo - saving histogram to histograms/Signal_region_Background-$ZZ^{star}$.npz
DEBUG - cabinetry.route -     reading sample Background $Z,tt^{bar}$
DEBUG - cabinetry.route -       variation Nominal
DEBUG - cabinetry.histo - saving histogram to histograms/Signal_region_Background-$Z,tt^{bar}$.npz

Total runtime in seconds: 114.2297842502594

In [10]:

plot_config = {
   "Regions":[
      {
         "Name": "Signal_region",
         "Variable": "4-lepton invariant mass $\mathrm{m_{4l}}$ [GeV]",
         "Binning": bin_list
      }
   ]
}

In [11]:

ws = cabinetry.workspace.build(config)
model, data = cabinetry.model_utils.model_and_data(ws)
model_prediction = cabinetry.model_utils.prediction(model)
cabinetry.tabulate.yields(model_prediction, data, per_bin=False, per_channel=True)
_ = cabinetry.visualize.data_mc(model_prediction, data, config=plot_config)

INFO - cabinetry.workspace - building workspace
WARNING - cabinetry.histo - the modified histogram histograms/Signal_region_Signal_modified.npz does not exist
WARNING - cabinetry.histo - loading the un-modified histogram instead!
DEBUG - cabinetry.workspace - adding NormFactor Signal_norm to sample Signal in region Signal_region
WARNING - cabinetry.histo - the modified histogram histograms/Signal_region_Background-$ZZ^{star}$_modified.npz does not exist
WARNING - cabinetry.histo - loading the un-modified histogram instead!
DEBUG - cabinetry.workspace - adding NormFactor ZZ_norm to sample Background $ZZ^{star}$ in region Signal_region
WARNING - cabinetry.histo - the modified histogram histograms/Signal_region_Background-$Z,tt^{bar}$_modified.npz does not exist
WARNING - cabinetry.histo - loading the un-modified histogram instead!
DEBUG - cabinetry.workspace - adding NormFactor Ztt_norm to sample Background $Z,tt^{bar}$ in region Signal_region
WARNING - cabinetry.histo - the modified histogram histograms/Signal_region_Data_modified.npz does not exist
WARNING - cabinetry.histo - loading the un-modified histogram instead!
INFO - pyhf.workspace - Validating spec against schema: workspace.json
INFO - pyhf.workspace - Validating spec against schema: workspace.json
INFO - pyhf.pdf - Validating spec against schema: model.json
INFO - pyhf.pdf - adding modifier staterror_Signal_region (34 new nuisance parameters)
INFO - pyhf.pdf - adding modifier Signal_norm (1 new nuisance parameters)
INFO - pyhf.pdf - adding modifier ZZ_norm (1 new nuisance parameters)
INFO - pyhf.pdf - adding modifier Ztt_norm (1 new nuisance parameters)
DEBUG - cabinetry.model_utils - total stdev is [... 0.718, 0.799, 0.552, 0.762, 0.577, 0.775, 0.65, 0.515, 0.683, 0.428, 0.585]]
DEBUG - cabinetry.model_utils - total stdev per channel is [5.28]
INFO - cabinetry.tabulate - yields per channel for pre-fit model prediction:
╒═════════════════════════╤═════════════════╕
│ sample                  │ Signal_region   │
╞═════════════════════════╪═════════════════╡
│ Background $Z,tt^{bar}$ │ 112.13          │
├─────────────────────────┼─────────────────┤
│ Background $ZZ^{star}$  │ 174.61          │
├─────────────────────────┼─────────────────┤
│ Signal                  │ 9.61            │
├─────────────────────────┼─────────────────┤
│ total                   │ 296.35 ± 5.28   │
├─────────────────────────┼─────────────────┤
│ data                    │ 371.00          │
╘═════════════════════════╧═════════════════╛
DEBUG - cabinetry.visualize.utils - saving figure as figures/Signal_region_prefit.pdf

In [12]:

fit_results = cabinetry.fit.fit(model, data)

INFO - cabinetry.fit - performing maximum likelihood fit
INFO - cabinetry.fit - MINUIT status:
┌─────────────────────────────────────────────────────────────────────────┐
│                                Migrad                                   │
├──────────────────────────────────┬──────────────────────────────────────┤
│ FCN = 68.27                      │             Nfcn = 2230              │
│ EDM = 4e-05 (Goal: 0.0002)       │                                      │
├──────────────────────────────────┼──────────────────────────────────────┤
│          Valid Minimum           │        No Parameters at limit        │
├──────────────────────────────────┼──────────────────────────────────────┤
│ Below EDM threshold (goal x 10)  │           Below call limit           │
├───────────────┬──────────────────┼───────────┬─────────────┬────────────┤
│  Covariance   │     Hesse ok     │ Accurate  │  Pos. def.  │ Not forced │
└───────────────┴──────────────────┴───────────┴─────────────┴────────────┘
DEBUG - cabinetry.fit - -2 log(L) = 68.265587 at best-fit point
INFO - cabinetry.fit - fit results (with symmetric uncertainties):
INFO - cabinetry.fit - staterror_Signal_region[0]  =  1.0008 +/- 0.0581
INFO - cabinetry.fit - staterror_Signal_region[1]  =  1.0011 +/- 0.0251
INFO - cabinetry.fit - staterror_Signal_region[2]  =  1.0038 +/- 0.0237
INFO - cabinetry.fit - staterror_Signal_region[3]  =  1.0069 +/- 0.1021
INFO - cabinetry.fit - staterror_Signal_region[4]  =  0.9656 +/- 0.1642
INFO - cabinetry.fit - staterror_Signal_region[5]  =  0.9701 +/- 0.1512
INFO - cabinetry.fit - staterror_Signal_region[6]  =  0.9808 +/- 0.1441
INFO - cabinetry.fit - staterror_Signal_region[7]  =  0.9370 +/- 0.1100
INFO - cabinetry.fit - staterror_Signal_region[8]  =  1.0300 +/- 0.0981
INFO - cabinetry.fit - staterror_Signal_region[9]  =  0.9755 +/- 0.1069
INFO - cabinetry.fit - staterror_Signal_region[10] =  1.0413 +/- 0.1270
INFO - cabinetry.fit - staterror_Signal_region[11] =  0.9920 +/- 0.1343
INFO - cabinetry.fit - staterror_Signal_region[12] =  1.0356 +/- 0.1345
INFO - cabinetry.fit - staterror_Signal_region[13] =  1.0659 +/- 0.1301
INFO - cabinetry.fit - staterror_Signal_region[14] =  1.0041 +/- 0.1315
INFO - cabinetry.fit - staterror_Signal_region[15] =  0.9831 +/- 0.1522
INFO - cabinetry.fit - staterror_Signal_region[16] =  1.1054 +/- 0.1350
INFO - cabinetry.fit - staterror_Signal_region[17] =  0.9837 +/- 0.1570
INFO - cabinetry.fit - staterror_Signal_region[18] =  1.0149 +/- 0.1476
INFO - cabinetry.fit - staterror_Signal_region[19] =  0.9593 +/- 0.1356
INFO - cabinetry.fit - staterror_Signal_region[20] =  1.0376 +/- 0.0924
INFO - cabinetry.fit - staterror_Signal_region[21] =  1.0159 +/- 0.0757
INFO - cabinetry.fit - staterror_Signal_region[22] =  0.9820 +/- 0.0589
INFO - cabinetry.fit - staterror_Signal_region[23] =  1.0090 +/- 0.0748
INFO - cabinetry.fit - staterror_Signal_region[24] =  0.9818 +/- 0.0715
INFO - cabinetry.fit - staterror_Signal_region[25] =  0.9953 +/- 0.0574
INFO - cabinetry.fit - staterror_Signal_region[26] =  0.9823 +/- 0.0770
INFO - cabinetry.fit - staterror_Signal_region[27] =  0.9866 +/- 0.0655
INFO - cabinetry.fit - staterror_Signal_region[28] =  0.9564 +/- 0.0859
INFO - cabinetry.fit - staterror_Signal_region[29] =  1.0151 +/- 0.0808
INFO - cabinetry.fit - staterror_Signal_region[30] =  0.9894 +/- 0.0707
INFO - cabinetry.fit - staterror_Signal_region[31] =  0.9851 +/- 0.0911
INFO - cabinetry.fit - staterror_Signal_region[32] =  1.0057 +/- 0.0684
INFO - cabinetry.fit - staterror_Signal_region[33] =  1.0036 +/- 0.0897
INFO - cabinetry.fit - Signal_norm                 =  1.7923 +/- 0.8458
INFO - cabinetry.fit - ZZ_norm                     =  1.5301 +/- 0.1292
INFO - cabinetry.fit - Ztt_norm                    =  0.7769 +/- 0.1917

In [13]:

model_prediction_postfit = cabinetry.model_utils.prediction(model, fit_results=fit_results)
_ = cabinetry.visualize.data_mc(model_prediction_postfit, data, config=plot_config)

DEBUG - cabinetry.model_utils - total stdev is [... 1.14, 1.36, 1.31, 1.17, 1.23, 1.1, 1.2, 1.08, 0.967, 1.04, 0.849, 0.914]]
DEBUG - cabinetry.model_utils - total stdev per channel is [19.2]
DEBUG - cabinetry.visualize.utils - saving figure as figures/Signal_region_postfit.pdf

In [14]:

significance_results = cabinetry.fit.significance(model, data)

INFO - cabinetry.fit - calculating discovery significance
INFO - cabinetry.fit - observed p-value: 0.950%
INFO - cabinetry.fit - observed significance: 2.345
INFO - cabinetry.fit - expected p-value: 9.661%
INFO - cabinetry.fit - expected significance: 1.301