# Code based on https://github.com/alexander-held/PyHEP-2021-cabinetry/blob/main/talk.ipynb
import cabinetry
cabinetry.set_logging()
# Make sure we have all the data files
import os
import requests
# If any of the necessary folders don't exist, make them
if not os.path.isdir('4lep'):
os.mkdir('4lep')
if not os.path.isdir('4lep/Data'):
os.mkdir('4lep/Data')
if not os.path.isdir('4lep/MC'):
os.mkdir('4lep/MC')
# List of all of the files we need
filelist = ['4lep/Data/data_A.4lep.root',
'4lep/Data/data_B.4lep.root',
'4lep/Data/data_C.4lep.root',
'4lep/Data/data_D.4lep.root',
'4lep/MC/mc_361106.Zee.4lep.root',
'4lep/MC/mc_361107.Zmumu.4lep.root',
'4lep/MC/mc_410000.ttbar_lep.4lep.root',
'4lep/MC/mc_363490.llll.4lep.root',
'4lep/MC/mc_345060.ggH125_ZZ4lep.4lep.root',
'4lep/MC/mc_344235.VBFH125_ZZ4lep.4lep.root',
'4lep/MC/mc_341964.WH125_ZZ4lep.4lep.root',
'4lep/MC/mc_341947.ZH125_ZZ4lep.4lep.root']
# For each of these files
for filepath in filelist:
# Check if the file exists locally
if not os.path.isfile(filepath):
#If the file doesn't exist, download it
data_download = requests.get('https://atlas-opendata.web.cern.ch/atlas-opendata/samples/2020/' + filepath)
new_file = open(filepath, 'wb')
new_file.write(data_download.content)
new_file.close()
config = {
"General":{
"Measurement": "CabinetryHZZAnalysis",
"POI": "Signal_norm",
"InputPath": "4lep/{SamplePath}",
"HistogramFolder": "histograms/"
}
}
bin_list = list(range(80, 255, 5))
config.update({
"Regions":[
{
"Name": "Signal_region",
"Filter": "(lep_charge[:,0] + lep_charge[:,1] + lep_charge[:,2] + lep_charge[:,3] == 0) & ((lep_type[:,0] + lep_type[:,1] + lep_type[:,2] + lep_type[:,3] == 44) | (lep_type[:,0] + lep_type[:,1] + lep_type[:,2] + lep_type[:,3] == 48) | (lep_type[:,0] + lep_type[:,1] + lep_type[:,2] + lep_type[:,3] == 52))",
"Variable": "sqrt((lep_E[:,0] + lep_E[:,1] + lep_E[:,2] + lep_E[:,3])**2 - (lep_pt[:,0]*cos(lep_phi[:,0]) + lep_pt[:,1]*cos(lep_phi[:,1]) + lep_pt[:,2]*cos(lep_phi[:,2]) + lep_pt[:,3]*cos(lep_phi[:,3]))**2 - (lep_pt[:,0]*sin(lep_phi[:,0]) + lep_pt[:,1]*sin(lep_phi[:,1]) + lep_pt[:,2]*sin(lep_phi[:,2]) + lep_pt[:,3]*sin(lep_phi[:,3]))**2 - (lep_pt[:,0]*sinh(lep_eta[:,0]) + lep_pt[:,1]*sinh(lep_eta[:,1]) + lep_pt[:,2]*sinh(lep_eta[:,2]) + lep_pt[:,3]*sinh(lep_eta[:,3]))**2)/1000",
"Binning": bin_list
}
]
})
config.update({
"Samples":[
{
"Name": "Data",
"Tree": "mini",
"SamplePath": ["Data/data_A.4lep.root",
"Data/data_B.4lep.root",
"Data/data_C.4lep.root",
"Data/data_D.4lep.root"],
"Data": True
},
{
"Name": "Signal",
"Tree": "mini",
"SamplePath": ['MC/mc_345060.ggH125_ZZ4lep.4lep.root',
'MC/mc_344235.VBFH125_ZZ4lep.4lep.root',
'MC/mc_341964.WH125_ZZ4lep.4lep.root',
'MC/mc_341947.ZH125_ZZ4lep.4lep.root'],
"Weight": "((channelNumber == 345060)*2.1605e-6 + (channelNumber == 344235)*1.2588e-6 + (channelNumber == 341964)*2.5228e-5 + (channelNumber == 341947)*1.4283e-7)*mcWeight*scaleFactor_PILEUP*scaleFactor_ELE*scaleFactor_MUON*scaleFactor_LepTRIGGER"
},
{
"Name": "Background $ZZ^{star}$",
"Tree": "mini",
"SamplePath": "MC/mc_363490.llll.4lep.root",
"Weight": "0.0016685*mcWeight*scaleFactor_PILEUP*scaleFactor_ELE*scaleFactor_MUON*scaleFactor_LepTRIGGER"
},
{
"Name": "Background $Z,tt^{bar}$",
"Tree": "mini",
"SamplePath": ['MC/mc_361106.Zee.4lep.root',
'MC/mc_361107.Zmumu.4lep.root',
'MC/mc_410000.ttbar_lep.4lep.root'],
"Weight": "((channelNumber == 361106)*1.2980e-4 + (channelNumber == 361107)*1.3239e-4 + (channelNumber == 410000)*0.091663)*mcWeight*scaleFactor_PILEUP*scaleFactor_ELE*scaleFactor_MUON*scaleFactor_LepTRIGGER"
}
]
})
config.update({"Systematics": []})
config.update({
"NormFactors":[
{
"Name": "Signal_norm",
"Samples": "Signal",
"Nominal": 1,
"Bounds": [0, 10]
},
{
"Name": "ZZ_norm",
"Samples": "Background $ZZ^{star}$",
"Nominal": 1,
"Bounds": [0, 10]
},
{
"Name": "Ztt_norm",
"Samples": "Background $Z,tt^{bar}$",
"Nominal": 1,
"Bounds": [0, 10]
}
]
})
import time
start_time = time.time()
cabinetry.templates.build(config, method="uproot")
# cabinetry.template_builder.create_histograms(config, method="coffea")
# Using the coffea backend requires this branch of cabinetry: https://github.com/alexander-held/cabinetry/pull/216
# It also further requires the modified coffea_wrapper.py found at https://github.com/stormsomething/CoffeaHZZAnalysis
finish_time = time.time()
print("Total runtime in seconds: " + str(finish_time - start_time))
DEBUG - cabinetry.route - in region Signal_region DEBUG - cabinetry.route - reading sample Data DEBUG - cabinetry.route - variation Nominal DEBUG - cabinetry.histo - saving histogram to histograms/Signal_region_Data.npz DEBUG - cabinetry.route - reading sample Signal DEBUG - cabinetry.route - variation Nominal DEBUG - cabinetry.histo - saving histogram to histograms/Signal_region_Signal.npz DEBUG - cabinetry.route - reading sample Background $ZZ^{star}$ DEBUG - cabinetry.route - variation Nominal DEBUG - cabinetry.histo - saving histogram to histograms/Signal_region_Background-$ZZ^{star}$.npz DEBUG - cabinetry.route - reading sample Background $Z,tt^{bar}$ DEBUG - cabinetry.route - variation Nominal DEBUG - cabinetry.histo - saving histogram to histograms/Signal_region_Background-$Z,tt^{bar}$.npz
Total runtime in seconds: 114.2297842502594
plot_config = {
"Regions":[
{
"Name": "Signal_region",
"Variable": "4-lepton invariant mass $\mathrm{m_{4l}}$ [GeV]",
"Binning": bin_list
}
]
}
ws = cabinetry.workspace.build(config)
model, data = cabinetry.model_utils.model_and_data(ws)
model_prediction = cabinetry.model_utils.prediction(model)
cabinetry.tabulate.yields(model_prediction, data, per_bin=False, per_channel=True)
_ = cabinetry.visualize.data_mc(model_prediction, data, config=plot_config)
INFO - cabinetry.workspace - building workspace WARNING - cabinetry.histo - the modified histogram histograms/Signal_region_Signal_modified.npz does not exist WARNING - cabinetry.histo - loading the un-modified histogram instead! DEBUG - cabinetry.workspace - adding NormFactor Signal_norm to sample Signal in region Signal_region WARNING - cabinetry.histo - the modified histogram histograms/Signal_region_Background-$ZZ^{star}$_modified.npz does not exist WARNING - cabinetry.histo - loading the un-modified histogram instead! DEBUG - cabinetry.workspace - adding NormFactor ZZ_norm to sample Background $ZZ^{star}$ in region Signal_region WARNING - cabinetry.histo - the modified histogram histograms/Signal_region_Background-$Z,tt^{bar}$_modified.npz does not exist WARNING - cabinetry.histo - loading the un-modified histogram instead! DEBUG - cabinetry.workspace - adding NormFactor Ztt_norm to sample Background $Z,tt^{bar}$ in region Signal_region WARNING - cabinetry.histo - the modified histogram histograms/Signal_region_Data_modified.npz does not exist WARNING - cabinetry.histo - loading the un-modified histogram instead! INFO - pyhf.workspace - Validating spec against schema: workspace.json INFO - pyhf.workspace - Validating spec against schema: workspace.json INFO - pyhf.pdf - Validating spec against schema: model.json INFO - pyhf.pdf - adding modifier staterror_Signal_region (34 new nuisance parameters) INFO - pyhf.pdf - adding modifier Signal_norm (1 new nuisance parameters) INFO - pyhf.pdf - adding modifier ZZ_norm (1 new nuisance parameters) INFO - pyhf.pdf - adding modifier Ztt_norm (1 new nuisance parameters) DEBUG - cabinetry.model_utils - total stdev is [... 0.718, 0.799, 0.552, 0.762, 0.577, 0.775, 0.65, 0.515, 0.683, 0.428, 0.585]] DEBUG - cabinetry.model_utils - total stdev per channel is [5.28] INFO - cabinetry.tabulate - yields per channel for pre-fit model prediction: ╒═════════════════════════╤═════════════════╕ │ sample │ Signal_region │ ╞═════════════════════════╪═════════════════╡ │ Background $Z,tt^{bar}$ │ 112.13 │ ├─────────────────────────┼─────────────────┤ │ Background $ZZ^{star}$ │ 174.61 │ ├─────────────────────────┼─────────────────┤ │ Signal │ 9.61 │ ├─────────────────────────┼─────────────────┤ │ total │ 296.35 ± 5.28 │ ├─────────────────────────┼─────────────────┤ │ data │ 371.00 │ ╘═════════════════════════╧═════════════════╛ DEBUG - cabinetry.visualize.utils - saving figure as figures/Signal_region_prefit.pdf
fit_results = cabinetry.fit.fit(model, data)
INFO - cabinetry.fit - performing maximum likelihood fit INFO - cabinetry.fit - MINUIT status: ┌─────────────────────────────────────────────────────────────────────────┐ │ Migrad │ ├──────────────────────────────────┬──────────────────────────────────────┤ │ FCN = 68.27 │ Nfcn = 2230 │ │ EDM = 4e-05 (Goal: 0.0002) │ │ ├──────────────────────────────────┼──────────────────────────────────────┤ │ Valid Minimum │ No Parameters at limit │ ├──────────────────────────────────┼──────────────────────────────────────┤ │ Below EDM threshold (goal x 10) │ Below call limit │ ├───────────────┬──────────────────┼───────────┬─────────────┬────────────┤ │ Covariance │ Hesse ok │ Accurate │ Pos. def. │ Not forced │ └───────────────┴──────────────────┴───────────┴─────────────┴────────────┘ DEBUG - cabinetry.fit - -2 log(L) = 68.265587 at best-fit point INFO - cabinetry.fit - fit results (with symmetric uncertainties): INFO - cabinetry.fit - staterror_Signal_region[0] = 1.0008 +/- 0.0581 INFO - cabinetry.fit - staterror_Signal_region[1] = 1.0011 +/- 0.0251 INFO - cabinetry.fit - staterror_Signal_region[2] = 1.0038 +/- 0.0237 INFO - cabinetry.fit - staterror_Signal_region[3] = 1.0069 +/- 0.1021 INFO - cabinetry.fit - staterror_Signal_region[4] = 0.9656 +/- 0.1642 INFO - cabinetry.fit - staterror_Signal_region[5] = 0.9701 +/- 0.1512 INFO - cabinetry.fit - staterror_Signal_region[6] = 0.9808 +/- 0.1441 INFO - cabinetry.fit - staterror_Signal_region[7] = 0.9370 +/- 0.1100 INFO - cabinetry.fit - staterror_Signal_region[8] = 1.0300 +/- 0.0981 INFO - cabinetry.fit - staterror_Signal_region[9] = 0.9755 +/- 0.1069 INFO - cabinetry.fit - staterror_Signal_region[10] = 1.0413 +/- 0.1270 INFO - cabinetry.fit - staterror_Signal_region[11] = 0.9920 +/- 0.1343 INFO - cabinetry.fit - staterror_Signal_region[12] = 1.0356 +/- 0.1345 INFO - cabinetry.fit - staterror_Signal_region[13] = 1.0659 +/- 0.1301 INFO - cabinetry.fit - staterror_Signal_region[14] = 1.0041 +/- 0.1315 INFO - cabinetry.fit - staterror_Signal_region[15] = 0.9831 +/- 0.1522 INFO - cabinetry.fit - staterror_Signal_region[16] = 1.1054 +/- 0.1350 INFO - cabinetry.fit - staterror_Signal_region[17] = 0.9837 +/- 0.1570 INFO - cabinetry.fit - staterror_Signal_region[18] = 1.0149 +/- 0.1476 INFO - cabinetry.fit - staterror_Signal_region[19] = 0.9593 +/- 0.1356 INFO - cabinetry.fit - staterror_Signal_region[20] = 1.0376 +/- 0.0924 INFO - cabinetry.fit - staterror_Signal_region[21] = 1.0159 +/- 0.0757 INFO - cabinetry.fit - staterror_Signal_region[22] = 0.9820 +/- 0.0589 INFO - cabinetry.fit - staterror_Signal_region[23] = 1.0090 +/- 0.0748 INFO - cabinetry.fit - staterror_Signal_region[24] = 0.9818 +/- 0.0715 INFO - cabinetry.fit - staterror_Signal_region[25] = 0.9953 +/- 0.0574 INFO - cabinetry.fit - staterror_Signal_region[26] = 0.9823 +/- 0.0770 INFO - cabinetry.fit - staterror_Signal_region[27] = 0.9866 +/- 0.0655 INFO - cabinetry.fit - staterror_Signal_region[28] = 0.9564 +/- 0.0859 INFO - cabinetry.fit - staterror_Signal_region[29] = 1.0151 +/- 0.0808 INFO - cabinetry.fit - staterror_Signal_region[30] = 0.9894 +/- 0.0707 INFO - cabinetry.fit - staterror_Signal_region[31] = 0.9851 +/- 0.0911 INFO - cabinetry.fit - staterror_Signal_region[32] = 1.0057 +/- 0.0684 INFO - cabinetry.fit - staterror_Signal_region[33] = 1.0036 +/- 0.0897 INFO - cabinetry.fit - Signal_norm = 1.7923 +/- 0.8458 INFO - cabinetry.fit - ZZ_norm = 1.5301 +/- 0.1292 INFO - cabinetry.fit - Ztt_norm = 0.7769 +/- 0.1917
model_prediction_postfit = cabinetry.model_utils.prediction(model, fit_results=fit_results)
_ = cabinetry.visualize.data_mc(model_prediction_postfit, data, config=plot_config)
DEBUG - cabinetry.model_utils - total stdev is [... 1.14, 1.36, 1.31, 1.17, 1.23, 1.1, 1.2, 1.08, 0.967, 1.04, 0.849, 0.914]] DEBUG - cabinetry.model_utils - total stdev per channel is [19.2] DEBUG - cabinetry.visualize.utils - saving figure as figures/Signal_region_postfit.pdf
significance_results = cabinetry.fit.significance(model, data)
INFO - cabinetry.fit - calculating discovery significance INFO - cabinetry.fit - observed p-value: 0.950% INFO - cabinetry.fit - observed significance: 2.345 INFO - cabinetry.fit - expected p-value: 9.661% INFO - cabinetry.fit - expected significance: 1.301