import numpy as np
%matplotlib inline
from coffea import hist
import coffea.processor as processor
import awkward as ak
# This program plots a per-event array (in this case, Jet pT). In Coffea, this is not very dissimilar from the event-level process.
class Processor(processor.ProcessorABC):
def __init__(self):
dataset_axis = hist.Cat("dataset", "")
Jet_axis = hist.Bin("Jet_pt", "Jet_pt [GeV]", 100, 15, 60)
self._accumulator = processor.dict_accumulator({
'Jet_pt': hist.Hist("Counts", dataset_axis, Jet_axis),
'cutflow': processor.defaultdict_accumulator(int)
})
@property
def accumulator(self):
return self._accumulator
def process(self, events):
output = self.accumulator.identity()
dataset = events.metadata['dataset']
Jet_pt = events.Jet.pt
# As before, we can get the number of events by checking the size of the array. To get the number of jets, which varies per event, though, we need to count up the number in each event, and then sum those counts (count subarray sizes, sum them).
output['cutflow']['all events'] += ak.size(Jet_pt, axis=0)
output['cutflow']['all jets'] += ak.sum(ak.count(Jet_pt, axis=1))
# .flatten() removes jaggedness; plotting jagged data is meaningless, we just want to plot flat jets.
output['Jet_pt'].fill(dataset=dataset, Jet_pt=ak.flatten(Jet_pt))
return output
def postprocess(self, accumulator):
return accumulator
from dask.distributed import Client
import time
client = Client("tls://localhost:8786")
# Our file is missing some cross-references, so we have to make NanoAOD push warnings instead of erroring out.
# This ultimately isn't a problem, it's just a constraint of the public NanoAOD we're using.
def fix():
from coffea.nanoevents import NanoAODSchema
NanoAODSchema.warn_missing_crossrefs = True
client.register_worker_callbacks(fix)
processor.NanoAODSchema.warn_missing_crossrefs = True
fileset = {'SingleMu' : ["root://eospublic.cern.ch//eos/root-eos/benchmark/Run2012B_SingleMu.root"]}
output = processor.run_uproot_job(fileset,
treename = 'Events',
processor_instance = Processor(),
executor = processor.dask_executor,
executor_args = {'schema': processor.NanoAODSchema, 'client': client}
)
[########################################] | 100% Completed | 1min 30.2s
hist.plot1d(output['Jet_pt'], overlay='dataset', fill_opts={'edgecolor': (0,0,0,0.3), 'alpha': 0.8})
<AxesSubplot:xlabel='Jet_pt [GeV]', ylabel='Counts'>
for key, value in output['cutflow'].items():
print(key, value)
all events 53446198 all jets 170952895