%matplotlib inline
from coffea import hist
from coffea.analysis_objects import JaggedCandidateArray
import coffea.processor as processor
# This program plots a per-event array (in this case, Jet pT). In Coffea, this is not very dissimilar from the event-level process.
class Processor(processor.ProcessorABC):
def __init__(self):
dataset_axis = hist.Cat("dataset", "")
Jet_axis = hist.Bin("Jet_pt", "Jet_pt [GeV]", 100, 15, 60)
self._accumulator = processor.dict_accumulator({
'Jet_pt': hist.Hist("Counts", dataset_axis, Jet_axis),
'cutflow': processor.defaultdict_accumulator(int)
})
@property
def accumulator(self):
return self._accumulator
def process(self, events):
output = self.accumulator.identity()
dataset = events.metadata['dataset']
Jet_pt = events.Jet.pt
# As before, we can get the number of events by checking the size of the array. To get the number of jets, which varies per event, though, we need to count up the number in each event, and then sum those counts (count subarray sizes, sum them).
output['cutflow']['all events'] += Jet_pt.size
output['cutflow']['all jets'] += Jet_pt.counts.sum()
# .flatten() removes jaggedness; plotting jagged data is meaningless, we just want to plot flat jets.
output['Jet_pt'].fill(dataset=dataset, Jet_pt=Jet_pt.flatten())
return output
def postprocess(self, accumulator):
return accumulator
fileset = {'SingleMu' : ["root://eospublic.cern.ch//eos/root-eos/benchmark/Run2012B_SingleMu.root"]}
from dask.distributed import Client
from coffea_casa import CoffeaCasaCluster
client = Client("tls://localhost:8786")
output = processor.run_uproot_job(fileset=fileset,
treename="Events",
processor_instance=Processor(),
executor=processor.dask_executor,
executor_args={'client': client, 'nano': True},
chunksize=250000)
[ ] | 2% Completed | 29.9s0s
distributed.comm.tcp - WARNING - Closing dangling stream in <TLS local=tls://127.0.0.1:43642 remote=tls://localhost:8786>
[########################################] | 100% Completed | 1min 10.3s
hist.plot1d(output['Jet_pt'], overlay='dataset', fill_opts={'edgecolor': (0,0,0,0.3), 'alpha': 0.8})
<matplotlib.axes._subplots.AxesSubplot at 0x7fefd9325610>
for key, value in output['cutflow'].items():
print(key, value)
all events 53446198 all jets 170952895