import numpy as np
%matplotlib inline
from coffea import hist
from coffea.analysis_objects import JaggedCandidateArray
import coffea.processor as processor
# This program plots a per-event array (jet_pt) that has been masked to meet certain conditions (in this case, abs(jet eta) < 1).
class Processor(processor.ProcessorABC):
def __init__(self):
dataset_axis = hist.Cat("dataset", "")
Jet_axis = hist.Bin("Jet_pt", "Jet_pt [GeV]", 100, 15, 60)
self._accumulator = processor.dict_accumulator({
'Jet_pt': hist.Hist("Counts", dataset_axis, Jet_axis),
'cutflow': processor.defaultdict_accumulator(int)
})
@property
def accumulator(self):
return self._accumulator
def process(self, events):
output = self.accumulator.identity()
dataset = events.metadata['dataset']
# NanoEvents allows easy access to our jets with the Jet column.
jets = events.Jet
output['cutflow']['all events'] += jets.size
output['cutflow']['number of jets'] += jets.counts.sum()
# We want jets with an abs(eta) < 1. Conditionals act on every value in an array in Coffea, so this is easy.
eta_max = (np.absolute(jets.eta) < 1)
# eta_max is a Boolean array, with True in the place of values where the condition is met, and False otherwise. We want to sum up all the Trues (=1) in each sublist, then sum up all the sublists to get the number of jets with pt > 20.
output['cutflow']['abs(eta) < 1'] += eta_max.sum().sum()
# We define good_jets as the actual jets we want to graph. We mask it with the jets that have abs(eta) < 1.
good_jets = jets[eta_max]
# good_jets is no longer a Boolean array, so we can't just sum up the True's. We count the amount of jets and sum that.
output['cutflow']['final good jets'] += good_jets.counts.sum()
output['Jet_pt'].fill(dataset=dataset, Jet_pt=good_jets.pt.flatten())
return output
def postprocess(self, accumulator):
return accumulator
fileset = {'SingleMu' : ["root://eospublic.cern.ch//eos/root-eos/benchmark/Run2012B_SingleMu.root"]}
from dask.distributed import Client
from coffea_casa import CoffeaCasaCluster
client = Client("tls://localhost:8786")
output = processor.run_uproot_job(fileset=fileset,
treename="Events",
processor_instance=Processor(),
executor=processor.dask_executor,
executor_args={'client': client, 'nano': True},
chunksize=250000)
[########################################] | 100% Completed | 4.1s
distributed.comm.tcp - WARNING - Closing dangling stream in <TLS local=tls://127.0.0.1:44122 remote=tls://localhost:8786>
[################################## ] | 85% Completed | 1min 9.2s
distributed.comm.tcp - WARNING - Closing dangling stream in <TLS local=tls://127.0.0.1:44444 remote=tls://localhost:8786> distributed.comm.tcp - WARNING - Closing dangling stream in <TLS local=tls://127.0.0.1:44446 remote=tls://localhost:8786>
[########################################] | 100% Completed | 1min 41.8s
hist.plot1d(output['Jet_pt'], overlay='dataset', fill_opts={'edgecolor': (0,0,0,0.3), 'alpha': 0.8})
<matplotlib.axes._subplots.AxesSubplot at 0x7f867ff09dc0>
for key, value in output['cutflow'].items():
print(key, value)
all events 53446198 number of jets 170952895 abs(eta) < 1 39563735 final good jets 39563735