Limits: number counting experiment with uncertainty on both the background rate and signal efficiency.
The usage of a Confidence Interval Calculator to set a limit on the signal is illustrated
Author: Kyle Cranmer
This notebook tutorial was automatically generated with ROOTBOOK-izer from the macro found in the ROOT repository on Wednesday, April 17, 2024 at 11:20 AM.
%%cpp -d
#include "RooProfileLL.h"
#include "RooAbsPdf.h"
#include "RooStats/HypoTestResult.h"
#include "RooRealVar.h"
#include "RooPlot.h"
#include "RooDataSet.h"
#include "RooTreeDataStore.h"
#include "TTree.h"
#include "TCanvas.h"
#include "TLine.h"
#include "TStopwatch.h"
#include "RooStats/ProfileLikelihoodCalculator.h"
#include "RooStats/MCMCCalculator.h"
#include "RooStats/UniformProposal.h"
#include "RooStats/FeldmanCousins.h"
#include "RooStats/NumberCountingPdfFactory.h"
#include "RooStats/ConfInterval.h"
#include "RooStats/PointSetInterval.h"
#include "RooStats/LikelihoodInterval.h"
#include "RooStats/LikelihoodIntervalPlot.h"
#include "RooStats/RooStatsUtils.h"
#include "RooStats/ModelConfig.h"
#include "RooStats/MCMCInterval.h"
#include "RooStats/MCMCIntervalPlot.h"
#include "RooStats/ProposalFunction.h"
#include "RooStats/ProposalHelper.h"
#include "RooFitResult.h"
#include "TGraph2D.h"
#include <cassert>
using namespace RooFit;
using namespace RooStats;
An example of setting a limit in a number counting experiment with uncertainty on background and signal
to time the macro
TStopwatch t;
t.Start();
RooWorkspace wspace{};
wspace.factory("Poisson::countingModel(obs[150,0,300], "
"sum(s[50,0,120]*ratioSigEff[1.,0,3.],b[100]*ratioBkgEff[1.,0.,3.]))"); // counting model
wspace.factory("Gaussian::sigConstraint(ratioSigEff,1,0.05)"); // 5% signal efficiency uncertainty wspace.factory("Gaussian::bkgConstraint(ratioBkgEff,1,0.1)"); // 10% background efficiency uncertainty
wspace.factory("Gaussian::sigConstraint(gSigEff[1,0,3],ratioSigEff,0.05)"); // 5% signal efficiency uncertainty
wspace.factory("Gaussian::bkgConstraint(gSigBkg[1,0,3],ratioBkgEff,0.2)"); // 10% background efficiency uncertainty
wspace.factory("PROD::modelWithConstraints(countingModel,sigConstraint,bkgConstraint)"); // product of terms
wspace.Print();
RooAbsPdf *modelWithConstraints = wspace.pdf("modelWithConstraints"); // get the model
RooRealVar *obs = wspace.var("obs"); // get the observable
RooRealVar *s = wspace.var("s"); // get the signal we care about
RooRealVar *b =
wspace.var("b"); // get the background and set it to a constant. Uncertainty included in ratioBkgEff
b->setConstant();
RooRealVar *ratioSigEff = wspace.var("ratioSigEff"); // get uncertain parameter to constrain
RooRealVar *ratioBkgEff = wspace.var("ratioBkgEff"); // get uncertain parameter to constrain
RooArgSet constrainedParams(*ratioSigEff,
*ratioBkgEff); // need to constrain these in the fit (should change default behavior)
RooRealVar *gSigEff = wspace.var("gSigEff"); // global observables for signal efficiency
RooRealVar *gSigBkg = wspace.var("gSigBkg"); // global obs for background efficiency
gSigEff->setConstant();
gSigBkg->setConstant();
RooWorkspace() contents variables --------- (b,gSigBkg,gSigEff,obs,ratioBkgEff,ratioSigEff,s) p.d.f.s ------- RooGaussian::bkgConstraint[ x=gSigBkg mean=ratioBkgEff sigma=0.2 ] = 1 RooPoisson::countingModel[ x=obs mean=countingModel_2 ] = 0.0325554 RooProdPdf::modelWithConstraints[ countingModel * sigConstraint * bkgConstraint ] = 0.0325554 RooGaussian::sigConstraint[ x=gSigEff mean=ratioSigEff sigma=0.05 ] = 1 functions -------- RooAddition::countingModel_2[ countingModel_2_[s_x_ratioSigEff] + countingModel_2_[b_x_ratioBkgEff] ] = 150 RooProduct::countingModel_2_[b_x_ratioBkgEff][ b * ratioBkgEff ] = 100 RooProduct::countingModel_2_[s_x_ratioSigEff][ s * ratioSigEff ] = 50
Create an example dataset with 160 observed events
obs->setVal(160.);
RooDataSet dataOrig{"exampleData", "exampleData", *obs};
dataOrig.add(*obs);
RooArgSet all(*s, *ratioBkgEff, *ratioSigEff);
not necessary
modelWithConstraints->fitTo(dataOrig, Constrain({*ratioSigEff, *ratioBkgEff}), PrintLevel(-1));
[#1] INFO:Minimization -- Including the following constraint terms in minimization: (sigConstraint,bkgConstraint) [#1] INFO:Minimization -- The global observables are not defined , normalize constraints with respect to the parameters (ratioSigEff,ratioBkgEff) [#1] INFO:Fitting -- RooAbsPdf::fitTo(modelWithConstraints) fixing normalization set for coefficient determination to observables in data [#1] INFO:Fitting -- using CPU computation library compiled with -mavx2 [#1] INFO:Fitting -- RooAddition::defaultErrorLevel(nll_modelWithConstraints_exampleData) Summation contains a RooNLLVar, using its error level [#1] INFO:Minimization -- RooAbsMinimizerFcn::setOptimizeConst: activating const optimization [#1] INFO:Minimization -- RooAbsMinimizerFcn::setOptimizeConst: deactivating const optimization
Now let's make some confidence intervals for s, our parameter of interest
RooArgSet paramOfInterest(*s);
ModelConfig modelConfig(&wspace);
modelConfig.SetPdf(*modelWithConstraints);
modelConfig.SetParametersOfInterest(paramOfInterest);
modelConfig.SetNuisanceParameters(constrainedParams);
modelConfig.SetObservables(*obs);
modelConfig.SetGlobalObservables(RooArgSet(*gSigEff, *gSigBkg));
modelConfig.SetName("ModelConfig");
wspace.import(modelConfig);
wspace.import(dataOrig);
wspace.SetName("w");
wspace.writeToFile("rs101_ws.root");
[#1] INFO:ObjectHandling -- RooWorkspace::import() importing dataset exampleData
Make sure we reference the data in the workspace from now on
RooDataSet &data = static_cast<RooDataSet &>(*wspace.data(dataOrig.GetName()));
input_line_87:2:2: warning: 'data' shadows a declaration with the same name in the 'std' namespace; use '::data' to reference this declaration RooDataSet &data = static_cast<RooDataSet &>(*wspace.data(dataOrig.GetName())); ^
First, let's use a Calculator based on the Profile Likelihood Ratio ProfileLikelihoodCalculator plc(data, *modelWithConstraints, paramOfInterest);
ProfileLikelihoodCalculator plc(data, modelConfig);
plc.SetTestSize(.05);
std::unique_ptr<LikelihoodInterval> lrinterval{static_cast<LikelihoodInterval*>(plc.GetInterval())};
input_line_88:2:34: error: reference to 'data' is ambiguous ProfileLikelihoodCalculator plc(data, modelConfig); ^ input_line_87:2:14: note: candidate found by name lookup is 'data' RooDataSet &data = static_cast<RooDataSet &>(*wspace.data(dataOrig.GetName())); ^ /usr/include/c++/9/bits/range_access.h:318:5: note: candidate found by name lookup is 'std::data' data(initializer_list<_Tp> __il) noexcept ^ /usr/include/c++/9/bits/range_access.h:289:5: note: candidate found by name lookup is 'std::data' data(_Container& __cont) noexcept(noexcept(__cont.data())) ^ /usr/include/c++/9/bits/range_access.h:299:5: note: candidate found by name lookup is 'std::data' data(const _Container& __cont) noexcept(noexcept(__cont.data())) ^ /usr/include/c++/9/bits/range_access.h:309:5: note: candidate found by name lookup is 'std::data' data(_Tp (&__array)[_Nm]) noexcept ^ input_line_88:2:34: error: unknown type name 'data' ProfileLikelihoodCalculator plc(data, modelConfig); ^ input_line_88:2:40: error: unknown type name 'modelConfig' ProfileLikelihoodCalculator plc(data, modelConfig); ^
Let's make a plot
auto dataCanvas = new TCanvas("dataCanvas");
dataCanvas->Divide(2, 1);
dataCanvas->cd(1);
LikelihoodIntervalPlot plotInt(lrinterval.get());
plotInt.SetTitle("Profile Likelihood Ratio and Posterior for S");
plotInt.Draw();
[runStaticInitializersOnce]: Failed to materialize symbols: { (main, { _ZN12__cling_N52816__cling_Un1Qu339EPv, __cxx_global_var_initcling_module_442_.6, _ZN12__cling_N52824__dynamic__cling_Un1Qu30E, __cxx_global_var_initcling_module_442_, __cxx_global_var_initcling_module_442_.3, __orc_init_func.cling-module-442, _Z31__fd_init_order__cling_Un1Qu310v, _ZN5cling7runtime8internal15DynamicExprInfoC1EPKcPPvb, _ZNK5cling7runtime8internal15LifetimeHandler9getMemoryEv, __vd_init_order__cling_Un1Qu311, _ZN12__cling_N52810dataCanvasE, __cxx_global_var_initcling_module_442_.2, _GLOBAL__sub_I_cling_module_442, $.cling-module-442.__inits.0, _ZN5cling7runtime8internal15DynamicExprInfoC2EPKcPPvb, _ZN12__cling_N5287plotIntE }) } IncrementalExecutor::executeFunction: symbol '_ZN5cling7runtime8internal15LifetimeHandlerC1EPNS1_15DynamicExprInfoEPN5clang11DeclContextEPKcPNS_11InterpreterE' unresolved while linking [cling interface function]! You are probably missing the definition of cling::runtime::internal::LifetimeHandler::LifetimeHandler(cling::runtime::internal::DynamicExprInfo*, clang::DeclContext*, char const*, cling::Interpreter*) Maybe you need to load the corresponding shared library? IncrementalExecutor::executeFunction: symbol '_ZN5cling7runtime8internal15LifetimeHandlerD1Ev' unresolved while linking [cling interface function]! You are probably missing the definition of cling::runtime::internal::LifetimeHandler::~LifetimeHandler() Maybe you need to load the corresponding shared library?
Second, use a Calculator based on the Feldman Cousins technique
FeldmanCousins fc(data, modelConfig);
fc.UseAdaptiveSampling(true);
fc.FluctuateNumDataEntries(false); // number counting analysis: dataset always has 1 entry with N events observed
fc.SetNBins(100); // number of points to test per parameter
fc.SetTestSize(.05);
input_line_90:2:20: error: reference to 'data' is ambiguous FeldmanCousins fc(data, modelConfig); ^ input_line_87:2:14: note: candidate found by name lookup is 'data' RooDataSet &data = static_cast<RooDataSet &>(*wspace.data(dataOrig.GetName())); ^ /usr/include/c++/9/bits/range_access.h:318:5: note: candidate found by name lookup is 'std::data' data(initializer_list<_Tp> __il) noexcept ^ /usr/include/c++/9/bits/range_access.h:289:5: note: candidate found by name lookup is 'std::data' data(_Container& __cont) noexcept(noexcept(__cont.data())) ^ /usr/include/c++/9/bits/range_access.h:299:5: note: candidate found by name lookup is 'std::data' data(const _Container& __cont) noexcept(noexcept(__cont.data())) ^ /usr/include/c++/9/bits/range_access.h:309:5: note: candidate found by name lookup is 'std::data' data(_Tp (&__array)[_Nm]) noexcept ^ input_line_90:2:20: error: unknown type name 'data' FeldmanCousins fc(data, modelConfig); ^ input_line_90:2:26: error: unknown type name 'modelConfig' FeldmanCousins fc(data, modelConfig); ^
fc.SaveBeltToFile(true); // optional
std::unique_ptr<PointSetInterval> fcint{static_cast<PointSetInterval*>(fc.GetInterval())};
std::unique_ptr<RooFitResult> fit{modelWithConstraints->fitTo(data, Save(true), PrintLevel(-1))};
input_line_91:4:63: error: reference to 'data' is ambiguous std::unique_ptr<RooFitResult> fit{modelWithConstraints->fitTo(data, Save(true), PrintLevel(-1))}; ^ input_line_87:2:14: note: candidate found by name lookup is 'data' RooDataSet &data = static_cast<RooDataSet &>(*wspace.data(dataOrig.GetName())); ^ /usr/include/c++/9/bits/range_access.h:318:5: note: candidate found by name lookup is 'std::data' data(initializer_list<_Tp> __il) noexcept ^ /usr/include/c++/9/bits/range_access.h:289:5: note: candidate found by name lookup is 'std::data' data(_Container& __cont) noexcept(noexcept(__cont.data())) ^ /usr/include/c++/9/bits/range_access.h:299:5: note: candidate found by name lookup is 'std::data' data(const _Container& __cont) noexcept(noexcept(__cont.data())) ^ /usr/include/c++/9/bits/range_access.h:309:5: note: candidate found by name lookup is 'std::data' data(_Tp (&__array)[_Nm]) noexcept ^
Third, use a Calculator based on Markov Chain monte carlo Before configuring the calculator, let's make a ProposalFunction that will achieve a high acceptance rate
ProposalHelper ph;
ph.SetVariables((RooArgSet &)fit->floatParsFinal());
ph.SetCovMatrix(fit->covarianceMatrix());
ph.SetUpdateProposalParameters(true);
ph.SetCacheSize(100);
ProposalFunction *pdfProp = ph.GetProposalFunction();
MCMCCalculator mc(data, modelConfig);
mc.SetNumIters(20000); // steps to propose in the chain
mc.SetTestSize(.05); // 95% CL
mc.SetNumBurnInSteps(40); // ignore first N steps in chain as "burn in"
mc.SetProposalFunction(*pdfProp);
mc.SetLeftSideTailFraction(0.5); // find a "central" interval
std::unique_ptr<MCMCInterval> mcInt{static_cast<MCMCInterval *>(mc.GetInterval())};
input_line_92:9:19: error: reference to 'data' is ambiguous MCMCCalculator mc(data, modelConfig); ^ input_line_87:2:14: note: candidate found by name lookup is 'data' RooDataSet &data = static_cast<RooDataSet &>(*wspace.data(dataOrig.GetName())); ^ /usr/include/c++/9/bits/range_access.h:318:5: note: candidate found by name lookup is 'std::data' data(initializer_list<_Tp> __il) noexcept ^ /usr/include/c++/9/bits/range_access.h:289:5: note: candidate found by name lookup is 'std::data' data(_Container& __cont) noexcept(noexcept(__cont.data())) ^ /usr/include/c++/9/bits/range_access.h:299:5: note: candidate found by name lookup is 'std::data' data(const _Container& __cont) noexcept(noexcept(__cont.data())) ^ /usr/include/c++/9/bits/range_access.h:309:5: note: candidate found by name lookup is 'std::data' data(_Tp (&__array)[_Nm]) noexcept ^ input_line_92:9:19: error: unknown type name 'data' MCMCCalculator mc(data, modelConfig); ^ input_line_92:9:25: error: unknown type name 'modelConfig' MCMCCalculator mc(data, modelConfig); ^ input_line_92:10:1: error: cannot initialize an array element of type 'void *' with an rvalue of type 'MCMCCalculator (*)(int, int)' mc.SetNumIters(20000); // steps to propose in the chain ^~ input_line_92:11:1: error: cannot initialize an array element of type 'void *' with an rvalue of type 'MCMCCalculator (*)(int, int)' mc.SetTestSize(.05); // 95% CL ^~ input_line_92:12:1: error: cannot initialize an array element of type 'void *' with an rvalue of type 'MCMCCalculator (*)(int, int)' mc.SetNumBurnInSteps(40); // ignore first N steps in chain as "burn in" ^~ input_line_92:13:1: error: cannot initialize an array element of type 'void *' with an rvalue of type 'MCMCCalculator (*)(int, int)' mc.SetProposalFunction(*pdfProp); ^~ input_line_92:14:1: error: cannot initialize an array element of type 'void *' with an rvalue of type 'MCMCCalculator (*)(int, int)' mc.SetLeftSideTailFraction(0.5); // find a "central" interval ^~
Get Lower and Upper limits from Profile Calculator
std::cout << "Profile lower limit on s = " << lrinterval->LowerLimit(*s) << std::endl;
std::cout << "Profile upper limit on s = " << lrinterval->UpperLimit(*s) << std::endl;
[runStaticInitializersOnce]: Failed to materialize symbols: { (main, { __orc_init_func.cling-module-442 }) } cling JIT session error: Failed to materialize symbols: { (main, { _ZN5cling7runtime8internal15DynamicExprInfoC1EPKcPPvb }) }
Get Lower and Upper limits from FeldmanCousins with profile construction
if (fcint) {
double fcul = fcint->UpperLimit(*s);
double fcll = fcint->LowerLimit(*s);
std::cout << "FC lower limit on s = " << fcll << std::endl;
std::cout << "FC upper limit on s = " << fcul << std::endl;
auto fcllLine = new TLine(fcll, 0, fcll, 1);
auto fculLine = new TLine(fcul, 0, fcul, 1);
fcllLine->SetLineColor(kRed);
fculLine->SetLineColor(kRed);
fcllLine->Draw("same");
fculLine->Draw("same");
dataCanvas->Update();
}
cling JIT session error: Failed to materialize symbols: { (main, { _ZN12__cling_N52810dataCanvasE }) }
Plot MCMC interval and print some statistics
MCMCIntervalPlot mcPlot(*mcInt);
mcPlot.SetLineColor(kMagenta);
mcPlot.SetLineWidth(2);
mcPlot.Draw("same");
double mcul = mcInt->UpperLimit(*s);
double mcll = mcInt->LowerLimit(*s);
std::cout << "MCMC lower limit on s = " << mcll << std::endl;
std::cout << "MCMC upper limit on s = " << mcul << std::endl;
std::cout << "MCMC Actual confidence level: " << mcInt->GetActualConfidenceLevel() << std::endl;
cling JIT session error: Failed to materialize symbols: { (main, { _ZN5cling7runtime8internal15DynamicExprInfoC1EPKcPPvb }) } [runStaticInitializersOnce]: Failed to materialize symbols: { (main, { __orc_init_func.cling-module-448, __cxx_global_var_initcling_module_448_.7, _ZN12__cling_N5346mcPlotE, __cxx_global_var_initcling_module_448_, $.cling-module-448.__inits.0, _ZN8RooStats16MCMCIntervalPlot12SetLineWidthEi, __cxx_global_var_initcling_module_448_.4, _ZN8RooStats16MCMCIntervalPlot12SetLineColorEs, __cxx_global_var_initcling_module_448_.2, _ZN12__cling_N53416__cling_Un1Qu345EPv, _GLOBAL__sub_I_cling_module_448, _ZN12__cling_N5344mcllE, _ZN12__cling_N5344mculE, _ZN12__cling_N53424__dynamic__cling_Un1Qu33E, __vd_init_order__cling_Un1Qu317, _Z31__fd_init_order__cling_Un1Qu316v, __cxx_global_var_initcling_module_448_.5 }) }
3-d plot of the parameter points
dataCanvas->cd(2);
[runStaticInitializersOnce]: Failed to materialize symbols: { (main, { __orc_init_func.cling-module-448 }) } cling JIT session error: Failed to materialize symbols: { (main, { _ZN12__cling_N52810dataCanvasE }) }
also plot the points in the markov chain
std::unique_ptr<RooDataSet> chainData{mcInt->GetChainAsDataSet()};
assert(chainData);
std::cout << "plotting the chain data - nentries = " << chainData->numEntries() << std::endl;
TTree *chain = RooStats::GetAsTTree("chainTreeData", "chainTreeData", *chainData);
assert(chain);
chain->SetMarkerStyle(6);
chain->SetMarkerColor(kRed);
chain->Draw("s:ratioSigEff:ratioBkgEff", "nll_MarkovChain_local_", "box"); // 3-d box proportional to posterior
cling JIT session error: Failed to materialize symbols: { (main, { _ZN5cling7runtime8internal15DynamicExprInfoC1EPKcPPvb }) } [runStaticInitializersOnce]: Failed to materialize symbols: { (main, { __orc_init_func.cling-module-450, __cxx_global_var_initcling_module_450_.2, _ZNKSt10unique_ptrI10RooDataSetSt14default_deleteIS0_EEdeEv, _ZN12__cling_N53624__dynamic__cling_Un1Qu34E, _GLOBAL__sub_I_cling_module_450, _ZNSt11_Tuple_implILm0EJP10RooDataSetSt14default_deleteIS0_EEE7_M_headERKS4_, _ZN12__cling_N5369chainDataE, _ZNKSt10unique_ptrI10RooDataSetSt14default_deleteIS0_EEptEv, _ZN12__cling_N53616__cling_Un1Qu347EPv, _Z31__fd_init_order__cling_Un1Qu318v, $.cling-module-450.__inits.0, _ZN12__cling_N5365chainE, __cxx_global_var_initcling_module_450_.4, __cxx_global_var_initcling_module_450_.5, _ZSt3getILm0EJP10RooDataSetSt14default_deleteIS0_EEERKNSt13tuple_elementIXT_ESt5tupleIJDpT0_EEE4typeERKS8_, _ZNSt10_Head_baseILm0EP10RooDataSetLb0EE7_M_headERKS2_, _ZNKSt15__uniq_ptr_implI10RooDataSetSt14default_deleteIS0_EE6_M_ptrEv, _ZSt12__get_helperILm0EP10RooDataSetJSt14default_deleteIS0_EEERKT0_RKSt11_Tuple_implIXT_EJS4_DpT1_EE, _ZNKSt10unique_ptrI10RooDataSetSt14default_deleteIS0_EE3getEv, __vd_init_order__cling_Un1Qu319, __cxx_global_var_initcling_module_450_ }) }
the points used in the profile construction
RooDataSet *parScanData = (RooDataSet *)fc.GetPointsToScan();
assert(parScanData);
std::cout << "plotting the scanned points used in the frequentist construction - npoints = "
<< parScanData->numEntries() << std::endl;
cling JIT session error: Failed to materialize symbols: { (main, { _ZN5cling7runtime8internal15DynamicExprInfoC1EPKcPPvb }) } [runStaticInitializersOnce]: Failed to materialize symbols: { (main, { _ZN12__cling_N53711parScanDataE, _ZN5cling5Value7CastFwdIP10RooDataSetE4castERKS0_, _GLOBAL__sub_I_cling_module_451, _ZN12__cling_N53716__cling_Un1Qu348EPv, $.cling-module-451.__inits.0, __orc_init_func.cling-module-451, _ZN5cling7runtime8internal9EvaluateTIP10RooDataSetEET_PNS1_15DynamicExprInfoEPN5clang11DeclContextE, _ZNK5cling5Value6castAsIP10RooDataSetEET_v, __cxx_global_var_initcling_module_451_ }) }
getting the tree and drawing it -crashes (very strange....); TTree* parameterScan = RooStats::GetAsTTree("parScanTreeData","parScanTreeData",*parScanData); assert(parameterScan); parameterScan->Draw("s:ratioSigEff:ratioBkgEff","","goff");
auto gr = new TGraph2D(parScanData->numEntries());
for (int ievt = 0; ievt < parScanData->numEntries(); ++ievt) {
const RooArgSet *evt = parScanData->get(ievt);
double x = evt->getRealValue("ratioBkgEff");
double y = evt->getRealValue("ratioSigEff");
double z = evt->getRealValue("s");
gr->SetPoint(ievt, x, y, z);
// std::cout << ievt << " " << x << " " << y << " " << z << std::endl;
}
gr->SetMarkerStyle(24);
gr->Draw("P SAME");
cling JIT session error: Failed to materialize symbols: { (main, { _ZN12__cling_N53711parScanDataE }) } [runStaticInitializersOnce]: Failed to materialize symbols: { (main, { __orc_init_func.cling-module-452, __cxx_global_var_initcling_module_452_, _ZN12__cling_N53816__cling_Un1Qu349EPv, $.cling-module-452.__inits.0, _GLOBAL__sub_I_cling_module_452, _ZN12__cling_N5382grE }) }
print timing info
t.Stop();
t.Print();
Real time 0:00:07, CP time 3.700
[runStaticInitializersOnce]: Failed to materialize symbols: { (main, { __orc_init_func.cling-module-450 }) }
Draw all canvases
gROOT->GetListOfCanvases()->Draw()