Rs 1 0 1_Limitexample

Limits: number counting experiment with uncertainty on both the background rate and signal efficiency.

The usage of a Confidence Interval Calculator to set a limit on the signal is illustrated

Author: Kyle Cranmer
This notebook tutorial was automatically generated with ROOTBOOK-izer from the macro found in the ROOT repository on Thursday, June 30, 2022 at 09:50 AM.

In [ ]:
%%cpp -d
#include "RooProfileLL.h"
#include "RooAbsPdf.h"
#include "RooStats/HypoTestResult.h"
#include "RooRealVar.h"
#include "RooPlot.h"
#include "RooDataSet.h"
#include "RooTreeDataStore.h"
#include "TTree.h"
#include "TCanvas.h"
#include "TLine.h"
#include "TStopwatch.h"

#include "RooStats/ProfileLikelihoodCalculator.h"
#include "RooStats/MCMCCalculator.h"
#include "RooStats/UniformProposal.h"
#include "RooStats/FeldmanCousins.h"
#include "RooStats/NumberCountingPdfFactory.h"
#include "RooStats/ConfInterval.h"
#include "RooStats/PointSetInterval.h"
#include "RooStats/LikelihoodInterval.h"
#include "RooStats/LikelihoodIntervalPlot.h"
#include "RooStats/RooStatsUtils.h"
#include "RooStats/ModelConfig.h"
#include "RooStats/MCMCInterval.h"
#include "RooStats/MCMCIntervalPlot.h"
#include "RooStats/ProposalFunction.h"
#include "RooStats/ProposalHelper.h"
#include "RooFitResult.h"
#include "TGraph2D.h"

#include <cassert>

Use this order for safety on library loading

In [ ]:
%%cpp -d
// This is a workaround to make sure the namespace is used inside functions
using namespace RooFit;
using namespace RooStats;

An example of setting a limit in a number counting experiment with uncertainty on background and signal

To time the macro

In [ ]:
TStopwatch t;
t.Start();

The Model building stage

In [ ]:
RooWorkspace *wspace = new RooWorkspace();
wspace->factory("Poisson::countingModel(obs[150,0,300], "
                "sum(s[50,0,120]*ratioSigEff[1.,0,3.],b[100]*ratioBkgEff[1.,0.,3.]))"); // counting model

Wspace->factory("gaussian::sigconstraint(ratiosigeff,1,0.05)"); // 5% signal efficiency uncertainty wspace->factory("Gaussian::bkgConstraint(ratioBkgEff,1,0.1)"); // 10% background efficiency uncertainty

In [ ]:
wspace->factory("Gaussian::sigConstraint(gSigEff[1,0,3],ratioSigEff,0.05)"); // 5% signal efficiency uncertainty
wspace->factory("Gaussian::bkgConstraint(gSigBkg[1,0,3],ratioBkgEff,0.2)");  // 10% background efficiency uncertainty
wspace->factory("PROD::modelWithConstraints(countingModel,sigConstraint,bkgConstraint)"); // product of terms
wspace->Print();

RooAbsPdf *modelWithConstraints = wspace->pdf("modelWithConstraints"); // get the model
RooRealVar *obs = wspace->var("obs");                                  // get the observable
RooRealVar *s = wspace->var("s");                                      // get the signal we care about
RooRealVar *b =
   wspace->var("b"); // get the background and set it to a constant.  Uncertainty included in ratioBkgEff
b->setConstant();

RooRealVar *ratioSigEff = wspace->var("ratioSigEff"); // get uncertain parameter to constrain
RooRealVar *ratioBkgEff = wspace->var("ratioBkgEff"); // get uncertain parameter to constrain
RooArgSet constrainedParams(*ratioSigEff,
                            *ratioBkgEff); // need to constrain these in the fit (should change default behavior)

RooRealVar *gSigEff = wspace->var("gSigEff"); // global observables for signal efficiency
RooRealVar *gSigBkg = wspace->var("gSigBkg"); // global obs for background efficiency
gSigEff->setConstant();
gSigBkg->setConstant();

Create an example dataset with 160 observed events

In [ ]:
obs->setVal(160.);
RooDataSet *data = new RooDataSet("exampleData", "exampleData", RooArgSet(*obs));
data->add(*obs);

RooArgSet all(*s, *ratioBkgEff, *ratioSigEff);

Not necessary

In [ ]:
modelWithConstraints->fitTo(*data, RooFit::Constrain(RooArgSet(*ratioSigEff, *ratioBkgEff)));

Now let's make some confidence intervals for s, our parameter of interest

In [ ]:
RooArgSet paramOfInterest(*s);

ModelConfig modelConfig(wspace);
modelConfig.SetPdf(*modelWithConstraints);
modelConfig.SetParametersOfInterest(paramOfInterest);
modelConfig.SetNuisanceParameters(constrainedParams);
modelConfig.SetObservables(*obs);
modelConfig.SetGlobalObservables(RooArgSet(*gSigEff, *gSigBkg));
modelConfig.SetName("ModelConfig");
wspace->import(modelConfig);
wspace->import(*data);
wspace->SetName("w");
wspace->writeToFile("rs101_ws.root");

First, let's use a calculator based on the profile likelihood ratio ProfileLikelihoodCalculator plc(data, modelWithConstraints, paramOfInterest);

In [ ]:
ProfileLikelihoodCalculator plc(*data, modelConfig);
plc.SetTestSize(.05);
ConfInterval *lrinterval = plc.GetInterval(); // that was easy.

Let's make a plot

In [ ]:
TCanvas *dataCanvas = new TCanvas("dataCanvas");
dataCanvas->Divide(2, 1);

dataCanvas->cd(1);
LikelihoodIntervalPlot plotInt((LikelihoodInterval *)lrinterval);
plotInt.SetTitle("Profile Likelihood Ratio and Posterior for S");
plotInt.Draw();

Second, use a calculator based on the feldman cousins technique

In [ ]:
FeldmanCousins fc(*data, modelConfig);
fc.UseAdaptiveSampling(true);
fc.FluctuateNumDataEntries(false); // number counting analysis: dataset always has 1 entry with N events observed
fc.SetNBins(100);                  // number of points to test per parameter
fc.SetTestSize(.05);

Fc.savebelttofile(true); // optional

In [ ]:
ConfInterval *fcint = NULL;
fcint = fc.GetInterval(); // that was easy.

RooFitResult *fit = modelWithConstraints->fitTo(*data, Save(true));

Third, use a calculator based on markov chain monte carlo Before configuring the calculator, let's make a ProposalFunction that will achieve a high acceptance rate

In [ ]:
ProposalHelper ph;
ph.SetVariables((RooArgSet &)fit->floatParsFinal());
ph.SetCovMatrix(fit->covarianceMatrix());
ph.SetUpdateProposalParameters(true);
ph.SetCacheSize(100);
ProposalFunction *pdfProp = ph.GetProposalFunction(); // that was easy

MCMCCalculator mc(*data, modelConfig);
mc.SetNumIters(20000);    // steps to propose in the chain
mc.SetTestSize(.05);      // 95% CL
mc.SetNumBurnInSteps(40); // ignore first N steps in chain as "burn in"
mc.SetProposalFunction(*pdfProp);
mc.SetLeftSideTailFraction(0.5);                        // find a "central" interval
MCMCInterval *mcInt = (MCMCInterval *)mc.GetInterval(); // that was easy

Get lower and upper limits from profile calculator

In [ ]:
cout << "Profile lower limit on s = " << ((LikelihoodInterval *)lrinterval)->LowerLimit(*s) << endl;
cout << "Profile upper limit on s = " << ((LikelihoodInterval *)lrinterval)->UpperLimit(*s) << endl;

Get lower and upper limits from feldmancousins with profile construction

In [ ]:
if (fcint != NULL) {
   double fcul = ((PointSetInterval *)fcint)->UpperLimit(*s);
   double fcll = ((PointSetInterval *)fcint)->LowerLimit(*s);
   cout << "FC lower limit on s = " << fcll << endl;
   cout << "FC upper limit on s = " << fcul << endl;
   TLine *fcllLine = new TLine(fcll, 0, fcll, 1);
   TLine *fculLine = new TLine(fcul, 0, fcul, 1);
   fcllLine->SetLineColor(kRed);
   fculLine->SetLineColor(kRed);
   fcllLine->Draw("same");
   fculLine->Draw("same");
   dataCanvas->Update();
}

Plot mcmc interval and print some statistics

In [ ]:
MCMCIntervalPlot mcPlot(*mcInt);
mcPlot.SetLineColor(kMagenta);
mcPlot.SetLineWidth(2);
mcPlot.Draw("same");

double mcul = mcInt->UpperLimit(*s);
double mcll = mcInt->LowerLimit(*s);
cout << "MCMC lower limit on s = " << mcll << endl;
cout << "MCMC upper limit on s = " << mcul << endl;
cout << "MCMC Actual confidence level: " << mcInt->GetActualConfidenceLevel() << endl;

3-d plot of the parameter points

In [ ]:
dataCanvas->cd(2);

Also plot the points in the markov chain

In [ ]:
RooDataSet *chainData = mcInt->GetChainAsDataSet();

assert(chainData);
std::cout << "plotting the chain data - nentries = " << chainData->numEntries() << std::endl;
TTree *chain = RooStats::GetAsTTree("chainTreeData", "chainTreeData", *chainData);
assert(chain);
chain->SetMarkerStyle(6);
chain->SetMarkerColor(kRed);

chain->Draw("s:ratioSigEff:ratioBkgEff", "nll_MarkovChain_local_", "box"); // 3-d box proportional to posterior

The points used in the profile construction

In [ ]:
RooDataSet *parScanData = (RooDataSet *)fc.GetPointsToScan();
assert(parScanData);
std::cout << "plotting the scanned points used in the frequentist construction - npoints = "
          << parScanData->numEntries() << std::endl;

Getting the tree and drawing it -crashes (very strange....); TTree parameterScan = RooStats::GetAsTTree("parScanTreeData","parScanTreeData",parScanData); assert(parameterScan); parameterScan->Draw("s:ratioSigEff:ratioBkgEff","","goff");

In [ ]:
TGraph2D *gr = new TGraph2D(parScanData->numEntries());
for (int ievt = 0; ievt < parScanData->numEntries(); ++ievt) {
   const RooArgSet *evt = parScanData->get(ievt);
   double x = evt->getRealValue("ratioBkgEff");
   double y = evt->getRealValue("ratioSigEff");
   double z = evt->getRealValue("s");
   gr->SetPoint(ievt, x, y, z);
   // std::cout << ievt << "  " << x << "  " << y << "  " << z << std::endl;
}
gr->SetMarkerStyle(24);
gr->Draw("P SAME");

delete wspace;
delete lrinterval;
delete mcInt;
delete fcint;
delete data;

Print timing info

In [ ]:
t.Stop();
t.Print();

Draw all canvases

In [ ]:
gROOT->GetListOfCanvases()->Draw()