# TMVAMultipleBackgroundExample¶

This example shows the training of signal with three different backgrounds Then in the application a tree is created with all signal and background events where the true class ID and the three classifier outputs are added finally with the application tree, the significance is maximized with the help of the TMVA genetic algorithm.

• Project : TMVA - a Root-integrated toolkit for multivariate data analysis
• Package : TMVA
• Executable: TMVAGAexample

Author: Andreas Hoecker
This notebook tutorial was automatically generated with ROOTBOOK-izer from the macro found in the ROOT repository on Monday, August 15, 2022 at 09:30 AM.

In :
%%cpp -d
#include <iostream> // Stream declarations
#include <vector>
#include <limits>

#include "TChain.h"
#include "TCut.h"
#include "TDirectory.h"
#include "TH1F.h"
#include "TH1.h"
#include "TMath.h"
#include "TFile.h"
#include "TStopwatch.h"
#include "TROOT.h"
#include "TSystem.h"

#include "TMVA/GeneticAlgorithm.h"
#include "TMVA/GeneticFitter.h"
#include "TMVA/IFitterTarget.h"
#include "TMVA/Factory.h"

using namespace std;

using namespace TMVA;

%%cpp -d

input_line_42:27:1: error: expected unqualified-id
%%cpp -d
^


## Genetic Algorithm Fitness definition¶

In :
class MyFitness : public IFitterTarget {
public:
// constructor
MyFitness( TChain* _chain ) : IFitterTarget() {
chain = _chain;

hSignal = new TH1F("hsignal","hsignal",100,-1,1);
hFP = new TH1F("hfp","hfp",100,-1,1);
hTP = new TH1F("htp","htp",100,-1,1);

TString cutsAndWeightSignal  = "weight*(classID==0)";
nSignal = chain->Draw("Entry$/Entries$>>hsignal",cutsAndWeightSignal,"goff");
weightsSignal = hSignal->Integral();

}

// the output of this function will be minimized
Double_t EstimatorFunction( std::vector<Double_t> & factors ){

TString cutsAndWeightTruePositive  = Form("weight*((classID==0) && cls0>%f && cls1>%f && cls2>%f )",factors.at(0), factors.at(1), factors.at(2));
TString cutsAndWeightFalsePositive = Form("weight*((classID >0) && cls0>%f && cls1>%f && cls2>%f )",factors.at(0), factors.at(1), factors.at(2));

// Entry$/Entries$ just draws something reasonable. Could in principle anything
Float_t nTP = chain->Draw("Entry$/Entries$>>htp",cutsAndWeightTruePositive,"goff");
Float_t nFP = chain->Draw("Entry$/Entries$>>hfp",cutsAndWeightFalsePositive,"goff");

weightsTruePositive = hTP->Integral();
weightsFalsePositive = hFP->Integral();

efficiency = 0;
if( weightsSignal > 0 )
efficiency = weightsTruePositive/weightsSignal;

purity = 0;
if( weightsTruePositive+weightsFalsePositive > 0 )
purity = weightsTruePositive/(weightsTruePositive+weightsFalsePositive);

Float_t effTimesPur = efficiency*purity;

Float_t toMinimize = std::numeric_limits<float>::max(); // set to the highest existing number
if( effTimesPur > 0 ) // if larger than 0, take 1/x. This is the value to minimize
toMinimize = 1./(effTimesPur); // we want to minimize 1/efficiency*purity

// Print();

}

void Print(){
std::cout << std::endl;
std::cout << "======================" << std::endl
<< "Efficiency : " << efficiency << std::endl
<< "Purity     : " << purity << std::endl << std::endl
<< "True positive weights : " << weightsTruePositive << std::endl
<< "False positive weights: " << weightsFalsePositive << std::endl
<< "Signal weights        : " << weightsSignal << std::endl;
}

Float_t nSignal;

Float_t efficiency;
Float_t purity;
Float_t weightsTruePositive;
Float_t weightsFalsePositive;
Float_t weightsSignal;

private:
TChain* chain;
TH1F* hSignal;
TH1F* hFP;
TH1F* hTP;

};

input_line_43:1:26: error: expected class name
class MyFitness : public IFitterTarget {
^
input_line_43:4:34: error: member initializer 'IFitterTarget' does not name a non-static data member or base class
MyFitness( TChain* _chain ) : IFitterTarget() {
^~~~~~~~~~~~~~~


## Training¶

In :
%%cpp -d
void Training(){
std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D:AnalysisType=Classification" );
TString fname = "./tmva_example_multiple_background.root";

TFile *input(0);
input = TFile::Open( fname );

TTree *signal      = (TTree*)input->Get("TreeS");
TTree *background0 = (TTree*)input->Get("TreeB0");
TTree *background1 = (TTree*)input->Get("TreeB1");
TTree *background2 = (TTree*)input->Get("TreeB2");

/ global event weights per tree (see below for setting event-wise weights)
Double_t signalWeight      = 1.0;
Double_t background0Weight = 1.0;
Double_t background1Weight = 1.0;
Double_t background2Weight = 1.0;

Create a new root output file.
TString outfileName( "TMVASignalBackground0.root" );
TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

background 0
____________
TMVA::Factory *factory = new TMVA::Factory( "TMVAMultiBkg0", outputFile, factoryOptions );

factory->SetBackgroundWeightExpression("weight");
TCut mycuts = "";  for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
TCut mycutb = "";  for example: TCut mycutb = "abs(var1)<0.5";

tell the factory to use all remaining events in the trees after training for testing:
"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

Boosted Decision Trees
factory->TrainAllMethods();
factory->TestAllMethods();
factory->EvaluateAllMethods();

outputFile->Close();

delete factory;

background 1
____________

outfileName = "TMVASignalBackground1.root";
outputFile = TFile::Open( outfileName, "RECREATE" );

factory = new TMVA::Factory( "TMVAMultiBkg1", outputFile, factoryOptions );

tell the factory to use all remaining events in the trees after training for testing:
"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

Boosted Decision Trees
factory->TrainAllMethods();
factory->TestAllMethods();
factory->EvaluateAllMethods();

outputFile->Close();

delete factory;

background 2
____________

outfileName = "TMVASignalBackground2.root";
outputFile = TFile::Open( outfileName, "RECREATE" );

factory = new TMVA::Factory( "TMVAMultiBkg2", outputFile, factoryOptions );

tell the dataloader to use all remaining events in the trees after training for testing:
"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

Boosted Decision Trees
factory->TrainAllMethods();
factory->TestAllMethods();
factory->EvaluateAllMethods();

outputFile->Close();

delete factory;

}

input_line_53:13:4: error: expected expression
/ global event weights per tree (see below for setting event-wise weights)
^
input_line_53:13:6: error: use of undeclared identifier 'global'
/ global event weights per tree (see below for setting event-wise weights)
^
input_line_53:19:5: error: unknown type name 'Create'
Create a new root output file.
^
input_line_53:19:13: error: expected ';' at end of declaration
Create a new root output file.
^
;
input_line_53:21:37: error: use of undeclared identifier 'outfileName'
TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
^
input_line_53:25:5: error: use of undeclared identifier 'background'
background 0
^
input_line_53:35:47: error: use of undeclared identifier 'signalWeight'
^
input_line_53:38:9: error: use of undeclared identifier 'factory'
factory->SetBackgroundWeightExpression("weight");
^
input_line_53:39:27: error: expected '(' after 'for'
TCut mycuts = "";  for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
^
input_line_53:40:27: error: expected '(' after 'for'
TCut mycutb = "";  for example: TCut mycutb = "abs(var1)<0.5";
^
input_line_53:42:5: error: unknown type name 'tell'
tell the factory to use all remaining events in the trees after training for testing:
^
input_line_53:42:13: error: expected ';' at end of declaration
tell the factory to use all remaining events in the trees after training for testing:
^
;
input_line_53:46:5: error: unknown type name 'Boosted'
Boosted Decision Trees
^
input_line_53:46:21: error: expected ';' at end of declaration
Boosted Decision Trees
^
;
input_line_53:49:4: error: use of undeclared identifier 'factory'
factory->TrainAllMethods();
^
input_line_53:50:4: error: use of undeclared identifier 'factory'
factory->TestAllMethods();
^
input_line_53:51:4: error: use of undeclared identifier 'factory'
factory->EvaluateAllMethods();
^
input_line_53:55:11: error: use of undeclared identifier 'factory'
delete factory;
^
input_line_53:60:5: error: use of undeclared identifier 'background'
background 1
^
fatal error: too many errors emitted, stopping now [-ferror-limit=]


## Application¶

create a summary tree with all signal and background events and for each event the three classifier values and the true classID

In :
%%cpp -d
void ApplicationCreateCombinedTree(){

Create a new root output file.
TString outfileName( "tmva_example_multiple_backgrounds__applied.root" );
TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
TTree* outputTree = new TTree("multiBkg","multiple backgrounds tree");

Float_t var1, var2;
Float_t var3, var4;
Int_t   classID = 0;
Float_t weight = 1.f;

Float_t classifier0, classifier1, classifier2;

outputTree->Branch("classID", &classID, "classID/I");
outputTree->Branch("var1", &var1, "var1/F");
outputTree->Branch("var2", &var2, "var2/F");
outputTree->Branch("var3", &var3, "var3/F");
outputTree->Branch("var4", &var4, "var4/F");
outputTree->Branch("weight", &weight, "weight/F");
outputTree->Branch("cls0", &classifier0, "cls0/F");
outputTree->Branch("cls1", &classifier1, "cls1/F");
outputTree->Branch("cls2", &classifier2, "cls2/F");

create three readers for the three different signal/background classifications, .. one for each background

TString method =  "BDT method";

TFile *input(0);
TString fname = "./tmva_example_multiple_background.root";
input = TFile::Open( fname );

TTree* theTree = NULL;

loop through signal and all background trees
for( int treeNumber = 0; treeNumber < 4; ++treeNumber ) {
if( treeNumber == 0 ){
theTree = (TTree*)input->Get("TreeS");
std::cout << "--- Select signal sample" << std::endl;
weight = 1;
classID = 0;
}else if( treeNumber == 1 ){
theTree = (TTree*)input->Get("TreeB0");
std::cout << "--- Select background 0 sample" << std::endl;
weight = 1;
classID = 1;
}else if( treeNumber == 2 ){
theTree = (TTree*)input->Get("TreeB1");
std::cout << "--- Select background 1 sample" << std::endl;
weight = 1;
classID = 2;
}else if( treeNumber == 3 ){
theTree = (TTree*)input->Get("TreeB2");
std::cout << "--- Select background 2 sample" << std::endl;
weight = 1;
classID = 3;
}

std::cout << "--- Processing: " << theTree->GetEntries() << " events" << std::endl;
TStopwatch sw;
sw.Start();
Int_t nEvent = theTree->GetEntries();
Int_t nEvent = 100;
for (Long64_t ievt=0; ievt<nEvent; ievt++) {

if (ievt%1000 == 0){
std::cout << "--- ... Processing event: " << ievt << std::endl;
}

theTree->GetEntry(ievt);

get the classifiers for each of the signal/background classifications

outputTree->Fill();
}

get elapsed time
sw.Stop();
std::cout << "--- End of event loop: "; sw.Print();
}
input->Close();

write output tree
/*   outputTree->SetDirectory(outputFile);
outputTree->Write(); */
outputFile->Write();

outputFile->Close();

std::cout << "--- Created root file: \"" << outfileName.Data() << "\" containing the MVA output histograms" << std::endl;

std::cout << "==> Application of readers is done! combined tree created" << std::endl << std::endl;

}

input_line_54:3:5: error: unknown type name 'Create'
Create a new root output file.
^
input_line_54:3:13: error: expected ';' at end of declaration
Create a new root output file.
^
;
input_line_54:5:37: error: use of undeclared identifier 'outfileName'
TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
^
input_line_54:26:5: error: unknown type name 'create'
create three readers for the three different signal/background classifications, .. one for each background
^
input_line_54:26:17: error: expected ';' at end of declaration
create three readers for the three different signal/background classifications, .. one for each background
^
;
input_line_54:31:4: error: use of undeclared identifier 'reader0'
^
input_line_54:32:4: error: use of undeclared identifier 'reader0'
^
input_line_54:33:4: error: use of undeclared identifier 'reader0'
^
input_line_54:34:4: error: use of undeclared identifier 'reader0'
^
input_line_54:46:5: error: unknown type name 'load'
^
input_line_54:46:13: error: expected ';' at end of declaration
^
;
input_line_54:48:4: error: use of undeclared identifier 'reader0'
^
input_line_54:52:5: error: unknown type name 'load'
^
input_line_54:52:13: error: expected ';' at end of declaration
^
;
input_line_54:55:4: error: use of undeclared identifier 'input'
input = TFile::Open( fname );
^
input_line_54:59:5: error: unknown type name 'loop'
loop through signal and all background trees
^
input_line_54:59:17: error: expected ';' at end of declaration
loop through signal and all background trees
^
;
input_line_54:123:10: error: expected ';' after expression
write output tree
^
;
input_line_54:123:5: warning: expression result unused [-Wunused-value]
write output tree
^~~~~
input_line_54:123:11: error: unknown type name 'output'
write output tree
^
fatal error: too many errors emitted, stopping now [-ferror-limit=]


## Call of Genetic algorithm¶

In :
%%cpp -d
void MaximizeSignificance(){

define all the parameters by their minimum and maximum value
in this example 3 parameters (=cuts on the classifiers) are defined.
vector<Interval*> ranges;
ranges.push_back( new Interval(-1,1) );  for some classifiers (especially LD) the ranges have to be taken larger
ranges.push_back( new Interval(-1,1) );
ranges.push_back( new Interval(-1,1) );

std::cout << "Classifier ranges (defined by the user)" << std::endl;
for( std::vector<Interval*>::iterator it = ranges.begin(); it != ranges.end(); it++ ){
std::cout << " range: " << (*it)->GetMin() << "   " << (*it)->GetMax() << std::endl;
}

TChain* chain = new TChain("multiBkg");

IFitterTarget* myFitness = new MyFitness( chain );

prepare the genetic algorithm with an initial population size of 20
mind: big population sizes will help in searching the domain space of the solution
but you have to weight this out to the number of generations
the extreme case of 1 generation and populationsize n is equal to
a Monte Carlo calculation with n tries

const TString name( "multipleBackgroundGA" );
const TString opts( "PopSize=100:Steps=30" );

GeneticFitter mg( *myFitness, name, ranges, opts);
mg.SetParameters( 4, 30, 200, 10,5, 0.95, 0.001 );

std::vector<Double_t> result;
Double_t estimator = mg.Run(result);

dynamic_cast<MyFitness*>(myFitness)->Print();
std::cout << std::endl;

int n = 0;
for( std::vector<Double_t>::iterator it = result.begin(); it<result.end(); it++ ){
std::cout << "  cutValue[" << n << "] = " << (*it) << ";"<< std::endl;
n++;
}

}

input_line_55:3:10: error: unknown type name 'define'
define all the parameters by their minimum and maximum value
^
input_line_55:3:20: error: expected ';' at end of declaration
define all the parameters by their minimum and maximum value
^
;
input_line_55:6:9: error: use of undeclared identifier 'ranges'
ranges.push_back( new Interval(-1,1) );  for some classifiers (especially LD) the ranges have to be taken larger
^
input_line_55:6:31: error: unknown type name 'Interval'
ranges.push_back( new Interval(-1,1) );  for some classifiers (especially LD) the ranges have to be taken larger
^
input_line_55:6:54: error: expected '(' after 'for'
ranges.push_back( new Interval(-1,1) );  for some classifiers (especially LD) the ranges have to be taken larger
^
input_line_55:8:9: error: use of undeclared identifier 'ranges'
ranges.push_back( new Interval(-1,1) );
^
input_line_55:8:31: error: unknown type name 'Interval'
ranges.push_back( new Interval(-1,1) );
^
input_line_55:11:26: error: use of undeclared identifier 'Interval'
for( std::vector<Interval*>::iterator it = ranges.begin(); it != ranges.end(); it++ ){
^
input_line_55:11:35: error: expected expression
for( std::vector<Interval*>::iterator it = ranges.begin(); it != ranges.end(); it++ ){
^
input_line_55:11:52: error: use of undeclared identifier 'ranges'
for( std::vector<Interval*>::iterator it = ranges.begin(); it != ranges.end(); it++ ){
^
input_line_55:11:74: error: use of undeclared identifier 'ranges'
for( std::vector<Interval*>::iterator it = ranges.begin(); it != ranges.end(); it++ ){
^
input_line_55:18:9: error: unknown type name 'IFitterTarget'
IFitterTarget* myFitness = new MyFitness( chain );
^
input_line_55:18:40: error: unknown type name 'MyFitness'
IFitterTarget* myFitness = new MyFitness( chain );
^
input_line_55:20:10: error: unknown type name 'prepare'
prepare the genetic algorithm with an initial population size of 20
^
input_line_55:20:21: error: expected ';' at end of declaration
prepare the genetic algorithm with an initial population size of 20
^
;
input_line_55:29:9: error: unknown type name 'GeneticFitter'
GeneticFitter mg( *myFitness, name, ranges, opts);
^
input_line_55:29:39: error: use of undeclared identifier 'name'
GeneticFitter mg( *myFitness, name, ranges, opts);
^
input_line_55:29:45: error: use of undeclared identifier 'ranges'
GeneticFitter mg( *myFitness, name, ranges, opts);
^
input_line_55:35:17: error: unknown type name 'MyFitness'
dynamic_cast<MyFitness*>(myFitness)->Print();
^


## Run all¶

In :
cout << "Start Test TMVAGAexample" << endl
<< "========================" << endl
<< endl;

TString createDataMacro = gROOT->GetTutorialDir() + "/tmva/createData.C";
gROOT->ProcessLine(TString::Format(".L %s",createDataMacro.Data()));
gROOT->ProcessLine("create_MultipleBackground(200)");

cout << endl;
cout << "========================" << endl;
cout << "--- Training" << endl;
Training();

cout << endl;
cout << "========================" << endl;
cout << "--- Application & create combined tree" << endl;
ApplicationCreateCombinedTree();

cout << endl;
cout << "========================" << endl;
cout << "--- maximize significance" << endl;
MaximizeSignificance();

Start Test TMVAGAexample
========================

... event: 0 (200)
======> EVENT:0
var1            = -1.14361
var2            = -0.822373
var3            = -0.395426
var4            = -0.529427
created tree: TreeS
... event: 0 (200)
======> EVENT:0
var1            = -1.54361
var2            = -1.42237
var3            = -1.39543
var4            = -2.02943
created tree: TreeB0
... event: 0 (200)
======> EVENT:0
var1            = -1.54361
var2            = -0.822373
var3            = -0.395426
var4            = -2.02943
created tree: TreeB1
======> EVENT:0
var1            = 0.463304
var2            = 1.37192
var3            = -1.16769
var4            = -1.77551
created tree: TreeB2
created data file: tmva_example_multiple_background.root

========================
--- Training

input_line_79:2:3: error: use of undeclared identifier 'Training'
(Training())
^
Error in <HandleInterpreterException>: Error evaluating expression (Training())
Execution of your code was aborted.