Notebook

TMVA_SOFIE_Keras¶

This macro provides a simple example for the parsing of Keras .h5 file into RModel object and further generating the .hxx header files for inference.

Author: Sanjiban Sengupta
This notebook tutorial was automatically generated with ROOTBOOK-izer from the macro found in the ROOT repository on Wednesday, April 17, 2024 at 11:22 AM.

In [1]:

using namespace TMVA::Experimental;

TString pythonSrc = "\
import os\n\
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n\
\n\
import numpy as np\n\
from tensorflow.keras.models import Model\n\
from tensorflow.keras.layers import Input,Dense,Activation,ReLU\n\
from tensorflow.keras.optimizers import SGD\n\
\n\
input=Input(shape=(64,),batch_size=4)\n\
x=Dense(32)(input)\n\
x=Activation('relu')(x)\n\
x=Dense(16,activation='relu')(x)\n\
x=Dense(8,activation='relu')(x)\n\
x=Dense(4)(x)\n\
output=ReLU()(x)\n\
model=Model(inputs=input,outputs=output)\n\
\n\
randomGenerator=np.random.RandomState(0)\n\
x_train=randomGenerator.rand(4,64)\n\
y_train=randomGenerator.rand(4,4)\n\
\n\
model.compile(loss='mean_squared_error', optimizer=SGD(learning_rate=0.01))\n\
model.fit(x_train, y_train, epochs=5, batch_size=4)\n\
model.save('KerasModel.h5')\n";

Arguments are defined.

In [2]:

const char * modelFile = nullptr;
bool printModelInfo = true;

Running the Python script to generate Keras .h5 file

In [3]:

TMVA::PyMethodBase::PyInitialize();

if (modelFile == nullptr) {
    TMacro m;
    m.AddLine(pythonSrc);
    m.SaveSource("make_keras_model.py");
    gSystem->Exec(TMVA::Python_Executable() + " make_keras_model.py");
    modelFile = "KerasModel.h5";
}

Epoch 1/5
1/1 [==============================] - 1s 538ms/step - loss: 0.3474
Epoch 2/5
1/1 [==============================] - 0s 6ms/step - loss: 0.3470
Epoch 3/5
1/1 [==============================] - 0s 4ms/step - loss: 0.3466
Epoch 4/5
1/1 [==============================] - 0s 9ms/step - loss: 0.3462
Epoch 5/5
1/1 [==============================] - 0s 9ms/step - loss: 0.3458

/usr/local/lib/python3.8/dist-packages/keras/src/engine/training.py:3000: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.
  saving_api.save_model(

Parsing the saved Keras .h5 file into RModel object

In [4]:

SOFIE::RModel model = SOFIE::PyKeras::Parse(modelFile);

TF/Keras Version: 2.13.0

2024-04-17 11:22:53.941973: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-17 11:22:54.017473: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-17 11:22:54.018618: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-17 11:22:55.788519: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT

Generating inference code

In [5]:

model.Generate();

generate output header. By default it will be modelName.hxx

In [6]:

model.OutputGenerated();

 if (!printModelInfo) return;

Printing required input tensors

In [7]:

std::cout<<"\n\n";
model.PrintRequiredInputTensors();


Model requires following inputs:
Fully Specified Tensor name: input1	type: float	shape: [4,64]

Printing initialized tensors (weights)

In [8]:

std::cout<<"\n\n";
model.PrintInitializedTensors();


Model initialized the following tensors:
Tensor name: "dense3bias0"	type: float	shape: [4]
Tensor name: "dense3kernel0"	type: float	shape: [8,4]
Tensor name: "dense2bias0"	type: float	shape: [8]
Tensor name: "dense2kernel0"	type: float	shape: [16,8]
Tensor name: "dense1bias0"	type: float	shape: [16]
Tensor name: "dense1kernel0"	type: float	shape: [32,16]
Tensor name: "densebias0"	type: float	shape: [32]
Tensor name: "densekernel0"	type: float	shape: [64,32]

Printing intermediate tensors

In [9]:

std::cout<<"\n\n";
model.PrintIntermediateTensors();


Model specify the following intermediate tensors:
Tensor name: "reluRelu0"	type: float	shape: [4,4]
Tensor name: "dense3BiasAdd0"	type: float	shape: [4,4]
Tensor name: "dense2Dense"	type: float	shape: [4,8]
Tensor name: "dense2Relu0"	type: float	shape: [4,8]
Tensor name: "dense1Dense"	type: float	shape: [4,16]
Tensor name: "dense1bias0bcast"	type: float	shape: [4,16]
Tensor name: "dense3bias0bcast"	type: float	shape: [4,4]
Tensor name: "dense1Relu0"	type: float	shape: [4,16]
Tensor name: "activationRelu0"	type: float	shape: [4,32]
Tensor name: "denseBiasAdd0"	type: float	shape: [4,32]
Tensor name: "dense2bias0bcast"	type: float	shape: [4,8]
Tensor name: "densebias0bcast"	type: float	shape: [4,32]

Checking if tensor already exist in model

In [10]:

std::cout<<"\n\nTensor \"dense2bias0\" already exist: "<<std::boolalpha<<model.CheckIfTensorAlreadyExist("dense2bias0")<<"\n\n";
std::vector<size_t> tensorShape = model.GetTensorShape("dense2bias0");
std::cout<<"Shape of tensor \"dense2bias0\": ";
for(auto& it:tensorShape){
    std::cout<<it<<",";
}
std::cout<<"\n\nData type of tensor \"dense2bias0\": ";
SOFIE::ETensorType tensorType = model.GetTensorType("dense2bias0");
std::cout<<SOFIE::ConvertTypeToString(tensorType);


Tensor "dense2bias0" already exist: true

Shape of tensor "dense2bias0": 8,

Data type of tensor "dense2bias0": float

Printing generated inference code

In [11]:

std::cout<<"\n\n";
model.PrintGenerated();


//Code generated automatically by TMVA for Inference of Model file [KerasModel.h5] at [Wed Apr 17 11:22:53 2024] 

#ifndef ROOT_TMVA_SOFIE_KERASMODEL
#define ROOT_TMVA_SOFIE_KERASMODEL

#include <algorithm>
#include <vector>
#include "TMVA/SOFIE_common.hxx"
#include <fstream>

namespace TMVA_SOFIE_KerasModel{
namespace BLAS{
	extern "C" void sgemv_(const char * trans, const int * m, const int * n, const float * alpha, const float * A,
	                       const int * lda, const float * X, const int * incx, const float * beta, const float * Y, const int * incy);
	extern "C" void sgemm_(const char * transa, const char * transb, const int * m, const int * n, const int * k,
	                       const float * alpha, const float * A, const int * lda, const float * B, const int * ldb,
	                       const float * beta, float * C, const int * ldc);
}//BLAS
struct Session {
std::vector<float> fTensor_dense3bias0 = std::vector<float>(4);
float * tensor_dense3bias0 = fTensor_dense3bias0.data();
std::vector<float> fTensor_dense3kernel0 = std::vector<float>(32);
float * tensor_dense3kernel0 = fTensor_dense3kernel0.data();
std::vector<float> fTensor_dense2bias0 = std::vector<float>(8);
float * tensor_dense2bias0 = fTensor_dense2bias0.data();
std::vector<float> fTensor_dense2kernel0 = std::vector<float>(128);
float * tensor_dense2kernel0 = fTensor_dense2kernel0.data();
std::vector<float> fTensor_dense1bias0 = std::vector<float>(16);
float * tensor_dense1bias0 = fTensor_dense1bias0.data();
std::vector<float> fTensor_dense1kernel0 = std::vector<float>(512);
float * tensor_dense1kernel0 = fTensor_dense1kernel0.data();
std::vector<float> fTensor_densebias0 = std::vector<float>(32);
float * tensor_densebias0 = fTensor_densebias0.data();
std::vector<float> fTensor_densekernel0 = std::vector<float>(2048);
float * tensor_densekernel0 = fTensor_densekernel0.data();

//--- declare and allocate the intermediate tensors
std::vector<float> fTensor_reluRelu0 = std::vector<float>(16);
float * tensor_reluRelu0 = fTensor_reluRelu0.data();
std::vector<float> fTensor_dense3BiasAdd0 = std::vector<float>(16);
float * tensor_dense3BiasAdd0 = fTensor_dense3BiasAdd0.data();
std::vector<float> fTensor_dense2Dense = std::vector<float>(32);
float * tensor_dense2Dense = fTensor_dense2Dense.data();
std::vector<float> fTensor_dense2Relu0 = std::vector<float>(32);
float * tensor_dense2Relu0 = fTensor_dense2Relu0.data();
std::vector<float> fTensor_dense1Dense = std::vector<float>(64);
float * tensor_dense1Dense = fTensor_dense1Dense.data();
std::vector<float> fTensor_dense1bias0bcast = std::vector<float>(64);
float * tensor_dense1bias0bcast = fTensor_dense1bias0bcast.data();
std::vector<float> fTensor_dense3bias0bcast = std::vector<float>(16);
float * tensor_dense3bias0bcast = fTensor_dense3bias0bcast.data();
std::vector<float> fTensor_dense1Relu0 = std::vector<float>(64);
float * tensor_dense1Relu0 = fTensor_dense1Relu0.data();
std::vector<float> fTensor_activationRelu0 = std::vector<float>(128);
float * tensor_activationRelu0 = fTensor_activationRelu0.data();
std::vector<float> fTensor_denseBiasAdd0 = std::vector<float>(128);
float * tensor_denseBiasAdd0 = fTensor_denseBiasAdd0.data();
std::vector<float> fTensor_dense2bias0bcast = std::vector<float>(32);
float * tensor_dense2bias0bcast = fTensor_dense2bias0bcast.data();
std::vector<float> fTensor_densebias0bcast = std::vector<float>(128);
float * tensor_densebias0bcast = fTensor_densebias0bcast.data();


Session(std::string filename ="KerasModel.dat") {

//--- reading weights from file
   std::ifstream f;
   f.open(filename);
   if (!f.is_open()) {
      throw std::runtime_error("tmva-sofie failed to open file for input weights");
   }
   std::string tensor_name;
   size_t length;
   f >> tensor_name >> length;
   if (tensor_name != "tensor_dense3bias0" ) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_dense3bias0 , read " + tensor_name;
      throw std::runtime_error(err_msg);
    }
   if (length != 4) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 4 , read " + std::to_string(length) ;
      throw std::runtime_error(err_msg);
    }
   for (size_t i = 0; i < length; ++i)
      f >> tensor_dense3bias0[i];
   f >> tensor_name >> length;
   if (tensor_name != "tensor_dense3kernel0" ) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_dense3kernel0 , read " + tensor_name;
      throw std::runtime_error(err_msg);
    }
   if (length != 32) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 32 , read " + std::to_string(length) ;
      throw std::runtime_error(err_msg);
    }
   for (size_t i = 0; i < length; ++i)
      f >> tensor_dense3kernel0[i];
   f >> tensor_name >> length;
   if (tensor_name != "tensor_dense2bias0" ) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_dense2bias0 , read " + tensor_name;
      throw std::runtime_error(err_msg);
    }
   if (length != 8) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 8 , read " + std::to_string(length) ;
      throw std::runtime_error(err_msg);
    }
   for (size_t i = 0; i < length; ++i)
      f >> tensor_dense2bias0[i];
   f >> tensor_name >> length;
   if (tensor_name != "tensor_dense2kernel0" ) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_dense2kernel0 , read " + tensor_name;
      throw std::runtime_error(err_msg);
    }
   if (length != 128) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 128 , read " + std::to_string(length) ;
      throw std::runtime_error(err_msg);
    }
   for (size_t i = 0; i < length; ++i)
      f >> tensor_dense2kernel0[i];
   f >> tensor_name >> length;
   if (tensor_name != "tensor_dense1bias0" ) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_dense1bias0 , read " + tensor_name;
      throw std::runtime_error(err_msg);
    }
   if (length != 16) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 16 , read " + std::to_string(length) ;
      throw std::runtime_error(err_msg);
    }
   for (size_t i = 0; i < length; ++i)
      f >> tensor_dense1bias0[i];
   f >> tensor_name >> length;
   if (tensor_name != "tensor_dense1kernel0" ) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_dense1kernel0 , read " + tensor_name;
      throw std::runtime_error(err_msg);
    }
   if (length != 512) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 512 , read " + std::to_string(length) ;
      throw std::runtime_error(err_msg);
    }
   for (size_t i = 0; i < length; ++i)
      f >> tensor_dense1kernel0[i];
   f >> tensor_name >> length;
   if (tensor_name != "tensor_densebias0" ) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_densebias0 , read " + tensor_name;
      throw std::runtime_error(err_msg);
    }
   if (length != 32) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 32 , read " + std::to_string(length) ;
      throw std::runtime_error(err_msg);
    }
   for (size_t i = 0; i < length; ++i)
      f >> tensor_densebias0[i];
   f >> tensor_name >> length;
   if (tensor_name != "tensor_densekernel0" ) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_densekernel0 , read " + tensor_name;
      throw std::runtime_error(err_msg);
    }
   if (length != 2048) {
      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2048 , read " + std::to_string(length) ;
      throw std::runtime_error(err_msg);
    }
   for (size_t i = 0; i < length; ++i)
      f >> tensor_densekernel0[i];
   f.close();

//---- allocate the intermediate dynamic tensors
//--- broadcast bias tensor densebias0for Gemm op
   {
      float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_densebias0,{ 32 }, { 4 , 32 });
      std::copy(data, data + 128, tensor_densebias0bcast);
      delete [] data;
   }
//--- broadcast bias tensor dense1bias0for Gemm op
   {
      float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_dense1bias0,{ 16 }, { 4 , 16 });
      std::copy(data, data + 64, tensor_dense1bias0bcast);
      delete [] data;
   }
//--- broadcast bias tensor dense2bias0for Gemm op
   {
      float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_dense2bias0,{ 8 }, { 4 , 8 });
      std::copy(data, data + 32, tensor_dense2bias0bcast);
      delete [] data;
   }
//--- broadcast bias tensor dense3bias0for Gemm op
   {
      float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_dense3bias0,{ 4 }, { 4 , 4 });
      std::copy(data, data + 16, tensor_dense3bias0bcast);
      delete [] data;
   }
}

std::vector<float> infer(float* tensor_input1){

//--------- Gemm
   char op_0_transA = 'n';
   char op_0_transB = 'n';
   int op_0_m = 4;
   int op_0_n = 32;
   int op_0_k = 64;
   float op_0_alpha = 1;
   float op_0_beta = 1;
   int op_0_lda = 64;
   int op_0_ldb = 32;
   std::copy(tensor_densebias0bcast, tensor_densebias0bcast + 128, tensor_denseBiasAdd0);
   BLAS::sgemm_(&op_0_transB, &op_0_transA, &op_0_n, &op_0_m, &op_0_k, &op_0_alpha, tensor_densekernel0, &op_0_ldb, tensor_input1, &op_0_lda, &op_0_beta, tensor_denseBiasAdd0, &op_0_n);

//------ RELU
   for (int id = 0; id < 128 ; id++){
      tensor_activationRelu0[id] = ((tensor_denseBiasAdd0[id] > 0 )? tensor_denseBiasAdd0[id] : 0);
   }

//--------- Gemm
   char op_2_transA = 'n';
   char op_2_transB = 'n';
   int op_2_m = 4;
   int op_2_n = 16;
   int op_2_k = 32;
   float op_2_alpha = 1;
   float op_2_beta = 1;
   int op_2_lda = 32;
   int op_2_ldb = 16;
   std::copy(tensor_dense1bias0bcast, tensor_dense1bias0bcast + 64, tensor_dense1Dense);
   BLAS::sgemm_(&op_2_transB, &op_2_transA, &op_2_n, &op_2_m, &op_2_k, &op_2_alpha, tensor_dense1kernel0, &op_2_ldb, tensor_activationRelu0, &op_2_lda, &op_2_beta, tensor_dense1Dense, &op_2_n);

//------ RELU
   for (int id = 0; id < 64 ; id++){
      tensor_dense1Relu0[id] = ((tensor_dense1Dense[id] > 0 )? tensor_dense1Dense[id] : 0);
   }

//--------- Gemm
   char op_4_transA = 'n';
   char op_4_transB = 'n';
   int op_4_m = 4;
   int op_4_n = 8;
   int op_4_k = 16;
   float op_4_alpha = 1;
   float op_4_beta = 1;
   int op_4_lda = 16;
   int op_4_ldb = 8;
   std::copy(tensor_dense2bias0bcast, tensor_dense2bias0bcast + 32, tensor_dense2Dense);
   BLAS::sgemm_(&op_4_transB, &op_4_transA, &op_4_n, &op_4_m, &op_4_k, &op_4_alpha, tensor_dense2kernel0, &op_4_ldb, tensor_dense1Relu0, &op_4_lda, &op_4_beta, tensor_dense2Dense, &op_4_n);

//------ RELU
   for (int id = 0; id < 32 ; id++){
      tensor_dense2Relu0[id] = ((tensor_dense2Dense[id] > 0 )? tensor_dense2Dense[id] : 0);
   }

//--------- Gemm
   char op_6_transA = 'n';
   char op_6_transB = 'n';
   int op_6_m = 4;
   int op_6_n = 4;
   int op_6_k = 8;
   float op_6_alpha = 1;
   float op_6_beta = 1;
   int op_6_lda = 8;
   int op_6_ldb = 4;
   std::copy(tensor_dense3bias0bcast, tensor_dense3bias0bcast + 16, tensor_dense3BiasAdd0);
   BLAS::sgemm_(&op_6_transB, &op_6_transA, &op_6_n, &op_6_m, &op_6_k, &op_6_alpha, tensor_dense3kernel0, &op_6_ldb, tensor_dense2Relu0, &op_6_lda, &op_6_beta, tensor_dense3BiasAdd0, &op_6_n);

//------ RELU
   for (int id = 0; id < 16 ; id++){
      tensor_reluRelu0[id] = ((tensor_dense3BiasAdd0[id] > 0 )? tensor_dense3BiasAdd0[id] : 0);
   }
   return fTensor_reluRelu0;
}
};
} //TMVA_SOFIE_KerasModel

#endif  // ROOT_TMVA_SOFIE_KERASMODEL