This macro provides a simple example for the parsing of Keras .h5 file into RModel object and further generating the .hxx header files for inference.
Author: Sanjiban Sengupta
This notebook tutorial was automatically generated with ROOTBOOK-izer from the macro found in the ROOT repository on Monday, March 27, 2023 at 09:58 AM.
using namespace TMVA::Experimental;
TString pythonSrc = "\
import os\n\
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n\
\n\
import numpy as np\n\
from tensorflow.keras.models import Model\n\
from tensorflow.keras.layers import Input,Dense,Activation,ReLU\n\
from tensorflow.keras.optimizers import SGD\n\
\n\
input=Input(shape=(64,),batch_size=4)\n\
x=Dense(32)(input)\n\
x=Activation('relu')(x)\n\
x=Dense(16,activation='relu')(x)\n\
x=Dense(8,activation='relu')(x)\n\
x=Dense(4)(x)\n\
output=ReLU()(x)\n\
model=Model(inputs=input,outputs=output)\n\
\n\
randomGenerator=np.random.RandomState(0)\n\
x_train=randomGenerator.rand(4,64)\n\
y_train=randomGenerator.rand(4,4)\n\
\n\
model.compile(loss='mean_squared_error', optimizer=SGD(learning_rate=0.01))\n\
model.fit(x_train, y_train, epochs=5, batch_size=4)\n\
model.save('KerasModel.h5')\n";
Arguments are defined.
const char * modelFile = nullptr;
bool printModelInfo = true;
Running the Python script to generate Keras .h5 file
TMVA::PyMethodBase::PyInitialize();
if (modelFile == nullptr) {
TMacro m;
m.AddLine(pythonSrc);
m.SaveSource("make_keras_model.py");
gSystem->Exec(TMVA::Python_Executable() + " make_keras_model.py");
modelFile = "KerasModel.h5";
}
Epoch 1/5 1/1 [==============================] - 1s 737ms/step - loss: 0.3250 Epoch 2/5 1/1 [==============================] - 0s 4ms/step - loss: 0.3222 Epoch 3/5 1/1 [==============================] - 0s 4ms/step - loss: 0.3196 Epoch 4/5 1/1 [==============================] - 0s 4ms/step - loss: 0.3172 Epoch 5/5 1/1 [==============================] - 0s 3ms/step - loss: 0.3149
Parsing the saved Keras .h5 file into RModel object
SOFIE::RModel model = SOFIE::PyKeras::Parse(modelFile);
Keras Version: 2.11.0
2023-03-27 09:59:08.809090: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. 2023-03-27 09:59:09.496557: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/sftnight/build/workspace/root-makedoc-master/rootspi/rdoc/src/master.build/lib 2023-03-27 09:59:09.496594: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. 2023-03-27 09:59:10.843208: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/sftnight/build/workspace/root-makedoc-master/rootspi/rdoc/src/master.build/lib 2023-03-27 09:59:10.843319: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/sftnight/build/workspace/root-makedoc-master/rootspi/rdoc/src/master.build/lib 2023-03-27 09:59:10.843336: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. 2023-03-27 09:59:12.609299: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/sftnight/build/workspace/root-makedoc-master/rootspi/rdoc/src/master.build/lib 2023-03-27 09:59:12.609335: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303) 2023-03-27 09:59:12.609361: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (root-ubuntu-2004-3): /proc/driver/nvidia/version does not exist 2023-03-27 09:59:12.609686: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Generating inference code
model.Generate();
generate output header. By default it will be modelName.hxx
model.OutputGenerated();
if (!printModelInfo) return;
Printing required input tensors
std::cout<<"\n\n";
model.PrintRequiredInputTensors();
Model requires following inputs: Fully Specified Tensor name: input1 type: float shape: [4,64]
Printing initialized tensors (weights)
std::cout<<"\n\n";
model.PrintInitializedTensors();
Model initialized the following tensors: Tensor name: "dense3bias0" type: float shape: [4] Tensor name: "dense3kernel0" type: float shape: [8,4] Tensor name: "dense2bias0" type: float shape: [8] Tensor name: "dense2kernel0" type: float shape: [16,8] Tensor name: "dense1bias0" type: float shape: [16] Tensor name: "dense1kernel0" type: float shape: [32,16] Tensor name: "densebias0" type: float shape: [32] Tensor name: "densekernel0" type: float shape: [64,32]
Printing intermediate tensors
std::cout<<"\n\n";
model.PrintIntermediateTensors();
Model specify the following intermediate tensors: Tensor name: "reluRelu0" type: float shape: [4,4] Tensor name: "dense3BiasAdd0" type: float shape: [4,4] Tensor name: "dense2Dense" type: float shape: [4,8] Tensor name: "dense2Relu0" type: float shape: [4,8] Tensor name: "dense1Dense" type: float shape: [4,16] Tensor name: "dense1bias0bcast" type: float shape: [4,16] Tensor name: "dense3bias0bcast" type: float shape: [4,4] Tensor name: "dense1Relu0" type: float shape: [4,16] Tensor name: "activationRelu0" type: float shape: [4,32] Tensor name: "denseBiasAdd0" type: float shape: [4,32] Tensor name: "dense2bias0bcast" type: float shape: [4,8] Tensor name: "densebias0bcast" type: float shape: [4,32]
Checking if tensor already exist in model
std::cout<<"\n\nTensor \"dense2bias0\" already exist: "<<std::boolalpha<<model.CheckIfTensorAlreadyExist("dense2bias0")<<"\n\n";
std::vector<size_t> tensorShape = model.GetTensorShape("dense2bias0");
std::cout<<"Shape of tensor \"dense2bias0\": ";
for(auto& it:tensorShape){
std::cout<<it<<",";
}
std::cout<<"\n\nData type of tensor \"dense2bias0\": ";
SOFIE::ETensorType tensorType = model.GetTensorType("dense2bias0");
std::cout<<SOFIE::ConvertTypeToString(tensorType);
Tensor "dense2bias0" already exist: true Shape of tensor "dense2bias0": 8, Data type of tensor "dense2bias0": float
Printing generated inference code
std::cout<<"\n\n";
model.PrintGenerated();
//Code generated automatically by TMVA for Inference of Model file [KerasModel.h5] at [Mon Mar 27 09:59:08 2023] #ifndef TMVA_SOFIE_KERASMODEL #define TMVA_SOFIE_KERASMODEL #include<algorithm> #include<vector> #include "TMVA/SOFIE_common.hxx" #include <fstream> namespace TMVA_SOFIE_KerasModel{ namespace BLAS{ extern "C" void sgemv_(const char * trans, const int * m, const int * n, const float * alpha, const float * A, const int * lda, const float * X, const int * incx, const float * beta, const float * Y, const int * incy); extern "C" void sgemm_(const char * transa, const char * transb, const int * m, const int * n, const int * k, const float * alpha, const float * A, const int * lda, const float * B, const int * ldb, const float * beta, float * C, const int * ldc); }//BLAS struct Session { std::vector<float> fTensor_dense3bias0 = std::vector<float>(4); float * tensor_dense3bias0 = fTensor_dense3bias0.data(); std::vector<float> fTensor_dense3kernel0 = std::vector<float>(32); float * tensor_dense3kernel0 = fTensor_dense3kernel0.data(); std::vector<float> fTensor_dense2bias0 = std::vector<float>(8); float * tensor_dense2bias0 = fTensor_dense2bias0.data(); std::vector<float> fTensor_dense2kernel0 = std::vector<float>(128); float * tensor_dense2kernel0 = fTensor_dense2kernel0.data(); std::vector<float> fTensor_dense1bias0 = std::vector<float>(16); float * tensor_dense1bias0 = fTensor_dense1bias0.data(); std::vector<float> fTensor_dense1kernel0 = std::vector<float>(512); float * tensor_dense1kernel0 = fTensor_dense1kernel0.data(); std::vector<float> fTensor_densebias0 = std::vector<float>(32); float * tensor_densebias0 = fTensor_densebias0.data(); std::vector<float> fTensor_densekernel0 = std::vector<float>(2048); float * tensor_densekernel0 = fTensor_densekernel0.data(); std::vector<float> fTensor_reluRelu0 = std::vector<float>(16); float * tensor_reluRelu0 = fTensor_reluRelu0.data(); std::vector<float> fTensor_dense3BiasAdd0 = std::vector<float>(16); float * tensor_dense3BiasAdd0 = fTensor_dense3BiasAdd0.data(); std::vector<float> fTensor_dense2Dense = std::vector<float>(32); float * tensor_dense2Dense = fTensor_dense2Dense.data(); std::vector<float> fTensor_dense2Relu0 = std::vector<float>(32); float * tensor_dense2Relu0 = fTensor_dense2Relu0.data(); std::vector<float> fTensor_dense1Dense = std::vector<float>(64); float * tensor_dense1Dense = fTensor_dense1Dense.data(); std::vector<float> fTensor_dense1bias0bcast = std::vector<float>(64); float * tensor_dense1bias0bcast = fTensor_dense1bias0bcast.data(); std::vector<float> fTensor_dense3bias0bcast = std::vector<float>(16); float * tensor_dense3bias0bcast = fTensor_dense3bias0bcast.data(); std::vector<float> fTensor_dense1Relu0 = std::vector<float>(64); float * tensor_dense1Relu0 = fTensor_dense1Relu0.data(); std::vector<float> fTensor_activationRelu0 = std::vector<float>(128); float * tensor_activationRelu0 = fTensor_activationRelu0.data(); std::vector<float> fTensor_denseBiasAdd0 = std::vector<float>(128); float * tensor_denseBiasAdd0 = fTensor_denseBiasAdd0.data(); std::vector<float> fTensor_dense2bias0bcast = std::vector<float>(32); float * tensor_dense2bias0bcast = fTensor_dense2bias0bcast.data(); std::vector<float> fTensor_densebias0bcast = std::vector<float>(128); float * tensor_densebias0bcast = fTensor_densebias0bcast.data(); Session(std::string filename ="") { if (filename.empty()) filename = "KerasModel.dat"; std::ifstream f; f.open(filename); if (!f.is_open()){ throw std::runtime_error("tmva-sofie failed to open file for input weights"); } std::string tensor_name; int length; f >> tensor_name >> length; if (tensor_name != "tensor_dense3bias0" ) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_dense3bias0 , read " + tensor_name; throw std::runtime_error(err_msg); } if (length != 4) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 4 , read " + std::to_string(length) ; throw std::runtime_error(err_msg); } for (int i =0; i < length; ++i) f >> tensor_dense3bias0[i]; f >> tensor_name >> length; if (tensor_name != "tensor_dense3kernel0" ) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_dense3kernel0 , read " + tensor_name; throw std::runtime_error(err_msg); } if (length != 32) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 32 , read " + std::to_string(length) ; throw std::runtime_error(err_msg); } for (int i =0; i < length; ++i) f >> tensor_dense3kernel0[i]; f >> tensor_name >> length; if (tensor_name != "tensor_dense2bias0" ) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_dense2bias0 , read " + tensor_name; throw std::runtime_error(err_msg); } if (length != 8) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 8 , read " + std::to_string(length) ; throw std::runtime_error(err_msg); } for (int i =0; i < length; ++i) f >> tensor_dense2bias0[i]; f >> tensor_name >> length; if (tensor_name != "tensor_dense2kernel0" ) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_dense2kernel0 , read " + tensor_name; throw std::runtime_error(err_msg); } if (length != 128) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 128 , read " + std::to_string(length) ; throw std::runtime_error(err_msg); } for (int i =0; i < length; ++i) f >> tensor_dense2kernel0[i]; f >> tensor_name >> length; if (tensor_name != "tensor_dense1bias0" ) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_dense1bias0 , read " + tensor_name; throw std::runtime_error(err_msg); } if (length != 16) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 16 , read " + std::to_string(length) ; throw std::runtime_error(err_msg); } for (int i =0; i < length; ++i) f >> tensor_dense1bias0[i]; f >> tensor_name >> length; if (tensor_name != "tensor_dense1kernel0" ) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_dense1kernel0 , read " + tensor_name; throw std::runtime_error(err_msg); } if (length != 512) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 512 , read " + std::to_string(length) ; throw std::runtime_error(err_msg); } for (int i =0; i < length; ++i) f >> tensor_dense1kernel0[i]; f >> tensor_name >> length; if (tensor_name != "tensor_densebias0" ) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_densebias0 , read " + tensor_name; throw std::runtime_error(err_msg); } if (length != 32) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 32 , read " + std::to_string(length) ; throw std::runtime_error(err_msg); } for (int i =0; i < length; ++i) f >> tensor_densebias0[i]; f >> tensor_name >> length; if (tensor_name != "tensor_densekernel0" ) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_densekernel0 , read " + tensor_name; throw std::runtime_error(err_msg); } if (length != 2048) { std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2048 , read " + std::to_string(length) ; throw std::runtime_error(err_msg); } for (int i =0; i < length; ++i) f >> tensor_densekernel0[i]; f.close(); { float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_densebias0,{ 32 }, { 4 , 32 }); std::copy(data, data + 128, tensor_densebias0bcast); delete [] data; } { float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_dense1bias0,{ 16 }, { 4 , 16 }); std::copy(data, data + 64, tensor_dense1bias0bcast); delete [] data; } { float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_dense2bias0,{ 8 }, { 4 , 8 }); std::copy(data, data + 32, tensor_dense2bias0bcast); delete [] data; } { float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_dense3bias0,{ 4 }, { 4 , 4 }); std::copy(data, data + 16, tensor_dense3bias0bcast); delete [] data; } } std::vector<float> infer(float* tensor_input1){ //--------- Gemm char op_0_transA = 'n'; char op_0_transB = 'n'; int op_0_m = 4; int op_0_n = 32; int op_0_k = 64; float op_0_alpha = 1; float op_0_beta = 1; int op_0_lda = 64; int op_0_ldb = 32; std::copy(tensor_densebias0bcast, tensor_densebias0bcast + 128, tensor_denseBiasAdd0); BLAS::sgemm_(&op_0_transB, &op_0_transA, &op_0_n, &op_0_m, &op_0_k, &op_0_alpha, tensor_densekernel0, &op_0_ldb, tensor_input1, &op_0_lda, &op_0_beta, tensor_denseBiasAdd0, &op_0_n); //------ RELU for (int id = 0; id < 128 ; id++){ tensor_activationRelu0[id] = ((tensor_denseBiasAdd0[id] > 0 )? tensor_denseBiasAdd0[id] : 0); } //--------- Gemm char op_2_transA = 'n'; char op_2_transB = 'n'; int op_2_m = 4; int op_2_n = 16; int op_2_k = 32; float op_2_alpha = 1; float op_2_beta = 1; int op_2_lda = 32; int op_2_ldb = 16; std::copy(tensor_dense1bias0bcast, tensor_dense1bias0bcast + 64, tensor_dense1Dense); BLAS::sgemm_(&op_2_transB, &op_2_transA, &op_2_n, &op_2_m, &op_2_k, &op_2_alpha, tensor_dense1kernel0, &op_2_ldb, tensor_activationRelu0, &op_2_lda, &op_2_beta, tensor_dense1Dense, &op_2_n); //------ RELU for (int id = 0; id < 64 ; id++){ tensor_dense1Relu0[id] = ((tensor_dense1Dense[id] > 0 )? tensor_dense1Dense[id] : 0); } //--------- Gemm char op_4_transA = 'n'; char op_4_transB = 'n'; int op_4_m = 4; int op_4_n = 8; int op_4_k = 16; float op_4_alpha = 1; float op_4_beta = 1; int op_4_lda = 16; int op_4_ldb = 8; std::copy(tensor_dense2bias0bcast, tensor_dense2bias0bcast + 32, tensor_dense2Dense); BLAS::sgemm_(&op_4_transB, &op_4_transA, &op_4_n, &op_4_m, &op_4_k, &op_4_alpha, tensor_dense2kernel0, &op_4_ldb, tensor_dense1Relu0, &op_4_lda, &op_4_beta, tensor_dense2Dense, &op_4_n); //------ RELU for (int id = 0; id < 32 ; id++){ tensor_dense2Relu0[id] = ((tensor_dense2Dense[id] > 0 )? tensor_dense2Dense[id] : 0); } //--------- Gemm char op_6_transA = 'n'; char op_6_transB = 'n'; int op_6_m = 4; int op_6_n = 4; int op_6_k = 8; float op_6_alpha = 1; float op_6_beta = 1; int op_6_lda = 8; int op_6_ldb = 4; std::copy(tensor_dense3bias0bcast, tensor_dense3bias0bcast + 16, tensor_dense3BiasAdd0); BLAS::sgemm_(&op_6_transB, &op_6_transA, &op_6_n, &op_6_m, &op_6_k, &op_6_alpha, tensor_dense3kernel0, &op_6_ldb, tensor_dense2Relu0, &op_6_lda, &op_6_beta, tensor_dense3BiasAdd0, &op_6_n); //------ RELU for (int id = 0; id < 16 ; id++){ tensor_reluRelu0[id] = ((tensor_dense3BiasAdd0[id] > 0 )? tensor_dense3BiasAdd0[id] : 0); } std::vector<float> ret (tensor_reluRelu0, tensor_reluRelu0 + 16); return ret; } }; } //TMVA_SOFIE_KerasModel #endif // TMVA_SOFIE_KERASMODEL