#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('reload_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') get_ipython().run_line_magic('matplotlib', 'inline') import os os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"; os.environ["CUDA_VISIBLE_DEVICES"]="" # Enforce CPU usage from psutil import cpu_count # Do "pip install psutil" if not already installed import tensorflow as tf import numpy as np # Constants from the performance optimization available in onnxruntime # It needs to be done before importing onnxruntime os.environ["OMP_NUM_THREADS"] = str(cpu_count(logical=True)) os.environ["OMP_WAIT_POLICY"] = 'ACTIVE' # ## ONNX and TensorFlow Lite Support in `ktrain` # # As of v0.24.x, `predictors` in **ktrain** provide built-in support for exports to [ONNX](https://github.com/onnx/onnx) and [TensorFlow Lite](https://www.tensorflow.org/lite) formats. This allows you to more easily take a **ktrain**-trained model and use it to make predictions *outside* of **ktrain** (or even TensorFlow) in deployment scenarios. In this notebook, we will show a text classification example of this. # # Let us begin by loading a previously trained `Predictor` instance, which consists of both the **DistilBert** model and its associated `Preprocessor` instance. # In[2]: import ktrain predictor = ktrain.load_predictor('/tmp/my_distilbert_predictor') print(predictor.model) print(predictor.preproc) # The cell above assumes that the model was previously trained on the 20 Newsgroup corpus using a GPU (e.g., on Google Colab). The files in question can be easily created with **ktrain**: # # ```python # # install ktrain # !pip install ktrain # # # load text data # categories = ['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian'] # from sklearn.datasets import fetch_20newsgroups # train_b = fetch_20newsgroups(subset='train', categories=categories, shuffle=True) # test_b = fetch_20newsgroups(subset='test',categories=categories, shuffle=True) # (x_train, y_train) = (train_b.data, train_b.target) # (x_test, y_test) = (test_b.data, test_b.target) # # # build, train, and validate model (Transformer is wrapper around transformers library) # import ktrain # from ktrain import text # MODEL_NAME = 'distilbert-base-uncased' # t = text.Transformer(MODEL_NAME, maxlen=500, class_names=train_b.target_names) # trn = t.preprocess_train(x_train, y_train) # val = t.preprocess_test(x_test, y_test) # model = t.get_classifier() # learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=6) # learner.fit_onecycle(5e-5, 1) # # # save predictor # predictor = ktrain.get_predictor(learner.model, t) # predictor.save('/tmp/my_distilbert_predictor') # ``` # ## TensorFlow Lite Inferences # # Here, we export our model to TensorFlow LITE and use it to make predictions *without* **ktrain**. # In[3]: # export TensorFlow Lite model tflite_model_path = '/tmp/model.tflite' tflite_model_path = predictor.export_model_to_tflite(tflite_model_path) # load interpreter interpreter = tf.lite.Interpreter(model_path=tflite_model_path) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() # set maxlen, class_names, and tokenizer (use settings employed when training the model - see above) maxlen = 500 # from above class_names = ['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian'] # from above from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased') # preprocess and predict outside of ktrain doc = 'I received a chest x-ray at the hospital.' inputs = tokenizer(doc, max_length=maxlen, padding='max_length', truncation=True, return_tensors="tf") interpreter.set_tensor(input_details[0]['index'], inputs['attention_mask']) interpreter.set_tensor(input_details[1]['index'], inputs['input_ids']) interpreter.invoke() output_tflite = interpreter.get_tensor(output_details[0]['index']) print() print('text input: %s' % (doc)) print() print('predicted logits: %s' % (output_tflite)) print() print("predicted class: %s" % ( class_names[np.argmax(output_tflite[0])]) ) # ## ONNX Inferences # # Here, we will export our trained model to ONNX and make predictions *outside* of both **ktrain** and **TensorFlow** using the ONNX runtime. Please ensure the ONNX libraries are installed before proceeding with: # ``` # pip install -q --upgrade onnxruntime==1.5.1 onnxruntime-tools onnx keras2onnx # ``` # # It is possible to transform a TensorFlow model directly to ONNX using: `predictor.export_model_to_onnx(onnx_model_path)`, similar to what was done for TFLite above. However, for **transformers** models like the **DistilBERT** text classifier used in this example, it is recommended that the model first be converted to PyTorch and then to ONNX for better performance of the final ONNX model. # # In the cell below, we use `AutoModelForSequenceClassification.from_pretrained` to load our classifier as a PyTorch model before converting to ONNX. We, then, use our ONNX model to make predictions **without** the need for ktrain or TensorFlow or PyTorch. This is well-suited for deployments that require smaller footprints (e.g., Heroku). # In[4]: # set maxlen, class_names, and tokenizer (use settings employed when training the model - see above) model_name = 'distilbert-base-uncased' maxlen = 500 # from above class_names = ['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian'] # from above from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) # imports import numpy as np from transformers.convert_graph_to_onnx import convert, optimize, quantize from transformers import AutoModelForSequenceClassification from pathlib import Path # paths predictor_path = '/tmp/my_distilbert_predictor' pt_path = predictor_path+'_pt' pt_onnx_path = pt_path +'_onnx/model.onnx' # convert to ONNX AutoModelForSequenceClassification.from_pretrained(predictor_path, from_tf=True).save_pretrained(pt_path) convert(framework='pt', model=pt_path,output=Path(pt_onnx_path), opset=11, tokenizer=model_name, pipeline_name='sentiment-analysis') pt_onnx_quantized_path = quantize(optimize(Path(pt_onnx_path))) # create ONNX session def create_onnx_session(onnx_model_path, provider='CPUExecutionProvider'): """ Creates ONNX inference session from provided onnx_model_path """ from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions, get_all_providers assert provider in get_all_providers(), f"provider {provider} not found, {get_all_providers()}" # Few properties that might have an impact on performances (provided by MS) options = SessionOptions() options.intra_op_num_threads = 0 options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL # Load the model as a graph and prepare the CPU backend session = InferenceSession(onnx_model_path, options, providers=[provider]) session.disable_fallback() return session sess = create_onnx_session(pt_onnx_quantized_path.as_posix()) # tokenize document and make prediction tokens = tokenizer.encode_plus('I received a chest x-ray at the hospital.', max_length=maxlen, truncation=True) tokens = {name: np.atleast_2d(value) for name, value in tokens.items()} print() print() print("predicted class: %s" % (class_names[np.argmax(sess.run(None, tokens)[0])])) # In[ ]: