#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('reload_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')
get_ipython().run_line_magic('matplotlib', 'inline')
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"]=""  # Enforce CPU usage
from psutil import cpu_count  # Do "pip install psutil" if not already installed
import tensorflow as tf
import numpy as np

# Constants from the performance optimization available in onnxruntime
# It needs to be done before importing onnxruntime
os.environ["OMP_NUM_THREADS"] = str(cpu_count(logical=True))
os.environ["OMP_WAIT_POLICY"] = 'ACTIVE'


# ## ONNX and TensorFlow Lite Support in `ktrain`
# 
# As of v0.24.x, `predictors` in **ktrain** provide built-in support for exports to [ONNX](https://github.com/onnx/onnx) and [TensorFlow Lite](https://www.tensorflow.org/lite) formats.  This allows you to more easily take a **ktrain**-trained model and use it to make predictions *outside* of **ktrain** (or even TensorFlow) in deployment scenarios. In this notebook, we will show a text classification example of this.
# 
# Let us begin by loading a previously trained `Predictor` instance, which consists of both the **DistilBert** model and its associated `Preprocessor` instance.  

# In[2]:


import ktrain
predictor = ktrain.load_predictor('/tmp/my_distilbert_predictor')
print(predictor.model)
print(predictor.preproc)


# The cell above assumes that the model was previously trained on the 20 Newsgroup corpus using a GPU (e.g., on Google Colab).  The files in question can be easily created with **ktrain**:
# 
# ```python
# # install ktrain
# !pip install ktrain
# 
# # load text data
# categories = ['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian']
# from sklearn.datasets import fetch_20newsgroups
# train_b = fetch_20newsgroups(subset='train', categories=categories, shuffle=True)
# test_b = fetch_20newsgroups(subset='test',categories=categories, shuffle=True)
# (x_train, y_train) = (train_b.data, train_b.target)
# (x_test, y_test) = (test_b.data, test_b.target)
# 
# # build, train, and validate model (Transformer is wrapper around transformers library)
# import ktrain
# from ktrain import text
# MODEL_NAME = 'distilbert-base-uncased'
# t = text.Transformer(MODEL_NAME, maxlen=500, class_names=train_b.target_names)
# trn = t.preprocess_train(x_train, y_train)
# val = t.preprocess_test(x_test, y_test)
# model = t.get_classifier()
# learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=6)
# learner.fit_onecycle(5e-5, 1)
# 
# # save predictor
# predictor = ktrain.get_predictor(learner.model, t)
# predictor.save('/tmp/my_distilbert_predictor')
# ```

# ## TensorFlow Lite Inferences
# 
# Here, we export our model to TensorFlow LITE and use it to make predictions *without* **ktrain**.

# In[3]:


# export TensorFlow Lite model
tflite_model_path = '/tmp/model.tflite'
tflite_model_path = predictor.export_model_to_tflite(tflite_model_path)

# load interpreter
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# set maxlen, class_names, and tokenizer (use settings employed when training the model - see above)
maxlen = 500                                                                       # from above
class_names = ['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian'] # from above
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')

# preprocess and predict outside of ktrain
doc = 'I received a chest x-ray at the hospital.'
inputs = tokenizer(doc, max_length=maxlen, padding='max_length', truncation=True, return_tensors="tf")
interpreter.set_tensor(input_details[0]['index'], inputs['attention_mask'])
interpreter.set_tensor(input_details[1]['index'], inputs['input_ids'])
interpreter.invoke()
output_tflite = interpreter.get_tensor(output_details[0]['index'])
print()
print('text input: %s' % (doc))
print()
print('predicted logits: %s' % (output_tflite))
print()
print("predicted class: %s" % ( class_names[np.argmax(output_tflite[0])]) )


# ## ONNX Inferences
# 
# Here, we will export our trained model to ONNX and make predictions *outside* of both **ktrain** and **TensorFlow** using the ONNX runtime. Please ensure the ONNX libraries are installed before proceeding with:
# ```
# pip install -q --upgrade onnxruntime==1.5.1 onnxruntime-tools onnx keras2onnx
# ```
# 
# It is possible to transform a TensorFlow model directly to ONNX using: `predictor.export_model_to_onnx(onnx_model_path)`, similar to what was done for TFLite above.  However, for **transformers** models like the **DistilBERT** text classifier used in this example, it is recommended that the model first be converted to PyTorch and then to ONNX for better performance of the final ONNX model. 
# 
# In the cell below, we use `AutoModelForSequenceClassification.from_pretrained` to load our classifier as a PyTorch model before converting to ONNX.  We, then, use our ONNX model to make predictions **without** the need for ktrain or TensorFlow or PyTorch.  This is well-suited for deployments that require smaller footprints (e.g., Heroku).

# In[4]:


# set maxlen, class_names, and tokenizer (use settings employed when training the model - see above)
model_name = 'distilbert-base-uncased'
maxlen = 500                                                                       # from above
class_names = ['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian'] # from above
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)


# imports
import numpy as np
from transformers.convert_graph_to_onnx import convert, optimize, quantize
from transformers import AutoModelForSequenceClassification
from pathlib import Path

# paths
predictor_path = '/tmp/my_distilbert_predictor'
pt_path = predictor_path+'_pt'
pt_onnx_path = pt_path +'_onnx/model.onnx'

# convert to ONNX
AutoModelForSequenceClassification.from_pretrained(predictor_path, 
                                                   from_tf=True).save_pretrained(pt_path)
convert(framework='pt', model=pt_path,output=Path(pt_onnx_path), opset=11, 
        tokenizer=model_name, pipeline_name='sentiment-analysis')
pt_onnx_quantized_path = quantize(optimize(Path(pt_onnx_path)))

# create ONNX session
def create_onnx_session(onnx_model_path, provider='CPUExecutionProvider'):
    """
    Creates ONNX inference session from provided onnx_model_path
    """

    from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions, get_all_providers
    assert provider in get_all_providers(), f"provider {provider} not found, {get_all_providers()}"

    # Few properties that might have an impact on performances (provided by MS)
    options = SessionOptions()
    options.intra_op_num_threads = 0
    options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL

    # Load the model as a graph and prepare the CPU backend 
    session = InferenceSession(onnx_model_path, options, providers=[provider])
    session.disable_fallback()
    return session
sess = create_onnx_session(pt_onnx_quantized_path.as_posix())

# tokenize document and make prediction
tokens = tokenizer.encode_plus('I received a chest x-ray at the hospital.', max_length=maxlen, truncation=True)
tokens = {name: np.atleast_2d(value) for name, value in tokens.items()}
print()
print()
print("predicted class: %s" % (class_names[np.argmax(sess.run(None, tokens)[0])]))


# In[ ]: