#!/usr/bin/env python
# coding: utf-8

# # Line-level text detection with Surya
# 
# [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/surya-line-level-text-detection/surya-line-level-text-detection.ipynb)
# 
# In this tutorial we will perform line-level text detection using [Surya](https://github.com/VikParuchuri/surya) toolkit and OpenVINO.
# 
# ![line-level text detection](https://github.com/VikParuchuri/surya/blob/master/static/images/excerpt.png?raw=true)
# 
# [**image source*](https://github.com/VikParuchuri/surya)
# 
# 
# Model used for line-level text detection based on [Segformer](https://arxiv.org/pdf/2105.15203.pdf). It has the following features:
# * It is specialized for document OCR. It will likely not work on photos or other images.
# * It is for printed text, not handwriting.
# * The model has trained itself to ignore advertisements.
# * Languages with very different character sets may not work well.
# 
# 
# #### Table of contents:
# 
# - [Fetch test image](#Fetch-test-image)
# - [Run PyTorch inference](#Run-PyTorch-inference)
# - [Convert model to OpenVINO Intermediate Representation (IR) format](#Convert-model-to-OpenVINO-Intermediate-Representation-(IR)-format)
# - [Run OpenVINO model](#Run-OpenVINO-model)
# - [Apply post-training quantization using NNCF](#Apply-post-training-quantization-using-NNCF)
#     - [Prepare dataset](#Prepare-dataset)
#     - [Quantize model](#Quantize-model)
# - [Run quantized OpenVINO model](#Run-quantized-OpenVINO-model)
# - [Interactive inference](#Interactive-inference)
# 
# 
# ### Installation Instructions
# 
# This is a self-contained example that relies solely on its own code.
# 
# We recommend  running the notebook in a virtual environment. You only need a Jupyter server to start.
# For details, please refer to [Installation Guide](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/README.md#-installation-guide).
# 
# <img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=5b5a4db0-7875-4bfb-bdbd-01698b5b1a77&file=notebooks/surya-line-level-text-detection/surya-line-level-text-detection.ipynb" />
# 

# ## Fetch test image
# [back to top ⬆️](#Table-of-contents:)
# 
# We will use an image from a randomly sampled subset of [DocLayNet](https://github.com/DS4SD/DocLayNet) dataset.

# In[ ]:


import os

os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false"

get_ipython().run_line_magic('pip', 'install -q "openvino>=2024.2.0" "nncf>=2.11.0"')
get_ipython().run_line_magic('pip', 'install -q --extra-index-url https://download.pytorch.org/whl/cpu "surya-ocr==0.4.0" torch datasets "gradio>=4.19" Pillow')


# In[2]:


from datasets import load_dataset
import requests
from pathlib import Path

if not Path("notebook_utils.py").exists():
    r = requests.get(
        url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py",
    )
    open("notebook_utils.py", "w").write(r.text)

# Read more about telemetry collection at https://github.com/openvinotoolkit/openvino_notebooks?tab=readme-ov-file#-telemetry
from notebook_utils import collect_telemetry

collect_telemetry("surya-line-level-text-detection.ipynb")


def fetch_image():
    dataset = load_dataset("vikp/doclaynet_bench", split="train", streaming=True)
    return next(iter(dataset))["image"]


test_image = fetch_image()
test_image


# ## Run PyTorch inference
# [back to top ⬆️](#Table-of-contents:)
# 
# To perform line-level text detection we will use `load_model` and `load_processor` functions from `surya` package. We will also use `batch_inference` function which performs pre and post processing.

# In[3]:


# Predictions visualization function
from PIL import ImageDraw


def visualize_prediction(image, prediction):
    image = image.copy()
    draw = ImageDraw.Draw(image)

    for polygon_box in prediction.bboxes:
        draw.rectangle(polygon_box.bbox, width=1, outline="red")

    display(image)


# In[4]:


from surya.detection import batch_text_detection
from surya.model.detection.segformer import load_model, load_processor

model, processor = load_model(), load_processor()

predictions = batch_text_detection([test_image], model, processor)

visualize_prediction(test_image, predictions[0])


# ## Convert model to OpenVINO Intermediate Representation (IR) format
# [back to top ⬆️](#Table-of-contents:)
# 
# For best results with OpenVINO, it is recommended to convert the model to OpenVINO IR format. OpenVINO supports PyTorch via Model conversion API.
# To convert the PyTorch model to OpenVINO IR format we will use `ov.convert_model` of [model conversion API](https://docs.openvino.ai/2024/openvino-workflow/model-preparation.html). The `ov.convert_model` Python function returns an OpenVINO Model object ready to load on the device and start making predictions.
# 
# `ov.convert_model` requires a sample of original model input. We will use image pre-processing from `surya` package to prepare example input.

# In[5]:


# Build example input
from surya.input.processing import prepare_image
import torch


def build_example_input(image, processor):
    input_values = prepare_image(image.convert("RGB"), processor)

    return {"pixel_values": torch.unsqueeze(input_values, 0)}


example_input = build_example_input(test_image, processor)


# In[6]:


# Convert model
import openvino as ov
from pathlib import Path

ov_model = ov.convert_model(model, example_input=example_input)

FP_MODEL_PATH = Path("model.xml")
INT8_MODEL_PATH = Path("int8_model.xml")

ov.save_model(ov_model, FP_MODEL_PATH)


# ## Run OpenVINO model
# [back to top ⬆️](#Table-of-contents:)
# 
# Select device from dropdown list for running inference using OpenVINO

# In[7]:


from notebook_utils import device_widget

device = device_widget()

device


# We want to reuse model results postprocessing implemented in `batch_inference` function. In order to do that we implement simple wrappers for OpenVINO model with interface required by `batch_inference` function.

# In[8]:


core = ov.Core()

# Compile OpenVINO model for loading on device
compiled_ov_model = core.compile_model(ov_model, device.value)


class OVModelWrapperResult:
    def __init__(self, logits):
        self.logits = logits


class OVModelWrapper:
    dtype = torch.float32
    device = model.device
    config = model.config

    def __init__(self, ov_model) -> None:
        self.ov_model = ov_model

    def __call__(self, **kwargs):
        # run inference on preprocessed data and get image-text similarity score
        logits = self.ov_model(kwargs)[0]
        return OVModelWrapperResult(torch.from_numpy(logits))


ov_model_wrapper = OVModelWrapper(compiled_ov_model)

ov_predictions = batch_text_detection([test_image], ov_model_wrapper, processor)

visualize_prediction(test_image, ov_predictions[0])


# ## Apply post-training quantization using NNCF
# [back to top ⬆️](#Table-of-contents:)
# 
# [NNCF](https://github.com/openvinotoolkit/nncf/) enables post-training quantization by adding the quantization layers into the model graph and then using a subset of the training dataset to initialize the parameters of these additional quantization layers. The framework is designed so that modifications to your original training code are minor. Quantization is the simplest scenario and requires a few modifications.
# 
# The optimization process contains the following steps:
# 
# 1. Create a dataset for quantization.
# 1. Run `nncf.quantize` for getting a quantized model.
# 
# 
# Please select below whether you would like to run quantization to improve model inference speed.
# 
# > **NOTE**: Quantization is time and memory consuming operation. Running quantization code below may take a long time.

# In[9]:


from notebook_utils import quantization_widget

to_quantize = quantization_widget()

to_quantize


# In[10]:


import requests

r = requests.get(
    url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py",
)
open("skip_kernel_extension.py", "w").write(r.text)

get_ipython().run_line_magic('load_ext', 'skip_kernel_extension')


# Free resources before quantization.

# In[11]:


import gc

del model
del ov_model
del compiled_ov_model
del ov_model_wrapper

gc.collect();


# ### Prepare dataset
# [back to top ⬆️](#Table-of-contents:)
# 
# We create calibration dataset with randomly sampled set of images from [DocLayNet](https://github.com/DS4SD/DocLayNet).

# In[12]:


get_ipython().run_cell_magic('skip', 'not $to_quantize.value', '\nfrom surya.input.processing import split_image\n\n\ndef prepare_calibration_dataset(size=1, buffer_size=1):\n\n    def collate_fn(data):\n        image = data[0]["image"].convert("RGB")\n        image_splits, _ = split_image(image, processor)\n        image_splits = prepare_image(image_splits[0], processor)\n\n        return image_splits\n\n    dataset = load_dataset("vikp/doclaynet_bench", split="train", streaming=True)\n    train_dataset = dataset.shuffle(seed=42, buffer_size=buffer_size)\n    dataloader = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn, batch_size=1)\n\n    def prepare_calibration_data(dataloader, size):\n        data = []\n        counter = 0\n        for batch in dataloader:\n            if counter == size:\n                break\n            counter += 1\n            batch = batch.to(torch.float32)\n            batch = batch.to("cpu")\n            data.append({"pixel_values": torch.stack([batch])})\n        return data\n\n    return prepare_calibration_data(dataloader, size)\n\n\ncalibration_dataset = prepare_calibration_dataset()\n')


# ### Quantize model
# [back to top ⬆️](#Table-of-contents:)
# 
# Create a quantized model from the `FP16` model.

# In[13]:


get_ipython().run_cell_magic('skip', 'not $to_quantize.value', '\nimport nncf\n\nquantized_ov_model = nncf.quantize(\n    model=core.read_model(FP_MODEL_PATH),\n    calibration_dataset=nncf.Dataset(calibration_dataset),\n    advanced_parameters=nncf.AdvancedQuantizationParameters(\n        activations_quantization_params=nncf.quantization.advanced_parameters.QuantizationParameters(per_channel=False)\n    ),\n)\n\nov.save_model(quantized_ov_model, INT8_MODEL_PATH)\n')


# ## Run quantized OpenVINO model
# [back to top ⬆️](#Table-of-contents:)
# 
# Now we ready to detect lines with `int8` OpenVINO model.

# In[14]:


get_ipython().run_cell_magic('skip', 'not $to_quantize.value', '\n# Compile OpenVINO model for loading on device\ncompiled_int8_ov_model = core.compile_model(quantized_ov_model, device.value)\n\nint8_ov_model_wrapper = OVModelWrapper(compiled_int8_ov_model)\n\nint8_ov_predictions = batch_text_detection([test_image], int8_ov_model_wrapper, processor)\n\nvisualize_prediction(test_image, int8_ov_predictions[0])\n')


# ## Interactive inference
# 
# [back to top ⬆️](#Table-of-contents:)
# 
# Now, it is your turn! Feel free to upload an image, using the file upload window.
# 
# Below you can select which model to run: original or quantized.

# In[15]:


from pathlib import Path
import ipywidgets as widgets

quantized_model_present = Path(INT8_MODEL_PATH).exists()

use_quantized_model = widgets.Checkbox(
    value=True if quantized_model_present else False,
    description="Use quantized model",
    disabled=not quantized_model_present,
)

use_quantized_model


# In[ ]:


compiled_model = ov.compile_model(INT8_MODEL_PATH if use_quantized_model.value else FP_MODEL_PATH, device.value)
ov_model = OVModelWrapper(compiled_model)

if not Path("gradio_helper.py").exists():
    r = requests.get(
        url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/surya-line-level-text-detection/gradio_helper.py"
    )
    open("gradio_helper.py", "w").write(r.text)

from gradio_helper import make_demo

demo = make_demo(ov_model, processor, test_image)

try:
    demo.launch(debug=True, height=1000)
except Exception:
    demo.launch(share=True, debug=True, height=1000)
# If you are launching remotely, specify server_name and server_port
# EXAMPLE: `demo.launch(server_name='your server name', server_port='server port in int')`
# To learn more please refer to the Gradio docs: https://gradio.app/docs/