#!/usr/bin/env python # coding: utf-8 # # Quantization of Image Classification Models # # This tutorial demonstrates how to apply `INT8` quantization to Image Classification model using [NNCF](https://github.com/openvinotoolkit/nncf). It uses the MobileNet V2 model, trained on Cifar10 dataset. The code is designed to be extendable to custom models and datasets. The tutorial uses OpenVINO backend for performing model quantization in NNCF, if you interested how to apply quantization on PyTorch model, please check this [tutorial](../pytorch-post-training-quantization-nncf/pytorch-post-training-quantization-nncf.ipynb). # # This tutorial consists of the following steps: # # - Prepare the model for quantization. # - Define a data loading functionality. # - Perform quantization. # - Compare accuracy of the original and quantized models. # - Compare performance of the original and quantized models. # - Compare results on one picture. # # # # # # #### Table of contents: # # - [Prepare the Model](#Prepare-the-Model) # - [Prepare Dataset](#Prepare-Dataset) # - [Perform Quantization](#Perform-Quantization) # - [Create Dataset for Validation](#Create-Dataset-for-Validation) # - [Run nncf.quantize for Getting an Optimized Model](#Run-nncf.quantize-for-Getting-an-Optimized-Model) # - [Serialize an OpenVINO IR model](#Serialize-an-OpenVINO-IR-model) # - [Compare Accuracy of the Original and Quantized Models](#Compare-Accuracy-of-the-Original-and-Quantized-Models) # - [Select inference device](#Select-inference-device) # - [Compare Performance of the Original and Quantized Models](#Compare-Performance-of-the-Original-and-Quantized-Models) # - [Compare results on four pictures](#Compare-results-on-four-pictures) # # # ### Installation Instructions # # This is a self-contained example that relies solely on its own code. # # We recommend running the notebook in a virtual environment. You only need a Jupyter server to start. # For details, please refer to [Installation Guide](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/README.md#-installation-guide). # In[ ]: # Install required packages get_ipython().run_line_magic('pip', 'install -q "openvino>=2023.1.0" "nncf>=2.6.0" torch torchvision tqdm "matplotlib>=3.4" --extra-index-url https://download.pytorch.org/whl/cpu') # In[ ]: from pathlib import Path # Set the data and model directories DATA_DIR = Path("data") MODEL_DIR = Path("model") DATA_DIR.mkdir(exist_ok=True) MODEL_DIR.mkdir(exist_ok=True) # ## Prepare the Model # [back to top ⬆️](#Table-of-contents:) # # Model preparation stage has the following steps: # # - Download a PyTorch model # - Convert model to OpenVINO Intermediate Representation format (IR) using model conversion Python API # - Serialize converted model on disk # # In[ ]: import requests r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", ) open("cmd_helper.py", "w").write(r.text) # In[ ]: from cmd_helper import clone_repo clone_repo("https://github.com/chenyaofo/pytorch-cifar-models.git") # In[3]: from pytorch_cifar_models import cifar10_mobilenetv2_x1_0 model = cifar10_mobilenetv2_x1_0(pretrained=True) # OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate Representation format using model conversion Python API. `ov.convert_model` accept PyTorch model instance and convert it into `openvino.runtime.Model` representation of model in OpenVINO. Optionally, you may specify `example_input` which serves as a helper for model tracing and `input_shape` for converting the model with static shape. The converted model is ready to be loaded on a device for inference and can be saved on a disk for next usage via the `save_model` function. More details about model conversion Python API can be found on this [page](https://docs.openvino.ai/2024/openvino-workflow/model-preparation.html). # In[6]: import openvino as ov model.eval() ov_model = ov.convert_model(model, input=[1, 3, 32, 32]) ov.save_model(ov_model, MODEL_DIR / "mobilenet_v2.xml") # ## Prepare Dataset # [back to top ⬆️](#Table-of-contents:) # # We will use [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset from [torchvision](https://pytorch.org/vision/stable/generated/torchvision.datasets.CIFAR10.html). Preprocessing for model obtained from training [config](https://github.com/chenyaofo/image-classification-codebase/blob/master/conf/cifar10.conf) # In[7]: import torch from torchvision import transforms from torchvision.datasets import CIFAR10 transform = transforms.Compose( [ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)), ] ) dataset = CIFAR10(root=DATA_DIR, train=False, transform=transform, download=True) val_loader = torch.utils.data.DataLoader( dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True, ) # ## Perform Quantization # [back to top ⬆️](#Table-of-contents:) # # [NNCF](https://github.com/openvinotoolkit/nncf) provides a suite of advanced algorithms for Neural Networks inference optimization in OpenVINO with minimal accuracy drop. # We will use 8-bit quantization in post-training mode (without the fine-tuning pipeline) to optimize MobileNetV2. # The optimization process contains the following steps: # # 1. Create a Dataset for quantization. # 2. Run `nncf.quantize` for getting an optimized model. # 3. Serialize an OpenVINO IR model, using the `openvino.save_model` function. # # # ### Create Dataset for Validation # [back to top ⬆️](#Table-of-contents:) # # NNCF is compatible with `torch.utils.data.DataLoader` interface. For performing quantization it should be passed into `nncf.Dataset` object with transformation function, which prepares input data to fit into model during quantization, in our case, to pick input tensor from pair (input tensor and label) and convert PyTorch tensor to numpy. # In[8]: import nncf def transform_fn(data_item): image_tensor = data_item[0] return image_tensor.numpy() quantization_dataset = nncf.Dataset(val_loader, transform_fn) # ## Run nncf.quantize for Getting an Optimized Model # [back to top ⬆️](#Table-of-contents:) # # `nncf.quantize` function accepts model and prepared quantization dataset for performing basic quantization. Optionally, additional parameters like `subset_size`, `preset`, `ignored_scope` can be provided to improve quantization result if applicable. More details about supported parameters can be found on this [page](https://docs.openvino.ai/2024/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.html#tune-quantization-parameters) # In[9]: quant_ov_model = nncf.quantize(ov_model, quantization_dataset) # ## Serialize an OpenVINO IR model # [back to top ⬆️](#Table-of-contents:) # # Similar to `ov.convert_model`, quantized model is `ov.Model` object which ready to be loaded into device and can be serialized on disk using `ov.save_model`. # In[10]: ov.save_model(quant_ov_model, MODEL_DIR / "quantized_mobilenet_v2.xml") # ## Compare Accuracy of the Original and Quantized Models # [back to top ⬆️](#Table-of-contents:) # # In[11]: from tqdm.notebook import tqdm import numpy as np def test_accuracy(ov_model, data_loader): correct = 0 total = 0 for batch_imgs, batch_labels in tqdm(data_loader): result = ov_model(batch_imgs)[0] top_label = np.argmax(result) correct += top_label == batch_labels.numpy() total += 1 return correct / total # ### Select inference device # [back to top ⬆️](#Table-of-contents:) # # select device from dropdown list for running inference using OpenVINO # In[ ]: from notebook_utils import device_widget device = device_widget() device # In[13]: core = ov.Core() compiled_model = core.compile_model(ov_model, device.value) optimized_compiled_model = core.compile_model(quant_ov_model, device.value) orig_accuracy = test_accuracy(compiled_model, val_loader) optimized_accuracy = test_accuracy(optimized_compiled_model, val_loader) # In[14]: print(f"Accuracy of the original model: {orig_accuracy[0] * 100 :.2f}%") print(f"Accuracy of the optimized model: {optimized_accuracy[0] * 100 :.2f}%") # ## Compare Performance of the Original and Quantized Models # [back to top ⬆️](#Table-of-contents:) # # Finally, measure the inference performance of the `FP32` and `INT8` models, using [Benchmark Tool](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/benchmark-tool.html) - an inference performance measurement tool in OpenVINO. # # > **NOTE**: For more accurate performance, it is recommended to run benchmark_app in a terminal/command prompt after closing other applications. Run `benchmark_app -m model.xml -d CPU` to benchmark async inference on CPU for one minute. Change CPU to GPU to benchmark on GPU. Run `benchmark_app --help` to see an overview of all command-line options. # # In[15]: # Inference FP16 model (OpenVINO IR) get_ipython().system('benchmark_app -m "model/mobilenet_v2.xml" -d $device.value -api async -t 15') # In[16]: # Inference INT8 model (OpenVINO IR) get_ipython().system('benchmark_app -m "model/quantized_mobilenet_v2.xml" -d $device.value -api async -t 15') # ## Compare results on four pictures # [back to top ⬆️](#Table-of-contents:) # # In[17]: # Define all possible labels from the CIFAR10 dataset labels_names = [ "airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck", ] all_pictures = [] all_labels = [] # Get all pictures and their labels. for i, batch in enumerate(val_loader): all_pictures.append(batch[0].numpy()) all_labels.append(batch[1].item()) # In[18]: import matplotlib.pyplot as plt def plot_pictures(indexes: list, all_pictures=all_pictures, all_labels=all_labels): """Plot 4 pictures. :param indexes: a list of indexes of pictures to be displayed. :param all_batches: batches with pictures. """ images, labels = [], [] num_pics = len(indexes) assert num_pics == 4, f"No enough indexes for pictures to be displayed, got {num_pics}" for idx in indexes: assert idx < 10000, "Cannot get such index, there are only 10000" pic = np.rollaxis(all_pictures[idx].squeeze(), 0, 3) images.append(pic) labels.append(labels_names[all_labels[idx]]) f, axarr = plt.subplots(1, 4) axarr[0].imshow(images[0]) axarr[0].set_title(labels[0]) axarr[1].imshow(images[1]) axarr[1].set_title(labels[1]) axarr[2].imshow(images[2]) axarr[2].set_title(labels[2]) axarr[3].imshow(images[3]) axarr[3].set_title(labels[3]) # In[19]: def infer_on_pictures(model, indexes: list, all_pictures=all_pictures): """Inference model on a few pictures. :param net: model on which do inference :param indexes: list of indexes """ output_key = model.output(0) predicted_labels = [] for idx in indexes: assert idx < 10000, "Cannot get such index, there are only 10000" result = model(all_pictures[idx])[output_key] result = labels_names[np.argmax(result[0])] predicted_labels.append(result) return predicted_labels # In[20]: indexes_to_infer = [7, 12, 15, 20] # To plot, specify 4 indexes. plot_pictures(indexes_to_infer) results_float = infer_on_pictures(compiled_model, indexes_to_infer) results_quanized = infer_on_pictures(optimized_compiled_model, indexes_to_infer) print(f"Labels for picture from float model : {results_float}.") print(f"Labels for picture from quantized model : {results_quanized}.")