This notebook demonstrates the use of the responsibleai
API to assess an multilabel image classification pytorch model trained on the fridge dataset. It walks through the API calls necessary to create a widget with model analysis insights, then guides a visual analysis of the model.
The following section examines the code necessary to create datasets and a model. It then generates insights using the responsibleai
API that can be visually analyzed.
import os
import sys
from zipfile import ZipFile
import numpy as np
import pandas as pd
import datasets
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from responsibleai_vision.common.constants import ImageColumns
import json
from fastai.learner import load_learner
from raiutils.common.retries import retry_function
try:
from urllib import urlretrieve
except ImportError:
from urllib.request import urlretrieve
EPOCHS = 10
LEARNING_RATE = 1e-4
IM_SIZE = 300
BATCH_SIZE = 16
FRIDGE_MODEL_NAME = 'multilabel_fridge_model'
FRIDGE_MODEL_WINDOWS_NAME = 'multilabel_fridge_model_windows'
WIN = 'win'
def load_fridge_dataset():
# create data folder if it doesnt exist.
os.makedirs("data", exist_ok=True)
# download data
download_url = ("https://cvbp-secondary.z19.web.core.windows.net/" +
"datasets/image_classification/multilabelFridgeObjects.zip")
folder_path = './data/multilabelFridgeObjects'
data_file = folder_path + '.zip'
urlretrieve(download_url, filename=data_file)
# extract files
with ZipFile(data_file, "r") as zipfile:
zipfile.extractall(path="./data")
# delete zip file
os.remove(data_file)
data = pd.read_csv(folder_path + '/labels.csv')
image_col_name = ImageColumns.IMAGE.value
label_col_name = ImageColumns.LABEL.value
data.rename(columns = {'filename': image_col_name,
'labels': label_col_name}, inplace = True)
image_col = data[image_col_name]
for i in range(len(image_col)):
image_col[i] = folder_path + '/images/' + image_col[i]
return data
import urllib.request as request_file
#download fine-tuned recycling model from url
def download_assets(filepath,force=False):
if force or not os.path.exists(filepath):
request_file.urlretrieve(
"https://publictestdatasets.blob.core.windows.net/models/fastrcnn.pt",
os.path.join(filepath))
else:
print('Found' + filepath)
return filepath
#Loading in our pretrained model
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torch
import os
def get_instance_segmentation_model(num_classes):
# load an instance segmentation model pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
return model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device: ', str(device))
num_classes = 5
model = get_instance_segmentation_model(num_classes)
_ = download_assets('Recycling_finetuned_FastRCNN.pt')
model.load_state_dict(torch.load('Recycling_finetuned_FastRCNN.pt', map_location = device))
#if using the general torchvision pretrained model, comment above and uncomment below
# model = detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.to(device)
from fastai.data.transforms import Normalize
from fastai.metrics import accuracy_multi
from fastai.vision.data import ImageDataLoaders, imagenet_stats
from fastai.vision.augment import Resize
from fastai.vision import models as fastai_models
from fastai.vision.learner import vision_learner
from fastai.losses import BCEWithLogitsLossFlat
import torch.nn as nn
def train_fastai_image_classifier(df):
data = ImageDataLoaders.from_df(
df, valid_pct=0.2, seed=10, label_delim=' ', bs=BATCH_SIZE,
batch_tfms=[Resize(IM_SIZE), Normalize.from_stats(*imagenet_stats)])
model = vision_learner(data, fastai_models.resnet18, metrics=[accuracy_multi], loss_func=BCEWithLogitsLossFlat())
model.unfreeze()
model.fit(EPOCHS, LEARNING_RATE)
return model
class FetchModel(object):
def __init__(self):
pass
def fetch(self):
if sys.platform.startswith(WIN):
model_name = FRIDGE_MODEL_WINDOWS_NAME
else:
model_name = FRIDGE_MODEL_NAME
url = ('https://publictestdatasets.blob.core.windows.net/models/' +
model_name)
urlretrieve(url, FRIDGE_MODEL_NAME)
def retrieve_or_train_fridge_model(df, force_train=False):
if force_train:
model = train_fastai_image_classifier(df)
# Save model to disk
model.export(FRIDGE_MODEL_NAME)
else:
fetcher = FetchModel()
action_name = "Dataset download"
err_msg = "Failed to download dataset"
max_retries = 4
retry_delay = 60
retry_function(fetcher.fetch, action_name, err_msg,
max_retries=max_retries,
retry_delay=retry_delay)
model = load_learner(FRIDGE_MODEL_NAME)
return model
data = load_fridge_dataset()
data
model = retrieve_or_train_fridge_model(data)
# Update the labels to be in grid format
data_len = len(data)
can = np.zeros(data_len)
carton = np.zeros(data_len)
milk_bottle = np.zeros(data_len)
water_bottle = np.zeros(data_len)
CAN = 'can'
CARTON = 'carton'
MILK_BOTTLE = 'milk_bottle'
WATER_BOTTLE = 'water_bottle'
target_columns = [CAN, CARTON, MILK_BOTTLE, WATER_BOTTLE]
for i in range(len(data)):
labels = data.iloc[i]['label']
labels = set(labels.split(' '))
if CAN in labels:
can[i] = 1
if CARTON in labels:
carton[i] = 1
if MILK_BOTTLE in labels:
milk_bottle[i] = 1
if WATER_BOTTLE in labels:
water_bottle[i] = 1
data[CAN] = can
data[CARTON] = carton
data[MILK_BOTTLE] = milk_bottle
data[WATER_BOTTLE] = water_bottle
data.drop(columns=ImageColumns.LABEL.value, inplace=True)
train_data = data
test_data = data
from raiwidgets import ResponsibleAIDashboard
from responsibleai_vision import ModelTask, RAIVisionInsights
To use Responsible AI Toolbox, initialize a RAIInsights object upon which different components can be loaded.
RAIInsights accepts the model, the full dataset, the test dataset, the target feature string and the task type string as its arguments.
rai_insights = RAIVisionInsights(model, test_data.sample(10, random_state=42),
target_columns,
ModelTask.MULTILABEL_IMAGE_CLASSIFICATION)
rai_insights
Add the components of the toolbox that are focused on model assessment.
# Interpretability
rai_insights.explainer.add()
# Error Analysis
rai_insights.error_analysis.add()
Once all the desired components have been loaded, compute insights on the test set.
rai_insights.compute()
Finally, visualize and explore the model insights. Use the resulting widget or follow the link to view this in a new tab.
ResponsibleAIDashboard(rai_insights)