Copyright (c) Microsoft Corporation. All rights reserved.
Licensed under the MIT License.
You built an image classification model, evaluated it on a validation set and got a decent accuracy. Now you deploy the model for the real-world scenario. And soon, you may find that the model performs worse than expected.
This is quite common scenario (and inevitable) when we build a machine learning model because we cannot collect all the possible samples. Your model is supposed to learn the features that describe the target classes the best, but in reality, it learns the best features to classify your dataset. For example, if we have photos of butterfly on a flower, the model may learn flower shapes to classify butterfly.
Did our model learn a butterfly? | or yellow flowers? |
Hard negative sampling (or hard negative mining) is a useful technique to address this pitfall. It is a way to explicitly create examples for your training set from falsely classified samples. The technique is widely used when you cannot add all the negative samples since (i) training time would get too slow because of too many training samples; and (ii) many of the negative images are trivial for the model and hence the model would not learn anything. Therefore, we try to identify the images which make a difference when added to the training set.
In this notebook, we train our model on a training set as usual, test the model on un-seen negative examples and see if the model classifies them correctly. If not, we introduce those samples into the training set and re-train the model on them.
Our goal is to train a classifier which can recognize fridge obejcts (watter_bottle
, carton
, can
, and milk_bottle
), similar to 01_train notebook. However, the input image might not even contain any of these objects in the real use-case. Therefore, we also introduce negative
class.
The overall training process is as follows:
# Ensure edits to libraries are loaded and plotting is shown in the notebook.
%reload_ext autoreload
%autoreload 2
%matplotlib inline
from functools import partial
import os
from pathlib import Path
import sys
import shutil
from tempfile import TemporaryDirectory
import matplotlib.pyplot as plt
import numpy as np
import scrapbook as sb
import fastai
from fastai.metrics import accuracy
from fastai.vision import (
CategoryList, DatasetType, get_image_files, ImageList, imagenet_stats,
cnn_learner, models, ClassificationInterpretation,
)
sys.path.append("../../")
from utils_cv.classification.model import (
IMAGENET_IM_SIZE as IMAGE_SIZE,
TrainMetricsRecorder,
get_preds,
)
from utils_cv.classification.plot import plot_pr_roc_curves
from utils_cv.classification.widget import ResultsWidget
from utils_cv.classification.data import Urls
from utils_cv.common.data import unzip_url
from utils_cv.common.gpu import db_num_workers, which_processor
from utils_cv.common.misc import copy_files, set_random_seed
from utils_cv.common.plot import line_graph, show_ims
print(f"Fast.ai version = {fastai.__version__}")
which_processor()
Fast.ai version = 1.0.48 Fast.ai (Torch) is using GPU: Tesla K80
DATA_PATH = unzip_url(Urls.fridge_objects_path, exist_ok=True)
# Number of negative samples to add for each iteration of negative mining
NEGATIVE_NUM = 10
EPOCHS_HEAD = 4
EPOCHS_BODY = 12
LEARNING_RATE = 1e-4
BATCH_SIZE = 16
# Using fast_inference parameters from 02_training_accuracy_vs_speed notebook.
ARCHITECTURE = models.resnet18
IM_SIZE = 300
# Temporary folder to store datasets for hard-negative mining
NEGATIVE_MINING_DATA_DIR = TemporaryDirectory().name
We prepare our dataset in the following way:
T
to include fridge objects as well as some initial negative samples+.U
.V
to have both fridge objects and negative samples. We evaluate our model on this set.+ We added NEGATIVE_NUM
of negative samples to our initial training set. In a real use-case, you may want to include 100 or more images of negative samples.
ori_datapath = Path(DATA_PATH)
neg_datapath = Path(unzip_url(Urls.fridge_objects_negatives_path, exist_ok=True))
# We split positive samples into 80% training and 20% validation
data_imlist = (
ImageList.from_folder(ori_datapath)
.split_by_rand_pct(valid_pct=0.2, seed=10)
.label_from_folder()
)
# We use 80% of negative images for hard-negative mining (set U) while 20% for validation
neg_data = (
ImageList.from_folder(neg_datapath)
.split_by_rand_pct(valid_pct=0.2, seed=10)
.label_const() # We don't use labels for negative data
.transform(size=IMAGE_SIZE)
.databunch(bs=BATCH_SIZE, num_workers = db_num_workers())
.normalize(imagenet_stats)
)
# Do not shuffle U when we predict
neg_data.train_dl = neg_data.train_dl.new(shuffle=False)
neg_data
ImageDataBunch; Train: LabelList (52 items) x: ImageList Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224) y: CategoryList 0,0,0,0,0 Path: /data/home/jumin/git/cvbp/data/fridgeObjectsNegative; Valid: LabelList (12 items) x: ImageList Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224) y: CategoryList 0,0,0,0,0 Path: /data/home/jumin/git/cvbp/data/fridgeObjectsNegative; Test: None
datapath = Path(NEGATIVE_MINING_DATA_DIR)/'data'
# Training set T
copy_files(data_imlist.train.items, datapath/'train', infer_subdir=True)
# We include first NEGATIVE_NUM negative images in U (neg_data.train_ds) to our initial training set T
copy_files(neg_data.train_ds.items[:NEGATIVE_NUM], datapath/'train'/'negative')
# Validation set V
copy_files(data_imlist.valid.items, datapath/'valid', infer_subdir=True)
copy_files(neg_data.valid_ds.items, datapath/'valid'/'negative')
set_random_seed(10)
data = (
ImageList.from_folder(datapath)
.split_by_folder()
.label_from_folder()
.transform(size=IMAGE_SIZE)
.databunch(bs=BATCH_SIZE, num_workers = db_num_workers())
.normalize(imagenet_stats)
)
data.show_batch()
print(f'number of classes: {data.c} = {data.classes}')
print(data.batch_stats)
number of classes: 5 = ['can', 'carton', 'milk_bottle', 'negative', 'water_bottle'] <bound method ImageDataBunch.batch_stats of ImageDataBunch; Train: LabelList (118 items) x: ImageList Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224) y: CategoryList water_bottle,water_bottle,water_bottle,water_bottle,water_bottle Path: /tmp/tmpl7esf9hg/data; Valid: LabelList (38 items) x: ImageList Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224) y: CategoryList milk_bottle,milk_bottle,milk_bottle,carton,carton Path: /tmp/tmpl7esf9hg/data; Test: None>
We use fast inference setup we demonstrated from 02_training_accuracy_vs_speed notebook. The model is Resnet18 and pre-trained on ImageNet. Regarding the details about training concept, please see 01_training notebook.
learn = cnn_learner(data, ARCHITECTURE, metrics=accuracy)
learn.fit_one_cycle(EPOCHS_HEAD, 10* LEARNING_RATE)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 2.151283 | 1.767368 | 0.342105 | 00:01 |
1 | 2.057722 | 1.649740 | 0.342105 | 00:01 |
2 | 1.988815 | 1.598645 | 0.342105 | 00:01 |
3 | 1.981072 | 1.582510 | 0.315789 | 00:01 |
# Records train and valid accuracies by using Callback TrainMetricsRecorder
learn.callbacks.append(TrainMetricsRecorder(learn, show_graph=True))
learn.unfreeze()
# We record train and valid accuracies for later analysis
train_acc = []
valid_acc = []
interpretations = []
From this section to the end, we do training and negative mining. As described in the Overview section, You may need to do repeat the negative mining steps several times to achieve good results.
# Show the number of repetitions you went through the negative mining
print(f"Ran {len(interpretations)} time(s)")
Ran 3 time(s)
learn.fit_one_cycle(EPOCHS_BODY, LEARNING_RATE)
epoch | train_loss | valid_loss | train_accuracy | valid_accuracy | time |
---|---|---|---|---|---|
0 | 0.030573 | 0.300409 | 0.992188 | 0.973684 | 00:01 |
1 | 0.027056 | 0.299452 | 1.000000 | 0.973684 | 00:01 |
2 | 0.026036 | 0.307839 | 1.000000 | 0.921053 | 00:01 |
3 | 0.028060 | 0.291998 | 1.000000 | 0.947368 | 00:01 |
4 | 0.024069 | 0.284929 | 1.000000 | 0.947368 | 00:01 |
5 | 0.025142 | 0.262559 | 0.992188 | 1.000000 | 00:01 |
6 | 0.023296 | 0.249425 | 1.000000 | 1.000000 | 00:01 |
7 | 0.023795 | 0.264898 | 0.992188 | 1.000000 | 00:01 |
8 | 0.021493 | 0.284928 | 1.000000 | 0.973684 | 00:01 |
9 | 0.020484 | 0.278413 | 1.000000 | 0.973684 | 00:01 |
10 | 0.022188 | 0.266408 | 1.000000 | 1.000000 | 00:01 |
11 | 0.021493 | 0.250755 | 1.000000 | 1.000000 | 00:01 |
The following cell shows confusion matrix for the validation set. If you are repeating the negative mining steps, you will see all the confusion matrices from the repetitions.
interpretations.append(ClassificationInterpretation.from_learner(learn))
for i, interp in enumerate(interpretations):
interp.plot_confusion_matrix()
# Store train and valid accuracy
train_acc.extend(np.array(learn.train_metrics_recorder.train_metrics)[:, 0])
valid_acc.extend(np.array(learn.train_metrics_recorder.valid_metrics)[:, 0])
line_graph(
values=(train_acc, valid_acc),
labels=("Train", "Valid"),
x_guides=[i*EPOCHS_BODY for i in range(1, len(train_acc)//EPOCHS_BODY + 1)],
x_name="Epoch",
y_name="Accuracy",
)
If the model performs well enough, we can stop the training / negative sampling here.
If not, let's do hard negative sampling.
pred_outs = np.array(get_preds(learn, neg_data.train_dl)[0].tolist())
print(f"Prediction results:\n{pred_outs[:10]}\n...")
Prediction results: [[5.636569e-04 7.190045e-04 1.893304e-04 9.984304e-01 9.756161e-05] [4.523375e-04 4.494532e-04 9.698982e-04 9.978563e-01 2.719614e-04] [1.549957e-04 2.096869e-04 8.716118e-04 9.986691e-01 9.449781e-05] [1.283643e-04 1.912243e-04 5.156257e-04 9.989849e-01 1.799864e-04] [6.468197e-04 1.271848e-03 1.257834e-03 9.958363e-01 9.872657e-04] [1.189604e-04 1.229729e-03 7.452155e-04 9.977199e-01 1.861462e-04] [4.616476e-04 9.085410e-04 3.231828e-04 9.978186e-01 4.879920e-04] [1.044702e-03 4.892807e-04 4.133865e-04 9.978685e-01 1.840416e-04] [1.986413e-03 2.160306e-03 1.787698e-03 9.926257e-01 1.439970e-03] [1.664896e-04 3.611505e-04 4.116161e-04 9.988049e-01 2.559099e-04]] ...
# Get top-n false classified images (by confidence)
preds = np.argmax(pred_outs, axis=1)
wrong_ids = np.where(preds!=data.classes.index('negative'))[0]
wrong_ids_confs = [(i, pred_outs[i][preds[i]]) for i in wrong_ids]
wrong_ids_confs = sorted(wrong_ids_confs, key=lambda l:l[1], reverse=True)[:NEGATIVE_NUM]
negative_sample_ids = [w[0] for w in wrong_ids_confs]
negative_sample_labels = [f"Pred: {data.classes[preds[w[0]]]}\nConf: {w[1]:.3f}" for w in wrong_ids_confs]
show_ims(neg_data.train_ds.items[negative_sample_ids], negative_sample_labels, rows=NEGATIVE_NUM//5)
We add the hard negative samples into the training set.
copy_files(neg_data.train_ds.items[negative_sample_ids], datapath/'train'/'negative')
# Reload the dataset which includes more negative-samples
data = (ImageList.from_folder(datapath)
.split_by_folder()
.label_from_folder()
.transform(size=IMAGE_SIZE)
.databunch(bs=BATCH_SIZE, num_workers = db_num_workers())
.normalize(imagenet_stats))
print(data.batch_stats)
# Set the dataset to the learner
learn.data = data
<bound method ImageDataBunch.batch_stats of ImageDataBunch; Train: LabelList (132 items) x: ImageList Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224) y: CategoryList water_bottle,water_bottle,water_bottle,water_bottle,water_bottle Path: /tmp/tmpl7esf9hg/data; Valid: LabelList (38 items) x: ImageList Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224),Image (3, 224, 224) y: CategoryList milk_bottle,milk_bottle,milk_bottle,carton,carton Path: /tmp/tmpl7esf9hg/data; Test: None>
Now, let's go back to the "3. Train the model on T" and repeat the training and negative mining steps while we have a decent accuracy on negative
samples
# Finally, show the number of repetitions you went through the negative mining
print(f"Ran {len(interpretations)} time(s)")
Ran 4 time(s)
# Preserve some of the notebook outputs
sb.glue("train_acc", train_acc)
sb.glue("valid_acc", valid_acc)
sb.glue("negative_sample_ids", negative_sample_ids)