Code and visualizations to test, debug, and evaluate the Mask R-CNN model.
import os
import sys
import random
import math
import re
import time
import numpy as np
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
# Root directory of the project
ROOT_DIR = os.path.abspath("../../")
# Import Mask RCNN
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log
%matplotlib inline
# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
utils.download_trained_weights(COCO_MODEL_PATH)
# Path to Shapes trained weights
SHAPES_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_shapes.h5")
Using TensorFlow backend.
# Run one of the code blocks
# Shapes toy dataset
# import shapes
# config = shapes.ShapesConfig()
# MS COCO Dataset
import coco
config = coco.CocoConfig()
COCO_DIR = "path to COCO dataset" # TODO: enter value here
# Override the training configurations with a few
# changes for inferencing.
class InferenceConfig(config.__class__):
# Run detection on one image at a time
GPU_COUNT = 1
IMAGES_PER_GPU = 1
config = InferenceConfig()
config.display()
Configurations: BACKBONE_SHAPES [[256 256] [128 128] [ 64 64] [ 32 32] [ 16 16]] BACKBONE_STRIDES [4, 8, 16, 32, 64] BATCH_SIZE 1 BBOX_STD_DEV [ 0.1 0.1 0.2 0.2] DETECTION_MAX_INSTANCES 100 DETECTION_MIN_CONFIDENCE 0.5 DETECTION_NMS_THRESHOLD 0.3 GPU_COUNT 1 IMAGES_PER_GPU 1 IMAGE_MAX_DIM 1024 IMAGE_MIN_DIM 800 IMAGE_PADDING True IMAGE_SHAPE [1024 1024 3] LEARNING_MOMENTUM 0.9 LEARNING_RATE 0.002 MASK_POOL_SIZE 14 MASK_SHAPE [28, 28] MAX_GT_INSTANCES 100 MEAN_PIXEL [ 123.7 116.8 103.9] MINI_MASK_SHAPE (56, 56) NAME coco NUM_CLASSES 81 POOL_SIZE 7 POST_NMS_ROIS_INFERENCE 1000 POST_NMS_ROIS_TRAINING 2000 ROI_POSITIVE_RATIO 0.33 RPN_ANCHOR_RATIOS [0.5, 1, 2] RPN_ANCHOR_SCALES (32, 64, 128, 256, 512) RPN_ANCHOR_STRIDE 2 RPN_BBOX_STD_DEV [ 0.1 0.1 0.2 0.2] RPN_TRAIN_ANCHORS_PER_IMAGE 256 STEPS_PER_EPOCH 1000 TRAIN_ROIS_PER_IMAGE 128 USE_MINI_MASK True USE_RPN_ROIS True VALIDATION_STEPS 50 WEIGHT_DECAY 0.0001
# Device to load the neural network on.
# Useful if you're training a model on the same
# machine, in which case use CPU and leave the
# GPU for training.
DEVICE = "/cpu:0" # /cpu:0 or /gpu:0
# Inspect the model in training or inference modes
# values: 'inference' or 'training'
# TODO: code for 'training' test mode not ready yet
TEST_MODE = "inference"
def get_ax(rows=1, cols=1, size=16):
"""Return a Matplotlib Axes array to be used in
all visualizations in the notebook. Provide a
central point to control graph sizes.
Adjust the size attribute to control how big to render images
"""
_, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
return ax
# Build validation dataset
if config.NAME == 'shapes':
dataset = shapes.ShapesDataset()
dataset.load_shapes(500, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
elif config.NAME == "coco":
dataset = coco.CocoDataset()
dataset.load_coco(COCO_DIR, "minival")
# Must call before using the dataset
dataset.prepare()
print("Images: {}\nClasses: {}".format(len(dataset.image_ids), dataset.class_names))
loading annotations into memory... Done (t=4.86s) creating index... index created! Images: 35185 Classes: ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
# Create model in inference mode
with tf.device(DEVICE):
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR,
config=config)
# Set weights file path
if config.NAME == "shapes":
weights_path = SHAPES_MODEL_PATH
elif config.NAME == "coco":
weights_path = COCO_MODEL_PATH
# Or, uncomment to load the last model you trained
# weights_path = model.find_last()
# Load weights
print("Loading weights ", weights_path)
model.load_weights(weights_path, by_name=True)
image_id = random.choice(dataset.image_ids)
image, image_meta, gt_class_id, gt_bbox, gt_mask =\
modellib.load_image_gt(dataset, config, image_id, use_mini_mask=False)
info = dataset.image_info[image_id]
print("image ID: {}.{} ({}) {}".format(info["source"], info["id"], image_id,
dataset.image_reference(image_id)))
# Run object detection
results = model.detect([image], verbose=1)
# Display results
ax = get_ax(1)
r = results[0]
visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
dataset.class_names, r['scores'], ax=ax,
title="Predictions")
log("gt_class_id", gt_class_id)
log("gt_bbox", gt_bbox)
log("gt_mask", gt_mask)
image ID: coco.392144 (34940) http://cocodataset.org/#explore?id=392144 Processing 1 images image shape: (1024, 1024, 3) min: 0.00000 max: 255.00000 molded_images shape: (1, 1024, 1024, 3) min: -123.70000 max: 151.10000 image_metas shape: (1, 89) min: 0.00000 max: 1024.00000 gt_class_id shape: (10,) min: 1.00000 max: 40.00000 gt_bbox shape: (10, 5) min: 0.00000 max: 1024.00000 gt_mask shape: (1024, 1024, 10) min: 0.00000 max: 1.00000