- 🤖 See full list of Machine Learning Experiments on GitHub
- ▶️ Interactive Demo: try this model and other machine learning experiments in action
In this experiment we will build a Convolutional Neural Network (CNN) model using Tensorflow to recognize Rock-Paper-Scissors signs (gestures) on the photo.
Instead of training the model from scratch we will use an MobileNetV2 model which is pre-trained on the ImageNet dataset, a large dataset of 1.4M images and 1000 classes of web images.
Inspired by Getting started with TensorFlow 2.0 article.
# Selecting Tensorflow version v2 (the command is relevant for Colab only).
%tensorflow_version 2.x
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np
import platform
import datetime
import os
import math
import random
print('Python version:', platform.python_version())
print('Tensorflow version:', tf.__version__)
print('Keras version:', tf.keras.__version__)
We will use TensorBoard as a helper to debug the model training process.
# Load the TensorBoard notebook extension.
# %reload_ext tensorboard
%load_ext tensorboard
# Clear any logs from previous runs.
!rm -rf ./logs/
We will download Rock-Paper-Scissors dataset from TensorFlow Datasets collection. To do that we loaded a tensorflow_datasets
module.
tensorflow_datasets
defines a collection of datasets ready-to-use with TensorFlow.
Each dataset is defined as a tfds.core.DatasetBuilder, which encapsulates the logic to download the dataset and construct an input pipeline, as well as contains the dataset documentation (version, splits, number of examples, etc.).
# See available datasets
tfds.list_builders()
DATASET_NAME = 'rock_paper_scissors'
(dataset_train_raw, dataset_test_raw), dataset_info = tfds.load(
name=DATASET_NAME,
data_dir='tmp',
with_info=True,
as_supervised=True,
split=[tfds.Split.TRAIN, tfds.Split.TEST],
)
print('Raw train dataset:', dataset_train_raw)
print('Raw train dataset size:', len(list(dataset_train_raw)), '\n')
print('Raw test dataset:', dataset_test_raw)
print('Raw test dataset size:', len(list(dataset_test_raw)), '\n')
dataset_info
NUM_TRAIN_EXAMPLES = dataset_info.splits['train'].num_examples
NUM_TEST_EXAMPLES = dataset_info.splits['test'].num_examples
NUM_CLASSES = dataset_info.features['label'].num_classes
print('Number of TRAIN examples:', NUM_TRAIN_EXAMPLES)
print('Number of TEST examples:', NUM_TEST_EXAMPLES)
print('Number of label classes:', NUM_CLASSES)
INPUT_IMG_SIZE_ORIGINAL = dataset_info.features['image'].shape[0]
INPUT_IMG_SHAPE_ORIGINAL = dataset_info.features['image'].shape
# For Mobilenet v2 possible input sizes are [96, 128, 160, 192, 224].
INPUT_IMG_SIZE_REDUCED = 128
INPUT_IMG_SHAPE_REDUCED = (
INPUT_IMG_SIZE_REDUCED,
INPUT_IMG_SIZE_REDUCED,
INPUT_IMG_SHAPE_ORIGINAL[2]
)
# Here we may switch between bigger or smaller image sized that we will train our model on.
INPUT_IMG_SIZE = INPUT_IMG_SIZE_REDUCED
INPUT_IMG_SHAPE = INPUT_IMG_SHAPE_REDUCED
print('Input image size (original):', INPUT_IMG_SIZE_ORIGINAL)
print('Input image shape (original):', INPUT_IMG_SHAPE_ORIGINAL)
print('\n')
print('Input image size (reduced):', INPUT_IMG_SIZE_REDUCED)
print('Input image shape (reduced):', INPUT_IMG_SHAPE_REDUCED)
print('\n')
print('Input image size:', INPUT_IMG_SIZE)
print('Input image shape:', INPUT_IMG_SHAPE)
# Function to convert label ID to labels string.
get_label_name = dataset_info.features['label'].int2str
print(get_label_name(0));
print(get_label_name(1));
print(get_label_name(2));
def preview_dataset(dataset):
plt.figure(figsize=(12, 12))
plot_index = 0
for features in dataset.take(12):
(image, label) = features
plot_index += 1
plt.subplot(3, 4, plot_index)
# plt.axis('Off')
label = get_label_name(label.numpy())
plt.title('Label: %s' % label)
plt.imshow(image.numpy())
# Explore raw training dataset images.
preview_dataset(dataset_train_raw)
# Explore what values are used to represent the image.
(first_image, first_lable) = list(dataset_train_raw.take(1))[0]
print('Label:', first_lable.numpy(), '\n')
print('Image shape:', first_image.numpy().shape, '\n')
print(first_image.numpy())
def format_example(image, label):
# Make image color values to be float.
image = tf.cast(image, tf.float32)
# Make image color values to be in [0..1] range.
image = image / 255.
# Make sure that image has a right size
image = tf.image.resize(image, [INPUT_IMG_SIZE, INPUT_IMG_SIZE])
return image, label
dataset_train = dataset_train_raw.map(format_example)
dataset_test = dataset_test_raw.map(format_example)
# Explore what values are used to represent the image.
(first_image, first_lable) = list(dataset_train.take(1))[0]
print('Label:', first_lable.numpy(), '\n')
print('Image shape:', first_image.numpy().shape, '\n')
print(first_image.numpy())
# Explore preprocessed training dataset images.
preview_dataset(dataset_train)
One of the way to fight the model overfitting and to generalize the model to a broader set of examples is to augment the training data.
As you saw from the previous section all training examples have a white background and vertically positioned right hands. But what if the image with the hand will be horizontally positioned or what if the background will not be that bright. What if instead of a right hand the model will see a left hand. To make our model a little bit more universal we're going to flip and rotate images and also to adjust background colors.
You may read more about a Simple and efficient data augmentations using the Tensorfow tf.Data and Dataset API.
def augment_flip(image: tf.Tensor) -> tf.Tensor:
image = tf.image.random_flip_left_right(image)
image = tf.image.random_flip_up_down(image)
return image
def augment_color(image: tf.Tensor) -> tf.Tensor:
image = tf.image.random_hue(image, max_delta=0.08)
image = tf.image.random_saturation(image, lower=0.7, upper=1.3)
image = tf.image.random_brightness(image, 0.05)
image = tf.image.random_contrast(image, lower=0.8, upper=1)
image = tf.clip_by_value(image, clip_value_min=0, clip_value_max=1)
return image
def augment_rotation(image: tf.Tensor) -> tf.Tensor:
# Rotate 0, 90, 180, 270 degrees
return tf.image.rot90(
image,
tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32)
)
def augment_inversion(image: tf.Tensor) -> tf.Tensor:
random = tf.random.uniform(shape=[], minval=0, maxval=1)
if random > 0.5:
image = tf.math.multiply(image, -1)
image = tf.math.add(image, 1)
return image
def augment_zoom(image: tf.Tensor, min_zoom=0.8, max_zoom=1.0) -> tf.Tensor:
image_width, image_height, image_colors = image.shape
crop_size = (image_width, image_height)
# Generate crop settings, ranging from a 1% to 20% crop.
scales = list(np.arange(min_zoom, max_zoom, 0.01))
boxes = np.zeros((len(scales), 4))
for i, scale in enumerate(scales):
x1 = y1 = 0.5 - (0.5 * scale)
x2 = y2 = 0.5 + (0.5 * scale)
boxes[i] = [x1, y1, x2, y2]
def random_crop(img):
# Create different crops for an image
crops = tf.image.crop_and_resize(
[img],
boxes=boxes,
box_indices=np.zeros(len(scales)),
crop_size=crop_size
)
# Return a random crop
return crops[tf.random.uniform(shape=[], minval=0, maxval=len(scales), dtype=tf.int32)]
choice = tf.random.uniform(shape=[], minval=0., maxval=1., dtype=tf.float32)
# Only apply cropping 50% of the time
return tf.cond(choice < 0.5, lambda: image, lambda: random_crop(image))
def augment_data(image, label):
image = augment_flip(image)
image = augment_color(image)
image = augment_rotation(image)
image = augment_zoom(image)
image = augment_inversion(image)
return image, label
dataset_train_augmented = dataset_train.map(augment_data)
# Explore augmented training dataset.
preview_dataset(dataset_train_augmented)