Notebook

Lab: Retraining a pre-trained model¶

We will retrain a pre-trained model to classify cats-and-dogs!

Runtime¶

~ 30 minutes

Note¶

Here we are dealing with real world images. Processing them will required a lot of compute power.
If you have access to, switch to GPU as run time!

References¶

In [ ]:

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

import tensorflow as tf
from tensorflow import keras
print ('tensorflow version :', tf.__version__)
tf.config.experimental.list_physical_devices()

In [ ]:

## Loading our custom utils files
import os
import sys
from pathlib import Path

# Hack to download image utils when running on Colab ..etc
import os
import urllib.request

file_url = 'https://raw.githubusercontent.com/elephantscale/es-public/master/deep-learning/transfer_learning_utils.py'
file_location = "transfer_learning_utils.py"

if not os.path.exists (file_location):
    file_location = os.path.basename(file_location)
    if not os.path.exists(file_location):
        print("Downloading : ", file_url)
        urllib.request.urlretrieve(file_url, file_location)
# print('file_location:', file_location)

In [ ]:

## Loading our custom utils files
import os
import sys
from pathlib import Path

# Hack to download image utils when running on Colab ..etc
import os
import urllib.request

file_url = 'https://raw.githubusercontent.com/elephantscale/es-public/master/deep-learning/image_utils.py'
file_location = "image_utils.py"

if not os.path.exists (file_location):
    file_location = os.path.basename(file_location)
    if not os.path.exists(file_location):
        print("Downloading : ", file_url)
        urllib.request.urlretrieve(file_url, file_location)
# print('file_location:', file_location)

TF-GPU Config¶

The following cell sets TF properties to run on GPU

In [ ]:

## This block is to tweak TF running on GPU
## You may comment this out, if you are not using GPU

## ---- start Memory setting ----
## Ask TF not to allocate all GPU memory at once.. allocate as needed
## Without this the execution will fail with "failed to initialize algorithm" error

from tensorflow.compat.v1.keras.backend import set_session
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
sess = tf.compat.v1.Session(config=config)
set_session(sess)
## ---- end Memory setting ----

Step 1 - Download Data¶

We will use cat-dog-redux dataset

In [ ]:

## Common constants

IMG_WIDTH=160
IMG_HEIGHT=160
NUM_CLASSES=2
BATCH_SIZE=64
APP_NAME = 'retrain'
EPOCHS = 3

In [ ]:

import os

data_location = 'https://elephantscale-public.s3.amazonaws.com/data/images/cat-dog-redux.zip'

data_location_local = keras.utils.get_file(fname=os.path.basename(data_location),
                                           origin=data_location, extract=True)
print ('local download file: ', data_location_local)
data_dir = os.path.join(os.path.dirname(data_location_local), 'cat-dog-redux')
print ('local data dir: ', data_dir)
train_dir = os.path.join(data_dir, 'train')
validation_dir = os.path.join(data_dir, 'val')
print ('train dir:', train_dir)
print ('validation dir:', validation_dir)

Step 2 - Setup Data Generators¶

In [ ]:

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# train_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our training data

train_image_generator = ImageDataGenerator(
                            rescale=1./255,
                            rotation_range=45,
                            width_shift_range = 0.2,
                            height_shift_range = 0.2,
                            shear_range = 0.2,
                            zoom_range = 0.2,
                            horizontal_flip = True)


validation_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our validation data

train_data_gen = train_image_generator.flow_from_directory(batch_size=BATCH_SIZE,
                                                           directory=train_dir,
                                                           shuffle=True,
                                                           target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                           class_mode='categorical'
                                                          )


val_data_gen = validation_image_generator.flow_from_directory(batch_size=BATCH_SIZE,
                                                              directory=validation_dir,
                                                              target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                              class_mode='categorical'
                                                             )

Step 3 - Download Model¶

Let's try the InceptionV3 model. We will only download the 'base' model, without the final classifying layers

In [ ]:

from transfer_learning_utils import print_model_summary_compact

pre_trained_model = tf.keras.applications.InceptionV3(input_shape=(IMG_WIDTH,IMG_HEIGHT,3), 
                                               include_top = False,
                                               weights = 'imagenet')
print_model_summary_compact(pre_trained_model)
print ()

! du -skh ~/.keras/models/* | grep inception

In [ ]:

# pre_trained_model.summary()  # large output

Inspect the model and freeze the layers¶

In [ ]:

# last_layer = pre_trained_model.get_layer('mixed7')
last_layer = pre_trained_model.layers[-1]

print ("last_layer.name:", last_layer.name)
print ('last_layer.output.shape:', last_layer.output_shape)

In [ ]:

## Freeze all layers
# for layer in pre_trained_model.layers:
#   layer.trainable = False

# or this works too
pre_trained_model.trainable = False

print_model_summary_compact (pre_trained_model)

Step 4 - Create our own model¶

We are going to add a ccouple of layers

One fully connected layer
And a softmax layer

On top of the model

In [ ]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Softmax, Flatten, GlobalAveragePooling2D, Dropout
from transfer_learning_utils import print_model_summary_compact

model = Sequential ([
                Input (shape=(IMG_WIDTH, IMG_HEIGHT, 3)),
                pre_trained_model, 
                Flatten(), 
                #GlobalAveragePooling2D(),
                Dense(512, activation='relu'),
                Dropout(0.5),
                Dense(NUM_CLASSES, activation='softmax')
            ])

model.compile (loss='categorical_crossentropy',
                  optimizer= 'adam',
                  metrics=['accuracy'])

print_model_summary_compact(model)
print()
# my_model.summary()  # large output

tf.keras.utils.plot_model(model, to_file='model.png', show_shapes=True)

Step 5 - Setup Tensorboard¶

In [ ]:

## This is fairly boiler plate code

import datetime
import os
import shutil

app_name = APP_NAME


# timestamp  = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S")

tb_top_level_dir= '/tmp/tensorboard-logs'

tb_app_dir = os.path.join (tb_top_level_dir, app_name)

tb_logs_dir = os.path.join (tb_app_dir, datetime.datetime.now().strftime("%H-%M-%S"))


print ("Saving TB logs to : " , tb_logs_dir)

#clear out old logs
shutil.rmtree ( tb_app_dir, ignore_errors=True )

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_logs_dir, write_graph=True, 
                                                      write_images=True, histogram_freq=1)

## This will embed Tensorboard right here in jupyter!
# ! killall tensorboard  # kill previously running tensorboards
%load_ext tensorboard
%tensorboard --logdir $tb_logs_dir

Step 6 : Train¶

In [ ]:

%%time 

steps_per_epoch = train_data_gen.n // train_data_gen.batch_size
validation_steps = val_data_gen.n // val_data_gen.batch_size
print ('steps_per_epocoh:', steps_per_epoch)
print ('validation_steps:', validation_steps)

history = model.fit(
    train_data_gen,
    steps_per_epoch= steps_per_epoch,
    epochs=10,
    validation_data=val_data_gen,
    validation_steps=validation_steps,
    callbacks = [tensorboard_callback]
)

Save the model for reuse later¶

As you can see training takes a long time.
Let's save the resulting model, so we can use it quickly without going through training again.

In [ ]:

import os

model_file =  APP_NAME + '-model.h5'
model.save(model_file)

model_size_in_bytes = os.path.getsize(model_file)
print ("model saved as '{}',  size = {:,f} bytes / {:,.1f} KB  / {:,.1f} MB".format(model_file, 
                                    model_size_in_bytes, model_size_in_bytes / 1e3, 
                                    model_size_in_bytes / 1e6 ))

Step 7 : See Training History¶

In [ ]:

%matplotlib inline
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'], label='train_accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.legend()
plt.show()

Step 8 : Evaluate the Model¶

8.1 - Metrics¶

In [ ]:

from math import ceil


metrics = model.evaluate(val_data_gen, batch_size=val_data_gen.batch_size, steps=ceil(val_data_gen.n // val_data_gen.batch_size) )

metric_names = model.metrics_names
print ("model metrics : " , metric_names)

for idx, metric in enumerate(metric_names):
    print ("Metric : {} = {:,.3f}".format (metric_names[idx], metrics[idx]))

8.2 - Predictions and Confusion Matrix¶

In [ ]:

import numpy as np
from math import ceil


print ("predicting on {:,} test images".format(val_data_gen.n))
# we need a ceiling for steps
predictions = model.predict(val_data_gen, batch_size=val_data_gen.batch_size, 
                            steps=ceil(val_data_gen.n / val_data_gen.batch_size) )
print( 'predictions.shape: ', predictions.shape)


if val_data_gen.class_mode == 'categorical':
    # converting softmax --> classes
    print ("convering softmax --> classes")
    predictions2 = [ np.argmax(p) for p in predictions]

if val_data_gen.class_mode == 'binary':
    # converting sigmoid --> classes
    print ("converting sigmod --> binary")
    predictions2 = [0 if n < 0.5 else 1 for n in predictions]


# ## Ensure all predictions match
assert(len(predictions) == len(predictions2) == len(val_data_gen.classes) )

In [ ]:

np.set_printoptions(formatter={'float': '{: 0.2f}'.format})

print ('predictions : ' , predictions[:10])
print ('prediction2: ' , predictions2[:10])

In [ ]:

from sklearn.metrics import confusion_matrix
import seaborn as sns

test_labels = val_data_gen.classes
cm = confusion_matrix(test_labels, predictions2, labels = range(0, NUM_CLASSES))
cm

In [ ]:

import matplotlib.pyplot as plt
import seaborn as sns

print ("class index mapping : ", val_data_gen.class_indices)

plt.figure(figsize = (8,6))

# colormaps : cmap="YlGnBu" , cmap="Greens", cmap="Blues",  cmap="Reds"
sns.heatmap(cm, annot=True, cmap="Reds", fmt='d').plot()

Cleanup¶

Before running the next exercise, run the following cell to terminate processes and free up resources

In [ ]:

## Kill any child processes (like tensorboard)

import psutil
import os, signal

current_process = psutil.Process()
children = current_process.children(recursive=True)
for child in children:
    print('Killing Child pid  {}'.format(child.pid))
    os.kill(child.pid, signal.SIGKILL)
    
## This will kill actual kernel itself
# os.kill(os.getpid(), signal.SIGKILL)