#!/usr/bin/env python # coding: utf-8 # # Lab: Retraining a pre-trained model # # We will retrain a pre-trained model to classify cats-and-dogs! # # [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/elephantscale/cool-ML-demos/blob/main/transfer-learning/transfer3-training-a-pretrained-model.ipynb) # # ### Runtime # ~ 30 minutes # # ### Note # Here we are dealing with real world images. Processing them will required a lot of compute power. # If you have access to, switch to **GPU** as run time! # # ### References # - https://www.tensorflow.org/tutorials/images/transfer_learning # - https://www.learnopencv.com/keras-tutorial-using-pre-trained-imagenet-models/ # In[ ]: try: # %tensorflow_version only exists in Colab. get_ipython().run_line_magic('tensorflow_version', '2.x') except Exception: pass import tensorflow as tf from tensorflow import keras print ('tensorflow version :', tf.__version__) tf.config.experimental.list_physical_devices() # In[ ]: ## Loading our custom utils files import os import sys from pathlib import Path # Hack to download image utils when running on Colab ..etc import os import urllib.request file_url = 'https://raw.githubusercontent.com/elephantscale/es-public/master/deep-learning/transfer_learning_utils.py' file_location = "transfer_learning_utils.py" if not os.path.exists (file_location): file_location = os.path.basename(file_location) if not os.path.exists(file_location): print("Downloading : ", file_url) urllib.request.urlretrieve(file_url, file_location) # print('file_location:', file_location) # In[ ]: ## Loading our custom utils files import os import sys from pathlib import Path # Hack to download image utils when running on Colab ..etc import os import urllib.request file_url = 'https://raw.githubusercontent.com/elephantscale/es-public/master/deep-learning/image_utils.py' file_location = "image_utils.py" if not os.path.exists (file_location): file_location = os.path.basename(file_location) if not os.path.exists(file_location): print("Downloading : ", file_url) urllib.request.urlretrieve(file_url, file_location) # print('file_location:', file_location) # ## TF-GPU Config # The following cell sets TF properties to run on GPU # In[ ]: ## This block is to tweak TF running on GPU ## You may comment this out, if you are not using GPU ## ---- start Memory setting ---- ## Ask TF not to allocate all GPU memory at once.. allocate as needed ## Without this the execution will fail with "failed to initialize algorithm" error from tensorflow.compat.v1.keras.backend import set_session config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU config.log_device_placement = True # to log device placement (on which device the operation ran) sess = tf.compat.v1.Session(config=config) set_session(sess) ## ---- end Memory setting ---- # ## Step 1 - Download Data # We will use cat-dog-redux dataset # In[ ]: ## Common constants IMG_WIDTH=160 IMG_HEIGHT=160 NUM_CLASSES=2 BATCH_SIZE=64 APP_NAME = 'retrain' EPOCHS = 3 # In[ ]: import os data_location = 'https://elephantscale-public.s3.amazonaws.com/data/images/cat-dog-redux.zip' data_location_local = keras.utils.get_file(fname=os.path.basename(data_location), origin=data_location, extract=True) print ('local download file: ', data_location_local) data_dir = os.path.join(os.path.dirname(data_location_local), 'cat-dog-redux') print ('local data dir: ', data_dir) train_dir = os.path.join(data_dir, 'train') validation_dir = os.path.join(data_dir, 'val') print ('train dir:', train_dir) print ('validation dir:', validation_dir) # ## Step 2 - Setup Data Generators # In[ ]: from tensorflow.keras.preprocessing.image import ImageDataGenerator # train_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our training data train_image_generator = ImageDataGenerator( rescale=1./255, rotation_range=45, width_shift_range = 0.2, height_shift_range = 0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True) validation_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our validation data train_data_gen = train_image_generator.flow_from_directory(batch_size=BATCH_SIZE, directory=train_dir, shuffle=True, target_size=(IMG_HEIGHT, IMG_WIDTH), class_mode='categorical' ) val_data_gen = validation_image_generator.flow_from_directory(batch_size=BATCH_SIZE, directory=validation_dir, target_size=(IMG_HEIGHT, IMG_WIDTH), class_mode='categorical' ) # ## Step 3 - Download Model # Let's try the InceptionV3 model. We will only download the 'base' model, without the final classifying layers # In[ ]: from transfer_learning_utils import print_model_summary_compact pre_trained_model = tf.keras.applications.InceptionV3(input_shape=(IMG_WIDTH,IMG_HEIGHT,3), include_top = False, weights = 'imagenet') print_model_summary_compact(pre_trained_model) print () get_ipython().system(' du -skh ~/.keras/models/* | grep inception') # In[ ]: # pre_trained_model.summary() # large output # ### Inspect the model and freeze the layers # In[ ]: # last_layer = pre_trained_model.get_layer('mixed7') last_layer = pre_trained_model.layers[-1] print ("last_layer.name:", last_layer.name) print ('last_layer.output.shape:', last_layer.output_shape) # In[ ]: ## Freeze all layers # for layer in pre_trained_model.layers: # layer.trainable = False # or this works too pre_trained_model.trainable = False print_model_summary_compact (pre_trained_model) # ## Step 4 - Create our own model # # We are going to add a ccouple of layers # - One fully connected layer # - And a softmax layer # # On top of the model # In[ ]: from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Input, Dense, Softmax, Flatten, GlobalAveragePooling2D, Dropout from transfer_learning_utils import print_model_summary_compact model = Sequential ([ Input (shape=(IMG_WIDTH, IMG_HEIGHT, 3)), pre_trained_model, Flatten(), #GlobalAveragePooling2D(), Dense(512, activation='relu'), Dropout(0.5), Dense(NUM_CLASSES, activation='softmax') ]) model.compile (loss='categorical_crossentropy', optimizer= 'adam', metrics=['accuracy']) print_model_summary_compact(model) print() # my_model.summary() # large output tf.keras.utils.plot_model(model, to_file='model.png', show_shapes=True) # ## Step 5 - Setup Tensorboard # In[ ]: ## This is fairly boiler plate code import datetime import os import shutil app_name = APP_NAME # timestamp = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S") tb_top_level_dir= '/tmp/tensorboard-logs' tb_app_dir = os.path.join (tb_top_level_dir, app_name) tb_logs_dir = os.path.join (tb_app_dir, datetime.datetime.now().strftime("%H-%M-%S")) print ("Saving TB logs to : " , tb_logs_dir) #clear out old logs shutil.rmtree ( tb_app_dir, ignore_errors=True ) tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_logs_dir, write_graph=True, write_images=True, histogram_freq=1) ## This will embed Tensorboard right here in jupyter! # ! killall tensorboard # kill previously running tensorboards get_ipython().run_line_magic('load_ext', 'tensorboard') get_ipython().run_line_magic('tensorboard', '--logdir $tb_logs_dir') # ## Step 6 : Train # In[ ]: get_ipython().run_cell_magic('time', '', "\nsteps_per_epoch = train_data_gen.n // train_data_gen.batch_size\nvalidation_steps = val_data_gen.n // val_data_gen.batch_size\nprint ('steps_per_epocoh:', steps_per_epoch)\nprint ('validation_steps:', validation_steps)\n\nhistory = model.fit(\n train_data_gen,\n steps_per_epoch= steps_per_epoch,\n epochs=10,\n validation_data=val_data_gen,\n validation_steps=validation_steps,\n callbacks = [tensorboard_callback]\n)\n") # ### Save the model for reuse later # As you can see training takes a long time. # Let's save the resulting model, so we can use it quickly without going through training again. # In[ ]: import os model_file = APP_NAME + '-model.h5' model.save(model_file) model_size_in_bytes = os.path.getsize(model_file) print ("model saved as '{}', size = {:,f} bytes / {:,.1f} KB / {:,.1f} MB".format(model_file, model_size_in_bytes, model_size_in_bytes / 1e3, model_size_in_bytes / 1e6 )) # ## Step 7 : See Training History # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt plt.plot(history.history['accuracy'], label='train_accuracy') plt.plot(history.history['val_accuracy'], label='val_accuracy') plt.legend() plt.show() # ## Step 8 : Evaluate the Model # ### 8.1 - Metrics # In[ ]: from math import ceil metrics = model.evaluate(val_data_gen, batch_size=val_data_gen.batch_size, steps=ceil(val_data_gen.n // val_data_gen.batch_size) ) metric_names = model.metrics_names print ("model metrics : " , metric_names) for idx, metric in enumerate(metric_names): print ("Metric : {} = {:,.3f}".format (metric_names[idx], metrics[idx])) # ### 8.2 - Predictions and Confusion Matrix # In[ ]: import numpy as np from math import ceil print ("predicting on {:,} test images".format(val_data_gen.n)) # we need a ceiling for steps predictions = model.predict(val_data_gen, batch_size=val_data_gen.batch_size, steps=ceil(val_data_gen.n / val_data_gen.batch_size) ) print( 'predictions.shape: ', predictions.shape) if val_data_gen.class_mode == 'categorical': # converting softmax --> classes print ("convering softmax --> classes") predictions2 = [ np.argmax(p) for p in predictions] if val_data_gen.class_mode == 'binary': # converting sigmoid --> classes print ("converting sigmod --> binary") predictions2 = [0 if n < 0.5 else 1 for n in predictions] # ## Ensure all predictions match assert(len(predictions) == len(predictions2) == len(val_data_gen.classes) ) # In[ ]: np.set_printoptions(formatter={'float': '{: 0.2f}'.format}) print ('predictions : ' , predictions[:10]) print ('prediction2: ' , predictions2[:10]) # In[ ]: from sklearn.metrics import confusion_matrix import seaborn as sns test_labels = val_data_gen.classes cm = confusion_matrix(test_labels, predictions2, labels = range(0, NUM_CLASSES)) cm # In[ ]: import matplotlib.pyplot as plt import seaborn as sns print ("class index mapping : ", val_data_gen.class_indices) plt.figure(figsize = (8,6)) # colormaps : cmap="YlGnBu" , cmap="Greens", cmap="Blues", cmap="Reds" sns.heatmap(cm, annot=True, cmap="Reds", fmt='d').plot() # ## Cleanup # Before running the next exercise, run the following cell to terminate processes and free up resources # In[ ]: ## Kill any child processes (like tensorboard) import psutil import os, signal current_process = psutil.Process() children = current_process.children(recursive=True) for child in children: print('Killing Child pid {}'.format(child.pid)) os.kill(child.pid, signal.SIGKILL) ## This will kill actual kernel itself # os.kill(os.getpid(), signal.SIGKILL)