try:
# %tensorflow_version only exists in Colab.
%tensorflow_version 2.x
except Exception:
pass
import tensorflow as tf
from tensorflow import keras
print ('tensorflow version :', tf.__version__)
tf.config.experimental.list_physical_devices()
## Loading our custom utils files
import os
import sys
from pathlib import Path
# Hack to download image utils when running on Colab ..etc
import os
import urllib.request
file_url = 'https://raw.githubusercontent.com/elephantscale/es-public/master/deep-learning/transfer_learning_utils.py'
file_location = "transfer_learning_utils.py"
if not os.path.exists (file_location):
file_location = os.path.basename(file_location)
if not os.path.exists(file_location):
print("Downloading : ", file_url)
urllib.request.urlretrieve(file_url, file_location)
# print('file_location:', file_location)
## Loading our custom utils files
import os
import sys
from pathlib import Path
# Hack to download image utils when running on Colab ..etc
import os
import urllib.request
file_url = 'https://raw.githubusercontent.com/elephantscale/es-public/master/deep-learning/image_utils.py'
file_location = "image_utils.py"
if not os.path.exists (file_location):
file_location = os.path.basename(file_location)
if not os.path.exists(file_location):
print("Downloading : ", file_url)
urllib.request.urlretrieve(file_url, file_location)
# print('file_location:', file_location)
The following cell sets TF properties to run on GPU
## This block is to tweak TF running on GPU
## You may comment this out, if you are not using GPU
## ---- start Memory setting ----
## Ask TF not to allocate all GPU memory at once.. allocate as needed
## Without this the execution will fail with "failed to initialize algorithm" error
from tensorflow.compat.v1.keras.backend import set_session
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU
config.log_device_placement = True # to log device placement (on which device the operation ran)
sess = tf.compat.v1.Session(config=config)
set_session(sess)
## ---- end Memory setting ----
We will use cat-dog-redux dataset
## Common constants
IMG_WIDTH=160
IMG_HEIGHT=160
NUM_CLASSES=2
BATCH_SIZE=64
APP_NAME = 'retrain'
EPOCHS = 3
import os
data_location = 'https://elephantscale-public.s3.amazonaws.com/data/images/cat-dog-redux.zip'
data_location_local = keras.utils.get_file(fname=os.path.basename(data_location),
origin=data_location, extract=True)
print ('local download file: ', data_location_local)
data_dir = os.path.join(os.path.dirname(data_location_local), 'cat-dog-redux')
print ('local data dir: ', data_dir)
train_dir = os.path.join(data_dir, 'train')
validation_dir = os.path.join(data_dir, 'val')
print ('train dir:', train_dir)
print ('validation dir:', validation_dir)
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# train_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our training data
train_image_generator = ImageDataGenerator(
rescale=1./255,
rotation_range=45,
width_shift_range = 0.2,
height_shift_range = 0.2,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True)
validation_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our validation data
train_data_gen = train_image_generator.flow_from_directory(batch_size=BATCH_SIZE,
directory=train_dir,
shuffle=True,
target_size=(IMG_HEIGHT, IMG_WIDTH),
class_mode='categorical'
)
val_data_gen = validation_image_generator.flow_from_directory(batch_size=BATCH_SIZE,
directory=validation_dir,
target_size=(IMG_HEIGHT, IMG_WIDTH),
class_mode='categorical'
)
Let's try the InceptionV3 model. We will only download the 'base' model, without the final classifying layers
from transfer_learning_utils import print_model_summary_compact
pre_trained_model = tf.keras.applications.InceptionV3(input_shape=(IMG_WIDTH,IMG_HEIGHT,3),
include_top = False,
weights = 'imagenet')
print_model_summary_compact(pre_trained_model)
print ()
! du -skh ~/.keras/models/* | grep inception
# pre_trained_model.summary() # large output
# last_layer = pre_trained_model.get_layer('mixed7')
last_layer = pre_trained_model.layers[-1]
print ("last_layer.name:", last_layer.name)
print ('last_layer.output.shape:', last_layer.output_shape)
## Freeze all layers
# for layer in pre_trained_model.layers:
# layer.trainable = False
# or this works too
pre_trained_model.trainable = False
print_model_summary_compact (pre_trained_model)
We are going to add a ccouple of layers
On top of the model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Softmax, Flatten, GlobalAveragePooling2D, Dropout
from transfer_learning_utils import print_model_summary_compact
model = Sequential ([
Input (shape=(IMG_WIDTH, IMG_HEIGHT, 3)),
pre_trained_model,
Flatten(),
#GlobalAveragePooling2D(),
Dense(512, activation='relu'),
Dropout(0.5),
Dense(NUM_CLASSES, activation='softmax')
])
model.compile (loss='categorical_crossentropy',
optimizer= 'adam',
metrics=['accuracy'])
print_model_summary_compact(model)
print()
# my_model.summary() # large output
tf.keras.utils.plot_model(model, to_file='model.png', show_shapes=True)
## This is fairly boiler plate code
import datetime
import os
import shutil
app_name = APP_NAME
# timestamp = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S")
tb_top_level_dir= '/tmp/tensorboard-logs'
tb_app_dir = os.path.join (tb_top_level_dir, app_name)
tb_logs_dir = os.path.join (tb_app_dir, datetime.datetime.now().strftime("%H-%M-%S"))
print ("Saving TB logs to : " , tb_logs_dir)
#clear out old logs
shutil.rmtree ( tb_app_dir, ignore_errors=True )
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_logs_dir, write_graph=True,
write_images=True, histogram_freq=1)
## This will embed Tensorboard right here in jupyter!
# ! killall tensorboard # kill previously running tensorboards
%load_ext tensorboard
%tensorboard --logdir $tb_logs_dir
%%time
steps_per_epoch = train_data_gen.n // train_data_gen.batch_size
validation_steps = val_data_gen.n // val_data_gen.batch_size
print ('steps_per_epocoh:', steps_per_epoch)
print ('validation_steps:', validation_steps)
history = model.fit(
train_data_gen,
steps_per_epoch= steps_per_epoch,
epochs=10,
validation_data=val_data_gen,
validation_steps=validation_steps,
callbacks = [tensorboard_callback]
)
As you can see training takes a long time.
Let's save the resulting model, so we can use it quickly without going through training again.
import os
model_file = APP_NAME + '-model.h5'
model.save(model_file)
model_size_in_bytes = os.path.getsize(model_file)
print ("model saved as '{}', size = {:,f} bytes / {:,.1f} KB / {:,.1f} MB".format(model_file,
model_size_in_bytes, model_size_in_bytes / 1e3,
model_size_in_bytes / 1e6 ))
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(history.history['accuracy'], label='train_accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.legend()
plt.show()
from math import ceil
metrics = model.evaluate(val_data_gen, batch_size=val_data_gen.batch_size, steps=ceil(val_data_gen.n // val_data_gen.batch_size) )
metric_names = model.metrics_names
print ("model metrics : " , metric_names)
for idx, metric in enumerate(metric_names):
print ("Metric : {} = {:,.3f}".format (metric_names[idx], metrics[idx]))
import numpy as np
from math import ceil
print ("predicting on {:,} test images".format(val_data_gen.n))
# we need a ceiling for steps
predictions = model.predict(val_data_gen, batch_size=val_data_gen.batch_size,
steps=ceil(val_data_gen.n / val_data_gen.batch_size) )
print( 'predictions.shape: ', predictions.shape)
if val_data_gen.class_mode == 'categorical':
# converting softmax --> classes
print ("convering softmax --> classes")
predictions2 = [ np.argmax(p) for p in predictions]
if val_data_gen.class_mode == 'binary':
# converting sigmoid --> classes
print ("converting sigmod --> binary")
predictions2 = [0 if n < 0.5 else 1 for n in predictions]
# ## Ensure all predictions match
assert(len(predictions) == len(predictions2) == len(val_data_gen.classes) )
np.set_printoptions(formatter={'float': '{: 0.2f}'.format})
print ('predictions : ' , predictions[:10])
print ('prediction2: ' , predictions2[:10])
from sklearn.metrics import confusion_matrix
import seaborn as sns
test_labels = val_data_gen.classes
cm = confusion_matrix(test_labels, predictions2, labels = range(0, NUM_CLASSES))
cm
import matplotlib.pyplot as plt
import seaborn as sns
print ("class index mapping : ", val_data_gen.class_indices)
plt.figure(figsize = (8,6))
# colormaps : cmap="YlGnBu" , cmap="Greens", cmap="Blues", cmap="Reds"
sns.heatmap(cm, annot=True, cmap="Reds", fmt='d').plot()
Before running the next exercise, run the following cell to terminate processes and free up resources
## Kill any child processes (like tensorboard)
import psutil
import os, signal
current_process = psutil.Process()
children = current_process.children(recursive=True)
for child in children:
print('Killing Child pid {}'.format(child.pid))
os.kill(child.pid, signal.SIGKILL)
## This will kill actual kernel itself
# os.kill(os.getpid(), signal.SIGKILL)