With certain data types it is possible to use the weights learned in one task to be transferred to another task. For example in a task that is used to detect Animals and Vehicles in images (as done in CIFAR10) could be reused to classify dogs and cats.
Transfer Learning is heavily used in Image recognition and Natural Language Processing (NLP) related tasks.
This tutorial is based on https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html.
!pip install tqdm
!conda install -y Pillow
Requirement already satisfied: tqdm in /root/miniconda3/lib/python3.6/site-packages Fetching package metadata ........... Solving package specifications: . # All requested packages already installed. # packages in environment at /root/miniconda3: # pillow 4.2.1 py36_0
import numpy as np
import matplotlib.pyplot as plt
from urllib.request import urlretrieve
from os.path import isfile, isdir, getsize
from os import mkdir, makedirs, remove
from tqdm import tqdm
import zipfile
import pickle
from keras.models import Sequential, Model
from keras import optimizers
from keras.layers import Dense, Activation, Conv2D, MaxPool2D, Flatten, BatchNormalization, Dropout
from keras.preprocessing.image import ImageDataGenerator
import glob
import shutil
import pickle
%matplotlib inline
Using Theano backend.
Download and extract the doge and cate pictures.
catdog_dataset_folder_path = 'catdog'
class DLProgress(tqdm):
last_block = 0
def hook(self, block_num=1, block_size=1, total_size=None):
self.total = total_size
self.update((block_num - self.last_block) * block_size)
self.last_block = block_num
if not isfile('catdog.zip'):
with DLProgress(unit='B', unit_scale=True, miniters=1, desc='Doge n Cate Dataset') as pbar:
urlretrieve(
'https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip',
'catdog.zip',
pbar.hook)
if not isdir(catdog_dataset_folder_path):
mkdir(catdog_dataset_folder_path)
with zipfile.ZipFile('catdog.zip') as f:
f.extractall('./'+catdog_dataset_folder_path)
# Unfortunately some of the files are corrupt so we need to clean these out:
!apt-get install -y jhead > /dev/null 2>&1
!jhead -de catdog/PetImages/Cat/*.jpg > /dev/null 2>&1
!jhead -de catdog/PetImages/Dog/*.jpg > /dev/null 2>&1
files = glob.glob(catdog_dataset_folder_path+'/PetImages/**/*.jpg')
labels = np.array([0]*12500+[1]*12500)
size = np.zeros(len(files))
for i,f in enumerate(files):
size[i] = getsize(f)
idx = np.where(size==0)[0]
for i in idx[::-1]:
del files[i]
labels = np.delete(labels, i)
In keras we are required to place the training images in a certain folder, with the subfolders structured so that each subfolder contains the class. We will structure the validation folder in the same way:
data/
train/
dogs/
dog001.jpg
dog002.jpg
...
cats/
cat001.jpg
cat002.jpg
...
validation/
dogs/
dog001.jpg
dog002.jpg
...
cats/
cat001.jpg
cat002.jpg
...
From the dataset we randomly choose 20000 images and moves them to training and the rest to testing folders.
len_data = len(files)
train_examples = 20000
test_examples = len_data - train_examples
# randomly choose 20000 as training and testing cases
permutation = np.random.permutation(len_data)
train_set = [files[i] for i in permutation[:][:train_examples]]
test_set = [files[i] for i in permutation[-test_examples:]]
train_labels = labels[permutation[:train_examples]]
test_labels = labels[permutation[-test_examples:]]
train_folder = catdog_dataset_folder_path+'/train'
test_folder = catdog_dataset_folder_path+'/test'
if isdir(train_folder): #if directory already exists
shutil.rmtree(train_folder)
if isdir(test_folder): #if directory already exists
shutil.rmtree(test_folder)
makedirs(train_folder+'/cat/')
makedirs(train_folder+'/dog/')
makedirs(test_folder+'/cat/')
makedirs(test_folder+'/dog/')
for f,i in zip(train_set, train_labels):
if i==0:
shutil.copy2(f, train_folder+'/cat/')
else:
shutil.copy2(f, train_folder+'/dog/')
for f,i in zip(test_set, test_labels):
if i==0:
shutil.copy2(f, test_folder+'/cat/')
else:
shutil.copy2(f, test_folder+'/dog/')
View some sample images:
datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=5,
zoom_range=0.2,
horizontal_flip=True)
img_height = img_width = 100
channels = 3
train_generator = datagen.flow_from_directory(
train_folder,
color_mode = "rgb",
target_size=(img_height, img_width),
batch_size=1,
class_mode=None)
i = 0
img_list = []
for batch in train_generator: #.flow(x, batch_size=1)
img_list.append(batch)
i += 1
if i > 5:
break
for img in img_list:
plt.imshow(np.squeeze(img))
plt.show()
Found 20000 images belonging to 2 classes.
Always, ALWAYS compare to the most basic possible ML/ statistics algorithm. In this case logistic regression.
batch_size = 1000
train_generator = datagen.flow_from_directory(
train_folder,
color_mode = "rgb",
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary')
Found 20000 images belonging to 2 classes.
x_train, y_train = next(train_generator)
x_test, y_test = next(train_generator)
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression()
logistic.fit(x_train.reshape(batch_size,-1), y_train)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, solver='liblinear', tol=0.0001, verbose=0, warm_start=False)
y_pred = logistic.predict(x_test.reshape(len(x_test), -1))
y_pred[:10]
array([ 1., 1., 1., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)
Predicting the probabilities for the first 3 images:
logistic.predict_proba(x_test[:3].reshape(3,-1))
array([[ 0.45270886, 0.54729114], [ 0.44448103, 0.55551897], [ 0.00495017, 0.99504983]])
Accuracy of the predictions:
np.count_nonzero(y_pred == y_test)/len(y_test)
0.514
model = Sequential()
# TODO: Add a CNN:
# Note 1: The input_shape needs to be specified in this case (input_height, input_width, channels)
# Note 2: The order usually goes Conv2D, Activation, MaxPool,
# Note 3: Must be flattened before passing onto Dense layers
# Note 4: The loss is binary_crossentropy
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d_5 (Conv2D) (None, 100, 100, 8) 224 _________________________________________________________________ activation_5 (Activation) (None, 100, 100, 8) 0 _________________________________________________________________ max_pooling2d_5 (MaxPooling2 (None, 33, 33, 8) 0 _________________________________________________________________ conv2d_6 (Conv2D) (None, 33, 33, 16) 1168 _________________________________________________________________ batch_normalization_4 (Batch (None, 33, 33, 16) 64 _________________________________________________________________ activation_6 (Activation) (None, 33, 33, 16) 0 _________________________________________________________________ max_pooling2d_6 (MaxPooling2 (None, 16, 16, 16) 0 _________________________________________________________________ conv2d_7 (Conv2D) (None, 16, 16, 32) 4640 _________________________________________________________________ batch_normalization_5 (Batch (None, 16, 16, 32) 128 _________________________________________________________________ activation_7 (Activation) (None, 16, 16, 32) 0 _________________________________________________________________ max_pooling2d_7 (MaxPooling2 (None, 8, 8, 32) 0 _________________________________________________________________ conv2d_8 (Conv2D) (None, 8, 8, 32) 9248 _________________________________________________________________ batch_normalization_6 (Batch (None, 8, 8, 32) 128 _________________________________________________________________ activation_8 (Activation) (None, 8, 8, 32) 0 _________________________________________________________________ max_pooling2d_8 (MaxPooling2 (None, 4, 4, 32) 0 _________________________________________________________________ flatten_2 (Flatten) (None, 512) 0 _________________________________________________________________ dense_2 (Dense) (None, 1) 513 ================================================================= Total params: 16,113.0 Trainable params: 15,953.0 Non-trainable params: 160.0 _________________________________________________________________
batch_size = 128
train_generator = datagen.flow_from_directory(
train_folder,
color_mode = "rgb",
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary')
model.fit_generator(train_generator, train_examples//batch_size, epochs=2)
Found 20000 images belonging to 2 classes. Epoch 1/2 156/156 [==============================] - 387s - loss: 0.6443 - acc: 0.6396 Epoch 2/2 156/156 [==============================] - 336s - loss: 0.5731 - acc: 0.6981
<keras.callbacks.History at 0x7f82935d40b8>
batch_size = 1
test_generator = datagen.flow_from_directory(
test_folder,
color_mode = "rgb",
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary',
shuffle=False)
y_pred = model.predict_generator(test_generator, test_examples//batch_size, workers=4)
# model.predict_classes(test_x)
# np.count_nonzero(y_pred == test_y)/len(test_y)
Found 4998 images belonging to 2 classes.
correct = 0
for i, f in enumerate(test_generator.filenames):
if f.startswith('cat') and y_pred[i]<0.5:
correct +=1
if f.startswith('dog') and y_pred[i]>=0.5:
correct +=1
print('Correct predictions: '+str(correct/len(test_generator.filenames)))
Correct predictions: 0.5070028011204482
batch_size = 6
test_generator = datagen.flow_from_directory(
test_folder,
color_mode = "rgb",
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary',
shuffle=True)
x_test, y_test = next(test_generator)
p = model.predict(x_test)
p = np.hstack([y_pred, 1-y_pred])
label_dict = {0: 'cat', 1: 'dog'}
Found 4998 images belonging to 2 classes.
plt.figure(figsize=(12,12))
for i in range(batch_size):
plt.subplot(batch_size,2,2*i+1)
plt.imshow(x_test[i])
plt.title(label_dict[y_test[i]])
plt.subplot(batch_size,2,2*i+2)
plt.bar(range(2),p[i])
plt.xticks(range(2), ['cat', 'dog'])
# plt.show()
plt.show()
p
array([[ 0.28162625, 0.71837378], [ 0.24552229, 0.75447774], [ 0.15673721, 0.84326279], ..., [ 0.36343947, 0.63656056], [ 0.29009002, 0.70990998], [ 0.27991441, 0.72008562]], dtype=float32)
from keras import applications
datagen = ImageDataGenerator(rescale=1.0/255)
model = applications.VGG16(include_top=False, input_shape=(img_width, img_height, channels))
model = applications.VGG16(include_top=False, weights='imagenet')
model.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_2 (InputLayer) (None, None, None, 3) 0 _________________________________________________________________ block1_conv1 (Conv2D) (None, None, None, 64) 1792 _________________________________________________________________ block1_conv2 (Conv2D) (None, None, None, 64) 36928 _________________________________________________________________ block1_pool (MaxPooling2D) (None, None, None, 64) 0 _________________________________________________________________ block2_conv1 (Conv2D) (None, None, None, 128) 73856 _________________________________________________________________ block2_conv2 (Conv2D) (None, None, None, 128) 147584 _________________________________________________________________ block2_pool (MaxPooling2D) (None, None, None, 128) 0 _________________________________________________________________ block3_conv1 (Conv2D) (None, None, None, 256) 295168 _________________________________________________________________ block3_conv2 (Conv2D) (None, None, None, 256) 590080 _________________________________________________________________ block3_conv3 (Conv2D) (None, None, None, 256) 590080 _________________________________________________________________ block3_pool (MaxPooling2D) (None, None, None, 256) 0 _________________________________________________________________ block4_conv1 (Conv2D) (None, None, None, 512) 1180160 _________________________________________________________________ block4_conv2 (Conv2D) (None, None, None, 512) 2359808 _________________________________________________________________ block4_conv3 (Conv2D) (None, None, None, 512) 2359808 _________________________________________________________________ block4_pool (MaxPooling2D) (None, None, None, 512) 0 _________________________________________________________________ block5_conv1 (Conv2D) (None, None, None, 512) 2359808 _________________________________________________________________ block5_conv2 (Conv2D) (None, None, None, 512) 2359808 _________________________________________________________________ block5_conv3 (Conv2D) (None, None, None, 512) 2359808 _________________________________________________________________ block5_pool (MaxPooling2D) (None, None, None, 512) 0 ================================================================= Total params: 14,714,688.0 Trainable params: 14,714,688.0 Non-trainable params: 0.0 _________________________________________________________________
Do not uncomment and run the following two blocks unless absolutely necessary. It takes almost one hour to run. It took me a while to understand why in the Keras blog they had saved the parameters. It isn't necassary for you to save it. However, if you do come back to the tutorial you probably dont want to run this section again. It is slow mainly because there's 14 Million parameters to go through for each example. Having a GPU in this instance would help tremendously.
It is however important to notice that I am not training in this block. I am predicting using a truncated VGG16 net. See how I set the include_top=False
parameter above. VGG16 was originally trained on the CIFAR10 dataset so that it would predict 10 classes. Now that we are truncating it and only using all but the top few layers (lyer closes to the prediction), it outputs a (3,3,512) image in our case.
batch_size = 128
generator = datagen.flow_from_directory(
train_folder,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
# bottleneck_features_train = model.predict_generator(generator, train_examples//batch_size, verbose=1, workers=4)
# pickle.dump(bottleneck_features_train, open('bottleneck_features_train.npy', 'wb'))
# bottleneck_features_train.shape
Found 20000 images belonging to 2 classes.
batch_size = 128
valid_generator = datagen.flow_from_directory(
test_folder,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
# bottleneck_features_valid = model.predict_generator(generator, test_examples//batch_size, verbose=1, workers=4)
# with open('bottleneck_features_valid.npy', 'wb') as f:
# pickle.dump(bottleneck_features_valid, f)
# bottleneck_features_valid.shape
Found 4998 images belonging to 2 classes.
bott
with open('bottleneck_features_train.npy','rb') as f:
bottleneck_features_train = pickle.load(f)
model = Sequential()
# TODO: Make a 1 hidden layer NN
# Note 1: Add Flatten() layer The input shape is the bottleneck features dimension
# Note 2: Choose a suitable dimension for the hidden layer (eg. half way between final node and dimension of input)
# Note 3: Last layer is 1 with activation sigmoid (remember we are trying to predict a probability)
# Note 4: See previous todo section to see what the loss is supposed to be (unless you know already of course)
model.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten_1 (Flatten) (None, 4608) 0 _________________________________________________________________ dense_1 (Dense) (None, 256) 1179904 _________________________________________________________________ dropout_1 (Dropout) (None, 256) 0 _________________________________________________________________ dense_2 (Dense) (None, 1) 257 ================================================================= Total params: 1,180,161.0 Trainable params: 1,180,161.0 Non-trainable params: 0.0 _________________________________________________________________
batch_size = 128
generator = datagen.flow_from_directory(
train_folder,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
labels = np.array([0 if f.startswith('cat') else 1 for f in generator.filenames])[:len(bottleneck_features_train)]
model.fit(bottleneck_features_train, labels, epochs=10, batch_size=batch_size)
Found 20000 images belonging to 2 classes. Epoch 1/10 19872/19872 [==============================] - 2s - loss: 0.6060 - acc: 0.7756 Epoch 2/10 19872/19872 [==============================] - 2s - loss: 0.3599 - acc: 0.8418 Epoch 3/10 19872/19872 [==============================] - 2s - loss: 0.3253 - acc: 0.8575 Epoch 4/10 19872/19872 [==============================] - 2s - loss: 0.3053 - acc: 0.8682 Epoch 5/10 19872/19872 [==============================] - 7s - loss: 0.2904 - acc: 0.8770 Epoch 6/10 19872/19872 [==============================] - 9s - loss: 0.2796 - acc: 0.8807 Epoch 7/10 19872/19872 [==============================] - 10s - loss: 0.2670 - acc: 0.8846 - ETA: 1s - loss: 0.2 Epoch 8/10 19872/19872 [==============================] - 10s - loss: 0.2530 - acc: 0.8934 Epoch 9/10 19872/19872 [==============================] - 11s - loss: 0.2472 - acc: 0.8955 Epoch 10/10 19872/19872 [==============================] - 12s - loss: 0.2371 - acc: 0.9004
<keras.callbacks.History at 0x7eff4ac14c88>
with open('bottleneck_features_valid.npy','rb') as f:
bottleneck_features_valid = pickle.load(f)
valid_labels = np.array([0 if f.startswith('cat') else 1 for f in valid_generator.filenames])[:len(bottleneck_features_valid)]
y_valid_pred = model.predict_classes(bottleneck_features_valid)
accuracy = np.count_nonzero(valid_labels == y_valid_pred.ravel())/len(valid_labels)
print('\nThe accuracy is: '+str(accuracy))
4000/4870 [=======================>......] - ETA: 0s The accuracy is: 0.8989733059548255
from PIL import Image
img = Image.open('doge.jpg')
img.thumbnail((img_height, img_width), Image.ANTIALIAS)
We can refine the model further by adjusting the last convolutional layer.
Note that vgg16 is of type Model
and not Sequential
. Hence we cannot add
the top layer as suggested in the keras blog.
We are setting the trainable weights to be everything but the last convolutional layer and the fully connected (dense) layers. Take note of the number of trainable parameters in the summary below.
vgg16 = applications.VGG16(include_top=False, weights='imagenet', input_shape=(img_width, img_height, channels))
combinedModel = Model(inputs= vgg16.input, outputs= model(vgg16.output))
for layer in combinedModel.layers[:-3]:
layer.trainable = False
combinedModel.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_3 (InputLayer) (None, 100, 100, 3) 0 _________________________________________________________________ block1_conv1 (Conv2D) (None, 100, 100, 64) 1792 _________________________________________________________________ block1_conv2 (Conv2D) (None, 100, 100, 64) 36928 _________________________________________________________________ block1_pool (MaxPooling2D) (None, 50, 50, 64) 0 _________________________________________________________________ block2_conv1 (Conv2D) (None, 50, 50, 128) 73856 _________________________________________________________________ block2_conv2 (Conv2D) (None, 50, 50, 128) 147584 _________________________________________________________________ block2_pool (MaxPooling2D) (None, 25, 25, 128) 0 _________________________________________________________________ block3_conv1 (Conv2D) (None, 25, 25, 256) 295168 _________________________________________________________________ block3_conv2 (Conv2D) (None, 25, 25, 256) 590080 _________________________________________________________________ block3_conv3 (Conv2D) (None, 25, 25, 256) 590080 _________________________________________________________________ block3_pool (MaxPooling2D) (None, 12, 12, 256) 0 _________________________________________________________________ block4_conv1 (Conv2D) (None, 12, 12, 512) 1180160 _________________________________________________________________ block4_conv2 (Conv2D) (None, 12, 12, 512) 2359808 _________________________________________________________________ block4_conv3 (Conv2D) (None, 12, 12, 512) 2359808 _________________________________________________________________ block4_pool (MaxPooling2D) (None, 6, 6, 512) 0 _________________________________________________________________ block5_conv1 (Conv2D) (None, 6, 6, 512) 2359808 _________________________________________________________________ block5_conv2 (Conv2D) (None, 6, 6, 512) 2359808 _________________________________________________________________ block5_conv3 (Conv2D) (None, 6, 6, 512) 2359808 _________________________________________________________________ block5_pool (MaxPooling2D) (None, 3, 3, 512) 0 _________________________________________________________________ sequential_1 (Sequential) (None, 1) 1180161 ================================================================= Total params: 15,894,849.0 Trainable params: 3,539,969.0 Non-trainable params: 12,354,880.0 _________________________________________________________________
You can try to use the adagrad
optmizer if you wish but you'll soon see that all the progress that was made in model
will be undone. It will infact overwrite the weights in model
and you would have to rerun the model
training from the bottleneck_features
section.
Why? It's so that the updates are small and does not destablise the weights that were previously learnt.
model.save_weights('fc_model.h5')
combinedModel.compile(loss='binary_crossentropy',
optimizer = optimizers.RMSprop(lr=1e-4, decay=0.9), # optimizers.SGD(lr=1e-4, momentum=0.9)
metrics=['accuracy'])
# prepare data augmentation configuration
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_folder,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
test_folder,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary')
Found 20000 images belonging to 2 classes. Found 4998 images belonging to 2 classes.
# fine-tune the model
combinedModel.fit_generator(
train_generator,
steps_per_epoch=train_examples//batch_size,
epochs=5,
validation_data=validation_generator,
validation_steps=test_examples//batch_size) # len(valid_generator.filenames)
Epoch 1/5 156/156 [==============================] - 3369s - loss: 0.3049 - acc: 0.8695 - val_loss: 0.2076 - val_acc: 0.9149 Epoch 2/5 155/156 [============================>.] - ETA: 16s - loss: 0.2834 - acc: 0.8760
# fine-tune the model
combinedModel.fit_generator(
train_generator,
steps_per_epoch=train_examples//batch_size,
epochs=5,
validation_data=validation_generator,
validation_steps=test_examples//batch_size) # len(valid_generator.filenames)
Epoch 1/5 59/156 [==========>...................] - ETA: 1636s - loss: 0.3259 - acc: 0.8567
from PIL import Image
img = Image.open('doge.jpg')
img = np.asarray(img.resize((img_height, img_width), Image.ANTIALIAS))/255
plt.imshow(img)
plt.show()
p = combinedModel.predict(np.array([img]))
print('The probability that this is a doge is: ' +str(p[0][0]))
img = Image.open('grumpy_cat.jpeg')
img = np.asarray(img.resize((img_height, img_width), Image.ANTIALIAS))/255
plt.imshow(img)
plt.show()
p = combinedModel.predict(np.array([img]))
print('The probability that this is a doge is: ' +str(p[0][0]))