Prepare dataset

We will do reverse visual search on LFW(Labeled Faces in the Wild) dataset. Download to have a direct look.

Download dataset, extract all images

In [ ]:
import os
import requests
if not os.path.exists('lfw.tgz'):
  open('lfw.tgz', 'wb').write(requests.get('http://vis-www.cs.umass.edu/lfw/lfw.tgz').content)

import tarfile
tar = tarfile.open("lfw.tgz")
tar.extractall()
tar.close()
In [ ]:
from os import listdir

extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']


def prepare_data():
  dirs = []
  files = []
  for root, _, filenames in os.walk('lfw'):
    for filename in filenames:
      ext = os.path.splitext(filename)[1]
      if ext in extensions:
        dir = root.split('/')[1]
        filepath = os.path.join(root, filename)
        dirs.append(dir)
        files.append(filepath)
  return dirs, files

Y, X = prepare_data()

# lfw data
TRAIN_SAMPLES = 13233
IMG_WIDTH, IMG_HEIGHT = 224, 224
NUM_CLASSES = 5749

Download preprocessed data

In [ ]:
def download_preprocessed_files():
  file_names = ['facenet_keras.h5', 'features-deepface.pickle',
                'features-facenet.pickle', 'features-inception.pickle',
                'features-resnet.pickle', 'features-vgg16.pickle', 
                'features-vggface.pickle', 'features-xception.pickle']
  base_url = 'https://dl.acytoo.com/rvs/'
  for f in file_names:
    if not os.path.exists(f):
      open(f, 'wb').write(requests.get(base_url + f).content)

download_preprocessed_files()

First approach

Using pretrained neural networks, remove the last layer, extracting features.

Part of the code learned from https://github.com/PracticalDL/Practical-Deep-Learning-Book/tree/master/code/chapter-4

In [ ]:
!pip install mtcnn
!pip install git+https://github.com/rcmalli/keras-vggface.git
!pip install keras_applications
!pip install faiss-gpu
filename = "/usr/local/lib/python3.7/dist-packages/keras_vggface/models.py"
text = open(filename).read()
open(filename, "w+").write(text.replace('keras.engine.topology', 'tensorflow.keras.utils'))
import mtcnn
from PIL import Image
import numpy as np
from numpy.linalg import norm
import pickle
from tqdm import notebook
import os
import random
import time
import math
import tensorflow
import tarfile
from sklearn.neighbors import NearestNeighbors
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, GlobalAveragePooling2D
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from keras.models import load_model
from keras_vggface.vggface import VGGFace
from sklearn.neighbors import KNeighborsClassifier
import faiss
In [ ]:
number_of_neighbors = 5


def model_picker(name):
  if (name == 'vgg16'):
    model = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),
                  pooling='max')
  elif (name == 'mobilenet'):
    model = MobileNet(weights='imagenet',
                      include_top=False,
                      input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),
                      pooling='max',
                      depth_multiplier=1,
                      alpha=1)
  elif (name == 'inception'):
    model = InceptionV3(weights='imagenet',
                        include_top=False,
                        input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),
                        pooling='max')
  elif (name == 'resnet'):
    model = ResNet50(weights='imagenet',
                     include_top=False,
                     input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),
                     pooling='max')
  elif (name == 'xception'):
    model = Xception(weights='imagenet',
                     include_top=False,
                     input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),
                     pooling='max')
  elif (name == 'facenet'):
    model = load_model('facenet_keras.h5', compile=False)
  elif (name == 'vggface'):
    model = VGGFace(model='resnet50', include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), pooling='avg')
  else:
    model = None
    print("Specified model not available")
  return model
In [ ]:
model_architecture = 'facenet'
model = model_picker(model_architecture)
Extract features function
In [ ]:
def extract_features_from_one(img_path, model):
  """
  Extract features from one image
  """
  input_shape = (IMG_WIDTH, IMG_HEIGHT, 3)
  img = image.load_img(img_path,
                       target_size=(input_shape[0], input_shape[1]))
  img_array = image.img_to_array(img)
  expanded_img_array = np.expand_dims(img_array, axis=0)
  preprocessed_img = preprocess_input(expanded_img_array)
  features = model.predict(preprocessed_img)
  flattened_features = features.flatten()
  normalized_features = flattened_features / norm(flattened_features)
  return normalized_features
Save features function
In [ ]:
def save_features(features_list, filename):
  pickle.dump(features_list, open(filename, 'wb'))
extract features
In [ ]:
def extract_features(image_paths, model):
  features_all = []
  for image_path in image_paths:
    features_all.append(extract_features_from_one(X[0], model))
  return features_all

features_all = extract_features(X, model)
save_features(features_all, model_architecture + '_features.pickle')

Feature extraction takes quite a long time, load pre processed features:

In [ ]:
def read_all_files():
  standard_feature_list = []
  with open('features-' + model_architecture +'.pickle', 'rb') as f:
    standard_feature_list = pickle.load(f)
  return standard_feature_list


standard_feature_list = read_all_files()
In [ ]:
neighbors = NearestNeighbors(n_neighbors=number_of_neighbors,
                             algorithm='brute',
                             metric='euclidean').fit(standard_feature_list, Y)
In [ ]:
# Helper function to get the classname
def classname(str):
    return str.split('/')[-2]


# Helper function to get the classname and filename
def classname_filename(str):
    return str.split('/')[-2] + '/' + str.split('/')[-1]


# Helper functions to plot the nearest images given a query image
def plot_images(filenames, distances):
    images = []
    for filename in filenames:
        images.append(mpimg.imread(filename))
    plt.figure(figsize=(20, 10))
    columns = 5
    for i, image in enumerate(images):
        ax = plt.subplot(len(images) / columns + 1, columns, i + 1)
        if i == 0:
            ax.set_title("Query Image\n" + classname_filename(filenames[i]))
        else:
            ax.set_title("Similar Image\n" + classname_filename(filenames[i]) +
                         "\nDistance: " +
                         str(float("{0:.2f}".format(distances[i]))))
        plt.imshow(image)
In [ ]:
test_idx = random.randint(0, len(X))
test_file, test_label = X[test_idx], Y[test_idx]

test_feature = extract_features_from_one(test_file, model)
distances, indices = neighbors.kneighbors([test_feature])
similar_image_paths = [test_file] + \
                      [X[indices[0][j]] for j in range(1, 5)]
plot_images(similar_image_paths, distances[0])

Improvement

Extract faceial part
In [ ]:
def extract_face(filename, required_size=(160, 160)):
	# load image from file
	image = Image.open(filename)
	# convert to RGB, if needed
	image = image.convert('RGB')
	# convert to array
	pixels = np.asarray(image)
	# create the detector, using default weights
	detector = mtcnn.MTCNN()
	# detect faces in the image
	results = detector.detect_faces(pixels)
	# extract the bounding box from the first face
	x1, y1, width, height = results[0]['box']
	# bug fix
	x1, y1 = abs(x1), abs(y1)
	x2, y2 = x1 + width, y1 + height
	# extract the face
	face = pixels[y1:y2, x1:x2]
	# resize pixels to the model size
	image = Image.fromarray(face)
	image = image.resize(required_size)
	face_array = np.asarray(image)
	return face_array
Get embeddings
In [ ]:
# get the face embedding for one face
def get_embedding(model, face_pixels):
	# scale pixel values
	face_pixels = face_pixels.astype('float32')
	# standardize pixel values across channels (global)
	mean, std = face_pixels.mean(), face_pixels.std()
	face_pixels = (face_pixels - mean) / std
	# transform face into one sample
	samples = np.expand_dims(face_pixels, axis=0)
	# make prediction to get embedding
	yhat = model.predict(samples)
	return yhat[0]
Facenet model
In [ ]:
from tqdm import notebook
newTrainX = []
for i in notebook.tqdm(range(len(X))):
  detected_faces = extract_face(X[i])
  embedding = get_embedding(model, detected_faces)
  newTrainX.append(embedding)
similar search: nearest neighbors
In [ ]:
newTrainX = standard_feature_list
neighbors = NearestNeighbors(n_neighbors=number_of_neighbors,
                             algorithm='kd_tree',
                             metric='euclidean').fit(newTrainX, Y)
In [ ]:
test_idx = random.randint(0, len(X))
test_file, test_label = X[test_idx], Y[test_idx]

detected_faces = extract_face(test_file)
embedding = get_embedding(model, detected_faces)
distances, indices = neighbors.kneighbors([embedding])
similar_image_paths = [test_file] + \
                [X[indices[0][j]] for j in range(1, 5)]
plot_images(similar_image_paths, distances[0])
In [ ]: