import os import requests if not os.path.exists('lfw.tgz'): open('lfw.tgz', 'wb').write(requests.get('http://vis-www.cs.umass.edu/lfw/lfw.tgz').content) import tarfile tar = tarfile.open("lfw.tgz") tar.extractall() tar.close() from os import listdir extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG'] def prepare_data(): dirs = [] files = [] for root, _, filenames in os.walk('lfw'): for filename in filenames: ext = os.path.splitext(filename)[1] if ext in extensions: dir = root.split('/')[1] filepath = os.path.join(root, filename) dirs.append(dir) files.append(filepath) return dirs, files Y, X = prepare_data() # lfw data TRAIN_SAMPLES = 13233 IMG_WIDTH, IMG_HEIGHT = 224, 224 NUM_CLASSES = 5749 def download_preprocessed_files(): file_names = ['facenet_keras.h5', 'features-deepface.pickle', 'features-facenet.pickle', 'features-inception.pickle', 'features-resnet.pickle', 'features-vgg16.pickle', 'features-vggface.pickle', 'features-xception.pickle'] base_url = 'https://dl.acytoo.com/rvs/' for f in file_names: if not os.path.exists(f): open(f, 'wb').write(requests.get(base_url + f).content) download_preprocessed_files() !pip install mtcnn !pip install git+https://github.com/rcmalli/keras-vggface.git !pip install keras_applications !pip install faiss-gpu filename = "/usr/local/lib/python3.7/dist-packages/keras_vggface/models.py" text = open(filename).read() open(filename, "w+").write(text.replace('keras.engine.topology', 'tensorflow.keras.utils')) import mtcnn from PIL import Image import numpy as np from numpy.linalg import norm import pickle from tqdm import notebook import os import random import time import math import tensorflow import tarfile from sklearn.neighbors import NearestNeighbors import matplotlib import matplotlib.pyplot as plt import matplotlib.image as mpimg from tensorflow.keras.preprocessing import image from tensorflow.keras.preprocessing.image import ImageDataGenerator from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input from tensorflow.keras.applications.vgg16 import VGG16 from tensorflow.keras.applications.mobilenet import MobileNet from tensorflow.keras.applications.inception_v3 import InceptionV3 from tensorflow.keras.applications.xception import Xception from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, GlobalAveragePooling2D from sklearn.decomposition import PCA from sklearn.neighbors import KNeighborsClassifier from keras.models import load_model from keras_vggface.vggface import VGGFace from sklearn.neighbors import KNeighborsClassifier import faiss number_of_neighbors = 5 def model_picker(name): if (name == 'vgg16'): model = VGG16(weights='imagenet', include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), pooling='max') elif (name == 'mobilenet'): model = MobileNet(weights='imagenet', include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), pooling='max', depth_multiplier=1, alpha=1) elif (name == 'inception'): model = InceptionV3(weights='imagenet', include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), pooling='max') elif (name == 'resnet'): model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), pooling='max') elif (name == 'xception'): model = Xception(weights='imagenet', include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), pooling='max') elif (name == 'facenet'): model = load_model('facenet_keras.h5', compile=False) elif (name == 'vggface'): model = VGGFace(model='resnet50', include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), pooling='avg') else: model = None print("Specified model not available") return model model_architecture = 'facenet' model = model_picker(model_architecture) def extract_features_from_one(img_path, model): """ Extract features from one image """ input_shape = (IMG_WIDTH, IMG_HEIGHT, 3) img = image.load_img(img_path, target_size=(input_shape[0], input_shape[1])) img_array = image.img_to_array(img) expanded_img_array = np.expand_dims(img_array, axis=0) preprocessed_img = preprocess_input(expanded_img_array) features = model.predict(preprocessed_img) flattened_features = features.flatten() normalized_features = flattened_features / norm(flattened_features) return normalized_features def save_features(features_list, filename): pickle.dump(features_list, open(filename, 'wb')) def extract_features(image_paths, model): features_all = [] for image_path in image_paths: features_all.append(extract_features_from_one(X[0], model)) return features_all features_all = extract_features(X, model) save_features(features_all, model_architecture + '_features.pickle') def read_all_files(): standard_feature_list = [] with open('features-' + model_architecture +'.pickle', 'rb') as f: standard_feature_list = pickle.load(f) return standard_feature_list standard_feature_list = read_all_files() neighbors = NearestNeighbors(n_neighbors=number_of_neighbors, algorithm='brute', metric='euclidean').fit(standard_feature_list, Y) # Helper function to get the classname def classname(str): return str.split('/')[-2] # Helper function to get the classname and filename def classname_filename(str): return str.split('/')[-2] + '/' + str.split('/')[-1] # Helper functions to plot the nearest images given a query image def plot_images(filenames, distances): images = [] for filename in filenames: images.append(mpimg.imread(filename)) plt.figure(figsize=(20, 10)) columns = 5 for i, image in enumerate(images): ax = plt.subplot(len(images) / columns + 1, columns, i + 1) if i == 0: ax.set_title("Query Image\n" + classname_filename(filenames[i])) else: ax.set_title("Similar Image\n" + classname_filename(filenames[i]) + "\nDistance: " + str(float("{0:.2f}".format(distances[i])))) plt.imshow(image) test_idx = random.randint(0, len(X)) test_file, test_label = X[test_idx], Y[test_idx] test_feature = extract_features_from_one(test_file, model) distances, indices = neighbors.kneighbors([test_feature]) similar_image_paths = [test_file] + \ [X[indices[0][j]] for j in range(1, 5)] plot_images(similar_image_paths, distances[0]) def extract_face(filename, required_size=(160, 160)): # load image from file image = Image.open(filename) # convert to RGB, if needed image = image.convert('RGB') # convert to array pixels = np.asarray(image) # create the detector, using default weights detector = mtcnn.MTCNN() # detect faces in the image results = detector.detect_faces(pixels) # extract the bounding box from the first face x1, y1, width, height = results[0]['box'] # bug fix x1, y1 = abs(x1), abs(y1) x2, y2 = x1 + width, y1 + height # extract the face face = pixels[y1:y2, x1:x2] # resize pixels to the model size image = Image.fromarray(face) image = image.resize(required_size) face_array = np.asarray(image) return face_array # get the face embedding for one face def get_embedding(model, face_pixels): # scale pixel values face_pixels = face_pixels.astype('float32') # standardize pixel values across channels (global) mean, std = face_pixels.mean(), face_pixels.std() face_pixels = (face_pixels - mean) / std # transform face into one sample samples = np.expand_dims(face_pixels, axis=0) # make prediction to get embedding yhat = model.predict(samples) return yhat[0] from tqdm import notebook newTrainX = [] for i in notebook.tqdm(range(len(X))): detected_faces = extract_face(X[i]) embedding = get_embedding(model, detected_faces) newTrainX.append(embedding) newTrainX = standard_feature_list neighbors = NearestNeighbors(n_neighbors=number_of_neighbors, algorithm='kd_tree', metric='euclidean').fit(newTrainX, Y) test_idx = random.randint(0, len(X)) test_file, test_label = X[test_idx], Y[test_idx] detected_faces = extract_face(test_file) embedding = get_embedding(model, detected_faces) distances, indices = neighbors.kneighbors([embedding]) similar_image_paths = [test_file] + \ [X[indices[0][j]] for j in range(1, 5)] plot_images(similar_image_paths, distances[0])