import os
import requests
if not os.path.exists('lfw.tgz'):
  open('lfw.tgz', 'wb').write(requests.get('http://vis-www.cs.umass.edu/lfw/lfw.tgz').content)

import tarfile
tar = tarfile.open("lfw.tgz")
tar.extractall()
tar.close()

from os import listdir

extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']


def prepare_data():
  dirs = []
  files = []
  for root, _, filenames in os.walk('lfw'):
    for filename in filenames:
      ext = os.path.splitext(filename)[1]
      if ext in extensions:
        dir = root.split('/')[1]
        filepath = os.path.join(root, filename)
        dirs.append(dir)
        files.append(filepath)
  return dirs, files

Y, X = prepare_data()

# lfw data
TRAIN_SAMPLES = 13233
IMG_WIDTH, IMG_HEIGHT = 224, 224
NUM_CLASSES = 5749

def download_preprocessed_files():
  file_names = ['facenet_keras.h5', 'features-deepface.pickle',
                'features-facenet.pickle', 'features-inception.pickle',
                'features-resnet.pickle', 'features-vgg16.pickle', 
                'features-vggface.pickle', 'features-xception.pickle']
  base_url = 'https://dl.acytoo.com/rvs/'
  for f in file_names:
    if not os.path.exists(f):
      open(f, 'wb').write(requests.get(base_url + f).content)

download_preprocessed_files()

!pip install mtcnn
!pip install git+https://github.com/rcmalli/keras-vggface.git
!pip install keras_applications
!pip install faiss-gpu
filename = "/usr/local/lib/python3.7/dist-packages/keras_vggface/models.py"
text = open(filename).read()
open(filename, "w+").write(text.replace('keras.engine.topology', 'tensorflow.keras.utils'))
import mtcnn
from PIL import Image
import numpy as np
from numpy.linalg import norm
import pickle
from tqdm import notebook
import os
import random
import time
import math
import tensorflow
import tarfile
from sklearn.neighbors import NearestNeighbors
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, GlobalAveragePooling2D
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from keras.models import load_model
from keras_vggface.vggface import VGGFace
from sklearn.neighbors import KNeighborsClassifier
import faiss


number_of_neighbors = 5


def model_picker(name):
  if (name == 'vgg16'):
    model = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),
                  pooling='max')
  elif (name == 'mobilenet'):
    model = MobileNet(weights='imagenet',
                      include_top=False,
                      input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),
                      pooling='max',
                      depth_multiplier=1,
                      alpha=1)
  elif (name == 'inception'):
    model = InceptionV3(weights='imagenet',
                        include_top=False,
                        input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),
                        pooling='max')
  elif (name == 'resnet'):
    model = ResNet50(weights='imagenet',
                     include_top=False,
                     input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),
                     pooling='max')
  elif (name == 'xception'):
    model = Xception(weights='imagenet',
                     include_top=False,
                     input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),
                     pooling='max')
  elif (name == 'facenet'):
    model = load_model('facenet_keras.h5', compile=False)
  elif (name == 'vggface'):
    model = VGGFace(model='resnet50', include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), pooling='avg')
  else:
    model = None
    print("Specified model not available")
  return model


model_architecture = 'facenet'
model = model_picker(model_architecture)

def extract_features_from_one(img_path, model):
  """
  Extract features from one image
  """
  input_shape = (IMG_WIDTH, IMG_HEIGHT, 3)
  img = image.load_img(img_path,
                       target_size=(input_shape[0], input_shape[1]))
  img_array = image.img_to_array(img)
  expanded_img_array = np.expand_dims(img_array, axis=0)
  preprocessed_img = preprocess_input(expanded_img_array)
  features = model.predict(preprocessed_img)
  flattened_features = features.flatten()
  normalized_features = flattened_features / norm(flattened_features)
  return normalized_features

def save_features(features_list, filename):
  pickle.dump(features_list, open(filename, 'wb'))

def extract_features(image_paths, model):
  features_all = []
  for image_path in image_paths:
    features_all.append(extract_features_from_one(X[0], model))
  return features_all

features_all = extract_features(X, model)
save_features(features_all, model_architecture + '_features.pickle')

def read_all_files():
  standard_feature_list = []
  with open('features-' + model_architecture +'.pickle', 'rb') as f:
    standard_feature_list = pickle.load(f)
  return standard_feature_list


standard_feature_list = read_all_files()

neighbors = NearestNeighbors(n_neighbors=number_of_neighbors,
                             algorithm='brute',
                             metric='euclidean').fit(standard_feature_list, Y)

# Helper function to get the classname
def classname(str):
    return str.split('/')[-2]


# Helper function to get the classname and filename
def classname_filename(str):
    return str.split('/')[-2] + '/' + str.split('/')[-1]


# Helper functions to plot the nearest images given a query image
def plot_images(filenames, distances):
    images = []
    for filename in filenames:
        images.append(mpimg.imread(filename))
    plt.figure(figsize=(20, 10))
    columns = 5
    for i, image in enumerate(images):
        ax = plt.subplot(len(images) / columns + 1, columns, i + 1)
        if i == 0:
            ax.set_title("Query Image\n" + classname_filename(filenames[i]))
        else:
            ax.set_title("Similar Image\n" + classname_filename(filenames[i]) +
                         "\nDistance: " +
                         str(float("{0:.2f}".format(distances[i]))))
        plt.imshow(image)

test_idx = random.randint(0, len(X))
test_file, test_label = X[test_idx], Y[test_idx]

test_feature = extract_features_from_one(test_file, model)
distances, indices = neighbors.kneighbors([test_feature])
similar_image_paths = [test_file] + \
                      [X[indices[0][j]] for j in range(1, 5)]
plot_images(similar_image_paths, distances[0])

def extract_face(filename, required_size=(160, 160)):
	# load image from file
	image = Image.open(filename)
	# convert to RGB, if needed
	image = image.convert('RGB')
	# convert to array
	pixels = np.asarray(image)
	# create the detector, using default weights
	detector = mtcnn.MTCNN()
	# detect faces in the image
	results = detector.detect_faces(pixels)
	# extract the bounding box from the first face
	x1, y1, width, height = results[0]['box']
	# bug fix
	x1, y1 = abs(x1), abs(y1)
	x2, y2 = x1 + width, y1 + height
	# extract the face
	face = pixels[y1:y2, x1:x2]
	# resize pixels to the model size
	image = Image.fromarray(face)
	image = image.resize(required_size)
	face_array = np.asarray(image)
	return face_array

# get the face embedding for one face
def get_embedding(model, face_pixels):
	# scale pixel values
	face_pixels = face_pixels.astype('float32')
	# standardize pixel values across channels (global)
	mean, std = face_pixels.mean(), face_pixels.std()
	face_pixels = (face_pixels - mean) / std
	# transform face into one sample
	samples = np.expand_dims(face_pixels, axis=0)
	# make prediction to get embedding
	yhat = model.predict(samples)
	return yhat[0]

from tqdm import notebook
newTrainX = []
for i in notebook.tqdm(range(len(X))):
  detected_faces = extract_face(X[i])
  embedding = get_embedding(model, detected_faces)
  newTrainX.append(embedding)

newTrainX = standard_feature_list
neighbors = NearestNeighbors(n_neighbors=number_of_neighbors,
                             algorithm='kd_tree',
                             metric='euclidean').fit(newTrainX, Y)

test_idx = random.randint(0, len(X))
test_file, test_label = X[test_idx], Y[test_idx]

detected_faces = extract_face(test_file)
embedding = get_embedding(model, detected_faces)
distances, indices = neighbors.kneighbors([embedding])
similar_image_paths = [test_file] + \
                [X[indices[0][j]] for j in range(1, 5)]
plot_images(similar_image_paths, distances[0])