#!/usr/bin/env python # coding: utf-8 # # Face-AntiSpoofing # # Face Anti-Spoofing project. Lanit-Tercom summer school 2022 # [github.com/hairymax/Face-AntiSpoofing](https://github.com/hairymax/Face-AntiSpoofing) # # **Spoofing attack** - an attempt to deceive the identification system by presenting it with a fake image of a face # # **Facial anti-spoofing** is the task of preventing false facial verification by using a photo, video, mask or a different substitute for an authorized person’s face. # # - **Print attack**: The attacker shows the picture of other person printed on a sheet of paper # - **Replay attack**: The attacker shows the screen of another device that plays a pre-recorded photo/video of the other person. # # ## DataSet # Training was performed on the *CelebA Spoof* dataset ([GitHub](https://github.com/ZhangYuanhan-AI/CelebA-Spoof) | [Kaggle](https://www.kaggle.com/datasets/attentionlayer241/celeba-spoof-for-face-antispoofing)). # ## Model # In this project we train and test the CNN model with architecture presented in [Silent-Face-Anti-Spoofing GitHub repository](https://github.com/minivision-ai/Silent-Face-Anti-Spoofing/) to detect Spoof attacks. The model architecture consists of the main branch of classification of attack type and the auxiliary supervision branch of Fourier spectrum. The Fourier transform is used only in the training stage. # # ## Tasks # Training was performed for two types of classification tasks: # 1. **Live Face** / **Spoof Attack** (binary classification) # 2. **Live Face** / **Print Attack** / **Replay Attack** # ## Data Preparation # In[1]: from IPython.display import Markdown, display from matplotlib import pyplot as plt import pandas as pd import cv2 from src.utility import plot_value_counts, spoof_labels_to_classes import data_preparation as dp CELEBA_DIR = 'CelebA_Spoof/' # Spoof attacks to keep spoof_filter = [0, 1, 2, 3, 7, 8, 9] # In[2]: train_label, test_label = dp.read_orig_labels(CELEBA_DIR, spoof_filter) display(Markdown('#### Train set Spoof labels')) plot_value_counts(train_label[40], sort_index=True) display(Markdown('#### Test set Spoof labels')) plot_value_counts(test_label[40], sort_index=True) # ### Examples of images in cropped dataset # In[3]: fig = plt.figure(figsize=(12,9)) cols, rows = 4, 3 for i in range(cols*rows): fig.add_subplot(rows, cols, i+1) img = dp.read_image(CELEBA_DIR + train_label.index[i]) plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) plt.tight_layout() plt.show() # In[4]: del train_label, test_label # ## Сlass balance # # Using the training dataset as an example # In[5]: from src.config import TrainConfig cnf = TrainConfig() train_labels = pd.read_csv(cnf.labels_path) display(Markdown('#### Binary classification: Live / Spoof')) spoofs = spoof_labels_to_classes(train_labels, ['Live','Spoof']).iloc[:,1] plot_value_counts(spoofs, sort_index=True) display(Markdown('#### Live / Print attack / Replay attack')) spoofs = spoof_labels_to_classes(train_labels, ['Live','Print','Replay']).iloc[:,1] plot_value_counts(spoofs, sort_index=True) # ## Testing data loader # # Examples of images from data loaders # In[6]: from IPython.display import Markdown, display import pandas as pd from src.config import TrainConfig from src.utility import plot_iter_images from src.dataset_loader import get_train_valid_loader # ### Binary classes: Live **0** and Spoof **1** # In[7]: cnf = TrainConfig(spoof_categories='binary', batch_size=8, crop_dir='data_1.5_128') train_loader, valid_loader = get_train_valid_loader(cnf) display(Markdown('### Training loader example')) plot_iter_images(iter(train_loader).next(), cnf.input_size, cnf.batch_size) display(Markdown('### Validation loader example')) plot_iter_images(iter(valid_loader).next(), cnf.input_size, 4) # ### Live **0** / Print **1** / Replay **2** # In[8]: cnf = TrainConfig(spoof_categories=[[0],[1,2,3],[7,8,9]], batch_size = 8, crop_dir='data_1.5_128') train_loader, valid_loader = get_train_valid_loader(cnf) display(Markdown('### Training loader example')) plot_iter_images(iter(train_loader).next(), cnf.input_size, cnf.batch_size) display(Markdown('### Validation loader example')) plot_iter_images(iter(valid_loader).next(), cnf.input_size, 4) # ## Testing Models # # Results of trained models on the test data set # In[1]: from tqdm.notebook import tqdm_notebook as tqdm from IPython.display import Markdown, display import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.metrics import (auc, roc_auc_score, accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, classification_report) from src.config import TestConfig from src.antispoof_pretrained import AntiSpoofPretrained from src.dataset_loader import get_test_loader from src.utility import (roc_curve_plots, confusion_matricies_plots, multiclass_roc_curve_plots, plot_iter_images) import time batches, batch_size = 10, 500 # ### `Live/Spoof` and `Live/Print/Replay` tasks comparison # Comparison of metrics of a model trained for binary classification (**Live / Spoof**) and a model for **Live / Print / Replay** detection with conversion of predictions to Live / Spoof # In[2]: def test_binary_classification(cnf_bin, cnf_lpr, batches=batches, batch_size=batch_size): models_loaders = { 'Live / Spoof': (AntiSpoofPretrained(cnf_bin), get_test_loader(cnf_bin)), 'Live / Print / Replay' : (AntiSpoofPretrained(cnf_lpr), get_test_loader(cnf_lpr))} models_proba = {} model_results = {} confusion_matricies = {} for name, model_and_loader in models_loaders.items(): model, loader = model_and_loader data_iter = iter(loader) target_all = np.array([]) proba_all = np.array([]) pred_all = np.array([]) pred_time = 0 for i in tqdm(range(batches)): imgs, target = data_iter.next() start = time.time() proba = model.predict(imgs) pred_time += time.time() - start target_all = np.append(target_all, target.numpy()) pred_all = np.append(pred_all, np.argmax(proba, axis=1)) proba_all = np.append(proba_all, proba[:,1:].sum(axis=1)) pred_bin = (~(pred_all == 0)).astype(int) target_bin = (~(target_all == 0)).astype(int) models_proba[name] = proba_all res = {'Accuracy' : accuracy_score(target_bin, pred_bin), 'AUC-ROC' : roc_auc_score(target_bin, proba_all), 'Precision': precision_score(target_bin, pred_bin), 'Recall' : recall_score(target_bin, pred_bin), 'F1 score' : f1_score(target_bin, pred_bin), 'Time, s' : pred_time } model_results[name] = res confusion_matricies[name] = confusion_matrix(target_bin, pred_bin, normalize='true') plot_iter_images(data_iter.next(), 128, 4) display(pd.DataFrame(model_results).T) roc_curve_plots(target_bin, models_proba) confusion_matricies_plots(confusion_matricies, class_labels=['Live','Spoof']) # #### Face cropping without the background # In[3]: cnf_bin = TestConfig('saved_models/AntiSpoofing_bin_128.pth', crop_dir='data128', batch_size=batch_size, spoof_categories='binary') cnf_lpr = TestConfig('saved_models/AntiSpoofing_print-replay_128.pth', crop_dir='data128', batch_size=batch_size, spoof_categories=[[0],[1,2,3],[7,8,9]]) test_binary_classification(cnf_bin, cnf_lpr) # #### Face cropping with the surrounding background # In[4]: cnf_bin = TestConfig('saved_models/AntiSpoofing_bin_1.5_128.pth', crop_dir='data_1.5_128', batch_size=batch_size, spoof_categories='binary') cnf_lpr = TestConfig('saved_models/AntiSpoofing_print-replay_1.5_128.pth', crop_dir='data_1.5_128', batch_size=batch_size, spoof_categories=[[0],[1,2,3],[7,8,9]]) test_binary_classification(cnf_bin, cnf_lpr) # ### `Live/Print/Replay` metrics per class # In[5]: def test_print_replay_model(cnf, batches=10, batch_size=500): model = AntiSpoofPretrained(cnf) loader = get_test_loader(cnf) data_iter = iter(loader) target_all = np.array([]) proba_all = np.empty([0, 3]) pred_all = np.array([]) for i in tqdm(range(batches)): imgs, target = data_iter.next() proba = model.predict(imgs) target_all = np.append(target_all, target.numpy()) pred_all = np.append(pred_all, np.argmax(proba, axis=1)) proba_all = np.append(proba_all, np.array(proba), axis=0) class_labels = ['Live','Print','Replay'] plot_iter_images(data_iter.next(), 128, 4) display(pd.DataFrame(classification_report(target_all, pred_all, output_dict=True, target_names=class_labels))) print('AUC ROC one vs rest =', roc_auc_score(target_all, proba_all, multi_class='ovr')) print('AUC ROC one vs one =', roc_auc_score(target_all, proba_all, multi_class='ovo')) multiclass_roc_curve_plots(target_all, proba_all, class_labels=class_labels) sns.heatmap(pd.DataFrame(confusion_matrix(target_all, pred_all, normalize='true')), annot=True, cmap='Blues', vmin=0, vmax=1, annot_kws={"fontsize":12}) plt.xticks(np.arange(0.5,3), class_labels, fontsize=12) plt.yticks(np.arange(0.5,3), class_labels, fontsize=12) plt.title('Live / Print / Replay confusion matrix', fontsize=14) plt.xlabel('Predictions', fontsize=14), plt.ylabel('True', fontsize=14) plt.show() # #### Face cropping without the background # In[6]: test_print_replay_model(TestConfig('saved_models/AntiSpoofing_print-replay_128.pth', crop_dir='data128', batch_size=batch_size, spoof_categories=[[0],[1,2,3],[7,8,9]])) # #### Face cropping with the surrounding background # In[7]: test_print_replay_model(TestConfig('saved_models/AntiSpoofing_print-replay_1.5_128.pth', crop_dir='data_1.5_128', batch_size=batch_size, spoof_categories=[[0],[1,2,3],[7,8,9]])) # ## Predictions with prepared models # In[1]: import cv2 import numpy as np import pandas as pd import matplotlib.pyplot as plt from src.utility import spoof_labels_to_classes from src.FaceAntiSpoofing import AntiSpoof TEST_DIR = "CelebA_Spoof_crop/data_1.5_128/test/" def plot_true_and_pred(labels, anti_spoof, cols=6): imgs = [] lbl_true = list(labels.iloc[:,1]) for i in range(len(labels)): img = cv2.imread(TEST_DIR+labels.iloc[i,0]) imgs.append(img) lbl_pred = anti_spoof(imgs) lbl_pred = [np.argmax(l) for l in lbl_pred] fig = plt.figure(figsize=(12,9)) rows = len(imgs) // cols for i in range(len(imgs)): h, w = imgs[i].shape[:2] size = max(h,w) ax = fig.add_subplot(rows, cols, i+1) plt.imshow(cv2.cvtColor(imgs[i], cv2.COLOR_BGR2RGB), extent=(0, w, 0, h)) plt.xlim((w-size)/2, (w+size)/2) plt.ylim((h-size)/2, (h+size)/2) plt.text(0.04, 0.05, lbl_true[i], fontsize = 20, transform=ax.transAxes, color='white', backgroundcolor='black') c = 'green' if lbl_true[i] == lbl_pred[i] else 'red' plt.text(0.85, 0.05, lbl_pred[i], fontsize = 20, transform=ax.transAxes, color='white', backgroundcolor=c) plt.yticks([]), plt.xticks([]) plt.tight_layout() plt.show() test_labels = pd.read_csv(TEST_DIR+'test_target.csv') # ### `Live/Spoof` # In[2]: labels_bin = spoof_labels_to_classes(test_labels.sample(30), [0,1]) anti_spoof = AntiSpoof('saved_models/AntiSpoofing_bin_1.5_128.onnx') plot_true_and_pred(labels_bin, anti_spoof) # ### `Live/Print/Replay` # In[3]: labels_print_replay = spoof_labels_to_classes(test_labels.sample(30), [0,1,2]) anti_spoof = AntiSpoof('saved_models/AntiSpoofing_print-replay_1.5_128.onnx') plot_true_and_pred(labels_print_replay, anti_spoof) # ## Examples on real data # In[1]: from src.face_detector import YOLOv5 from src.FaceAntiSpoofing import AntiSpoof import matplotlib.pyplot as plt import os import cv2 import numpy as np def increased_crop(img, bbox : tuple, bbox_inc : float = 1.5): # Crop face based on its bounding box real_h, real_w = img.shape[:2] x, y, w, h = bbox w, h = w - x, h - y l = max(w, h) xc, yc = x + w/2, y + h/2 x, y = int(xc - l*bbox_inc/2), int(yc - l*bbox_inc/2) x1 = 0 if x < 0 else x y1 = 0 if y < 0 else y x2 = real_w if x + l*bbox_inc > real_w else x + int(l*bbox_inc) y2 = real_h if y + l*bbox_inc > real_h else y + int(l*bbox_inc) img = img[y1:y2,x1:x2,:] img = cv2.copyMakeBorder(img, y1-y, int(l*bbox_inc-y2+y), x1-x, int(l*bbox_inc)-x2+x, cv2.BORDER_CONSTANT, value=[0, 0, 0]) return img TEST_IMG_PATH = 'test_imgs' # In[2]: face_detector = YOLOv5('saved_models/yolov5s-face.onnx') anti_spoof = AntiSpoof('saved_models/AntiSpoofing_print-replay_1.5_128.onnx') for img_name in os.listdir(TEST_IMG_PATH): img = cv2.imread(os.path.join(TEST_IMG_PATH, img_name)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) bbox = face_detector([img])[0] if bbox.shape[0] > 0: x1,y1,x2,y2 = face_detector([img])[0].flatten()[:4].astype(int) else: print('Face not found!') raise # plt.imshow(increased_crop(img, (x1,y1, x2,y2), bbox_inc=1.5)) # plt.show() pred = anti_spoof([increased_crop(img, (x1,y1, x2,y2), bbox_inc=1.5)])[0] label = np.argmax(pred) real_face_score = pred[0][0] if label == 0: print("Image '{}' is Real Face.".format(img_name)) res_text = "REAL. score: {:.2f}".format(real_face_score) color = (0, 255, 0) else: print("Image '{}' is Fake Face.".format(img_name)) res_text = "FAKE. score: {:.2f}".format(real_face_score) color = (255, 0, 0) cv2.rectangle(img, (x1, y1), (x2, y2), color, 20) cv2.putText(img, res_text, (x1,y1-50), cv2.FONT_HERSHEY_COMPLEX, (x2-x1)/300, color, 10) #img = img[y1:y2,x1:x2,:] plt.figure(figsize=(10,10)) plt.imshow(img) plt.yticks([]), plt.xticks([]), plt.show() # In[ ]: