%%capture %mkdir yearbook %cd yearbook !pip install --upgrade --no-cache-dir gdown !gdown --id "1NHT8NN8ClBEnUC5VqkP3wr2KhyiIQzyU" !unzip PHfiles.zip %mkdir images !pip install PyMuPDF !pip install dlib !pip install DeepFace import os, shutil, fitz, cv2, numpy as np, pandas as pd, dlib, tensorflow as tf from os.path import dirname, join from deepface import DeepFace path = r'./' pdfs = [f for f in os.listdir(path) if f.endswith('.pdf')] for pdf in pdfs: os.chdir(os.path.join('./images')) os.mkdir((pdf.split(".")[0])) newdir = (os.path.join('./images/' + os.path.join(pdf.split(".")[0]))) os.chdir("..") print ("Now copying images into " + (newdir)) shutil.copy(pdf, newdir) os.chdir(newdir) doc = fitz.open(pdf) for page in doc: pix = page.get_pixmap() pix.save("page-%i.png" % page.number) os.chdir(os.path.dirname(os.getcwd())) os.chdir("..") path = r'./' os.chdir(os.path.join(path + 'images')) dirs = os.listdir(path) for dir in dirs: os.chdir(os.path.join(path + dir)) pngs = [f for f in os.listdir(path) if f.endswith('.png')] if not os.path.exists((dir) + ' faces'): print("New 'faces' directory created in " + (dir) + " folder") os.makedirs((dir) + ' faces') count = 0 for png in pngs: image = cv2.imread(png) greyscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml") detected_faces = face_cascade.detectMultiScale(image=greyscale_image, scaleFactor=1.9, minNeighbors=4) count = 0 for (x,y,w,h) in detected_faces: try: xpadding = 20 ypadding = 40 crop_face = image[y-ypadding: y + h+ypadding, x-xpadding: x + w+xpadding] count+=1 face = cv2.rectangle(crop_face,(x,y),(x+w,y+h),(255,0,0),2) cv2.imwrite(path + (dir) + ' faces/' + str(count) + '_' + png, face) except (Exception): print("An error happened") continue os.remove(os.path.join(path, png)) os.chdir("..") %cd .. number_smiles = 0 smile_counts = [] number_nonsmiles = 0 nonsmile_counts = [] num_errors = 0 error_counts = [] pngs = [] file_count = 0 file_count_list = [] years = ['1911', '1921', '1931', '1941', '1951', '1961'] for year in years: path = r'./images' + '/' + year for root, dirs, files in os.walk(path): for dir in dirs: path = path + '/' + (year + ' faces') if(file_count != 0): file_count_list.append(file_count) file_count = 0 for f in os.listdir(path): if f.endswith('.png'): pngs.append(path + '/' + f) file_count = file_count + 1 file_count_list.append(file_count) total_loops = 0 count = 0 iterator = 0 for png in pngs: try: total_loops = total_loops + 1 count = count + 1 if(count != (file_count_list[iterator] + 1)): demography = DeepFace.analyze(png, actions = ['emotion']) print(demography) if(demography[0]['dominant_emotion'] == 'happy'): number_smiles = number_smiles + 1 else: number_nonsmiles = number_nonsmiles + 1 else: count = count - 1 smile_counts.append(number_smiles / count) nonsmile_counts.append(number_nonsmiles / count) error_counts.append(num_errors / count) number_smiles = 0 number_nonsmiles = 0 num_errors = 0 iterator = iterator + 1 count = 0 except (Exception): num_errors = num_errors + 1 print("An error happened") continue smile_counts.append(number_smiles / count) nonsmile_counts.append(number_nonsmiles / count) error_counts.append(num_errors / count) dict = {'Years': years, 'Smiles': smile_counts, 'Non-Smiles': nonsmile_counts, "Error Weight": error_counts} data = pd.DataFrame(dict) data.to_csv('YearbookOutput.csv', index=False) print(count) from google.colab import files files.download('YearbookOutput.csv')