A simple demonstration of facial detection using images from the State Library of NSW's Tribune collection.
If you haven't used one of these notebooks before, they're basically web pages in which you can write, edit, and run live code. They're meant to encourage experimentation, so don't feel nervous. Just try running a few cells and see what happens!.
Some tips:
import cv2
import pandas as pd
import os
from urllib.parse import urlparse
import requests
from IPython.display import display, HTML
import copy
# Load Tribune images data
df = pd.read_csv('https://raw.githubusercontent.com/GLAM-Workbench/ozglam-data-records-of-resistance/master/data/images.csv')
# Link to the facial detection data file
face_cl = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
def select_images(sample):
'''
Get a random sample of images.
'''
images = []
rows = df.sample(sample)
for img_id in list(rows['images']):
img_url = 'https://s3-ap-southeast-2.amazonaws.com/wraggetribune/images/{0}.jpg'.format(img_id)
images.append((img_id, img_url))
return images
def download_image(img_url):
'''
Download and save the specified image.
'''
current_dir = os.getcwd()
parsed = urlparse(img_url)
filename = os.path.join(current_dir, os.path.basename(parsed.path))
response = requests.get(img_url, stream=True)
with open(filename, 'wb') as fd:
for chunk in response.iter_content(chunk_size=128):
fd.write(chunk)
return filename
def detect_faces(img_file):
'''
Use OpenCV to find faces.
'''
faces = []
f = 1
print('Processing {}'.format(img_file))
try:
image = cv2.imread(img_file)
# Create a copy to annotate
results = image.copy()
# Create a greyscale copy for face detection
grey = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Find faces!
# Try adjusting scaleFactor and minNeighbors if results aren't what you expect.
faces = face_cl.detectMultiScale(grey, scaleFactor=1.3, minNeighbors=4, minSize=(50, 50))
except cv2.error:
raise
else:
for (x, y, w, h) in faces:
# Save a cropped version of the detected face
face = image[y: y + h, x: x + w]
cv2.imwrite('{}-{}.jpg'.format(os.path.splitext(os.path.basename(img_file))[0], f), face)
# Draw a green box on the complete image
cv2.rectangle(results, (x, y), (x + w, y + h), (0, 255, 0), 2)
f += 1
# Save the annotated image
cv2.imwrite(img_file, results)
return faces
def process_images(images):
'''
Find faces in a list of images.
Displays the results
'''
for img_id, img_url in images:
filename = download_image(img_url)
faces = detect_faces(filename)
html = '<image src="{}"><br><a target="_blank" href="http://digital.sl.nsw.gov.au/delivery/DeliveryManagerServlet?dps_pid={}&embedded=true&toolbar=false">More details at SLNSW</a><br>'.format(os.path.basename(filename), img_id)
print('I found {} faces...'.format(len(faces)))
for i, face in enumerate(faces, 1):
html += '<a target="_blank" href="{0}-{1}.jpg"><image style="width: 100px; height: 100px; float: left; margin: 10px; object-fit: scale-down;" src="{0}-{1}.jpg"></a>'.format(img_id, i)
display(HTML(html))
def get_random_image(sample=1):
'''
Process a random sample of images.
'''
images = select_images(sample)
process_images(images)
def get_image_by_id(img_id):
'''
Process a specific image.
'''
images = [(img_id, 'https://s3-ap-southeast-2.amazonaws.com/wraggetribune/images/{0}.jpg'.format(img_id))]
process_images(images)
# Provide a number as a parameter for more than 1 images
get_random_image()
get_image_by_id('FL4578163')