%matplotlib inline import matplotlib.pyplot as plt import sys, os, re, time import urllib import numpy as np from IPython import parallel HAAR_CASCADE_PATH = "haarcascade_frontalface_default.xml" # if you have opencv installed via homebrew, this would be in # /usr/local/share/OpenCV/haarcascades/ import cv storage = cv.CreateMemStorage() cascade = cv.Load(HAAR_CASCADE_PATH) def extract_faces(image, faces): """Returns any faces in an image in a list of numpy arrays""" import numpy as np A = np.frombuffer(image.tostring(), dtype=np.uint8).reshape((image.height, image.width, image.nChannels)) A = A[:,:,::-1] face_arrays = [] for face in faces: Aface = A[face[1]:face[1]+face[3],face[0]:face[0]+face[2]] face_arrays.append(Aface) return face_arrays def detect_faces(filename): """Loads an image into OpenCV, and detects faces returns None if no image is found, (filename, [list of numpy arrays]) if there are faces """ image = cv.LoadImage(filename) faces = [] detected = cv.HaarDetectObjects(image, cascade, storage, 1.2, 2, cv.CV_HAAR_DO_CANNY_PRUNING, (100,100)) if detected: for (x,y,w,h),n in detected: faces.append((x,y,w,h)) if faces: return filename, extract_faces(image, faces) pictures_dir = 'images' import glob pictures = [] for directory, subdirs, files in os.walk(pictures_dir): for fname in files: if fname.endswith('.jpg'): pictures.append(os.path.join(directory, fname)) for p in pictures: found = detect_faces(p) if found: break filename, faces = found for face in faces: plt.figure() plt.imshow(face) rc = parallel.Client() all_engines = rc[:] view = rc.load_balanced_view() %%px %cd notebooks/parallel %%px HAAR_CASCADE_PATH = "haarcascade_frontalface_default.xml" import cv storage = cv.CreateMemStorage() cascade = cv.Load(HAAR_CASCADE_PATH) all_engines.push(dict( extract_faces=extract_faces, )) tic = time.time() amr = view.map_async(detect_faces, pictures[:1000], ordered=False) nfound = 0 for r in amr: if not r: continue filename, faces = r nfound += len(faces) print "%i faces found in %s" % (len(faces), filename) for face in faces: plt.imshow(face) plt.show() toc = time.time() print "found %i faces in %i images in %f s" % (nfound, len(amr), toc-tic)