%matplotlib inline import numpy as np import os import matplotlib.pyplot as plt import math from PIL import Image !pwd data_dir = "/Users/Quan/GitHub/sklearn-practice/CroppedYale" os.chdir(data_dir) !ls # converting from svg to png # from glob import glob # %cd $data_dir # files=glob('yaleB*/*.pgm') # print 'number of files is',len(files) # count=0 # for f in files: # new_f=f[:-3]+'png' # !convert $f $new_f # count += 1 # if count % 100==0: # print count,f,new_f def image_grid(D,H,W,cols=10,scale=1): """ display a grid of images H,W: Height and width of the images cols: number of columns = number of images in each row scale: 1 to fill screen """ n = np.shape(D)[0] rows = int(math.ceil((n+0.0)/cols)) fig = plt.figure(1,figsize=[scale*20.0/H*W,scale*20.0/cols*rows],dpi=300) for i in range(n): plt.subplot(rows,cols,i+1) fig=plt.imshow(np.reshape(D[i,:],[H,W]), cmap = plt.get_cmap("gray")) plt.axis('off') def create_filenames(data_dir, view_list): # loads the pictures into a list # data_dir: the CroppedYale folder # view_list: the views you wish to grab dir_list = os.listdir(data_dir) file_list = [] for dir in dir_list: for view in view_list: filename = "%s/%s_%s.png" % (dir, dir, view) file_list.append(filename) return(file_list) view_list = ['P00A+000E+00', 'P00A+005E+10' , 'P00A+005E-10' , 'P00A+010E+00'] file_list = create_filenames(data_dir, view_list) len(file_list) # open image im = Image.open(file_list[0]).convert("L") # get original dimensions H,W = np.shape(im) print 'shape=',(H,W) im_number = len(file_list) # fill array with rows as image # and columns as pixels arr = np.zeros([im_number,H*W]) for i in range(im_number): im = Image.open(file_list[i]).convert("L") arr[i,:] = np.reshape(np.asarray(im),[1,H*W]) image_grid(arr,H,W) # let's find the mean_image mean_image = np.mean(arr, axis=0) plt.imshow(np.reshape(mean_image,[H,W]), cmap = plt.get_cmap("gray")) plt.figure() plt.hist(mean_image,bins=100); # centering the data (subtract mean face) arr_norm = np.zeros([im_number, H*W]) arr_norm = arr - mean_image # plot the first 10 normalized faces image_grid(arr_norm[:10,:],H,W) from sklearn.decomposition.pca import PCA pca = PCA() pca.fit(arr_norm) # Let's make a scree plot pve = pca.explained_variance_ratio_ pve.shape plt.plot(range(len(pve)), pve) plt.title("Scree Plot") plt.ylabel("Proportion of Variance Explained") plt.xlabel("Principal Component Number") # eigenfaces eigenfaces = pca.components_ image_grid(eigenfaces[:9,:], H, W, cols=3) img_idx = file_list.index('yaleB01/yaleB01_P00A+010E+00.png') loadings = pca.components_ n_components = loadings.shape[0] scores = np.dot(arr_norm[:,:], loadings[:,:].T) img_proj = [] for n in range(n_components): proj = np.dot(scores[img_idx, n], loadings[n,:]) img_proj.append(proj) len(img_proj) faces = mean_image face_list = [] face_list.append(mean_image) for i in range(len(img_proj)): faces = np.add(faces, img_proj[i]) face_list.append(faces) len(face_list) face_arr = np.asarray(face_list) face_arr.shape image_grid(face_arr[:25], H, W, cols=5) image_grid(face_arr[range(0, 121, 5)], H, W, cols=5) # getting the index of the subject sub_idx = [i for i, s in enumerate(file_list) if "yaleB05" in s] print sub_idx face_idx = file_list.index("yaleB05/yaleB05_P00A+010E+00.png") print face_idx # plot target face image_grid(arr[19:20], H, W) # remove subject from array arr_new = np.zeros([len(file_list), H*W]) for i in range(len(file_list)): im = Image.open(file_list[i]).convert("L") arr_new[i,:] = np.reshape(np.asarray(im),[1, H*W]) target_face = arr_new[19,] arr_new = np.delete(arr_new, sub_idx, axis = 0) arr_new.shape target_face.shape mean_face = np.mean(arr_new, axis = 0) centered_face = target_face - mean_face plt.imshow(np.reshape(centered_face,[H,W]), cmap = plt.get_cmap("gray")) plt.figure() plt.hist(centered_face,bins=100); arr_norm = arr_new - mean_face pca.fit(arr_new) loadings = pca.components_ n_components = loadings.shape[0] scores = np.dot(centered_face, loadings.T) reconstruct = np.dot(scores, loadings) reconstruct.shape plt.imshow(np.reshape(reconstruct, [H,W]), cmap = plt.get_cmap("gray")) plt.figure()