%matplotlib inline
import numpy as np
import os
import matplotlib.pyplot as plt
import math
from PIL import Image

!pwd
data_dir = "/Users/Quan/GitHub/sklearn-practice/CroppedYale"
os.chdir(data_dir)
!ls

# converting from svg to png
# from glob import glob

# %cd $data_dir

# files=glob('yaleB*/*.pgm')
# print 'number of files is',len(files)
# count=0
# for f in files:
#     new_f=f[:-3]+'png'
#     !convert $f $new_f
#     count += 1
#     if count % 100==0:
#         print count,f,new_f

def image_grid(D,H,W,cols=10,scale=1):
    """ display a grid of images
        H,W: Height and width of the images
        cols: number of columns = number of images in each row
        scale: 1 to fill screen
    """
    n = np.shape(D)[0]
    rows = int(math.ceil((n+0.0)/cols))
    fig = plt.figure(1,figsize=[scale*20.0/H*W,scale*20.0/cols*rows],dpi=300)
    for i in range(n):
        plt.subplot(rows,cols,i+1)
        fig=plt.imshow(np.reshape(D[i,:],[H,W]), cmap = plt.get_cmap("gray"))
        plt.axis('off')

def create_filenames(data_dir, view_list):
    # loads the pictures into a list
    # data_dir: the CroppedYale folder
    # view_list: the views you wish to grab
    dir_list = os.listdir(data_dir)
    file_list = []
    for dir in dir_list:
        for view in view_list:
            filename = "%s/%s_%s.png" % (dir, dir, view)
            file_list.append(filename)
    return(file_list)


view_list = ['P00A+000E+00', 'P00A+005E+10' , 'P00A+005E-10' , 'P00A+010E+00']

file_list = create_filenames(data_dir, view_list)
len(file_list)

# open image
im = Image.open(file_list[0]).convert("L")
# get original dimensions
H,W = np.shape(im)
print 'shape=',(H,W)

im_number = len(file_list)
# fill array with rows as image
# and columns as pixels
arr = np.zeros([im_number,H*W])

for i in range(im_number):
    im = Image.open(file_list[i]).convert("L")
    arr[i,:] = np.reshape(np.asarray(im),[1,H*W])

image_grid(arr,H,W)

# let's find the mean_image
mean_image = np.mean(arr, axis=0)

plt.imshow(np.reshape(mean_image,[H,W]), cmap = plt.get_cmap("gray"))
plt.figure()
plt.hist(mean_image,bins=100);

# centering the data (subtract mean face)
arr_norm = np.zeros([im_number, H*W])
arr_norm = arr - mean_image

# plot the first 10 normalized faces
image_grid(arr_norm[:10,:],H,W)

from sklearn.decomposition.pca import PCA

pca = PCA()
pca.fit(arr_norm)

# Let's make a scree plot
pve = pca.explained_variance_ratio_
pve.shape
plt.plot(range(len(pve)), pve)
plt.title("Scree Plot")
plt.ylabel("Proportion of Variance Explained")
plt.xlabel("Principal Component Number")

# eigenfaces
eigenfaces = pca.components_
image_grid(eigenfaces[:9,:], H, W, cols=3)

img_idx = file_list.index('yaleB01/yaleB01_P00A+010E+00.png')
loadings = pca.components_
n_components = loadings.shape[0]
scores = np.dot(arr_norm[:,:], loadings[:,:].T)

img_proj = []
for n in range(n_components):
    proj = np.dot(scores[img_idx, n], loadings[n,:])
    img_proj.append(proj)
len(img_proj)

faces = mean_image
face_list = []
face_list.append(mean_image)
for i in range(len(img_proj)):
    faces = np.add(faces, img_proj[i])
    face_list.append(faces)

len(face_list)

face_arr = np.asarray(face_list)
face_arr.shape

image_grid(face_arr[:25], H, W, cols=5)

image_grid(face_arr[range(0, 121, 5)], H, W, cols=5)

# getting the index of the subject
sub_idx = [i for i, s in enumerate(file_list) if "yaleB05" in s]
print sub_idx

face_idx = file_list.index("yaleB05/yaleB05_P00A+010E+00.png")
print face_idx

# plot target face
image_grid(arr[19:20], H, W)

# remove subject from array
arr_new = np.zeros([len(file_list), H*W])

for i in range(len(file_list)):
    im = Image.open(file_list[i]).convert("L")
    arr_new[i,:] = np.reshape(np.asarray(im),[1, H*W])

target_face = arr_new[19,]
    
arr_new = np.delete(arr_new, sub_idx, axis = 0)
arr_new.shape

target_face.shape

mean_face = np.mean(arr_new, axis = 0)

centered_face = target_face - mean_face

plt.imshow(np.reshape(centered_face,[H,W]), cmap = plt.get_cmap("gray"))
plt.figure()
plt.hist(centered_face,bins=100);

arr_norm = arr_new - mean_face
pca.fit(arr_new)

loadings = pca.components_
n_components = loadings.shape[0]
scores = np.dot(centered_face, loadings.T)
reconstruct = np.dot(scores, loadings)

reconstruct.shape

plt.imshow(np.reshape(reconstruct, [H,W]), cmap = plt.get_cmap("gray"))
plt.figure()