Notebook

In [ ]:

def read_images(csv_file):
    # Returns a df with the images in it
    
    import pandas as pd
    from skimage import io
    
    df = pd.read_csv(csv_file)
    read_images = []
    for file in df['image_path']:
        read_images.append(io.imread(file))

    df = df.assign(image = read_images)
    
    return df

In [ ]:

def create_hog(images, canny = False):
    # Takes a list of images, returns a list of the hog descriptors
    
    from skimage.feature import hog

    hog_features = []
    
    if canny: # we need a special case for canny hog because we have to specify the image as type int
        for image in images:
            hog_features.append(hog(image.astype(int),
                                    orientations = 8,
                                    pixels_per_cell = (40, 40),
                                    visualize = False)
                               )
    else:
        for image in images:
            hog_features.append(hog(image,
                                    orientations = 8,
                                    pixels_per_cell = (40, 40),
                                    visualize = False)
                               )
    
    return hog_features

In [1]:

def classify_images(df, random_state = 0):
    # From a df, performs image classification and returns the results
    # Takes random_state as an input to allow different reproducible results
    
    import numpy as np
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import train_test_split
    
    x = ['hog_features', 'image', 'image_path']
    train, test, y_train, y_test = train_test_split(df[x],
                                                    df['label'],
                                                    test_size = 0.2,
                                                    random_state = random_state)
    
    x_train = np.stack(train['hog_features'].values)
    x_test = np.stack(test['hog_features'].values)
    
    random_forest = RandomForestClassifier(n_estimators = 10, max_depth = 7, random_state = random_state)
    random_forest.fit(x_train, y_train.values)
    predictions = random_forest.predict(x_test)
    
    return predictions, y_test, test

In [ ]:

def print_results(predictions, test):
    
    import matplotlib.pyplot as plt
    import pandas as pd
    from skimage import io

    i = 0
    l = len(test)
    
    df_test = pd.DataFrame(test)
    
    for index, row in df_test.iterrows():
        plt.subplot(1, l, i+1)
        plt.imshow(io.imread(row['image_path']))
        plt.title(str(predictions[i]))
        i = i + 1
    plt.show()