def read_images(csv_file):
# Returns a df with the images in it
import pandas as pd
from skimage import io
df = pd.read_csv(csv_file)
read_images = []
for file in df['image_path']:
read_images.append(io.imread(file))
df = df.assign(image = read_images)
return df
def create_hog(images, canny = False):
# Takes a list of images, returns a list of the hog descriptors
from skimage.feature import hog
hog_features = []
if canny: # we need a special case for canny hog because we have to specify the image as type int
for image in images:
hog_features.append(hog(image.astype(int),
orientations = 8,
pixels_per_cell = (40, 40),
visualize = False)
)
else:
for image in images:
hog_features.append(hog(image,
orientations = 8,
pixels_per_cell = (40, 40),
visualize = False)
)
return hog_features
def classify_images(df, random_state = 0):
# From a df, performs image classification and returns the results
# Takes random_state as an input to allow different reproducible results
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
x = ['hog_features', 'image', 'image_path']
train, test, y_train, y_test = train_test_split(df[x],
df['label'],
test_size = 0.2,
random_state = random_state)
x_train = np.stack(train['hog_features'].values)
x_test = np.stack(test['hog_features'].values)
random_forest = RandomForestClassifier(n_estimators = 10, max_depth = 7, random_state = random_state)
random_forest.fit(x_train, y_train.values)
predictions = random_forest.predict(x_test)
return predictions, y_test, test
def print_results(predictions, test):
import matplotlib.pyplot as plt
import pandas as pd
from skimage import io
i = 0
l = len(test)
df_test = pd.DataFrame(test)
for index, row in df_test.iterrows():
plt.subplot(1, l, i+1)
plt.imshow(io.imread(row['image_path']))
plt.title(str(predictions[i]))
i = i + 1
plt.show()