In this notebook we use visual features obtained using a pre-trained deep neural network to determine visual dissimilarity between images. Then we use a genetic algorithm to find an ordering of a list of images that maximises the mean distance between adjacent images. Distance or dissimilarity between images is defined as the cosine distance between the vector representation of an image obtained by pushing the image through a pre-trained neural network.
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pylab as plt
import evol
import glob
from scipy.spatial import distance
from plotnine import *
import plotnine.options
%matplotlib inline
plotnine.options.figure_size = (16,9)
The images are loaded from disk in this notebook, but were originally sourced from Flickr.
Original images are linked here for source and credit:
image_files = sorted(glob.glob('../flickr-multi/*'))
To project images into a feature space, we use an instance of the VGG16 neural network pre-trained on imagenet, with the top layer cut off and avereage pooling applied. This turns arbitrary images into a 512 feature vector.
We load the images twice, once as is for display and once with preprocessing for the network.
pretrained_net = tf.keras.applications.VGG16(
include_top=False,
weights='imagenet',
pooling='avg',
input_shape=(224,224,3)
)
# The single letter var is apparently Keras' preferred style
input_layer = tf.keras.layers.Input([None, None, 3], dtype = tf.uint8)
x = tf.cast(input_layer, tf.float32)
x = tf.keras.applications.vgg16.preprocess_input(x)
x = pretrained_net(x)
model = tf.keras.Model(inputs=[input_layer], outputs=[x])
original_images = [
tf.keras.preprocessing.image.load_img(img_file)
for img_file in image_files
]
input_images = np.array([
tf.keras.preprocessing.image.img_to_array(
tf.keras.preprocessing.image.load_img(img_file, target_size=(224,224))
)
for img_file in image_files
])
image_vectors = model(input_images)
image_vectors.shape
TensorShape([10, 512])
For visual inspection of our distance metric, we create a pairwise distance matrix in a DataFrame
and visualise using a tile plot with some annotations.
def show_images(images, rows=2):
"""
Helper function to display a list of images across multiple rows.
"""
# The double negation is a silly and hard to mentally parse trick to round up integer divison,
# but now you know...
cols = -(-len(images) // rows)
fig,ax = plt.subplots(
rows, cols, figsize=(16,9), squeeze=False,
gridspec_kw=dict(wspace=0, hspace=0))
for i in range(rows * cols):
ax[i // cols][i % cols].axis('off')
if i < len(images):
ax[i // cols][i % cols].imshow(images[i])
ax[i // cols][i % cols].text(0, 0, str(i), fontsize=22)
distance_frame = (
pd.DataFrame( # Construct a DataFrame
distance.squareform( # from the square form
distance.pdist(image_vectors, distance.cosine) # of the pairwise cosine distance matrix
# between images' vector representations
)
)
.reset_index() # Use source image as column
.rename(columns={'index':'from_image'})
.assign(from_image=lambda df: df['from_image'].astype('category')) # Turn into categorical
.melt(id_vars=['from_image'], var_name='to_image', value_name='distance') # Un-pivot for plotting
)
# Add a formatted representation for geom_text()
distance_frame['text_distance'] = distance_frame['distance'].apply(lambda value: '{:.3f}'.format(value))
distance_frame.head()
from_image | to_image | distance | text_distance | |
---|---|---|---|---|
0 | 0 | 0 | 0.000000 | 0.000 |
1 | 1 | 0 | 0.522615 | 0.523 |
2 | 2 | 0 | 0.352597 | 0.353 |
3 | 3 | 0 | 0.782678 | 0.783 |
4 | 4 | 0 | 0.785532 | 0.786 |
# Plot and show images for order
show_images(original_images), (
ggplot(distance_frame, aes(x='from_image', y='to_image'))
+ geom_tile(aes(fill='distance'))
+ geom_text(aes(label='text_distance'))
+ scale_fill_distiller(palette='Oranges', guide=False)
+ annotate(geom='rect', xmin=0.5, ymin=0.5, xmax=3.5, ymax=3.5, fill=None, color='blue', size=2)
+ annotate(geom='text', x=2, y=2.5, label='boats', color='blue', size=26)
+ annotate(geom='rect', xmin=3.5, ymin=3.5, xmax=6.5, ymax=6.5, fill=None, color='blue', size=2)
+ annotate(geom='text', x=5, y=5.5, label='cars', color='blue', size=26)
+ annotate(geom='rect', xmin=6.5, ymin=6.5, xmax=9.5, ymax=9.5, fill=None, color='blue', size=2)
+ annotate(geom='text', x=8, y=8.5, label='dogs', color='blue', size=26)
+ labs(x=None, y=None, title='Pairwise Cosine Distance')
)