#!/usr/bin/env python # coding: utf-8 # # Exercise 12.2 # ## Activation maximization # In this task, we use the approach of activation maximization to visualize to which patterns features of a CNN trained using on MNIST are sensitive. This will give us a deeper understanding of the working principle of CNNs. # In[3]: import tensorflow as tf from tensorflow import keras import numpy as np import matplotlib.pyplot as plt KTF = keras.backend layers = keras.layers # ### Download and preprocess data # In[2]: (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() x_train = x_train.astype(np.float32)[...,np.newaxis] / 255. x_test = x_test.astype(np.float32)[...,np.newaxis] / 255. y_train = keras.utils.to_categorical(y_train, 10) y_test = keras.utils.to_categorical(y_test, 10) # ### Set up a convolutional neural network with at least 4 CNN layers. # In[ ]: model = keras.models.Sequential() model.summary() # #### compile and train model # In[ ]: model.compile( loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(lr=1e-3), metrics=['accuracy']) results = model.fit(x_train, y_train, batch_size=100, epochs=3, verbose=1, validation_split=0.1 ) # ### Implementation of activation maximization # Select a layer you want to visualize and perform activation maximization. # In[ ]: gradient_updates = 50 step_size = 1. def normalize(x): '''Normalize gradients via l2 norm''' return x / (KTF.sqrt(KTF.mean(KTF.square(x))) + KTF.epsilon()) # In the following, implement activation maximization to visualize to which patterns a specific feature map is sensitive: # - Start from uniform distributed noise 'images' (note that the shape has to be `(1, 28, 28, 1)`, as we use a batch size of `1`). # - Choose one specific feature map using 'filter_index'. # - Create a scalar loss as discussed in Chapter 12 (maximize the average feature map activation). # - Thereafter, add the calculated gradients to your start image (gradient ascent step) and repeat the procedure using gradient_updates = 50. # You can calculate the gradients using the following expressions: # `with tf.GradientTape() as gtape: # grads = gtape.gradient(YOUR_OBJECTIVE, THE_VARIABLE_YOU_WANT_TO_OPTIMIZE) # grads = normalize(grads)` # # - Finally, implement the gradient ascent step (you may use `assign_sub` or `assign_add` to adapt the parameters) and perform 50 updates. # # Remember to construct a Keras variable for the input (we want to find an input that 'maximizes' the output, so we build an input that holds adaptive parameters which we can train using TensorFlow / Keras) # The following code snippet may help you to implement the maximization: # In[ ]: visualized_feature = [] layer_dict = layer_dict = dict([(layer.name, layer) for layer in model.layers[:]]) layer_name = "conv2d_3" layer_output = layer_dict[layer_name].output sub_model = keras.models.Model([model.inputs], [layer_output]) for filter_index in range(layer_output.shape[-1]): # iterate over fiters print('Processing filter %d' % (filter_index+1)) input_img = KTF.variable([0]) # instead of '[0]' use noise as the (start) input image with correct shape for i in range(gradient_updates): with tf.GradientTape() as gtape: # define a scalar loss using Keras. # remember: You would like to maximize the activations in the respective feature map! loss = 0 # <--: define your loss HERE # #### Plot images to visualize to which patterns the respective feature maps are sensitive. # In[ ]: def deprocess_image(x): # reprocess visualization to format of "MNIST images" x -= x.mean() x /= (x.std() + KTF.epsilon()) # x *= 0.1 x += 0.5 x *= 255 x = np.clip(x, 0, 255).astype('uint8') return x # In[ ]: plt.figure(figsize=(10,10)) for i, feature_ in enumerate(visualized_feature): feature_image = deprocess_image(feature_) ax = plt.subplot(8,8, 1+i, ) plt.imshow(feature_image.squeeze()) ax.axis('off') plt.title("feature %s" % i) plt.tight_layout()