#!/usr/bin/env python # coding: utf-8 # # Convolution Nets for MNIST # Deep Learning models can take quite a bit of time to run, particularly if GPU isn't used. # # In the interest of time, you could sample a subset of observations (e.g. $1000$) that are a particular number of your choice (e.g. $6$) and $1000$ observations that aren't that particular number (i.e. $\neq 6$). # # We will build a model using that and see how it performs on the test dataset # In[ ]: #Import the required libraries import numpy as np np.random.seed(1338) from keras.datasets import mnist # In[ ]: from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation, Flatten # In[ ]: from keras.layers.convolutional import Conv2D from keras.layers.pooling import MaxPooling2D # In[ ]: from keras.utils import np_utils from keras.optimizers import SGD # ## Loading Data # In[ ]: #Load the training and testing data (X_train, y_train), (X_test, y_test) = mnist.load_data() # In[ ]: X_test_orig = X_test # ## Data Preparation # In[ ]: from keras import backend as K # #### Very Important: # When dealing with images & convolutions, it is paramount to handle `image_data_format` properly # In[ ]: img_rows, img_cols = 28, 28 if K.image_data_format() == 'channels_first': shape_ord = (1, img_rows, img_cols) else: # channel_last shape_ord = (img_rows, img_cols, 1) # #### Preprocess and Normalise Data # In[ ]: X_train = X_train.reshape((X_train.shape[0],) + shape_ord) X_test = X_test.reshape((X_test.shape[0],) + shape_ord) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 # In[ ]: np.random.seed(1338) # for reproducibilty!! # Test data X_test = X_test.copy() Y = y_test.copy() # Converting the output to binary classification(Six=1,Not Six=0) Y_test = Y == 6 Y_test = Y_test.astype(int) # Selecting the 5918 examples where the output is 6 X_six = X_train[y_train == 6].copy() Y_six = y_train[y_train == 6].copy() # Selecting the examples where the output is not 6 X_not_six = X_train[y_train != 6].copy() Y_not_six = y_train[y_train != 6].copy() # Selecting 6000 random examples from the data that # only contains the data where the output is not 6 random_rows = np.random.randint(0,X_six.shape[0],6000) X_not_six = X_not_six[random_rows] Y_not_six = Y_not_six[random_rows] # In[ ]: # Appending the data with output as 6 and data with output as <> 6 X_train = np.append(X_six,X_not_six) # Reshaping the appended data to appropraite form X_train = X_train.reshape((X_six.shape[0] + X_not_six.shape[0],) + shape_ord) # Appending the labels and converting the labels to # binary classification(Six=1,Not Six=0) Y_labels = np.append(Y_six,Y_not_six) Y_train = Y_labels == 6 Y_train = Y_train.astype(int) # In[ ]: print(X_train.shape, Y_labels.shape, X_test.shape, Y_test.shape) # In[ ]: # Converting the classes to its binary categorical form nb_classes = 2 Y_train = np_utils.to_categorical(Y_train, nb_classes) Y_test = np_utils.to_categorical(Y_test, nb_classes) # # A simple CNN # In[ ]: # -- Initializing the values for the convolution neural network nb_epoch = 2 # kept very low! Please increase if you have GPU batch_size = 64 # number of convolutional filters to use nb_filters = 32 # size of pooling area for max pooling nb_pool = 2 # convolution kernel size nb_conv = 3 sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) # #### Step 1: Model Definition # In[ ]: model = Sequential() model.add(Conv2D(nb_filters, (nb_conv, nb_conv), padding='valid', input_shape=shape_ord)) # note: the very first layer **must** always specify the input_shape model.add(Activation('relu')) model.add(Flatten()) model.add(Dense(nb_classes)) model.add(Activation('softmax')) # #### Step 2: Compile # In[ ]: model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) # #### Step 3: Fit # In[ ]: hist = model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) # In[ ]: import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') plt.figure() plt.xlabel('Epochs') plt.ylabel('Loss') plt.plot(hist.history['loss']) plt.plot(hist.history['val_loss']) plt.legend(['Training', 'Validation']) plt.figure() plt.xlabel('Epochs') plt.ylabel('Accuracy') plt.plot(hist.history['acc']) plt.plot(hist.history['val_acc']) plt.legend(['Training', 'Validation'], loc='lower right') # ### Step 4: Evaluate # In[ ]: print('Available Metrics in Model: {}'.format(model.metrics_names)) # In[ ]: # Evaluating the model on the test data loss, accuracy = model.evaluate(X_test, Y_test, verbose=0) print('Test Loss:', loss) print('Test Accuracy:', accuracy) # ### Let's plot our model Predictions! # In[ ]: import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[ ]: slice = 15 predicted = model.predict(X_test[:slice]).argmax(-1) plt.figure(figsize=(16,8)) for i in range(slice): plt.subplot(1, slice, i+1) plt.imshow(X_test_orig[i], interpolation='nearest') plt.text(0, 0, predicted[i], color='black', bbox=dict(facecolor='white', alpha=1)) plt.axis('off') # # Adding more Dense Layers # In[ ]: model = Sequential() model.add(Conv2D(nb_filters, (nb_conv, nb_conv), padding='valid', input_shape=shape_ord)) model.add(Activation('relu')) model.add(Flatten()) model.add(Dense(128)) model.add(Activation('relu')) model.add(Dense(nb_classes)) model.add(Activation('softmax')) # In[ ]: model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch,verbose=1, validation_data=(X_test, Y_test)) # In[ ]: #Evaluating the model on the test data score, accuracy = model.evaluate(X_test, Y_test, verbose=0) print('Test score:', score) print('Test accuracy:', accuracy) # # Adding Dropout # In[ ]: model = Sequential() model.add(Conv2D(nb_filters, (nb_conv, nb_conv), padding='valid', input_shape=shape_ord)) model.add(Activation('relu')) model.add(Flatten()) model.add(Dense(128)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) # In[ ]: model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch,verbose=1, validation_data=(X_test, Y_test)) # In[ ]: #Evaluating the model on the test data score, accuracy = model.evaluate(X_test, Y_test, verbose=0) print('Test score:', score) print('Test accuracy:', accuracy) # # Adding more Convolution Layers # In[ ]: model = Sequential() model.add(Conv2D(nb_filters, (nb_conv, nb_conv), padding='valid', input_shape=shape_ord)) model.add(Activation('relu')) model.add(Convolution2D(nb_filters, (nb_conv, nb_conv))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) # In[ ]: model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch,verbose=1, validation_data=(X_test, Y_test)) # In[ ]: #Evaluating the model on the test data score, accuracy = model.evaluate(X_test, Y_test, verbose=0) print('Test score:', score) print('Test accuracy:', accuracy) # # Exercise # # The above code has been written as a function. # # Change some of the **hyperparameters** and see what happens. # In[ ]: # Function for constructing the convolution neural network # Feel free to add parameters, if you want def build_model(): """""" model = Sequential() model.add(Conv2D(nb_filters, (nb_conv, nb_conv), padding='valid', input_shape=shape_ord)) model.add(Activation('relu')) model.add(Conv2D(nb_filters, (nb_conv, nb_conv))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch,verbose=1, validation_data=(X_test, Y_test)) #Evaluating the model on the test data score, accuracy = model.evaluate(X_test, Y_test, verbose=0) print('Test score:', score) print('Test accuracy:', accuracy) # In[ ]: #Timing how long it takes to build the model and test it. get_ipython().run_line_magic('timeit', '-n1 -r1 build_model()') # # Batch Normalisation # Normalize the activations of the previous layer at each batch, i.e. applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1. # ## How to BatchNorm in Keras # ```python # from keras.layers.normalization import BatchNormalization # # BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, # beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', # moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, # beta_constraint=None, gamma_constraint=None) # ``` # # #### Arguments # #
Conv2D
layer with
# data_format="channels_first"
,
# set axis=1
in BatchNormalization
.beta
to normalized tensor.
# If False, beta
is ignored.gamma
.
# If False, gamma
is not used.
# When the next layer is linear (also e.g. nn.relu
),
# this can be disabled since the scaling
# will be done by the next layer.