import numpy as np
import os
import sys
import cntk
from cntk.layers import Convolution2D, MaxPooling, Dense, Dropout
from common.params import *
from common.utils import *
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("CNTK: ", cntk.__version__)
print("GPU: ", get_gpu_name())
OS: linux Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) [GCC 4.4.7 20120313 (Red Hat 4.4.7-1)] Numpy: 1.13.3 CNTK: 2.2 GPU: ['Tesla K80']
def create_symbol():
# Weight initialiser from uniform distribution
# Activation (unless states) is None
with cntk.layers.default_options(init = cntk.glorot_uniform(), activation = cntk.relu):
x = Convolution2D(filter_shape=(3, 3), num_filters=50, pad=True)(features)
x = Convolution2D(filter_shape=(3, 3), num_filters=50, pad=True)(x)
x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x)
x = Dropout(0.25)(x)
x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x)
x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x)
x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x)
x = Dropout(0.25)(x)
x = Dense(512)(x)
x = Dropout(0.5)(x)
x = Dense(N_CLASSES, activation=None)(x)
return x
def init_model(m):
# Loss (dense labels); check if support for sparse labels
loss = cntk.cross_entropy_with_softmax(m, labels)
# Momentum SGD
# https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_use_learners.ipynb
# unit_gain=False: momentum_direction = momentum*old_momentum_direction + gradient
# if unit_gain=True then ...(1-momentum)*gradient
learner = cntk.momentum_sgd(m.parameters,
lr=cntk.learning_rate_schedule(LR, cntk.UnitType.minibatch) ,
momentum=cntk.momentum_schedule(MOMENTUM),
unit_gain=False)
return loss, learner
%%time
# Data into format for library
x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True, one_hot=True)
# CNTK format
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)
Preparing train set... Preparing test set... (50000, 3, 32, 32) (10000, 3, 32, 32) (50000, 10) (10000, 10) float32 float32 float32 float32 CPU times: user 846 ms, sys: 543 ms, total: 1.39 s Wall time: 1.39 s
%%time
# Placeholders
features = cntk.input_variable((3, 32, 32), np.float32)
labels = cntk.input_variable(N_CLASSES, np.float32)
# Load symbol
sym = create_symbol()
CPU times: user 17.1 ms, sys: 32.7 ms, total: 49.8 ms Wall time: 78.5 ms
%%time
loss, learner = init_model(sym)
CPU times: user 78.9 ms, sys: 221 ms, total: 300 ms Wall time: 306 ms
%%time
# 171s
loss.train((x_train, y_train),
minibatch_size=BATCHSIZE,
max_epochs=EPOCHS,
parameter_learners=[learner])
CPU times: user 2min 8s, sys: 24.9 s, total: 2min 32s Wall time: 2min 51s
{'epoch_summaries': [{'loss': 1.8166315625, 'metric': 0.0, 'samples': 50000}, {'loss': 1.3537084375, 'metric': 0.0, 'samples': 50000}, {'loss': 1.12093609375, 'metric': 0.0, 'samples': 50000}, {'loss': 0.97167546875, 'metric': 0.0, 'samples': 50000}, {'loss': 0.86488921875, 'metric': 0.0, 'samples': 50000}, {'loss': 0.769997734375, 'metric': 0.0, 'samples': 50000}, {'loss': 0.707360078125, 'metric': 0.0, 'samples': 50000}, {'loss': 0.64719390625, 'metric': 0.0, 'samples': 50000}, {'loss': 0.592496171875, 'metric': 0.0, 'samples': 50000}, {'loss': 0.5582487109375, 'metric': 0.0, 'samples': 50000}], 'updates': [{'loss': 1.8166693029269365, 'metric': 0.0, 'samples': 49984}, {'loss': 1.353612999909971, 'metric': 0.0, 'samples': 49984}, {'loss': 1.120971445237476, 'metric': 0.0, 'samples': 49984}, {'loss': 0.9715477702864916, 'metric': 0.0, 'samples': 49984}, {'loss': 0.8647872006542093, 'metric': 0.0, 'samples': 49984}, {'loss': 0.7700418821522887, 'metric': 0.0, 'samples': 49984}, {'loss': 0.7073556506832186, 'metric': 0.0, 'samples': 49984}, {'loss': 0.6472827945567582, 'metric': 0.0, 'samples': 49984}, {'loss': 0.5925403941761364, 'metric': 0.0, 'samples': 49984}, {'loss': 0.5583446859244958, 'metric': 0.0, 'samples': 49984}]}
%%time
# Predict and then score accuracy
# (We don't need softmax -> monotonic function)
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = np.argmax(y_test[:n_samples], axis=-1)
c = 0
for data, label in yield_mb(x_test, y_test, BATCHSIZE):
predicted_label_probs = sym.eval({features : data})
y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = np.argmax(predicted_label_probs, axis=-1)
c += 1
CPU times: user 964 ms, sys: 220 ms, total: 1.18 s Wall time: 1.43 s
print("Accuracy: ", sum(y_guess == y_truth)/len(y_guess))
Accuracy: 0.773237179487