#!/usr/bin/env python # coding: utf-8 # ## Kaggel: Digit Recognizer(mnist) , score:0.99528 # In[193]: import pandas as pd import numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') import tensorflow as tf import random as rn import os os.environ['PYTHONHASHSEED'] = '0' seed = 123 rn.seed(seed) np.random.seed(seed) session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) from keras import backend as K tf.set_random_seed(seed) sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) K.set_session(sess) train = pd.read_csv('train.csv') test = pd.read_csv('test.csv') # In[194]: train.shape # In[195]: test.shape # In[196]: label = train.label label.shape test_index = test.index # In[197]: train = train.drop(['label'], axis=1) train.shape # In[198]: train = train.values.reshape(-1,28,28,1) / 255.0 test = test.values.reshape(-1,28,28,1) / 255.0 # In[199]: plt.figure(figsize=(20,2)) n = 10 for i in range(n): plt.subplot(1,n,i+1) plt.imshow(train[i,:,:,0], cmap='gray') plt.axis('off') # In[200]: from keras.models import Sequential, Input, Model from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau import keras initializer = keras.initializers.glorot_uniform(seed=seed) # In[201]: model = Sequential() model.add(Conv2D(32, kernel_size=(5,5),padding='Same', activation='relu', input_shape=(28,28,1), kernel_initializer=initializer)) model.add(Conv2D(32, kernel_size=(5,5),padding='Same', activation='relu',kernel_initializer=initializer)) model.add(MaxPool2D(pool_size=(2,2))) model.add(BatchNormalization()) model.add(Dropout(0.25, seed=seed)) model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation ='relu', kernel_initializer=initializer)) model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation ='relu', kernel_initializer=initializer)) model.add(MaxPool2D(pool_size=(2,2), strides=(2,2))) model.add(BatchNormalization()) model.add(Dropout(0.25, seed=seed)) model.add(Flatten()) model.add(Dense(128, activation = "relu", kernel_initializer=initializer)) model.add(BatchNormalization()) model.add(Dropout(0.5, seed=seed)) model.add(Dense(64, activation = "relu", kernel_initializer=initializer)) model.add(BatchNormalization()) model.add(Dropout(0.5, seed=seed)) model.add(Dense(10, activation = "softmax", kernel_initializer=initializer)) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc']) # In[202]: epochs = 100 reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.000001,verbose=1) early_stopping = EarlyStopping(patience=10, verbose=1) checkpointer = ModelCheckpoint(filepath='cnn_final_checkpoint.hdf5', verbose=1, save_best_only=True) hist = model.fit(train, label, batch_size=32, epochs=epochs, verbose=1, validation_split=0.2, callbacks=[reduce_lr, early_stopping, checkpointer]) # In[ ]: # Epoch 00024: ReduceLROnPlateau reducing learning rate to 1e-06. # Epoch 00024: val_loss did not improve from 0.02237 # Epoch 25/100 # 33600/33600 [==============================] - 12s 343us/step - loss: 0.0200 - acc: 0.9943 - val_loss: 0.0225 - val_acc: 0.9944 # Epoch 00025: val_loss did not improve from 0.02237 # Epoch 00025: early stopping # In[203]: # 0.02674 plt.plot(hist.history['acc'], 'b-', label='acc' ) plt.plot(hist.history['val_acc'], 'g-', label='val_acc' ) plt.legend() plt.show() plt.plot(hist.history['loss'], 'r-', label='loss') plt.plot(hist.history['val_loss'], 'm-', label='val_loss') plt.legend() plt.show() # In[204]: model.load_weights('cnn_final_checkpoint.hdf5') pred = model.predict(test) final = pred.argmax(axis=1) # In[205]: submission = pd.DataFrame({'ImageId': test_index+1, 'Label': final}) submission.to_csv('cnn_submission.csv', index=False) # In[ ]: