#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('reload_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')
get_ipython().run_line_magic('matplotlib', 'inline')
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"]="0"


# In[2]:


import numpy as np
import tensorflow as tf
print("Tensorflow version " + tf.__version__)


# # Using *ktrain* to Facilitate a Normal TensorFlow Workflow
# 
# This example notebook simply illustrates how *ktrain* can be used in a **minimally-invasive** way within
# a normal TensorFlow workflow.  In this notebook, we will store our datasets in the form of `tf.Datasets` and build our own `tf.Keras` model following the example of TensorFlow's [Keras MNIST TPU.ipynb](https://colab.research.google.com/github/tensorflow/tpu/blob/master/tools/colab/keras_mnist_tpu.ipynb#scrollTo=cCpkS9C_H7Tl).  We will then simply use **ktrain** as a lightweight wrapper for our model and data to estimate a learning rate, train the model, inspect the model, and make predictions.

# ## Detect Hardware:  CPU vs. GPU vs. TPU

# In[3]:


# Detect hardware
try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
except ValueError:
  tpu = None
  gpus = tf.config.experimental.list_logical_devices("GPU")
    
# Select appropriate distribution strategy
if tpu:
  tf.tpu.experimental.initialize_tpu_system(tpu)
  strategy = tf.distribute.experimental.TPUStrategy(tpu, steps_per_run=128) # Going back and forth between TPU and host is expensive. Better to run 128 batches on the TPU before reporting back.
  print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])  
elif len(gpus) > 1:
  strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
  print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:
  strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on single GPU ', gpus[0].name)
else:
  strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on CPU')
print("Number of accelerators: ", strategy.num_replicas_in_sync)


# ## Prepare Training and Validation Data as `tf.Datasets`
# 
# Download the dataset files from [LeCun's website](http://yann.lecun.com/exdb/mnist/).

# In[4]:


BATCH_SIZE = 64 * strategy.num_replicas_in_sync # Gobal batch size.
training_images_file   = 'data/mnist_lecun/train-images-idx3-ubyte'
training_labels_file   = 'data/mnist_lecun/train-labels-idx1-ubyte'
validation_images_file = 'data/mnist_lecun/t10k-images-idx3-ubyte'
validation_labels_file = 'data/mnist_lecun/t10k-labels-idx1-ubyte'


# Note that, if training using a TPU, these should be set as follows:
# 
# ```python
# training_images_file   = 'gs://mnist-public/train-images-idx3-ubyte'
# training_labels_file   = 'gs://mnist-public/train-labels-idx1-ubyte'
# validation_images_file = 'gs://mnist-public/t10k-images-idx3-ubyte'
# validation_labels_file = 'gs://mnist-public/t10k-labels-idx1-ubyte'
# ```
# 
# You may need to authenticate:
# ```python
# IS_COLAB_BACKEND = 'COLAB_GPU' in os.environ  # this is always set on Colab, the value is 0 or 1 depending on GPU presence
# if IS_COLAB_BACKEND:
#   from google.colab import auth
#   # Authenticates the Colab machine and also the TPU using your
#   # credentials so that they can access your private GCS buckets.
#   auth.authenticate_user()
# ```

# In[5]:


def read_label(tf_bytestring):
    label = tf.io.decode_raw(tf_bytestring, tf.uint8)
    label = tf.reshape(label, [])
    label = tf.one_hot(label, 10)
    return label
  
def read_image(tf_bytestring):
    image = tf.io.decode_raw(tf_bytestring, tf.uint8)
    image = tf.cast(image, tf.float32)/255.0
    image = tf.reshape(image, [28*28])
    return image
  
def load_dataset(image_file, label_file):
    imagedataset = tf.data.FixedLengthRecordDataset(image_file, 28*28, header_bytes=16)
    imagedataset = imagedataset.map(read_image, num_parallel_calls=16)
    labelsdataset = tf.data.FixedLengthRecordDataset(label_file, 1, header_bytes=8)
    labelsdataset = labelsdataset.map(read_label, num_parallel_calls=16)
    dataset = tf.data.Dataset.zip((imagedataset, labelsdataset))
    return dataset 
  
def get_training_dataset(image_file, label_file, batch_size):
    dataset = load_dataset(image_file, label_file)
    dataset = dataset.cache()  # this small dataset can be entirely cached in RAM
    dataset = dataset.shuffle(5000, reshuffle_each_iteration=True)
    dataset = dataset.repeat() # Mandatory for Keras for now
    dataset = dataset.batch(batch_size, drop_remainder=True) # drop_remainder is important on TPU, batch size must be fixed
    dataset = dataset.prefetch(-1)  # fetch next batches while training on the current one (-1: autotune prefetch buffer size)
    return dataset
  
def get_validation_dataset(image_file, label_file):
    dataset = load_dataset(image_file, label_file)
    dataset = dataset.cache() # this small dataset can be entirely cached in RAM
    dataset = dataset.batch(10000, drop_remainder=True) # 10000 items in eval dataset, all in one batch
    dataset = dataset.repeat() # Mandatory for Keras for now
    return dataset

def load_label_dataset(label_file):
    labelsdataset = tf.data.FixedLengthRecordDataset(label_file, 1, header_bytes=8)
    labelsdataset = labelsdataset.map(read_label, num_parallel_calls=16)
    return labelsdataset 

# instantiate the datasets
training_dataset = get_training_dataset(training_images_file, training_labels_file, BATCH_SIZE)
validation_dataset = get_validation_dataset(validation_images_file, validation_labels_file)

# exract ground truth labels
training_labels = np.vstack(list(load_label_dataset(training_labels_file).as_numpy_iterator()))
validation_labels = np.vstack(list(load_label_dataset(validation_labels_file).as_numpy_iterator()))


# ## Build a Model

# In[6]:


# This model trains to 99.4% accuracy in 10 epochs (with a batch size of 64)  

def make_model():
    model = tf.keras.Sequential(
      [
        tf.keras.layers.Reshape(input_shape=(28*28,), target_shape=(28, 28, 1), name="image"),

        tf.keras.layers.Conv2D(filters=12, kernel_size=3, padding='same', use_bias=False), # no bias necessary before batch norm
        tf.keras.layers.BatchNormalization(scale=False, center=True), # no batch norm scaling necessary before "relu"
        tf.keras.layers.Activation('relu'), # activation after batch norm

        tf.keras.layers.Conv2D(filters=24, kernel_size=6, padding='same', use_bias=False, strides=2),
        tf.keras.layers.BatchNormalization(scale=False, center=True),
        tf.keras.layers.Activation('relu'),

        tf.keras.layers.Conv2D(filters=32, kernel_size=6, padding='same', use_bias=False, strides=2),
        tf.keras.layers.BatchNormalization(scale=False, center=True),
        tf.keras.layers.Activation('relu'),

        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(200, use_bias=False),
        tf.keras.layers.BatchNormalization(scale=False, center=True),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.Dropout(0.4), # Dropout on dense layer only

        tf.keras.layers.Dense(10, activation='softmax')
      ])

    model.compile(optimizer='adam', # learning rate will be set by LearningRateScheduler
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model
    
with strategy.scope():
    model = make_model()


# set up learning rate decay [FROM ORIGINAL EXAMPLE BUT NOT USED]
# NOT NEEDED: we will use ktrain to find LR and decay learning rate during training
LEARNING_RATE = 0.01
LEARNING_RATE_EXP_DECAY = 0.6 if strategy.num_replicas_in_sync == 1 else 0.7
lr_decay = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: LEARNING_RATE * LEARNING_RATE_EXP_DECAY**epoch,
    verbose=True)


# ## Use *ktrain* With Our Model and Data

# ### Wrap tf.Datasets in a `ktrain.TFDataset` wrapper and create `Learner`

# In[7]:


import ktrain
trn = ktrain.TFDataset(training_dataset, n=training_labels.shape[0], y=training_labels)
val = ktrain.TFDataset(validation_dataset, n=validation_labels.shape[0], y=validation_labels)
learner = ktrain.get_learner(model, train_data=trn, val_data=val)


# ### Find Learning Rate

# In[8]:


learner.lr_find(show_plot=True)


# ## Train the Model Using a Cosine Annealing LR Schedule

# In[9]:


learner.fit(5e-3, 1, cycle_len=10, checkpoint_folder='/tmp/mymodel')


# In[10]:


# cosine annealed LR schedule
learner.plot('lr')


# In[11]:


# training vs. validation loss
learner.plot('loss')


# ### Inspect Model
# 
# #### Evaluate as Normal

# In[12]:


learner.model.evaluate(validation_dataset, steps=1)


# #### Validation Metrics

# In[14]:


learner.validate(class_names=list(map(str, range(10))))


# #### View Top Losses

# In[11]:


learner.view_top_losses(n=1)


# ### Making Predictions

# In[26]:


preds = learner.predict(val)
preds = np.argmax(preds, axis=1)
actual = learner.ground_truth(val)
actual = np.argmax(actual, axis=1)


# In[27]:


import pandas as pd
df = pd.DataFrame(zip(preds, actual), columns=['Predicted', 'Actual'])
df.head()


# ## Save Model and Reload Model

# In[13]:


learner.save_model('/tmp/my_tf_model')


# In[14]:


learner.load_model('/tmp/my_tf_model')


# In[15]:


learner.model.evaluate(validation_dataset, steps=1)


# In[ ]: