#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('reload_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')
get_ipython().run_line_magic('matplotlib', 'inline')
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"]="0" 
import ktrain
from ktrain import vision as vis


# In[2]:


# build a regular expression that extracts class from file name
import re
p = re.compile(r'([^/]+)_\d+.jpg$')
r = p.search('/hello/world/beagle_007.jpg')
r.group(1)


# In[3]:


# use the vision.images_from_fname to load the dataset and extract class names from file names
# Don't forget to do the following
# 1. Download data from here:  https://s3.amazonaws.com/fast-ai-imageclas/oxford-iiit-pet.tgz
# 2. set DATADIR to the extracted folder
DATADIR='data/oxford-iiit-pet/images'
data_aug = vis.get_data_aug(horizontal_flip=True, shear_range=0.2) # additions to default data augmentation
(train_data, val_data, preproc) = vis.images_from_fname( 
                                                          DATADIR,
                                                           pattern = r'([^/]+)_\d+.jpg$',
                                                           data_aug = data_aug, 
                                                           target_size=(299,299), color_mode='rgb', val_pct=0.2)


# In[4]:


# build a pre-trained ResNet50 model
# Note that we have used the pt_fc and pt_ps arguments to include
# an extra Dense layer (and dropout) before final dropout and final Dense layer
model = vis.image_classifier('pretrained_resnet50', train_data, val_data, pt_fc=[512], pt_ps=[0.25])


# In[5]:


# wrap model and data in Learner object
learner = ktrain.get_learner(model=model, train_data=train_data, val_data=val_data, 
                             workers=8, use_multiprocessing=False, batch_size=64)


# In[6]:


learner.print_layers()


# In[7]:


# find a good learning rate
learner.lr_find(max_epochs=5)


# In[8]:


learner.lr_plot()


# For demonstration purposes, we use `autofit` to train, which employs a triangular learning rate policy with `epochs=20` and `reduce_on_plateau=2`.  You may choose to try something different.

# In[12]:


# train for 20 epochs and reduce LR after no improvement for 2 epochs
learner.autofit(5e-4, 20, reduce_on_plateau=2)


# In[43]:


# get a Predictor instance that wraps model and Preprocessor object
predictor = ktrain.get_predictor(learner.model, preproc)


# In[46]:


# get some random file names of images
get_ipython().getoutput('ls {DATADIR} | sort -R |head -10')


# In[47]:


# visualize ragdoll
vis.show_image(DATADIR+'/Ragdoll_186.jpg')


# In[48]:


# correctly predict ragdoll
predictor.predict_filename(DATADIR+'/Ragdoll_186.jpg')


# In[49]:


# visualize a staffordshire bull terrier
vis.show_image(DATADIR+'/staffordshire_bull_terrier_8.jpg')


# In[50]:


# correctly predict the staffordshire bull terrier
predictor.predict_filename(DATADIR+'/staffordshire_bull_terrier_8.jpg')


# In[51]:


# save predictor object for later use
predictor.save('/tmp/pet_detector')


# In[52]:


# load predictor object
predictor = ktrain.load_predictor('/tmp/pet_detector')


# In[54]:


# predict a Newfoundland
predictor.predict_filename(DATADIR+'/newfoundland_146.jpg')


# In[55]:


# let's look at the dog we correctly predicted - it's cute
vis.show_image(DATADIR+'/newfoundland_146.jpg')