#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('reload_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') get_ipython().run_line_magic('matplotlib', 'inline') import os os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"; os.environ["CUDA_VISIBLE_DEVICES"]="0" # ## Image Classification Example # # # We will begin our image classification example by importing some required modules. # In[2]: import ktrain from ktrain import vision as vis # Next, we will load and preprocess the image data for training and validation. *ktrain* can load images and associated labels from a variety of source: # # # - `images_from_folder`: labels are represented as subfolders containing images [ [example notebook] ](https://github.com/amaiya/ktrain/blob/master/examples/vision/dogs_vs_cats-ResNet50.ipynb) # - `images_from_csv`: labels are mapped to images in a CSV file [ [example notebook](https://github.com/amaiya/ktrain/blob/master/examples/vision/planet-ResNet50.ipynb) ] # - `images_from_fname`: labels are included as part of the filename and must be extracted using a regular expression [ [example notebook](https://github.com/amaiya/ktrain/blob/master/examples/vision/pets-ResNet50.ipynb) ] # - `images_from_array`: images and labels are stored in array [ [example notebook](https://github.com/amaiya/ktrain/blob/master/examples/vision/mnist-images_from_array_example.ipynb) ] # # # Here, we use the ```images_from_folder``` function to load the data as a generator (i.e., DirectoryIterator object). This function assumes the following directory structure: # ``` # ├── datadir # │ ├── train # │ │ ├── class0 # folder containing documents of class 0 # │ │ ├── class1 # folder containing documents of class 1 # │ │ ├── class2 # folder containing documents of class 2 # │ │ └── classN # folder containing documents of class N # │ └── test # │ ├── class0 # folder containing documents of class 0 # │ ├── class1 # folder containing documents of class 1 # │ ├── class2 # folder containing documents of class 2 # │ └── classN # folder containing documents of class N # ``` # # # The *train_test_names* argument can be used, if the train and test subfolders are named differently (e.g., *test* folder is called *valid*). Here, we load a dataset of cat and dog images, which can be obtained from [here](https://www.kaggle.com/c/dogs-vs-cats/data). The DATADIR variale should be set to the path to the extracted folder. The **data_aug** parameter can be used to employ [data augmentation](https://arxiv.org/abs/1712.04621). We set this parameter using the ```get_data_aug``` function, which returns a default data augmentation with ```horizontal_flip=True``` as the only change to the defaults. See [Keras documentation](https://keras.io/preprocessing/image/#imagedatagenerator-class) for a full set of agumentation parameters. Finally, we pass the requested target size (224,224) and color mode (rgb, which is a 3-channel image). The image will be resized or converted appropriately based on the values supplied. A target size of 224 by 224 is typically used when using a network pretrained on ImageNet, which we do next. The ```images_from_folder``` function returns generators for both the training and validation data in addition an instance of ```ktrain.vision.ImagePreprocessor```, which can be used to preprocess raw data when making predictions for new examples. This will be demonstrated later. # In[3]: DATADIR = 'data/dogscats' (train_data, val_data, preproc) = vis.images_from_folder(datadir=DATADIR, # use a default data augmentation with horizontal_flip=True data_aug=vis.get_data_aug(horizontal_flip=True), train_test_names=['train', 'valid'], target_size=(224,224), color_mode='rgb') # Let's examine some sample cat and dog images from the training set: # In[4]: print('sample cat images:') vis.show_random_images(DATADIR+'/train/cats/') # In[5]: print('sample dog images:') vis.show_random_images(DATADIR+'/train/dogs/') # Next, we use the ```image_classifier``` function to load a **ResNet50** model pre-trained on [ImageNet](http://www.image-net.org/). For more information on using pretrained networks, see this [blog post](https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html). By default, all layers except the randomly initialized custom Dense layers on top are frozen (i.e., trainable). We, then, wrap the model and data in a Learner object. We specify 4 CPU workers to load batches during training, disable multiprocessing, and use a batch size of 64. You can change these values based on your system specification to see what yields the best peformance. # In[6]: # let's print the available precanned image classification models in ktrain vis.print_image_classifiers() # In[5]: model = vis.image_classifier('pretrained_resnet50', train_data, val_data) learner = ktrain.get_learner(model=model, train_data=train_data, val_data=val_data, workers=8, use_multiprocessing=False, batch_size=64) # Next, we freeze the first 15 layers, as the ImageNet pre-trained weights of these early layers are typically applicable as is. All other layers are unfrozen and trainable. You can use the ```learner.freeze``` and ```learner.unfreeze``` methods to selectively freeze and unfreeze layers, if necessary. ```learner.freeze(freeze_range=15)``` and ```learner.unfreeze(exclude_range=15)``` are equivalent. The number of layers you freeze will depend on how similar your dataset is to ImageNet and other particulars of the dataset. For instance, classifying satellite images or subcellular protein patterns may require less frozen layers than classifying pictures of dogs and cats. You can also begin training for a few epochs with many frozen layers and gradually unfreeze layers for later epochs. # In[6]: learner.freeze(freeze_range=15) # You use the ```print_layers``` function to examine the layers of the created network. # In[7]: learner.print_layers() # As shown before, we use the Learning Rate Finder in *ktrain* to find a good initial learning rate. # In[8]: learner.lr_find() # In[9]: learner.lr_plot() # Finally, we will use the ```autofit``` method to train our model using a [triangular learning rate policy](https://arxiv.org/pdf/1506.01186.pdf). Since we have not specified the number of epochs, the maximum learning # rate will be periodically reduced when validation loss fails to decrease and eventually stop automatically. # # Our final validation accuracy is **99.55%** first occuring at the 8th epoch during this run. # In[10]: learner.autofit(1e-4) # In[11]: loss, acc = learner.model.evaluate_generator(learner.val_data, steps=len(learner.val_data)) # In[12]: print('final loss:%s, final accuracy:%s' % (loss, acc)) # As can be seen, the final validation accuracy of our model is **99.55%**. # ### Using Our Model to Make Predictions # Finally, let's use our model to make predictions for some images. # # Here is a sample image of both a cat and a dog from the validation set. # In[13]: get_ipython().getoutput('ls {DATADIR}/valid/cats |head -n3') # In[14]: get_ipython().getoutput('ls {DATADIR}/valid/dogs |head -n3') # In[15]: vis.show_image(DATADIR+'/valid/cats/cat.10016.jpg') # In[16]: vis.show_image(DATADIR+'/valid/dogs/dog.10001.jpg') # Now, let's create a predictor object to make predictions for the above images. # In[17]: predictor = ktrain.get_predictor(learner.model, preproc) # Let's see if we predict the selected cat and dog images correctly. # In[18]: predictor.predict_filename(DATADIR+'/valid/cats/cat.10016.jpg') # In[19]: predictor.predict_filename(DATADIR+'/valid/dogs/dog.10001.jpg') # Our predictor is working well. We can save our predictor to disk for later use in an application. # In[20]: predictor.save('/tmp/cat_vs_dog_detector') # Let's load our predictor from disk to show that it still works as expected. # In[21]: predictor = ktrain.load_predictor('/tmp/cat_vs_dog_detector') # In[22]: predictor.predict_filename(DATADIR+'/valid/cats/cat.10016.jpg') # In[23]: predictor.predict_filename(DATADIR+'/valid/dogs/dog.10001.jpg') # Finally, let's make predictions for all the cat pictures in our validation set: # In[24]: predictor.predict_folder(DATADIR+'/valid/cats/')[:10] # By default, `predict*` methods in *ktrain* return the predicted class labels. To view the predicted probabilities for each class, supply `return_proba=True` as an extra argument: # # ```python # predictor.predict_filename(filename, return_proba=True) # predictor.predict_folder(foldername, return_proba=True) # ``` # ## Multi-Label Image Classification # In the previous example, the classes were mutually exclusive. That is, images contained either a dog or a cat, but not both. Some problems are multi-label classification problems in that each image can belong to multiple classes (or categories). One such instance of this is the [Kaggle Planet Competition](https://www.kaggle.com/c/planet-understanding-the-amazon-from-space). In this competition, were are given a collection of satellite images of the Amazon rainforest. The objective here is to identify locations of deforestation and human encroachment on forests by classifying images into up to 17 different categories. Categories include *agriculture*, *habitation*, *selective_logging*, and *slash_burn*. A given satellite image can belong to more than category. The dataset can be downloaded from the [competition page](https://www.kaggle.com/c/planet-understanding-the-amazon-from-space/data). The satellite images are located in a zipped folder called **train-jpg.zip**. The labels for each image are in the form of a CSV (i.e., **train_v2.csv**) with file names and their labels. Let us first examine the CSV file for this dataset. Be sure to set the DATADIR variable to the path of the extracted dataset. # In[3]: DATADIR = 'data/planet' get_ipython().getoutput('head {DATADIR}/train_v2.csv') # We make three observations. # * The *image_name* field is the file name of the satellite image. # * The file names are missing the .jpg file extension. # * The labels are simply a space-delimited list of tags, rather than a one-hot-encoded vector. # # Let us first convert this CSV into a new CSV that includes one-hot-encoded representations of the tags and appends the file extension to the file names. Since this dataset format is somewhat common (especially for multi-label image classification problems), *ktrain* contains a convenience function to automatically perform the conversion. # In[4]: ORIGINAL_DATA = DATADIR+'/train_v2.csv' CONVERTED_DATA = DATADIR+'/train_v2-CONVERTED.csv' labels = vis.preprocess_csv(ORIGINAL_DATA, CONVERTED_DATA, x_col='image_name', y_col='tags', suffix='.jpg') # In[5]: get_ipython().getoutput('head {DATADIR}/train_v2-CONVERTED.csv') # We can use the ```images_from_csv``` for function to load the data as generators. Remember to specify ```preprocess_for='resenet50'```, as we will be using a ResNet50 architecture again. # In[6]: train_data, val_data, preproc = vis.images_from_csv( CONVERTED_DATA, 'image_name', directory=DATADIR+'/train-jpg', val_filepath = None, label_columns = labels, data_aug=vis.get_data_aug(horizontal_flip=True, vertical_flip=True)) # As before, we load a pre-trained ResNet50 model (the default) and wrap this model and the data in a Learner object. Here, will freeze only the first two layers, as the satelitte images are comparatively more dissimilar to ImageNet. Thus, the weights in earlier layers will need more updating. # In[8]: model = vis.image_classifier('pretrained_resnet50', train_data, val_data=val_data) learner = ktrain.get_learner(model, train_data=train_data, val_data=val_data, batch_size=64, workers=8, use_multiprocessing=False) # In[9]: learner.freeze(2) # The learning-rate-finder indicates a learning rate of 1e-4 would be a good choice. # In[35]: learner.lr_find() # In[36]: learner.lr_plot() # For this dataset, instead of using ```autofit```, we will use the ```fit_onecycle``` method that utilizes the [1cycle learning rate policy](https://arxiv.org/pdf/1803.09820.pdf). The final model achieves an F2-score of **0.928**, as shown below. # In[10]: learner.fit_onecycle(1e-4, 20) # If there is not yet evidence of overfitting, it can sometimes be beneficial to train further after early_stopping. Since, the validation loss appears to still decrease, we will train for a little more using a lower learning rate. We only train for one additional epoch here for illustration purposes. Prior training, the current model is saved using the ```learner.save_model``` method in case we end up overfitting. If overfitting, the original model can be restored using the ```learner.load_model``` method. # In[15]: learner.save_model('/tmp/planet_model') # In[16]: learner.fit_onecycle(1e-4/10,1) # ### Evaluation # # The evaluation metric for the [Kaggle Planet competition](https://www.kaggle.com/c/planet-understanding-the-amazon-from-space#evaluation) was the F2-score. # # As shown below, this model achieves an F2-score of **0.928**. # # In[17]: from sklearn.metrics import fbeta_score import numpy as np import warnings def f2(preds, targs, start=0.17, end=0.24, step=0.01): with warnings.catch_warnings(): warnings.simplefilter("ignore") return max([fbeta_score(targs, (preds>th), 2, average='samples') for th in np.arange(start,end,step)]) # In[18]: y_pred = learner.model.predict_generator(val_data, steps=len(val_data)) y_true = val_data.labels # In[19]: f2(y_pred, y_true) # ### Making Predictions # # # Let's make some predictions using our model and examine results. As before, we first create a Predictor instance. # In[22]: predictor = ktrain.get_predictor(learner.model, preproc) # Let's examine the folder of images and select a couple to analyze. # In[23]: get_ipython().getoutput('ls {DATADIR}/train-jpg/ |head') # Image train_10008.jpg is categorized into the following classes: # * artisinal_mine (i.e., small-scale mining operations - sometimes illegal in lands designated for conservation) # * clear # * primary (rainforest) # * water # In[40]: vis.show_image(DATADIR+'/train-jpg/train_10008.jpg') # In[43]: get_ipython().getoutput('head -n 1 {CONVERTED_DATA}') # In[44]: get_ipython().getoutput('grep train_10008.jpg {CONVERTED_DATA}') # Our predictions are consistent as shown below: # In[45]: predictor.predict_filename(DATADIR+'/train-jpg/train_10008.jpg') # Here is another example showing water, clear, and primary. # In[46]: vis.show_image(DATADIR+'/train-jpg/train_10010.jpg') # In[47]: get_ipython().getoutput('head -n 1 {CONVERTED_DATA}') # In[48]: get_ipython().getoutput('grep train_10010.jpg {CONVERTED_DATA}') # In[49]: predictor.predict_filename(DATADIR+'/train-jpg/train_10010.jpg')