Authors' note: The cells below set up the data to mirror the format found on Kaggle, which is where this notebook was originally written. If you are running the notebook on your own machine or server, you will probably want a different directory structure (note that we can't provide support for this approach to running the lesson material).

In [ ]:

%%capture
!mkdir ../input/computer-vision-for-the-humanities-ph/ads_data/ads_data/ -p
!wget https://zenodo.org/record/5838410/files/ads_upsampled.csv?download=1 -O ../input/computer-vision-for-the-humanities-ph/ads_data/ads_data/ads_upsampled.csv
!mkdir ../input/computer-vision-for-the-humanities-ph/ads_data/ads_data/images/ -p
!wget -O images.zip https://zenodo.org/record/5838410/files/images.zip?download=1
!unzip images.zip -d ../input/computer-vision-for-the-humanities-ph/ads_data/ads_data/images/
!mkdir ../input/computer-vision-for-the-humanities-ph/photos_multi/photos_multi/ -p
!wget https://zenodo.org/record/4487141/files/multi_label.csv?download=1 -O ../input/computer-vision-for-the-humanities-ph/photos_multi/photos_multi/multi_label.csv
!wget -O photo_images.zip https://zenodo.org/record/4487141/files/images.zip?download=1
!mkdir ../input/computer-vision-for-the-humanities-ph/photos_multi/photos_multi/photo_images -p
!unzip photo_images -d ../input/computer-vision-for-the-humanities-ph/photos_multi/photos_multi/photo_images

In [ ]:

!pip install fastai --upgrade

Computer Vision for the Humanities: an Introduction to Deep Learning for Image Classification¶

This notebook contains the code you'll need to run in both Part 1 and Part 2 of this lesson.

Part 1¶

Creating an Image Classifier in fastai¶

In [ ]:

from fastai.vision.all import *

In [ ]:

%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn')

Loading the Data¶

In [ ]:

ad_data = ImageDataLoaders.from_csv(
    path="../input/computer-vision-for-the-humanities-ph/ads_data/ads_data/",  # root path to csv file and image directory
    csv_fname="ads_upsampled.csv",  # the name of our csv file
    folder="images/",  # the folder where our images are stored
    fn_col="file",  # the file column in our csv
    label_col="label",  # the label column in our csv
    item_tfms=Resize(224, ResizeMethod.Squish),  # resize imagesby squishing so they are 224x224 pixels
    seed=42,  # set a fixed seed to make results more reproducible
)

In [ ]:

ad_data.show_batch()

Creating the Model¶

In [ ]:

learn = vision_learner(
    ad_data,  # the data the model will be trained on
    resnet18,  # the type of model we want to use
    metrics=accuracy,  # the metrics to track
)

Training the Model¶

In [ ]:

learn.fine_tune(5)

Appendix: A Non-Scientific Experiment Assessing Transfer Learning¶

In [ ]:

learn_random_start = vision_learner(ad_data, resnet18, metrics=accuracy, pretrained=False)

In [ ]:

learn_random_start.fine_tune(5)

In [ ]:

learn.validate()

Part 2¶

Looking More Closely at the Data¶

In [ ]:

import pandas as pd

In [ ]:

%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn')

In [ ]:

df = pd.read_csv('../input/computer-vision-for-the-humanities-ph/photos_multi/photos_multi/multi_label.csv', na_filter=False)

In [ ]:

df

In [ ]:

df['label'].value_counts()

In [ ]:

# create a variable lables to store the list
labels = df['label'].to_list()
# take a slice of this list to display
labels[:6]

In [ ]:

# for each label in the list split on "|"
split_labels = [label.split("|") for label in labels]

In [ ]:

split_labels[:4]

In [ ]:

labels = [label for sublist in split_labels for label in sublist]
labels[:4]

Counting the Labels¶

In [ ]:

from collections import Counter
label_freqs = Counter(labels)

In [ ]:

label_freqs

In [ ]:

sum(label_freqs.values())

In [ ]:

import matplotlib.pyplot as plt

plt.bar(
    label_freqs.keys(),  #pass in our labels
    list(map(lambda x: x / sum(label_freqs.values()), label_freqs.values())),  # normalized values
)
# add a title to the plot
plt.title("Label frequencies")
# add a y axis label
plt.ylabel("Percentage of total labels")
plt.show()  # show the plot

Loading Data¶

In [ ]:

from fastai.vision.all import *

In [ ]:

df.columns

In [ ]:

photo_data = ImageDataLoaders.from_df(
    df,  # the dataframe where our labels and image file paths are stored
    folder="../input/computer-vision-for-the-humanities-ph/photos_multi/photos_multi/photo_images",  # the path to the directory holding the images
    bs=32,  # the batch size (number of images + labels)
    label_delim="|",  # the deliminator between each label in our label column
    item_tfms=Resize(224),  # resize each image to 224x224
    valid_pct=0.3,  # use 30% of the data as validation data
    seed=42  # set a seed to make results more reproducible
)

fastai DataLoaders¶

In [ ]:

photo_data

Viewing our Loaded Data¶

In [ ]:

photo_data.show_batch(figsize=(15,15))

Inspecting Model Inputs¶

In [ ]:

photo_data.vocab

In [ ]:

x, y = photo_data.one_batch()

In [ ]:

type(x), type(y)

In [ ]:

len(x), len(y)

In [ ]:

x[0]

In [ ]:

x[0].shape

In [ ]:

y[0]

In [ ]:

y[0].shape

In [ ]:

x.shape, y.shape

Image Augmentations¶

In [ ]:

tfms = setup_aug_tfms([Rotate(max_deg=90, p=0.75), Zoom(), Flip()])

In [ ]:

photo_data = ImageDataLoaders.from_df(
    df,  # dataframe containing paths to images and labels
    folder="../input/computer-vision-for-the-humanities-ph/photos_multi/photos_multi/photo_images",  # folder where images are stored
    bs=32,  # batch size
    label_delim="|",  # the deliminator for multiple labels
    item_tfms=Resize(224),  # resize images to a standard size
    batch_tfms=tfms,  # pass in our transforms
    valid_pct=0.3,  # 30% of data used for validation
    seed=42,  # set a seed,
)

In [ ]:

photo_data.show_batch(unique=True, figsize=(10,10))

Creating a Model¶

In [ ]:

learn = vision_learner(photo_data, densenet121, metrics=[F1ScoreMulti(), accuracy_multi])

In [ ]:

?learn

Training the Model¶

In [ ]:

learn.lr_find()

Fitting the Model¶

In [ ]:

learn.fit_one_cycle(5, lr_max=2e-2)

In [ ]:

learn.recorder.plot_loss()

Saving Progress¶

In [ ]:

learn.save('stage_1')

Unfreezing the Model¶

In [ ]:

learn.unfreeze()

In [ ]:

learn.lr_find()

In [ ]:

learn.fit_one_cycle(4, lr_max=slice(6e-6, 4e-4), cbs=[SaveModelCallback(monitor='f1_score')])

Investigating the Results of our Model¶

In [ ]:

y_pred, y_true = learn.get_preds()

In [ ]:

len(y_pred), len(y_true)

In [ ]:

y_pred[0]

Exploring our Predictions Using scikit-learn¶

In [ ]:

from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score

In [ ]:

f1_score(y_true, y_pred>0.50, average='macro')

In [ ]:

from sklearn.metrics import classification_report

In [ ]:

print(classification_report(y_true, y_pred>0.50, target_names=photo_data.vocab, zero_division=1))