Authors' note: The cells below set up the data to mirror the format found on Kaggle, which is where this notebook was originally written. If you are running the notebook on your own machine or server, you will probably want a different directory structure (note that we can't provide support for this approach to running the lesson material).
%%capture
!mkdir ../input/computer-vision-for-the-humanities-ph/ads_data/ads_data/ -p
!wget https://zenodo.org/record/5838410/files/ads_upsampled.csv?download=1 -O ../input/computer-vision-for-the-humanities-ph/ads_data/ads_data/ads_upsampled.csv
!mkdir ../input/computer-vision-for-the-humanities-ph/ads_data/ads_data/images/ -p
!wget -O images.zip https://zenodo.org/record/5838410/files/images.zip?download=1
!unzip images.zip -d ../input/computer-vision-for-the-humanities-ph/ads_data/ads_data/images/
!mkdir ../input/computer-vision-for-the-humanities-ph/photos_multi/photos_multi/ -p
!wget https://zenodo.org/record/4487141/files/multi_label.csv?download=1 -O ../input/computer-vision-for-the-humanities-ph/photos_multi/photos_multi/multi_label.csv
!wget -O photo_images.zip https://zenodo.org/record/4487141/files/images.zip?download=1
!mkdir ../input/computer-vision-for-the-humanities-ph/photos_multi/photos_multi/photo_images -p
!unzip photo_images -d ../input/computer-vision-for-the-humanities-ph/photos_multi/photos_multi/photo_images
!pip install fastai --upgrade
This notebook contains the code you'll need to run in both Part 1 and Part 2 of this lesson.
from fastai.vision.all import *
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn')
ad_data = ImageDataLoaders.from_csv(
path="../input/computer-vision-for-the-humanities-ph/ads_data/ads_data/", # root path to csv file and image directory
csv_fname="ads_upsampled.csv", # the name of our csv file
folder="images/", # the folder where our images are stored
fn_col="file", # the file column in our csv
label_col="label", # the label column in our csv
item_tfms=Resize(224, ResizeMethod.Squish), # resize imagesby squishing so they are 224x224 pixels
seed=42, # set a fixed seed to make results more reproducible
)
ad_data.show_batch()
learn = vision_learner(
ad_data, # the data the model will be trained on
resnet18, # the type of model we want to use
metrics=accuracy, # the metrics to track
)
learn.fine_tune(5)
learn_random_start = vision_learner(ad_data, resnet18, metrics=accuracy, pretrained=False)
learn_random_start.fine_tune(5)
learn.validate()
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn')
df = pd.read_csv('../input/computer-vision-for-the-humanities-ph/photos_multi/photos_multi/multi_label.csv', na_filter=False)
df
df['label'].value_counts()
# create a variable lables to store the list
labels = df['label'].to_list()
# take a slice of this list to display
labels[:6]
# for each label in the list split on "|"
split_labels = [label.split("|") for label in labels]
split_labels[:4]
labels = [label for sublist in split_labels for label in sublist]
labels[:4]
from collections import Counter
label_freqs = Counter(labels)
label_freqs
sum(label_freqs.values())
import matplotlib.pyplot as plt
plt.bar(
label_freqs.keys(), #pass in our labels
list(map(lambda x: x / sum(label_freqs.values()), label_freqs.values())), # normalized values
)
# add a title to the plot
plt.title("Label frequencies")
# add a y axis label
plt.ylabel("Percentage of total labels")
plt.show() # show the plot
from fastai.vision.all import *
df.columns
photo_data = ImageDataLoaders.from_df(
df, # the dataframe where our labels and image file paths are stored
folder="../input/computer-vision-for-the-humanities-ph/photos_multi/photos_multi/photo_images", # the path to the directory holding the images
bs=32, # the batch size (number of images + labels)
label_delim="|", # the deliminator between each label in our label column
item_tfms=Resize(224), # resize each image to 224x224
valid_pct=0.3, # use 30% of the data as validation data
seed=42 # set a seed to make results more reproducible
)
photo_data
photo_data.show_batch(figsize=(15,15))
photo_data.vocab
x, y = photo_data.one_batch()
type(x), type(y)
len(x), len(y)
x[0]
x[0].shape
y[0]
y[0].shape
x.shape, y.shape
tfms = setup_aug_tfms([Rotate(max_deg=90, p=0.75), Zoom(), Flip()])
photo_data = ImageDataLoaders.from_df(
df, # dataframe containing paths to images and labels
folder="../input/computer-vision-for-the-humanities-ph/photos_multi/photos_multi/photo_images", # folder where images are stored
bs=32, # batch size
label_delim="|", # the deliminator for multiple labels
item_tfms=Resize(224), # resize images to a standard size
batch_tfms=tfms, # pass in our transforms
valid_pct=0.3, # 30% of data used for validation
seed=42, # set a seed,
)
photo_data.show_batch(unique=True, figsize=(10,10))
learn = vision_learner(photo_data, densenet121, metrics=[F1ScoreMulti(), accuracy_multi])
?learn
learn.lr_find()
learn.fit_one_cycle(5, lr_max=2e-2)
learn.recorder.plot_loss()
learn.save('stage_1')
learn.unfreeze()
learn.lr_find()
learn.fit_one_cycle(4, lr_max=slice(6e-6, 4e-4), cbs=[SaveModelCallback(monitor='f1_score')])
y_pred, y_true = learn.get_preds()
len(y_pred), len(y_true)
y_pred[0]
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
f1_score(y_true, y_pred>0.50, average='macro')
from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred>0.50, target_names=photo_data.vocab, zero_division=1))