%reload_ext autoreload
%autoreload 2
%matplotlib inline
from fastai.conv_learner import *
PATH = 'data/planet/'
# Data preparation steps if you are using Crestle:
os.makedirs('data/planet/models', exist_ok=True)
os.makedirs('/cache/planet/tmp', exist_ok=True)
!ln -s /datasets/kaggle/planet-understanding-the-amazon-from-space/train-jpg {PATH}
!ln -s /datasets/kaggle/planet-understanding-the-amazon-from-space/test-jpg {PATH}
!ln -s /datasets/kaggle/planet-understanding-the-amazon-from-space/train_v2.csv {PATH}
!ln -s /cache/planet/tmp {PATH}
ls {PATH}
models/ test-jpg/ tmp/ train-jpg/ train_v2.csv*
from fastai.plots import *
def get_1st(path): return glob(f'{path}/*.*')[0]
dc_path = "data/dogscats/valid/"
list_paths = [get_1st(f"{dc_path}cats"), get_1st(f"{dc_path}dogs")]
plots_from_files(list_paths, titles=["cat", "dog"], maintitle="Single-label classification")
In single-label classification each sample belongs to one class. In the previous example, each image is either a dog or a cat.
list_paths = [f"{PATH}train-jpg/train_0.jpg", f"{PATH}train-jpg/train_1.jpg"]
titles=["haze primary", "agriculture clear primary water"]
plots_from_files(list_paths, titles=titles, maintitle="Multi-label classification")
In multi-label classification each sample can belong to one or more classes. In the previous example, the first images belongs to two classes: haze and primary. The second image belongs to four classes: agriculture, clear, primary and water.
from planet import f2
metrics=[f2]
f_model = resnet34
label_csv = f'{PATH}train_v2.csv'
n = len(list(open(label_csv)))-1
val_idxs = get_cv_idxs(n)
We use a different set of data augmentations for this dataset - we also allow vertical flips, since we don't expect vertical orientation of satellite images to change our classifications.
def get_data(sz):
tfms = tfms_from_model(f_model, sz, aug_tfms=transforms_top_down, max_zoom=1.05)
return ImageClassifierData.from_csv(PATH, 'train-jpg', label_csv, tfms=tfms,
suffix='.jpg', val_idxs=val_idxs, test_name='test-jpg')
data = get_data(256)
x,y = next(iter(data.val_dl))
y
1 0 0 ... 0 1 1 0 0 0 ... 0 0 0 0 0 0 ... 0 0 0 ... ⋱ ... 0 0 0 ... 0 0 0 0 0 0 ... 0 0 0 1 0 0 ... 0 0 0 [torch.FloatTensor of size 64x17]
list(zip(data.classes, y[0]))
[('agriculture', 1.0), ('artisinal_mine', 0.0), ('bare_ground', 0.0), ('blooming', 0.0), ('blow_down', 0.0), ('clear', 1.0), ('cloudy', 0.0), ('conventional_mine', 0.0), ('cultivation', 0.0), ('habitation', 0.0), ('haze', 0.0), ('partly_cloudy', 0.0), ('primary', 1.0), ('road', 0.0), ('selective_logging', 0.0), ('slash_burn', 1.0), ('water', 1.0)]
plt.imshow(data.val_ds.denorm(to_np(x))[0]*1.4);
sz=64
data = get_data(sz)
data = data.resize(int(sz*1.3), 'tmp')
A Jupyter Widget
learn = ConvLearner.pretrained(f_model, data, metrics=metrics)
lrf=learn.lr_find()
learn.sched.plot()
A Jupyter Widget
[ 0. 0.22404 0.31176 0.82044]
lr = 0.2
learn.fit(lr, 3, cycle_len=1, cycle_mult=2)
A Jupyter Widget
[ 0. 0.14882 0.13552 0.87878] [ 1. 0.14237 0.13048 0.88251] [ 2. 0.13675 0.12779 0.88796] [ 3. 0.13528 0.12834 0.88419] [ 4. 0.13428 0.12581 0.88879] [ 5. 0.13237 0.12361 0.89141] [ 6. 0.13179 0.12472 0.8896 ]
lrs = np.array([lr/9,lr/3,lr])
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
A Jupyter Widget
[ 0. 0.12534 0.10926 0.90892] [ 1. 0.12035 0.10086 0.91635] [ 2. 0.11001 0.09792 0.91894] [ 3. 0.1144 0.09972 0.91748] [ 4. 0.11055 0.09617 0.92016] [ 5. 0.10348 0.0935 0.92267] [ 6. 0.10502 0.09345 0.92281]
learn.save(f'{sz}')
learn.sched.plot_loss()
sz=128
learn.set_data(get_data(sz))
learn.freeze()
learn.fit(lr, 3, cycle_len=1, cycle_mult=2)
A Jupyter Widget
[ 0. 0.09729 0.09375 0.91885] [ 1. 0.10118 0.09243 0.92075] [ 2. 0.09805 0.09143 0.92235] [ 3. 0.09834 0.09134 0.92263] [ 4. 0.096 0.09046 0.9231 ] [ 5. 0.09584 0.09035 0.92403] [ 6. 0.09262 0.09059 0.92358]
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
learn.save(f'{sz}')
A Jupyter Widget
[ 0. 0.09623 0.08693 0.92696] [ 1. 0.09371 0.08621 0.92887] [ 2. 0.08919 0.08296 0.93113] [ 3. 0.09221 0.08579 0.92709] [ 4. 0.08994 0.08575 0.92862] [ 5. 0.08729 0.08248 0.93108] [ 6. 0.08218 0.08315 0.92971]
sz=256
learn.set_data(get_data(sz))
learn.freeze()
learn.fit(lr, 3, cycle_len=1, cycle_mult=2)
A Jupyter Widget
[ 0. 0.09161 0.08651 0.92712] [ 1. 0.08933 0.08665 0.92677] [ 2. 0.09125 0.08584 0.92719] [ 3. 0.08732 0.08532 0.92812] [ 4. 0.08736 0.08479 0.92854] [ 5. 0.08807 0.08471 0.92835] [ 6. 0.08942 0.08448 0.9289 ]
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
learn.save(f'{sz}')
A Jupyter Widget
[ 0. 0.08932 0.08218 0.9324 ] [ 1. 0.08654 0.08195 0.93313] [ 2. 0.08468 0.08024 0.93391] [ 3. 0.08596 0.08141 0.93287] [ 4. 0.08211 0.08152 0.93401] [ 5. 0.07971 0.08001 0.93377] [ 6. 0.07928 0.0792 0.93554]
multi_preds, y = learn.TTA()
preds = np.mean(multi_preds, 0)
f2(preds,y)
0.93626519738612801