#hide
from utils import *
def get_data(url, presize, resize):
path = untar_data(url)
return DataBlock(
blocks=(ImageBlock, CategoryBlock), get_items=get_image_files,
splitter=GrandparentSplitter(valid_name='val'),
get_y=parent_label, item_tfms=Resize(presize),
batch_tfms=[*aug_transforms(min_scale=0.5, size=resize),
Normalize.from_stats(*imagenet_stats)],
).dataloaders(path, bs=128)
dls = get_data(URLs.IMAGENETTE_160, 160, 128)
def get_learner(**kwargs):
return cnn_learner(dls, resnet34, pretrained=False,
metrics=accuracy, **kwargs).to_fp16()
learn = get_learner()
learn.fit_one_cycle(3, 0.003)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 2.571932 | 2.685040 | 0.322548 | 00:11 |
1 | 1.904674 | 1.852589 | 0.437452 | 00:11 |
2 | 1.586909 | 1.374908 | 0.594904 | 00:11 |
learn = get_learner(opt_func=SGD)
learn.lr_find()
(0.017378008365631102, 3.019951861915615e-07)
learn.fit_one_cycle(3, 0.03, moms=(0,0,0))
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 2.969412 | 2.214596 | 0.242038 | 00:09 |
1 | 2.442730 | 1.845950 | 0.362548 | 00:09 |
2 | 2.157159 | 1.741143 | 0.408917 | 00:09 |
def sgd_cb(p, lr, **kwargs): p.data.add_(-lr, p.grad.data)
opt_func = partial(Optimizer, cbs=[sgd_step])
learn = get_learner(opt_func=opt_func)
learn.fit(3, 0.03)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 2.730918 | 2.009971 | 0.332739 | 00:09 |
1 | 2.204893 | 1.747202 | 0.441529 | 00:09 |
2 | 1.875621 | 1.684515 | 0.445350 | 00:09 |
x = np.linspace(-4, 4, 100)
y = 1 - (x/3) ** 2
x1 = x + np.random.randn(100) * 0.1
y1 = y + np.random.randn(100) * 0.1
plt.scatter(x1,y1)
idx = x1.argsort()
beta,avg,res = 0.7,0,[]
for i in idx:
avg = beta * avg + (1-beta) * y1[i]
res.append(avg/(1-beta**(i+1)))
plt.plot(x1[idx],np.array(res), color='red');
x = np.linspace(-4, 4, 100)
y = 1 - (x/3) ** 2
x1 = x + np.random.randn(100) * 0.1
y1 = y + np.random.randn(100) * 0.1
_,axs = plt.subplots(2,2, figsize=(12,8))
betas = [0.5,0.7,0.9,0.99]
idx = x1.argsort()
for beta,ax in zip(betas, axs.flatten()):
ax.scatter(x1,y1)
avg,res = 0,[]
for i in idx:
avg = beta * avg + (1-beta) * y1[i]
res.append(avg)#/(1-beta**(i+1)))
ax.plot(x1[idx],np.array(res), color='red');
ax.set_title(f'beta={beta}')
def average_grad(p, mom, grad_avg=None, **kwargs):
if grad_avg is None: grad_avg = torch.zeros_like(p.grad.data)
return {'grad_avg': grad_avg*mom + p.grad.data}
def momentum_step(p, lr, grad_avg, **kwargs): p.data.add_(-lr, grad_avg)
opt_func = partial(Optimizer, cbs=[average_grad,momentum_step], mom=0.9)
learn = get_learner(opt_func=opt_func)
learn.fit_one_cycle(3, 0.03)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 2.856000 | 2.493429 | 0.246115 | 00:10 |
1 | 2.504205 | 2.463813 | 0.348280 | 00:10 |
2 | 2.187387 | 1.755670 | 0.418853 | 00:10 |
learn.recorder.plot_sched()
def average_sqr_grad(p, sqr_mom, sqr_avg=None, **kwargs):
if sqr_avg is None: sqr_avg = torch.zeros_like(p.grad.data)
return {'sqr_avg': sqr_avg*sqr_mom + p.grad.data**2}
def rms_prop_step(p, lr, sqr_avg, eps, grad_avg=None, **kwargs):
denom = sqr_avg.sqrt().add_(eps)
p.data.addcdiv_(-lr, p.grad, denom)
opt_func = partial(Optimizer, cbs=[average_sqr_grad,rms_prop_step],
sqr_mom=0.99, eps=1e-7)
learn = get_learner(opt_func=opt_func)
learn.fit_one_cycle(3, 0.003)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 2.766912 | 1.845900 | 0.402548 | 00:11 |
1 | 2.194586 | 1.510269 | 0.504459 | 00:11 |
2 | 1.869099 | 1.447939 | 0.544968 | 00:11 |