#hide ! [ -e /content ] && pip install -Uqq fastbook import fastbook fastbook.setup_book() #hide from fastai.vision.all import * from fastbook import * matplotlib.rc('image', cmap='Greys') [[chapter_mnist_basics]] path = untar_data(URLs.MNIST_SAMPLE) #hide Path.BASE_PATH = path path.ls() (path/'train').ls() threes = (path/'train'/'3').ls().sorted() sevens = (path/'train'/'7').ls().sorted() threes im3_path = threes[1] im3 = Image.open(im3_path) im3 array(im3)[4:10,4:10] tensor(im3)[4:10,4:10] #hide_output im3_t = tensor(im3) df = pd.DataFrame(im3_t[4:15,4:22]) df.style.set_properties(**{'font-size':'6pt'}).background_gradient('Greys') seven_tensors = [tensor(Image.open(o)) for o in sevens] three_tensors = [tensor(Image.open(o)) for o in threes] len(three_tensors),len(seven_tensors) show_image(three_tensors[1]); stacked_sevens = torch.stack(seven_tensors).float()/255 stacked_threes = torch.stack(three_tensors).float()/255 stacked_threes.shape len(stacked_threes.shape) stacked_threes.ndim mean3 = stacked_threes.mean(0) show_image(mean3); mean7 = stacked_sevens.mean(0) show_image(mean7); a_3 = stacked_threes[1] show_image(a_3); dist_3_abs = (a_3 - mean3).abs().mean() dist_3_sqr = ((a_3 - mean3)**2).mean().sqrt() dist_3_abs,dist_3_sqr dist_7_abs = (a_3 - mean7).abs().mean() dist_7_sqr = ((a_3 - mean7)**2).mean().sqrt() dist_7_abs,dist_7_sqr F.l1_loss(a_3.float(),mean7), F.mse_loss(a_3,mean7).sqrt() data = [[1,2,3],[4,5,6]] arr = array (data) tns = tensor(data) arr # numpy tns # pytorch tns[1] tns[:,1] tns[1,1:3] tns+1 tns.type() tns*1.5 valid_3_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'3').ls()]) valid_3_tens = valid_3_tens.float()/255 valid_7_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'7').ls()]) valid_7_tens = valid_7_tens.float()/255 valid_3_tens.shape,valid_7_tens.shape def mnist_distance(a,b): return (a-b).abs().mean((-1,-2)) mnist_distance(a_3, mean3) valid_3_dist = mnist_distance(valid_3_tens, mean3) valid_3_dist, valid_3_dist.shape tensor([1,2,3]) + tensor(1) (valid_3_tens-mean3).shape def is_3(x): return mnist_distance(x,mean3) < mnist_distance(x,mean7) is_3(a_3), is_3(a_3).float() is_3(valid_3_tens) accuracy_3s = is_3(valid_3_tens).float() .mean() accuracy_7s = (1 - is_3(valid_7_tens).float()).mean() accuracy_3s,accuracy_7s,(accuracy_3s+accuracy_7s)/2 #id gradient_descent #caption The gradient descent process #alt Graph showing the steps for Gradient Descent gv(''' init->predict->loss->gradient->step->stop step->predict[label=repeat] ''') def f(x): return x**2 plot_function(f, 'x', 'x**2') plot_function(f, 'x', 'x**2') plt.scatter(-1.5, f(-1.5), color='red'); xt = tensor(3.).requires_grad_() yt = f(xt) yt yt.backward() xt.grad xt = tensor([3.,4.,10.]).requires_grad_() xt def f(x): return (x**2).sum() yt = f(xt) yt yt.backward() xt.grad time = torch.arange(0,20).float(); time speed = torch.randn(20)*3 + 0.75*(time-9.5)**2 + 1 plt.scatter(time,speed); def f(t, params): a,b,c = params return a*(t**2) + (b*t) + c def mse(preds, targets): return ((preds-targets)**2).mean() params = torch.randn(3).requires_grad_() #hide orig_params = params.clone() preds = f(time, params) def show_preds(preds, ax=None): if ax is None: ax=plt.subplots()[1] ax.scatter(time, speed) ax.scatter(time, to_np(preds), color='red') ax.set_ylim(-300,100) show_preds(preds) loss = mse(preds, speed) loss loss.backward() params.grad params.grad * 1e-5 params lr = 1e-5 params.data -= lr * params.grad.data params.grad = None preds = f(time,params) mse(preds, speed) show_preds(preds) def apply_step(params, prn=True): preds = f(time, params) loss = mse(preds, speed) loss.backward() params.data -= lr * params.grad.data params.grad = None if prn: print(loss.item()) return preds for i in range(10): apply_step(params) #hide params = orig_params.detach().requires_grad_() _,axs = plt.subplots(1,4,figsize=(12,3)) for ax in axs: show_preds(apply_step(params, False), ax) plt.tight_layout() #hide_input #id gradient_descent #caption The gradient descent process #alt Graph showing the steps for Gradient Descent gv(''' init->predict->loss->gradient->step->stop step->predict[label=repeat] ''') train_x = torch.cat([stacked_threes, stacked_sevens]).view(-1, 28*28) train_y = tensor([1]*len(threes) + [0]*len(sevens)).unsqueeze(1) train_x.shape,train_y.shape dset = list(zip(train_x,train_y)) x,y = dset[0] x.shape,y valid_x = torch.cat([valid_3_tens, valid_7_tens]).view(-1, 28*28) valid_y = tensor([1]*len(valid_3_tens) + [0]*len(valid_7_tens)).unsqueeze(1) valid_dset = list(zip(valid_x,valid_y)) def init_params(size, std=1.0): return (torch.randn(size)*std).requires_grad_() weights = init_params((28*28,1)) bias = init_params(1) (train_x[0]*weights.T).sum() + bias def linear1(xb): return xb@weights + bias preds = linear1(train_x) preds corrects = (preds>0.0).float() == train_y corrects corrects.float().mean().item() with torch.no_grad(): weights[0] *= 1.0001 preds = linear1(train_x) ((preds>0.0).float() == train_y).float().mean().item() trgts = tensor([1,0,1]) prds = tensor([0.9, 0.4, 0.2]) def mnist_loss(predictions, targets): return torch.where(targets==1, 1-predictions, predictions).mean() torch.where(trgts==1, 1-prds, prds) mnist_loss(prds,trgts) mnist_loss(tensor([0.9, 0.4, 0.8]),trgts) def sigmoid(x): return 1/(1+torch.exp(-x)) plot_function(torch.sigmoid, title='Sigmoid', min=-4, max=4) def mnist_loss(predictions, targets): predictions = predictions.sigmoid() return torch.where(targets==1, 1-predictions, predictions).mean() coll = range(15) dl = DataLoader(coll, batch_size=5, shuffle=True) list(dl) ds = L(enumerate(string.ascii_lowercase)) ds dl = DataLoader(ds, batch_size=6, shuffle=True) list(dl) weights = init_params((28*28,1)) bias = init_params(1) dl = DataLoader(dset, batch_size=256) xb,yb = first(dl) xb.shape,yb.shape valid_dl = DataLoader(valid_dset, batch_size=256) batch = train_x[:4] batch.shape preds = linear1(batch) preds loss = mnist_loss(preds, train_y[:4]) loss loss.backward() weights.grad.shape,weights.grad.mean(),bias.grad def calc_grad(xb, yb, model): preds = model(xb) loss = mnist_loss(preds, yb) loss.backward() calc_grad(batch, train_y[:4], linear1) weights.grad.mean(),bias.grad calc_grad(batch, train_y[:4], linear1) weights.grad.mean(),bias.grad weights.grad.zero_() bias.grad.zero_(); def train_epoch(model, lr, params): for xb,yb in dl: calc_grad(xb, yb, model) for p in params: p.data -= p.grad*lr p.grad.zero_() (preds>0.0).float() == train_y[:4] def batch_accuracy(xb, yb): preds = xb.sigmoid() correct = (preds>0.5) == yb return correct.float().mean() batch_accuracy(linear1(batch), train_y[:4]) def validate_epoch(model): accs = [batch_accuracy(model(xb), yb) for xb,yb in valid_dl] return round(torch.stack(accs).mean().item(), 4) validate_epoch(linear1) lr = 1. params = weights,bias train_epoch(linear1, lr, params) validate_epoch(linear1) for i in range(20): train_epoch(linear1, lr, params) print(validate_epoch(linear1), end=' ') linear_model = nn.Linear(28*28,1) w,b = linear_model.parameters() w.shape,b.shape class BasicOptim: def __init__(self,params,lr): self.params,self.lr = list(params),lr def step(self, *args, **kwargs): for p in self.params: p.data -= p.grad.data * self.lr def zero_grad(self, *args, **kwargs): for p in self.params: p.grad = None opt = BasicOptim(linear_model.parameters(), lr) def train_epoch(model): for xb,yb in dl: calc_grad(xb, yb, model) opt.step() opt.zero_grad() validate_epoch(linear_model) def train_model(model, epochs): for i in range(epochs): train_epoch(model) print(validate_epoch(model), end=' ') train_model(linear_model, 20) linear_model = nn.Linear(28*28,1) opt = SGD(linear_model.parameters(), lr) train_model(linear_model, 20) dls = DataLoaders(dl, valid_dl) learn = Learner(dls, nn.Linear(28*28,1), opt_func=SGD, loss_func=mnist_loss, metrics=batch_accuracy) learn.fit(10, lr=lr) def simple_net(xb): res = xb@w1 + b1 res = res.max(tensor(0.0)) res = res@w2 + b2 return res w1 = init_params((28*28,30)) b1 = init_params(30) w2 = init_params((30,1)) b2 = init_params(1) plot_function(F.relu) simple_net = nn.Sequential( nn.Linear(28*28,30), nn.ReLU(), nn.Linear(30,1) ) learn = Learner(dls, simple_net, opt_func=SGD, loss_func=mnist_loss, metrics=batch_accuracy) #hide_output learn.fit(40, 0.1) plt.plot(L(learn.recorder.values).itemgot(2)); learn.recorder.values[-1][2] dls = ImageDataLoaders.from_folder(path) learn = vision_learner(dls, resnet18, pretrained=False, loss_func=F.cross_entropy, metrics=accuracy) learn.fit_one_cycle(1, 0.1)