#!/usr/bin/env python # coding: utf-8 # # CS 231n example # In[1]: import torch # In[18]: x=torch.tensor(-2.,requires_grad=True) y=torch.tensor(5.,requires_grad=True) z=torch.tensor(-4.,requires_grad=True) q=x+y f=q*z f.backward() print(z.grad) print(y.grad) # ## Another example # In[19]: w0=torch.tensor(2.,requires_grad=True) x0=torch.tensor(-1.,requires_grad=True) w1=torch.tensor(-3.,requires_grad=True) x1=torch.tensor(-2.,requires_grad=True) w2=torch.tensor(-3.) f=1/(1+torch.exp(-(w0*x0+w1*x1+w2))) f.backward() print(x0.grad) print(w0.grad) # # Linear regression # ## create data and targets # In[20]: import torch import torch.optim as optim def linear_model(x, W, b): return torch.matmul(x, W) + b Wt =torch.tensor([[1.,2.,3.],[4.,5.,6.]]) bt = torch.tensor([1.,2.,3.]) data = torch.randn(10000,2) targets = torch.matmul(data,Wt)+bt # ## without optimizer # In[21]: W = torch.randn(2, 3, requires_grad=True) # For earlier pytorch version, torch.tensor does not have requires_grad # and need to use torch.autograd.Variable instead b = torch.randn(3, requires_grad=True) learning_rate=0.001 for sample, target in zip(data, targets): # clear out the gradients of Variables # (i.e. W, b) output = linear_model(sample, W, b) loss = torch.sum((output - target) ** 2) loss.backward() with torch.no_grad(): W -= learning_rate * W.grad.data b -= learning_rate * b.grad.data # W.grad.data.zero_() # b.grad.data.zero_() W.grad=None b.grad=None print(W,b) # ## with optimizer # In[22]: W = torch.randn(2, 3, requires_grad=True) b = torch.randn(3, requires_grad=True) optimizer = optim.SGD([W, b],lr=0.001) for sample, target in zip(data, targets): # clear out the gradients of all Variables # in this optimizer (i.e. W, b) optimizer.zero_grad() output = linear_model(sample, W, b) loss = torch.sum((output - target) ** 2) loss.backward() optimizer.step() print(W,b) # # For earlier PyTorch version, use torch.autograd.Variable instead # ## Without optimizer # In[23]: from torch.autograd import Variable # older version needs variable. For newer version, you may use Tensor directly W = Variable(torch.randn(2, 3), requires_grad=True) b = Variable(torch.randn(3), requires_grad=True) learning_rate=0.001 for sample, target in zip(data, targets): # clear out the gradients of Variables # (i.e. W, b) output = linear_model(sample, W, b) loss = torch.sum((output - target) ** 2) loss.backward() with torch.no_grad(): W -= learning_rate * W.grad.data b -= learning_rate * b.grad.data # W.grad.data.zero_() # b.grad.data.zero_() W.grad=None b.grad=None print(W,b) # In[24]: W = Variable(torch.randn(2, 3), requires_grad=True) b = Variable(torch.randn(3), requires_grad=True) optimizer = optim.SGD([W, b],lr=0.001) for sample, target in zip(data, targets): # clear out the gradients of all Variables # in this optimizer (i.e. W, b) optimizer.zero_grad() output = linear_model(sample, W, b) loss = torch.sum((output - target) ** 2) loss.backward() optimizer.step() print(W,b) # In[ ]: # In[ ]: