CS 231n example¶

In [1]:

import torch

In [18]:

x=torch.tensor(-2.,requires_grad=True)
y=torch.tensor(5.,requires_grad=True)
z=torch.tensor(-4.,requires_grad=True)

q=x+y

f=q*z

f.backward()

print(z.grad)
print(y.grad)

tensor(3.)
tensor(-4.)

Another example¶

In [19]:

w0=torch.tensor(2.,requires_grad=True)
x0=torch.tensor(-1.,requires_grad=True)
w1=torch.tensor(-3.,requires_grad=True)
x1=torch.tensor(-2.,requires_grad=True)
w2=torch.tensor(-3.)

f=1/(1+torch.exp(-(w0*x0+w1*x1+w2)))

f.backward()

print(x0.grad)
print(w0.grad)

tensor(0.3932)
tensor(-0.1966)

Linear regression¶

create data and targets¶

In [20]:

import torch
import torch.optim as optim

def linear_model(x, W, b):
    return torch.matmul(x, W) + b

Wt =torch.tensor([[1.,2.,3.],[4.,5.,6.]])
bt = torch.tensor([1.,2.,3.])
data = torch.randn(10000,2)
targets = torch.matmul(data,Wt)+bt

without optimizer¶

In [21]:

W = torch.randn(2, 3, requires_grad=True) # For earlier pytorch version, torch.tensor does not have requires_grad
                                          # and need to use torch.autograd.Variable instead
b = torch.randn(3, requires_grad=True)

learning_rate=0.001
for sample, target in zip(data, targets):
    # clear out the gradients of Variables 
    # (i.e. W, b)
    output = linear_model(sample, W, b)
    loss = torch.sum((output - target) ** 2)
    loss.backward()

    with torch.no_grad():
        W -= learning_rate * W.grad.data
        b -= learning_rate * b.grad.data

#         W.grad.data.zero_()
#         b.grad.data.zero_()
        W.grad=None
        b.grad=None


print(W,b)

tensor([[1.0000, 2.0000, 3.0000],
        [4.0000, 5.0000, 6.0000]], requires_grad=True) tensor([1.0000, 2.0000, 3.0000], requires_grad=True)

with optimizer¶

In [22]:

W = torch.randn(2, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)

optimizer = optim.SGD([W, b],lr=0.001)

for sample, target in zip(data, targets):
    # clear out the gradients of all Variables 
    # in this optimizer (i.e. W, b)
    optimizer.zero_grad()
    output = linear_model(sample, W, b)
    loss = torch.sum((output - target) ** 2)
    loss.backward()
    optimizer.step()

print(W,b)

tensor([[1.0000, 2.0000, 3.0000],
        [4.0000, 5.0000, 6.0000]], requires_grad=True) tensor([1.0000, 2.0000, 3.0000], requires_grad=True)

For earlier PyTorch version, use torch.autograd.Variable instead¶

Without optimizer¶

In [23]:

from torch.autograd import Variable # older version needs variable. For newer version, you may use Tensor directly

W = Variable(torch.randn(2, 3), requires_grad=True)
b = Variable(torch.randn(3), requires_grad=True)

learning_rate=0.001
for sample, target in zip(data, targets):
    # clear out the gradients of Variables 
    # (i.e. W, b)
    output = linear_model(sample, W, b)
    loss = torch.sum((output - target) ** 2)
    loss.backward()

    with torch.no_grad():
        W -= learning_rate * W.grad.data
        b -= learning_rate * b.grad.data

#         W.grad.data.zero_()
#         b.grad.data.zero_()
        W.grad=None
        b.grad=None


print(W,b)

tensor([[1.0000, 2.0000, 3.0000],
        [4.0000, 5.0000, 6.0000]], requires_grad=True) tensor([1.0000, 2.0000, 3.0000], requires_grad=True)

In [24]:

W = Variable(torch.randn(2, 3), requires_grad=True)
b = Variable(torch.randn(3), requires_grad=True)

optimizer = optim.SGD([W, b],lr=0.001)

for sample, target in zip(data, targets):
    # clear out the gradients of all Variables 
    # in this optimizer (i.e. W, b)
    optimizer.zero_grad()
    output = linear_model(sample, W, b)
    loss = torch.sum((output - target) ** 2)
    loss.backward()
    optimizer.step()

print(W,b)

tensor([[1.0000, 2.0000, 3.0000],
        [4.0000, 5.0000, 6.0000]], requires_grad=True) tensor([1.0000, 2.0000, 3.0000], requires_grad=True)

In [ ]: