#!/usr/bin/env python
# coding: utf-8

# # CS 231n example

# In[1]:


import torch


# In[18]:


x=torch.tensor(-2.,requires_grad=True)
y=torch.tensor(5.,requires_grad=True)
z=torch.tensor(-4.,requires_grad=True)

q=x+y

f=q*z

f.backward()

print(z.grad)
print(y.grad)


# ## Another example

# In[19]:


w0=torch.tensor(2.,requires_grad=True)
x0=torch.tensor(-1.,requires_grad=True)
w1=torch.tensor(-3.,requires_grad=True)
x1=torch.tensor(-2.,requires_grad=True)
w2=torch.tensor(-3.)

f=1/(1+torch.exp(-(w0*x0+w1*x1+w2)))

f.backward()

print(x0.grad)
print(w0.grad)


# # Linear regression

# ## create data and targets

# In[20]:


import torch
import torch.optim as optim

def linear_model(x, W, b):
    return torch.matmul(x, W) + b

Wt =torch.tensor([[1.,2.,3.],[4.,5.,6.]])
bt = torch.tensor([1.,2.,3.])
data = torch.randn(10000,2)
targets = torch.matmul(data,Wt)+bt


# ## without optimizer

# In[21]:


W = torch.randn(2, 3, requires_grad=True) # For earlier pytorch version, torch.tensor does not have requires_grad
                                          # and need to use torch.autograd.Variable instead
b = torch.randn(3, requires_grad=True)

learning_rate=0.001
for sample, target in zip(data, targets):
    # clear out the gradients of Variables 
    # (i.e. W, b)
    output = linear_model(sample, W, b)
    loss = torch.sum((output - target) ** 2)
    loss.backward()

    with torch.no_grad():
        W -= learning_rate * W.grad.data
        b -= learning_rate * b.grad.data

#         W.grad.data.zero_()
#         b.grad.data.zero_()
        W.grad=None
        b.grad=None


print(W,b)


# ## with optimizer

# In[22]:


W = torch.randn(2, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)

optimizer = optim.SGD([W, b],lr=0.001)

for sample, target in zip(data, targets):
    # clear out the gradients of all Variables 
    # in this optimizer (i.e. W, b)
    optimizer.zero_grad()
    output = linear_model(sample, W, b)
    loss = torch.sum((output - target) ** 2)
    loss.backward()
    optimizer.step()

print(W,b)


# # For earlier PyTorch version, use torch.autograd.Variable instead

# ## Without optimizer

# In[23]:


from torch.autograd import Variable # older version needs variable. For newer version, you may use Tensor directly

W = Variable(torch.randn(2, 3), requires_grad=True)
b = Variable(torch.randn(3), requires_grad=True)

learning_rate=0.001
for sample, target in zip(data, targets):
    # clear out the gradients of Variables 
    # (i.e. W, b)
    output = linear_model(sample, W, b)
    loss = torch.sum((output - target) ** 2)
    loss.backward()

    with torch.no_grad():
        W -= learning_rate * W.grad.data
        b -= learning_rate * b.grad.data

#         W.grad.data.zero_()
#         b.grad.data.zero_()
        W.grad=None
        b.grad=None


print(W,b)


# In[24]:


W = Variable(torch.randn(2, 3), requires_grad=True)
b = Variable(torch.randn(3), requires_grad=True)

optimizer = optim.SGD([W, b],lr=0.001)

for sample, target in zip(data, targets):
    # clear out the gradients of all Variables 
    # in this optimizer (i.e. W, b)
    optimizer.zero_grad()
    output = linear_model(sample, W, b)
    loss = torch.sum((output - target) ** 2)
    loss.backward()
    optimizer.step()

print(W,b)


# In[ ]:


# In[ ]: