#!/usr/bin/env python
# coding: utf-8

# ![](https://discuss.pytorch.org/uploads/default/original/2X/3/35226d9fbc661ced1c5d17e374638389178c3176.png)

# <!--NAVIGATION-->
# # | Basics | [Autograd](2-Autograd.ipynb) >

# ## References and other resources
# - [PyTorch Tutorials](https://pytorch.org/tutorials/)
# - [Torchvision](https://pytorch.org/docs/stable/torchvision/index.html)

# ## Alternatives

# - [Tensorflow](https://www.tensorflow.org/)
# - [Keras](https://keras.io/)
# - [Theano](http://deeplearning.net/software/theano/)
# - [Caffe](http://caffe.berkeleyvision.org/)
# - [Caffe2](https://caffe2.ai/)
# - [MXNet](https://mxnet.apache.org/)
# - [many more...](https://www.google.com/search?q=deep+learning+frameworks&oq=deep+learning+frame&aqs=chrome.0.0j69i57j69i61l2j0l2.2284j0j1&sourceid=chrome&ie=UTF-8)

# ## So why PyTorch?

# - Simple Python
# - Easy to use + debug
# - Supported/developed by Facebook
# - Nice and extensible interface (modules, etc.)
# - A lot of research code is published as PyTorch project

# ____

# ## Google Colab only!

# In[ ]:


# execute only if you're using Google Colab
get_ipython().system('wget -q https://raw.githubusercontent.com/ahug/amld-pytorch-workshop/master/binder/requirements.txt -O requirements.txt')
get_ipython().system('pip install -qr requirements.txt')


# ___

# In[ ]:


import torch


# In[ ]:


print("PyTorch Version:", torch.__version__)


# In[ ]:


import numpy as np


# Very similar to numpy framework (if that helps!)

# ## Tensor Creation 

# ## First of all, what is a tensor?
# 
# A **matrix** is a grid of numbers, let's say (3x5). In simple terms, a **tensor** can be seen as a generalization of a matrix to higher dimension. It can be of arbitrary shape, e.g. (3 x 6 x 2 x 10). 
# 
# For the start, you can think of tensors as multidimensional arrays.

# In[ ]:


X = torch.tensor([1, 2, 3, 4, 5])
X


# In[ ]:


X.shape


# In[ ]:


X = torch.tensor([[1, 2, 3], [4, 5, 6]])
X


# In[ ]:


X.shape


# In[ ]:


# numpy
np.eye(3)


# In[ ]:


# torch
torch.eye(3)


# In[ ]:


# numpy
5 * np.eye(3)


# In[ ]:


# torch
5 * torch.eye(3)


# In[ ]:


# numpy
np.ones(5)


# In[ ]:


# torch
torch.ones(5)


# In[ ]:


# numpy
np.zeros(5)


# In[ ]:


# torch
torch.zeros(5)


# In[ ]:


# numpy
np.empty((3, 5))


# In[ ]:


# torch
torch.empty((3, 5))


# In[ ]:


# numpy
X = np.random.random((5, 3))
X


# In[ ]:


# torch
Y = torch.rand((5, 3))
Y


# In[ ]:


# numpy
X.shape


# In[ ]:


# torch
Y.shape


# ___

# ## But wait: Why do we even need tensors if we can do exactly the same with numpy arrays?

# `torch.tensor` behaves like numpy arrays under mathematical operations. However, `torch.tensor` additionally keeps track of the gradients (see next notebook) and provides GPU support.

# ____

# ## Linear Algebra Operations

# In[ ]:


X = np.random.rand(3, 5)
Y = torch.rand(3, 5)


# In[ ]:


# numpy (matrix multiplication)
X.T @ X


# In[ ]:


Y.shape


# In[ ]:


# torch (matrix multiplication)
Y.t() @ Y


# In[ ]:


Y.t().matmul(Y)


# In[ ]:


# CAUTION: Operator '*' does element-wise multiplication, just like in numpy!
# Y.t() * Y  # error, dimensions do not match for element-wise multiplication


# In[ ]:


np.linalg.inv(X.T @ X)


# In[ ]:


torch.inverse(Y.t() @ Y)


# In[ ]:


np.arange(2, 10, 2)


# In[ ]:


torch.arange(2, 10, 2)


# In[ ]:


np.linspace(0, 1, 10)


# In[ ]:


torch.linspace(0, 1, 10)


# ## Your turn

# **_Create the tensor:_**
# 
# $ \begin{bmatrix}
# 5 & 7 & 9 & 11 & 13 & 15 & 17 & 19
# \end{bmatrix}  $

# In[ ]:


# YOUR TURN


# ## More on PyTorch Tensors

# Each operation is also available as a function.

# In[ ]:


X = torch.rand(3, 2)


# In[ ]:


torch.exp(X)


# In[ ]:


X.exp()


# In[ ]:


X.sqrt()


# In[ ]:


(X.exp() + 2).sqrt() - 2 * X.log().sigmoid()  # be creative :-)


# Many more functions available: sin, cos, tanh, log, etc.

# In[ ]:


A = torch.eye(3)
A


# In[ ]:


A.add(5)


# In[ ]:


A


# Functions that mutate (in-place) the passed object end with an underscore, e.g. *add_*, *div_*, etc.

# In[ ]:


A.add_(5)


# In[ ]:


A


# In[ ]:


A.div_(3)


# In[ ]:


A


# In[ ]:


A.uniform_()  # fills the tensor with random uniform numbers in [0, 1]


# In[ ]:


A


# ## Indexing

# Again, it works just like in numpy.

# In[ ]:


A = torch.randint(100, (3, 3))
A


# In[ ]:


A[0, 0]


# In[ ]:


A[2, 1]


# In[ ]:


A[1]


# In[ ]:


A[:, 1]


# In[ ]:


A[1:2, :], A[1:2, :].shape


# In[ ]:


A[1:, 1:]


# In[ ]:


A[:2, :2]


# _____

# ## Reshaping & Expanding

# In[ ]:


X = torch.tensor([1, 2, 3, 4])
X


# In[ ]:


X = X.repeat(3, 1) # repeat it 3 times along 0th dimension and 1 times along first dimension
X, X.shape


# In[ ]:


# equivalent of 'reshape' in numpy (view does not allocate new memory!)
Y = X.view(2, 6)
Y


# In[ ]:


Y = X.view(-1)  # -1 tells PyTorch to infer the number of elements along that dimension
Y, Y.shape


# In[ ]:


Y = X.view(-1, 2)
Y, Y.shape


# In[ ]:


Y = X.view(-1, 4)
Y, Y.shape


# In[ ]:


Y = torch.ones(5)
Y, Y.shape


# In[ ]:


Y = Y.view(-1, 1)
Y, Y.shape


# In[ ]:


Y.expand(5, 5)  # similar to repeat but does not actually allocate new memory


# In[ ]:


X = torch.eye(4)
Y = X[3:, :]
Y, Y.shape


# In[ ]:


Y = Y.squeeze() # removes all dimensions of size '1'
Y, Y.shape


# In[ ]:


Y = Y.unsqueeze(1)
Y, Y.shape


# ## Your turn!

# **_Create the tensor:_**
# 
# $ \begin{bmatrix}
# 7 & 5 & 5 & 5 & 5 \\
# 5 & 7 & 5 & 5 & 5 \\
# 5 & 5 & 7 & 5 & 5 \\
# 5 & 5 & 5 & 7 & 5 \\
# 5 & 5 & 5 & 5 & 7 
# \end{bmatrix}  $
# 
# Hint: You can use matrix sum and scalar multiplication

# In[ ]:


# YOUR TURN


# **_Create the tensor:_**
# 
# $ \begin{bmatrix}
# 4 & 6 & 8 & 10 & 12 \\
# 14 & 16 & 18 & 20 & 22 \\
# 24 & 26 & 28 & 30 & 32
# \end{bmatrix}$

# In[ ]:


# YOUR TURN


# **_Create the tensor:_**
# 
# $ \begin{bmatrix}
# 2 & 2 & 2 & 2 & 2 \\
# 4 & 4 & 4 & 4 & 4 \\
# 6 & 6 & 6 & 6 & 6 \\
# 8 & 8 & 8 & 8 & 8
# \end{bmatrix}  $

# In[ ]:


# YOUR TURN


# _____

# ## Reductions

# In[ ]:


X = torch.randint(10, (3, 4)).float()
X


# In[ ]:


X.sum()


# In[ ]:


X.sum().item()


# In[ ]:


X.sum(0) # colum-wise sum


# In[ ]:


X.sum(dim=1)  # row-wise sum


# In[ ]:


X.mean()


# In[ ]:


X.mean(dim=1)


# In[ ]:


X.norm(dim=0)


# ## Your turn!

# Compute the norms of the row-vectors in matrix **X** without using _torch.norm()_.
# 
# Remember: $$||\vec{v}||_2 = \sqrt{x_1^2 + x_2^2 + \dots + x_n^2}$$
# 
# Hint: _X\*\*2_ computes the element-wise square.

# In[ ]:


X = torch.eye(4) + torch.arange(4).repeat(4, 1).float()

# YOUR TURN

# SOLUTION: tensor([3.8730, 4.1231, 4.3589, 4.5826]


# ## Masking

# In[ ]:


X = torch.randint(100, (5, 3))
X


# In[ ]:


mask = (X > 25) & (X < 75)
mask


# In[ ]:


X[mask]  # returns all elements matching the criteria in a 1D-tensor


# In[ ]:


mask.sum()  # number of elements that fulfill the condition


# In[ ]:


(X == 25) | (X > 60)


# ## Your turn!

# Get the number of non-zeros in **X**

# In[ ]:


X = torch.tensor([[1, 0, 2], [0, 6, 0]])
# YOUR TURN


# Compute the sum of all entries in X that are larger than the mean of all values in X.

# In[ ]:


# YOUR TURN


# ______

# ## Some useful properties of tensors

# In[ ]:


x = torch.Tensor([[0,1,2], [3,4,5]])

print("x.shape: \n%s\n" % (x.shape,))
print("x.size(): \n%s\n" % (x.size(),))
print("x.size(1): \n%s\n" % x.size(1))
print("x.dim(): \n%s\n" % x.dim())

print("x.dtype: \n%s\n" % x.dtype)
print("x.device: \n%s\n" % x.device)


# The `nonzero` function returns indices of the non zero elements.

# In[ ]:


x = torch.Tensor([[0,1,2], [3,4,5]])

print("x.nonzero(): \n%s\n" % x.nonzero())


# In[ ]:


# press tab to autocomplete
# x.


# ___

# ## Converting between PyTorch and numpy

# In[ ]:


X = np.random.random((5,3))
X


# In[ ]:


# numpy ---> torch
Y = torch.from_numpy(X)  # Y is actually a DoubleTensor (i.e. 64-bit representation)
Y


# In[ ]:


Y = torch.rand((2,4))
Y


# In[ ]:


# torch ---> numpy
X = Y.numpy()
X


# ____

# ## Using GPUs 

# Using **GPU** in pytorch is as simple as calling **`.cuda()`** on your tensor.
# 
# But first, you may want to check: 
#  - that cuda can actually be used : `torch.cuda.is_available()`
#  - how many gpus are available : `torch.cuda.device_count()`

# In[ ]:


torch.cuda.is_available()


# In[ ]:


torch.cuda.device_count()


# In[ ]:


x = torch.Tensor([[1,2,3], [4,5,6]])
print(x)


# ### tensor.cuda
# 
# _Note : If you don't have Cuda on the machine, the following examples won't work_

# In[ ]:


x.cuda(0)
print(x.device)
x = x.cuda(0)
print(x.device)
x = x.cuda(1)
print(x.device)


# In[ ]:


x = torch.Tensor([[1,2,3], [4,5,6]])

# This will generate an error since you cannot do operation on tensor that are not on the same device
x + x.cuda()


# #### Write an if statement that moves x on gpu if cuda is available

# In[ ]:


# YOUR TURN


# These kinds of if statements used to be all over the place in people's pytorch code. Recently, a more flexible way was introduced:

# ### torch.device

# A **`torch.device`** is an object representing the device on which a torch.tensor is or will be allocated.
# 
# You can easily move a tensor from a device to another by using the **`tensor.to()`** function

# In[ ]:


cpu = torch.device('cpu')
cuda_0 = torch.device('cuda:0')

x = x.to(cpu)
print(x.device)
x = x.to(cuda_0)
print(x.device)


# It can be more flexible since you can check if cuda exists only once in your code

# In[ ]:


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x = x.to(device)  # We don't need to care anymore about whether cuda is available or not
print(x.device)


# #### Timing GPU

# How much faster is GPU ?  See for yourself ...

# In[ ]:


A = torch.rand(100, 1000, 1000)
B = A.cuda(1)
A.size()


# In[ ]:


get_ipython().run_line_magic('timeit', '-n 3 torch.bmm(A, A)')


# In[ ]:


get_ipython().run_line_magic('timeit', '-n 30 torch.bmm(B, B)')


# ___

# ## Don't forget to download the notebook, otherwise your changes will be lost!

# ![Download the notebook](figures/notebook-download.png)

# <!--NAVIGATION-->
# # | Basics | [Autograd](2-Autograd.ipynb) >