# hide
!pip install fastai>=2.3 --upgrade

import fastai
from fastai.vision.all import *
print(fastai.__version__ )

mnist_path = untar_data(URLs.MNIST_SAMPLE)
mnist_path.ls()

sample_3 = Image.open((mnist_path/'train/3').ls().sorted()[1])
sample_3

sample_3_t = tensor(sample_3)
df = pd.DataFrame(sample_3_t[4:15, 4:22])
df.style.set_properties(**{'font-size':'6pt'}).background_gradient('Greys')

threes = (mnist_path/'train/3').ls().sorted()
len(threes), threes[0]

all_threes = [ tensor(Image.open(fp)) for fp in threes ]
len(all_threes), all_threes[0].shape

stacked_threes = torch.stack(all_threes).float()/255
stacked_threes.shape

# shape = the length of each axis
print('shape: ', stacked_threes.shape)

# rank = the total number of axes
print('rank: ', stacked_threes.ndim)

# type = the datatype of its contents
print('type: ', stacked_threes.dtype)

# view = change the shape of a tensor without changing its contents
stacked_threes_rank_2 = stacked_threes.view(-1, 28*28)
print('orig. shape: ', stacked_threes.shape)
print('make into a rank 2 tensor', stacked_threes_rank_2.shape)

# @ = operator for matrix multiplication
print('result of matrix multiplication: ', (stacked_threes @ torch.randn((1,28,28))).shape)

# where = torch.where(a,b,c) => [b[i] if a[i] else c[i] for i in range(len(a))] ... see p.167
trgs = tensor([1,0,1])
preds = tensor([0.9, 0.4, 0.2])

def mnist_loss(preds, targs):
  return torch.where(targs == 1, 1 - preds, preds).mean()

print('output of where: ', mnist_loss(preds, trgs))

def plot_function(f, tx=None, ty=None, title=None, min=-2, max=2, figsize=(6,4)):
    x = torch.linspace(min,max)
    fig,ax = plt.subplots(figsize=figsize)
    ax.plot(x,f(x))
    if tx is not None: ax.set_xlabel(tx)
    if ty is not None: ax.set_ylabel(ty)
    if title is not None: ax.set_title(title)

xt = tensor(-1.5).requires_grad_(); xt

def f(x): return x**2
loss = f(xt)

plot_function(f, 'x', 'x**2')
plt.scatter(xt.detach().numpy(), loss.detach().numpy(), color='red')
print('Loss: ', loss.item())

xt = tensor(-1.).requires_grad_(); xt

loss = f(xt)

plot_function(f, 'x', 'x**2')
plt.scatter(xt.detach().numpy(), loss.detach().numpy(), color='red')
print('Loss: ', loss.item())

xt = tensor(-1.).requires_grad_(); 
print(xt)

loss = f(xt)
print(loss)

loss.backward()
print(xt.grad)

lr = 0.01

with torch.no_grad():
 xt -= xt.grad * lr

 print('New value for xt: ', xt)
 print('New loss: ', f(xt))

ds = L(enumerate(string.ascii_lowercase))
ds

dl = DataLoader(ds, bs=6, shuffle=True)
list(dl)

lin1 = nn.Linear(28*28, 1)

# the trainable parameters
weights, bias = lin1.parameters()
print(weights.shape, bias.shape)

plot_function(F.relu)

non_lin1 = nn.ReLU()
print(list(non_lin1.parameters()))

print('using nn.ReLU: ', non_lin1(tensor(-1)), non_lin1(tensor(4)))
print('using max()', tensor(-1).max(tensor(0.0)), tensor(4).max(tensor(0.0)))

lin2 = nn.Linear(1, 10)
seq_model = nn.Sequential(lin1, non_lin1, lin2)

seq_params = list(seq_model.parameters())
print(len(seq_params))

for p in seq_params: print(p.shape)