#!/usr/bin/env python
# coding: utf-8

# ## for SO https://stackoverflow.com/q/75017358/8508004
# 
# I've taken the original main loop and made that where collect each segment.
# Then use technique at the top [here](https://nbviewer.org/github/fomightez/animated_matplotlib-binder/blob/master/index.ipynb) which only works in classic notebook mode. Now that I pulled it apart to realize each is segment, probably could use the method with `FuncAnimation()` with associated widget controller that is illustrated at the bottom to make something that also would work in JupyterLab. Or for JupyterLab maybe easier to adapt the way [here](https://stackoverflow.com/a/52672859/8508004).

# In[8]:


get_ipython().run_line_magic('matplotlib', 'notebook')
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt

# torch.manual_seed(1)    # reproducible

# Hyper Parameters
TIME_STEP = 10      # rnn time step
INPUT_SIZE = 1      # rnn input size
LR = 0.02           # learning rate

# data
steps = np.linspace(0, np.pi*2, 100, dtype=np.float32)  # float32 for converting torch FloatTensor
x_np = np.sin(steps)
y_np = np.cos(steps)

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()

        self.rnn = nn.RNN(
            input_size=INPUT_SIZE,
            hidden_size=32,     # rnn hidden unit
            num_layers=1,       # number of rnn layer
            batch_first=True,   # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )
        self.out = nn.Linear(32, 1)

    def forward(self, x, h_state):
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, hidden_size)
        r_out, h_state = self.rnn(x, h_state)

        outs = []    # save all predictions
        for time_step in range(r_out.size(1)):    # calculate output for each time step
            outs.append(self.out(r_out[:, time_step, :]))
        return torch.stack(outs, dim=1), h_state

        # instead, for simplicity, you can replace above codes by follows
        # r_out = r_out.view(-1, 32)
        # outs = self.out(r_out)
        # outs = outs.view(-1, TIME_STEP, 1)
        # return outs, h_state
        
        # or even simpler, since nn.Linear can accept inputs of any dimension 
        # and returns outputs with same dimension except for the last
        # outs = self.out(r_out)
        # return outs

rnn = RNN()
print(rnn)

optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all cnn parameters
loss_func = nn.MSELoss()

h_state = None      # for initial hidden state

## Collect the data for each segment

steps_ls = []
r = []
b = []
num_iterations = 100
for step in range(num_iterations):
    start, end = step * np.pi, (step+1)*np.pi   # time range
    # use sin predicts cos
    steps = np.linspace(start, end, TIME_STEP, dtype=np.float32, endpoint=False)  # float32 for converting torch FloatTensor
    x_np = np.sin(steps)
    y_np = np.cos(steps)

    x = torch.from_numpy(x_np[np.newaxis, :, np.newaxis])    # shape (batch, time_step, input_size)
    y = torch.from_numpy(y_np[np.newaxis, :, np.newaxis])

    prediction, h_state = rnn(x, h_state)   # rnn output
    # !! next step is important !!
    h_state = h_state.data        # repack the hidden state, break the connection from last iteration

    loss = loss_func(prediction, y)         # calculate loss
    optimizer.zero_grad()                   # clear gradients for this training step
    loss.backward()                         # backpropagation, compute gradients
    optimizer.step()                        # apply gradients


    steps_ls.append(list(steps))
    r.append(y_np.flatten())
    b.append(prediction.data.numpy().flatten())

## Plot the segments over time as animation    
import time
def makeplot(ax, indx):
    ax.plot(steps_ls[indx], list(r[indx]), 'r-') 
    ax.plot(steps_ls[indx], list(b[indx]), 'b-') 
    fig.canvas.draw()


fig, ax = plt.subplots(figsize=(12, 5))

for indx,_ in enumerate(steps_ls):
    makeplot(ax, indx)
    time.sleep(0.2)


# In[ ]: