#!/usr/bin/env python # coding: utf-8 # Open In Colab # ## Table of contents: # * Initial Setup # * Data Loading and Preprocessing # * Data Variables # * Model Instantiation # * Optimizer and Scheduler # * Train and Evaluation Loop # * Save the model and plot the losses (**1 Point**) # * Test Loop # * Human Pose Visualization (**2 Points**) # * Report and Parameter Fine-Tuning Analysis (**4 Points**) # * Calculating MPJPE for a Specific Frame (**2 Points**) # * Iterative Mechanism (**3 Points**) # * YOUR custom model (**3 Points**) # * Performance BONUS (**Up to 2 Points**) # # # ## Group composition: # - Luca Mazzucco : 1996710, # - Gian Alvin Guico : 2033024, # - Antonio Rocca : 1813055, # - Francois Hascoat : 2116739. # ## Initial Setup # Run the following two cellls to sync with Google Drive only if you run from Google Colab. # # *Note: we recommend using Google Colab for this specific homework, since the training phase will require a GPU* # In[1]: from os import makedirs from os.path import exists # In[2]: get_ipython().system(' git clone https://github.com/LM1997610/AdavancedML.git') print() get_ipython().run_line_magic('cd', '/content/AdavancedML/Assignment_3/Practice') # In[3]: get_ipython().system(' gdown 1fNjPKEBHJObyhZkgpnP4gYbIXp_D0eYA') # In[4]: if exists("data.zip"): get_ipython().system(' unzip -q data.zip') get_ipython().system(' rm data.zip') # Welcome to this guide on training, testing, and fine-tuning a deep learning model. Deep learning is at the forefront of artificial intelligence, with applications spanning image recognition, natural language processing, and more. # # Throughout this assignment, you'll: # # 1. **Prepare Data:** Preprocess and load the data. # # 2. **Use Neural Networks:** Instantiate a neural network architecture. # # 3. **Train Models:** Utilize optimization, loss functions, and backpropagation. # # 4. **Evaluate Performance:** Assess model performance, prevent overfitting, and underfitting. # # 5. **Fine-Tune Models:** Explore hyperparameter tuning. # In[5]: import torch import torch.autograd import torch.optim as optim import torch.nn.functional as F from torch.utils.data import DataLoader from utils import h36motion3d as datasets import time import numpy as np import matplotlib.pyplot as plt from utils.loss_funcs import * from utils.data_utils import define_actions from utils.h36_3d_viz import visualize # In[6]: # Use GPU if available, otherwise stick with cpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Using device:', device, '- Type:', torch.cuda.get_device_name(0)) # ## Data Loading and Preprocessing # For this homework, you will use [Human3.6M](https://vision.imar.ro/human3.6m/pami-h36m.pdf), which is a large-scale dataset of 3.6 million accurate 3D human poses acquired by recording the performance of five female and six male subjects under four different viewpoints. The dataset includes: # - Synchronized image. # - Human motion capture. # - Time of flight (depth) data. # - Accurate 3D body scans of all the subject actors involved. # # The dataset aims to provide diverse motions and poses encountered in typical human activities, with additional data to train realistic human sensing systems. # # ![picture](https://drive.google.com/uc?export=view&id=1nyD1_F3r1ctexKFGDmhy2Q9SX_2Z_bpS) # # For this assignment, we will leverage the rich **motion data** (See in the figure above) provided by H3.6M to perform a task known as *motion prediction*. Motion prediction involves using historical motion data to forecast future movements. This task is fundamental in human-robot interaction, animation, and sports analytics applications. # ### Data Variables # # Each created sequence has the shape (35, 17, 3), where: # - Number of observed sequences and number of sequences to predict: $N_{obs}+N_{pred} = 10 + 25 = 35$; # - Number of body joints to consider: $J=22$; # - Spatial coordinates: $(x,y,z) = 3$. # # # The original data provides high-resolution progressive scan videos at 50 Hz. However, the dataset has been downsampled to 25 Hz for research purposes. This means that 25 frames of motion data are provided per second. # # *Note: the figure above shows 18 joints, however the dataset contains 32. For this specific case we will consider 22 joints, ignoring some of the finer ones (e.g. foot tip, hand tip, etc)* # # In[7]: # # Arguments to setup the datasets datas = 'h36m' # dataset name path = './data/h3.6m/h3.6m/dataset' input_n = 10 # number of frames to train on (default=10) output_n = 25 # number of frames to predict on input_dim = 3 # dimensions of the input coordinates (default=3) skip_rate = 1 # skip rate of frames joints_to_consider = 22 #FLAGS FOR THE TRAINING mode = 'train' # choose either train or test mode batch_size_test = 8 model_path = './checkpoints/' # path to the model checkpoint file actions_to_consider_test = 'all' # actions to test on. #the model name to save/load model_name = datas+'_3d_'+str(output_n)+'frames_ckpt' #FLAGS FOR THE VISUALIZATION actions_to_consider_viz = 'all' # actions to visualize visualize_from = 'test' n_viz = 2 # Load Dataset # # *Note: It will take you ~ 5 minutes* # In[8]: import warnings warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) # In[9]: # Load Data print('Loading Train Dataset...') dataset = datasets.Datasets(path,input_n,output_n,skip_rate, split=0) print('Loading Validation Dataset...') vald_dataset = datasets.Datasets(path,input_n,output_n,skip_rate, split=1) #! Note: Ignore warning: "VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences" # Following we create a torch dataloader that create the batches for each epoch. # In[10]: batch_size=256 print('>>> Training dataset length: {:d}'.format(dataset.__len__())) data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)# print('>>> Validation dataset length: {:d}'.format(vald_dataset.__len__())) vald_loader = DataLoader(vald_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True) # ## Model instantiation # # Each sequence comprises an **observed** part to train the Encoder and a part that attempts to predict the **future** sequence, the Decoder. # # Generally, the standard setup plans to use the first 10 sequences of poses ($N_{obs}=10$) for the observation and the following 25 ($N_{pred} = 25$) for the prediction. # # We create an instance of a custom Spatio-Temporal transformer with the chosen configuration. # # (*Note: explore the model in ./models/sttr/sttformer.py*) # # Then we allocate it to the GPU for forward and backward accelerated computation. # In[11]: from models.sttr.sttformer import Model device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Using device: %s'%device) n_heads = 1 model = Model(num_joints=joints_to_consider, num_frames=input_n, num_frames_out=output_n, num_heads=n_heads, num_channels=3, kernel_size=[3,3], use_pes=True).to(device) print('total number of parameters of the network is: '+str(sum(p.numel() for p in model.parameters() if p.requires_grad))) # ## Optimizer and Scheduler # As we embark on training deep learning models for motion prediction using the H3.6M dataset, it's essential to recognize several key parameters and components that significantly impact the training phase: # # - **Learning Rate:** This parameter determines the convergence speed during optimization. # # - **Batch Size:** It influences model generalization and training efficiency. # # - **Number of Epochs:** The number of training iterations affects model learning. # # - **Loss Function:** The choice of loss function directly affects learning and final performance. # # - **Optimizer:** The optimization algorithm used (e.g., Adam, SGD) impacts gradient descent during training. # # - **Milestones and Gamma:** These parameters control learning rate schedules, allowing for adaptive adjustments during training. # # - **Weight Decay:** It regulates the impact of model parameters during optimization. # # - **Scheduler:** Scheduler strategies (e.g., StepLR, ReduceLROnPlateau) manage learning rate adaptation during training. # In[12]: # Arguments to setup the optimizer lr = 1e-01 # learning rate use_scheduler = True # use MultiStepLR scheduler milestones = [10, 30] # the epochs after which the learning rate is adjusted by gamma gamma = 0.1 # gamma correction to the learning rate, after reaching the milestone epochs weight_decay = 1e-05 # weight decay (L2 penalty) optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay = weight_decay) if use_scheduler: scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma) # In[13]: clip_grad= None # select max norm to clip gradients # Argument for training n_epochs = 41 log_step = 200 # ## Train and Evaluation Loop # The **loss** and **metric** used during training and evaluation respectively, compare the predicted joint positions to ground truth joint positions for all frames, which is typically referred to as the **Average Mean Per # Joint Position Error (A-MPJPE)** can be seen as an $L_2$. This loss quantifies the dissimilarity between the predicted and ground truth joint positions by measuring the squared Euclidean distance between corresponding joint positions. # # \begin{align*} # A-MPJPE &= \frac{1}{N_{pred}} \sum_{i=1}^{N_{pred}} \left(\frac{1}{J} \sum_{j=1}^{J} \left\| P_{\text{predicted}_{t,j}} - P_{\text{gt}_{t,j}} \right\|^2\right) # \end{align*} # # $$where:$$ # # \begin{align*} # P_{\text{predicted}} &: \text{Set of predicted joint positions estimated by the model.} \\ # P_{\text{gt}} &: \text{Corresponding set of ground truth joint positions.} \\ # \end{align*} # *Note: If you restart the training for any reason, remember to instantiate the model and the optimizer again. This will avoid continuing the training with the initialized weights of the previous one* # # ### Save the model and plot the losses (1 Point) # # **Objective:** In this exercise, you will practice implementing a code snippet to save your deep learning model's checkpoints and visualize the training and validation loss on the same plot every 5 epochs during model training. # # Your task is to implement the following: # # - Set up a mechanism to save the model's checkpoints (weights and architecture) during training. These checkpoints should be saved periodically, say, **every 5 epochs**. # # - Create a plot displaying the training and validation losses on the same graph. The x-axis should represent the number of epochs, and the y-axis should represent the loss values. The training and validation losses should be plotted as separate lines on the same graph. # # - Ensure that the code saves the model's checkpoints in a specified directory, including the model's architecture and weights, and that the loss plot is displayed. # # Analyze the loss plot to gain insights into how your model is learning over time and whether there are any signs of overfitting or underfitting. # # *Note: see the Pytorch Documentation on how to save your model's checkpoints.* # # In[14]: from IPython.display import clear_output # In[15]: def do_my_plot_and_save(my_model, train_loss, val_loss, path_to_save_model, model_name, this_epoch): #if not exists(path_to_save_model): makedirs(path_to_save_model) if not exists(path_to_save_model+ "plots_dir/"): makedirs(path_to_save_model + "plots_dir/") torch.save(my_model.state_dict(), path_to_save_model + model_name + "_epoch_"+str(this_epoch+1)+".pt") fig = plt.figure(figsize=(5, 2)) fig.tight_layout(pad = 2) x_lenght = list(range(1, len(train_loss)+1)) plt.plot(x_lenght , train_loss, 'r', label = 'Train loss') plt.plot(x_lenght , val_loss, 'g', label =' Val loss') plt.title('\n Loss History \n', fontsize=14) plt.xlabel('n_of_epochs \n'); plt.ylabel('loss') t = 1 if this_epoch < 11 else 2 if this_epoch<21 else 3 plt.xticks(list(range(1, len(train_loss)+1, t))); plt.grid(linewidth=0.4); plt.legend() plt.savefig(path_to_save_model + "plots_dir/" +"loss_epoch_"+str(this_epoch+1)+".png", bbox_inches='tight') plt.show() # In[16]: def train(data_loader,vald_loader, path_to_save_model=None): train_loss = [] val_loss = [] val_loss_best = 1000 dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]) for epoch in range(n_epochs-1): running_loss=0 n=0 model.train() for cnt,batch in enumerate(data_loader): batch=batch.float().to(device) batch_dim=batch.shape[0] n+=batch_dim sequences_train=batch[:, 0:input_n, dim_used].view(-1,input_n,len(dim_used)//3,3).permute(0,3,1,2) sequences_gt=batch[:, input_n:input_n+output_n, dim_used].view(-1,output_n,len(dim_used)//3,3) optimizer.zero_grad() sequences_predict=model(sequences_train).view(-1, output_n, joints_to_consider, 3) loss=mpjpe_error(sequences_predict,sequences_gt) #if cnt % log_step == 0: # print('[Epoch: %d, Iteration: %5d] training loss: %.3f' %(epoch + 1, cnt + 1, loss.item())) loss.backward() if clip_grad is not None: torch.nn.utils.clip_grad_norm_(model.parameters(),clip_grad) optimizer.step() running_loss += loss*batch_dim train_loss.append(running_loss.detach().cpu()/n) model.eval() with torch.no_grad(): running_loss=0 n=0 for cnt,batch in enumerate(vald_loader): batch=batch.float().to(device) batch_dim=batch.shape[0] n+=batch_dim sequences_train=batch[:, 0:input_n, dim_used].view(-1,input_n,len(dim_used)//3,3).permute(0,3,1,2) sequences_gt=batch[:, input_n:input_n+output_n, dim_used].view(-1,output_n,len(dim_used)//3,3) sequences_predict=model(sequences_train).view(-1, output_n, joints_to_consider, 3) loss=mpjpe_error(sequences_predict,sequences_gt) if cnt % log_step == 0: print('[Epoch: %d, Iteration: %5d] validation loss: %.3f' %(epoch + 1, cnt + 1, loss.item())) running_loss+=loss*batch_dim val_loss.append(running_loss.detach().cpu()/n) if running_loss/n < val_loss_best: val_loss_best = running_loss/n if use_scheduler: scheduler.step() # save and plot model every 5 epochs # Insert your code below. Use the argument path_to_save_model to save the model to the path specified. #if save_and_plot and epoch in [4 + 5 * i for i in range(n_epochs)]: if save_and_plot and epoch in list(range(4, n_epochs, 5)): #clear_output(wait=True) do_my_plot_and_save(model, train_loss, val_loss, path_to_save_model, model_name, epoch ) # In[17]: # Save the model and plot the loss # Change to True if you want to save the model and plot the loss save_and_plot = True # launch training train(data_loader,vald_loader, path_to_save_model=model_path) # plots and model checkpoints are saved in "checkpoints" directory # ## Test Loop # After training and validation, the test loop is the final phase that evaluates the model's performance on an entirely independent dataset known as the test dataset. This dataset is distinct from the training and validation data, ensuring unbiased assessment. The test loop provides a reliable estimate of how well the model will perform in real-world scenarios, confirming that any improvements observed during training and validation are not due to overfitting or chance. It's a crucial step before deploying the model in practical applications. # In[18]: def test(ckpt_path=None): model.load_state_dict(torch.load(ckpt_path)) print('\n ...model loaded \n') model.eval() accum_loss = 0 n_batches = 0 # number of batches for all the sequences actions = define_actions(actions_to_consider_test) dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]) # joints at same loc joint_to_ignore = np.array([16, 20, 23, 24, 28, 31]) index_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) joint_equal = np.array([13, 19, 22, 13, 27, 30]) index_to_equal = np.concatenate((joint_equal * 3, joint_equal * 3 + 1, joint_equal * 3 + 2)) totalll=0 counter=0 for action in actions: running_loss=0 n=0 dataset_test = datasets.Datasets(path, input_n, output_n, skip_rate, split=2, actions=[action]) #print('>>> test action for sequences: {:d}'.format(dataset_test.__len__())) test_loader = DataLoader(dataset_test, batch_size=batch_size_test, shuffle=False, num_workers=0, pin_memory=True) for cnt,batch in enumerate(test_loader): with torch.no_grad(): batch=batch.to(device) batch_dim=batch.shape[0] n+=batch_dim all_joints_seq=batch.clone()[:, input_n:input_n+output_n,:] sequences_train=batch[:, 0:input_n, dim_used].view(-1,input_n,len(dim_used)//3,3).permute(0,3,1,2) sequences_gt=batch[:, input_n:input_n+output_n, :] running_time = time.time() #sequences_predict = model(sequences_train) sequences_predict = model(sequences_train).view(-1, output_n, joints_to_consider, 3) totalll += time.time()-running_time counter += 1 sequences_predict = sequences_predict.contiguous().view(-1,output_n,len(dim_used)) all_joints_seq[:,:,dim_used] = sequences_predict all_joints_seq[:,:,index_to_ignore] = all_joints_seq[:,:,index_to_equal] loss = mpjpe_error(all_joints_seq.view(-1,output_n,32,3),sequences_gt.view(-1,output_n,32,3)) running_loss += loss*batch_dim accum_loss += loss*batch_dim #print('loss at test subject for action : '+str(action)+ ' is: '+ str(running_loss/n)) print(str(action),': ', str(np.round((running_loss/n).item(),1))) n_batches += n print('\nAverage: '+str(np.round((accum_loss/n_batches).item(),1))) print('Prediction time: ', totalll/counter) # *Note: Your results should be better than 95 millimiters on average* # In[19]: # Change the epoch according to the validation curve : ckpt_path = f'./checkpoints/h36m_3d_25frames_ckpt_epoch_{35}.pt' test(ckpt_path) # ## Human Pose Visualization (**2 Points**) # The qualitative results are as important as the quantitative ones. In this section, you will visualize and compare the predicted poses with the ground truth ones. # For simplicity, you will visualize only the first predicted pose and the ground truth. # # Ideally, the same plot should show the predicted pose in red and the ground truth one in green. # # *Note: you will find which nodes are connected in the file ./models/skeleton_connection.py* # In[20]: # Insert your code below visualize(input_n, output_n, visualize_from, path, model, device, n_viz, skip_rate, actions_to_consider_viz, directory = "images_dir") # The visualization was performed by the 'visualize' function from the provided 'h36_3d_viz.py' file. # Although the module was imported, it was not utilized... # Some issues due to defining the three axes with Matplotlib. # Once this was addressed, only adjustment is changing the colors according to requirements. # Results are saved as gif images in 'images_dir' directory # ## Report and Parameter Fine-Tuning Analysis (**4 Points**) # **Objective:** In this exercise, you will analyze the results obtained from a deep learning model you previously trained and perform parameter fine-tuning to optimize its performance. The key considerations are learning rate, milestones, and weight decay. **You will also use tables and plots to visualize and interpret the outcomes.** # # **Instructions:** # # 1. **Analysis:** Analyze the generated report and answer the following questions: # - Is there evidence of overfitting or underfitting in the initial training results? # - Are there fluctuations in training and validation loss or accuracy? If so, what might be causing them? # - What can you infer from the initial learning rate, milestones, and weight decay settings? # # 2. **Parameter Fine-Tuning:** Based on your analysis, perform parameter fine-tuning to optimize model performance. Adjust the following parameters: # - **Learning Rate:** Experiment with different learning rates (higher and lower values) to find an optimal rate. # - **Milestones:** Modify the milestone values for adjusting the learning rate schedule. # - **Weight Decay:** Explore different weight decay values. # # # 3. **Re-Training:** Train the model with the adjusted hyperparameters. Record the training progress and generate a new report, including performance metrics and line plots as before. # # 4. **Final Analysis:** Analyze the results of the fine-tuned model and compare them with the initial training. Answer the following questions: # - Has parameter fine-tuning improved model performance? # - Did it mitigate overfitting or underfitting issues? # - What can you conclude about the optimal hyperparameters for this task? # # Report and final analysis submitted separately with a pdf file named `AML_HW3_report.pdf` \ # Search carried out with the help of [Weights & Biases](https://wandb.ai/site) # In[21]: model= Model(num_joints=joints_to_consider, num_frames=input_n, num_frames_out=output_n, num_heads=n_heads, num_channels=3, kernel_size=[3,3], use_pes=True).to(device) print('total number of parameters of the network is: '+str(sum(p.numel() for p in model.parameters() if p.requires_grad))) # In[22]: # Arguments to setup the optimizer lr = 3e-01 # learning rate milestones = [10, 30] # the epochs after which the learning rate is adjusted by gamma weight_decay = 1e-08 # weight decay (L2 penalty) optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay = weight_decay) use_scheduler = True # use MultiStepLR scheduler if use_scheduler: scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma) clip_grad= None # select max norm to clip gradients # Argument for training n_epochs = 41 log_step = 200 # In[23]: # Save the model and plot the loss # Change to True if you want to save the model and plot the loss save_and_plot = True # launch training # train(data_loader, vald_loader, path_to_save_model='./checkpoints_fine_tuned/') # In[ ]: # Change the epoch according to the validation curve : # ckpt_path = f'./checkpoints_fine_tuned/h36m_3d_25frames_ckpt_epoch_{35}.pt' # test(ckpt_path) # ## Calculating MPJPE for a Specific Frame (**2 Points**) # # In this exercise, you will calculate the Mean Per Joint Position Error (MPJPE) for a specific frame. This skill is valuable for assessing the accuracy of your model's predictions at a particular moment. # # \begin{align*} # \text{MPJPE}_t = \frac{1}{N} \sum_{j=1}^{J} \left\| P_{\text{predicted}_{t,j}} - P_{\text{gt}_{t,j}} \right\| # \end{align*} # # Fixed the frame $t$, you will calculate the MPJPE for the predicted pose and the ground truth. Steps: # # - Write a function that takes in input the predicted pose and the ground truth one and returns the MPJPE for a number of frames $t$. (e.g. the output could be a dictionary with the frame number as key and the MPJPE as value) # - Rewrite the test function to use the function you just wrote. # - Run the newly created test function for $t=[5, 10, 15, 25]$ and report the results in a table and plot. # # # # # In[24]: def mpjpe_per_frame(sequences_predict, sequences_gt, frames_to_consider): # Insert your code below mpjpe_diz = {} #print(f"predicted.shape: {sequences_predict.shape}") #print(f"ground_true.shape : {sequences_gt.shape}") for frame_index in frames_to_consider: predicted_frame = sequences_predict[:, frame_index-1, :, :] gt_frame = sequences_gt[:, frame_index-1, :, :] joint_distances = torch.norm(predicted_frame - gt_frame, dim=1, p=1) mpjpe_frame = torch.mean(joint_distances) mpjpe_diz[frame_index] = mpjpe_frame.cpu() return mpjpe_diz # In[25]: def test_per_frame(ckpt_path=None): model.load_state_dict(torch.load(ckpt_path)) print('\n ...model loaded \n') model.eval() accum_loss=0 n_batches=0 # number of batches for all the sequences actions=define_actions(actions_to_consider_test) dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]) # joints at same loc joint_to_ignore = np.array([16, 20, 23, 24, 28, 31]) index_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) joint_equal = np.array([13, 19, 22, 13, 27, 30]) index_to_equal = np.concatenate((joint_equal * 3, joint_equal * 3 + 1, joint_equal * 3 + 2)) totalll = 0 counter = 0 frames_to_consider = [5, 10, 15, 25] for action in actions: running_loss = 0 n = 0 dataset_test = datasets.Datasets(path,input_n,output_n,skip_rate, split=2,actions=[action]) #print('>>> test action for sequences: {:d}'.format(dataset_test.__len__())) test_loader = DataLoader(dataset_test, batch_size=batch_size_test, shuffle=False, num_workers=0, pin_memory=True) for cnt,batch in enumerate(test_loader): with torch.no_grad(): batch = batch.to(device) batch_dim = batch.shape[0] n += batch_dim all_joints_seq = batch.clone()[:, input_n:input_n+output_n,:] sequences_train=batch[:, 0:input_n, dim_used].view(-1,input_n,len(dim_used)//3,3).permute(0,3,1,2) sequences_gt=batch[:, input_n:input_n+output_n, :] running_time = time.time() #sequences_predict = model(sequences_train) sequences_predict = model(sequences_train).view(-1, output_n, joints_to_consider, 3) totalll += time.time()-running_time counter += 1 sequences_predict=sequences_predict.contiguous().view(-1,output_n,len(dim_used)) all_joints_seq[:,:,dim_used] = sequences_predict all_joints_seq[:,:,index_to_ignore] = all_joints_seq[:,:,index_to_equal] # Insert your code below. # The function mpjpe_per_frame should return the loss for each frame in the sequence. # (e.g. a dictionary with keys the frames and values the loss for each frame) # Keep a tab of the running loss for each frame and the number of frames in the sequence. frames_to_consider = [5, 10, 15, 25] dict_loss = mpjpe_per_frame(all_joints_seq.view(-1, output_n, 32, 3), sequences_gt.view(-1, output_n, 32, 3), frames_to_consider) loss = sum(dict_loss.values()) / n running_loss += loss # Insert your code below. # Average the loss over all the frames in the sequence and print the results. accum_loss +=running_loss print(str(action),': ', str(np.round((running_loss/n).item(),1))) n_batches+=n print('\nAverage: '+str(np.round((accum_loss/n_batches).item(),1))) print('Prediction time: ', np.round(totalll/counter, 5)) # In[26]: # Insert your code below where you want to load the model and test it. # You need to specify the path to the model checkpoint file and call the test function. ckpt_path = f'./checkpoints/h36m_3d_25frames_ckpt_epoch_{35}.pt' test_per_frame(ckpt_path) # ## Iterative Mechanism (**3 Points**) # In this exercise, you will explore the concept of an iterative mechanism and its adaptability when the model's output length changes. You will start with a model designed to produce 25 output frames but adapt it to generate only 10. The exercise will involve modifying and re-training the model for the new output length. During test time, the model will generate 10 frames and then use them as input to generate the successive 10 frames, and so on, until the desired number of frames is reached. In this case, you are asked to generate 25 frames. # # The steps are as follows: # - Change the model's output length from 25 to 10. # - Re-train the model. # - Rewrite the test function to generate 25 frames using the iterative mechanism. # - Generate a new report and compare the results to the baseline model. # In[27]: # # Arguments to setup the datasets datas = 'h36m' # dataset name path = './data/h3.6m/h3.6m/dataset' input_n = 10 # number of frames to train on (default=10) # Insert your code below output_n = 10 # number of frames to predict on input_dim = 3 # dimensions of the input coordinates(default=3) skip_rate = 1 # skip rate of frames joints_to_consider = 22 #FLAGS FOR THE TRAINING mode = 'train' #choose either train or test mode batch_size_test = 8 model_path_iterative = './checkpoints_iterative/' # path to the model checkpoint file actions_to_consider_test = 'all' # actions to test on. model_name = datas+'_3d_'+str(output_n)+'frames_ckpt' #the model name to save/load #FLAGS FOR THE VISUALIZATION actions_to_consider_viz = 'all' # actions to visualize visualize_from = 'test' n_viz = 2 # In[28]: # Load Data print('Loading Train Dataset...') dataset = datasets.Datasets(path,input_n,output_n,skip_rate, split=0) print('Loading Validation Dataset...') vald_dataset = datasets.Datasets(path,input_n,output_n,skip_rate, split=1) #! Note: Ignore warning: "VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences" # In[29]: batch_size=256 print('>>> Training dataset length: {:d}'.format(dataset.__len__())) data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)# print('>>> Validation dataset length: {:d}'.format(vald_dataset.__len__())) vald_loader = DataLoader(vald_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True) # In[30]: device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Using device: %s'%device) n_heads = 1 model = Model(num_joints=joints_to_consider, num_frames=input_n, num_frames_out=output_n, num_heads=n_heads, num_channels=3, kernel_size=[3,3], use_pes=True).to(device) print('total number of parameters of the network is: '+str(sum(p.numel() for p in model.parameters() if p.requires_grad))) # In[31]: # Arguments to setup the optimizer lr=1e-01 # learning rate use_scheduler=True # use MultiStepLR scheduler milestones=[10,30] # the epochs after which the learning rate is adjusted by gamma gamma=0.1 # gamma correction to the learning rate, after reaching the milestone epochs weight_decay=1e-05 # weight decay (L2 penalty) optimizer=optim.Adam(model.parameters(),lr=lr,weight_decay=weight_decay) if use_scheduler: scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma) clip_grad=None # select max norm to clip gradients # Argument for training n_epochs = 41 log_step = 200 # ### Train and Validation Loop # In[32]: # Save the model and plot the loss. # Change to True if you want to save the model and plot the loss save_and_plot = True # launch training with the new output_n train(data_loader, vald_loader, path_to_save_model=model_path_iterative) # ### Test Loop # In[33]: def test(ckpt_path=None): model.load_state_dict(torch.load(ckpt_path)) print('...model loaded \n') model.eval() accum_loss=0 n_batches=0 # number of batches for all the sequences actions=define_actions(actions_to_consider_test) dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]) # joints at same loc joint_to_ignore = np.array([16, 20, 23, 24, 28, 31]) index_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) joint_equal = np.array([13, 19, 22, 13, 27, 30]) index_to_equal = np.concatenate((joint_equal * 3, joint_equal * 3 + 1, joint_equal * 3 + 2)) totalll=0 counter=0 for action in actions: running_loss=0 n=0 dataset_test = datasets.Datasets(path,input_n,35, skip_rate, split=2,actions=[action]) test_loader = DataLoader(dataset_test, batch_size=batch_size_test, shuffle=False, num_workers=0, pin_memory=True) for cnt,batch in enumerate(test_loader): with torch.no_grad(): batch=batch.to(device) batch_dim=batch.shape[0] n+=batch_dim all_joints_seq=batch.clone()[:, input_n:input_n+output_n+15,:] running_time = time.time() sequences_train=batch[:, 0:input_n, dim_used].view(-1,input_n,len(dim_used)//3,3).permute(0,3,1,2) sequences_gt=batch[:, input_n:input_n+output_n+15, :] sequences_predict = model(sequences_train).view(-1, output_n, joints_to_consider, 3) #sequences_predict = model(sequences_train) # Insert your code below. You will need to iteratively predict the next frames # and feed it to back to the model until you reach the desired number of frames. input_frames = sequences_predict[:, -10:, :, :].permute(0,3,1,2) new_frames = model(input_frames).view(-1, output_n, joints_to_consider, 3) sequences_predict = torch.cat((sequences_predict, new_frames), dim=1) input_frames = sequences_predict[:, -10:, :, :].permute(0,3,1,2) new_frames = model(input_frames).view(-1, output_n, joints_to_consider, 3) sequences_predict = torch.cat((sequences_predict, new_frames), dim=1) sequences_predict = sequences_predict[:, :-5, :, :] sequences_predict=sequences_predict.contiguous().view(-1,25,len(dim_used)) all_joints_seq[:,:,dim_used] = sequences_predict all_joints_seq[:,:,index_to_ignore] = all_joints_seq[:,:,index_to_equal] loss= mpjpe_error(all_joints_seq.view(-1,25,32,3),sequences_gt.view(-1,25,32,3)) totalll += time.time()-running_time counter += 1 running_loss+=loss*batch_dim accum_loss+=loss*batch_dim print(str(action),': ', str(np.round((running_loss/n).item(),1))) n_batches+=n print('\nAverage: '+str(np.round((accum_loss/n_batches).item(),1))) print('Prediction time: ', totalll/counter) # In[34]: # Insert your code below where you want to load the model and test it. # You need to specify the path to the model checkpoint file and call the test function. ckpt_path = model_path_iterative + model_name + f"_epoch_{35}.pt" test(ckpt_path = ckpt_path ) # ## YOUR custom model (**3 Points**) # In this exercise, you will implement a Transformer-like network (based on the Theory notebook) for this specific task. You can use the Transformer's Encoder and implement your own Decoder to predict future poses. (e.g. RNN, MLP, CNN, TCN, ...). # We won't provide any code for this exercise, but you can use the code provided in the Theory notebook as a starting point. # The goal of this exercise is not to beat the previous model but to understand how to implement a Transformer network for this specific task. For this reason, the evaluation will be based on the code you write and the explanation you provide in the report rather than the results. # # # ### Performance BONUS (**Up to 2 Points**) # - **1 Bonus Point** if the model achieves an A-MPJPE between 80 and 90 millimeters. # - **2 Bonus Points** if the model achieves an A-MPJPE between 70 and 80 millimeters. # # In[35]: # Input Shape: [batch_size, input_time, joints, 3] # # Encoder: # Input shape: [batch_size, input_time, joints, 3] # Output shape: [batch_size, input_time/output_time, joints, FREE] # # # Decoder: # Input shape: [batch_size, input_time/output_time, joints, FREE] # Output shape: [batch_size, output_time, joints, 3] # # # Hint: Transformers often take an input of shape [batch_size, time, joints*channels], # use the reshape or view function to match the dimensionality. # In[36]: from transformer import Transformer, subsequent_mask, transformer_inputs, train # A Transformer Network based on the *Theory_Notebook*, where no changes have been made to either the Encoder or the Decoder. # # In[38]: input_n = 10 # number of frames to train on (default=10) output_n = 25 # number of frames to predict on skip_rate = 1 # skip rate of frames batch_size=256 path = './data/h3.6m/h3.6m/dataset' c_model_path = './checkpoints_transformer/' # path to the model checkpoint file print('Loading Train Dataset...') dataset = datasets.Datasets(path,input_n,output_n,skip_rate, split=0) print('Loading Validation Dataset...\n') vald_dataset = datasets.Datasets(path,input_n,output_n,skip_rate, split=1) print('>>> Training dataset length: {:d}'.format(dataset.__len__())) data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)# print('>>> Validation dataset length: {:d}'.format(vald_dataset.__len__())) vald_loader = DataLoader(vald_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True) # In[58]: torch.manual_seed(0) num_heads = 4 d_model = 512 dim_feedforward = 1024 dropout = 0.4 coder_blocks = 3 tf = Transformer(enc_inp_size = 33, dec_inp_size=34, dec_out_size=66, N=coder_blocks, d_model=d_model, dim_feedforward=dim_feedforward, num_heads=num_heads, dropout=dropout).to(device) # In[59]: # Arguments to setup the optimizer lr = 3e-05 # learning rate use_scheduler = True # use MultiStepLR scheduler milestones = [10, 30] # the epochs after which the learning rate is adjusted by gamma gamma = 0.1 # gamma correction to the learning rate, after reaching the milestone epochs weight_decay = 1e-02 # weight decay (L2 penalty) optimizer = optim.Adam(tf.parameters(), lr = lr, weight_decay = weight_decay) if use_scheduler: scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma) epoches = 15 train(data_loader,vald_loader, epoches, tf, scheduler, optimizer, device, c_model_path) # In[ ]: # In[ ]: get_ipython().system('zip -r -q /content/images_dir.zip /content/AdavancedML/Assignment_3/Practice/images_dir') # In[ ]: