#!/usr/bin/env python # coding: utf-8 # ## Learning Rate Schedulers with Skorch # # This notebook demonstrates 3 learning rate schedulers in skorch: # # StepLR, ReduceROn Plateau and Cosine Annealing. This notebook was contributed by [Parag Ekbote](https://github.com/ParagEkbote). # # Firstly you will need to install the following libraries: skorch,numpy,matplotlib and torch. #

# #

Run in Google Colab #

View source on GitHub

# # # Imports # In[1]: import matplotlib.pyplot as plt from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler import torch import torch.optim as optim import torch.nn as nn import numpy as np from skorch import NeuralNetClassifier from skorch.callbacks import LRScheduler from skorch.callbacks import Callback # ## Data Preparation # # The dataset will be split into train and test datasets. We will scale the features upto float 32 and labels reshaped for efficient binary classification. # # In[2]: def prepare_data(): # Load the dataset data = load_breast_cancer() X, y = data.data, data.target # Split into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Standardize the features scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train).astype(np.float32) X_test_scaled = scaler.transform(X_test).astype(np.float32) # Reshape the labels for compatibility y_train = y_train.astype(np.float32).reshape(-1, 1) y_test = y_test.astype(np.float32).reshape(-1, 1) return X_train_scaled, X_test_scaled, y_train, y_test # ## Neural Net Parameters # # The BreastCancerNet is a neural network designed for binary classification tasks. It consists of an input layer, two hidden layers with ReLU activation functions, and a single output layer. The architecture is parameterized to allow flexibility in adjusting the input and hidden layer dimensions. # In[3]: class BreastCancerNet(nn.Module): def __init__(self, input_dim=30, hidden_dim=64): super(BreastCancerNet, self).__init__() self.fc1 = nn.Linear(input_dim, hidden_dim) self.relu = nn.ReLU() self.fc2 = nn.Linear(hidden_dim, 1) def forward(self, x): x = self.fc1(x) x = self.relu(x) x = self.fc2(x) return x # ## Learning Rate Scheduler Parameters # # 1) StepLR: # # - Reduces the learning rate by a factor (gamma=0.3) every 100 epochs (step_size=10). # - Useful for steady, predictable learning rate decay. # # 2) ReduceLROnPlateau: # # - Reduces the learning rate dynamically when the model's performance (e.g., loss) plateaus. # - Adjusts by a factor (factor=0.7) after 5 epochs of no improvement (patience=5). # - Ideal for tasks where loss stagnation indicates the need for smaller learning rates. # # 3) CosineAnnealing: # # - Reduces the learning rate in a cosine curve over 10 epochs (T_max=10). # - Periodically resets the learning rate, promoting exploration of the loss landscape. # # We will now train the neural network with different LR Schedulers. # # Note: We are training with synthetic data. The breast cancer dataset can also be used. # In[5]: class LRCaptureCallback(Callback): def on_epoch_end(self, net, **kwargs): # Log the learning rate of the optimizer lr = net.optimizer_.param_groups[0]['lr'] if not hasattr(net.history, 'lr'): net.history.record('lr', lr) else: net.history[-1, 'lr'] = lr # Training function with learning rate tracking def train_schedulers(X_train, X_test, y_train, y_test, lr=0.05, epochs=1000, hidden_dim=128): # Convert data to PyTorch tensors X_train = X_train.astype(np.float32) X_test = X_test.astype(np.float32) y_train = y_train.astype(np.float32).reshape(-1, 1) y_test = y_test.astype(np.float32).reshape(-1, 1) # Split training data into training and validation sets X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.4, random_state=42) # Define learning rate schedulers schedulers = [ {"name": "StepLR", "scheduler_class": torch.optim.lr_scheduler.StepLR, "params": {"step_size": 100, "gamma": 0.3}}, {"name": "ReduceLROnPlateau", "scheduler_class": torch.optim.lr_scheduler.ReduceLROnPlateau, "params": {"mode": "max", "factor": 0.7, "patience": 5}}, {"name": "CosineAnnealingLR", "scheduler_class": torch.optim.lr_scheduler.CosineAnnealingLR, "params": {"T_max": 1000}}, ] results = {} for scheduler_info in schedulers: print(f"\nTraining with {scheduler_info['name']} scheduler...") # Set up the neural network with the specified scheduler net = NeuralNetClassifier( module=BreastCancerNet, max_epochs=epochs, lr=lr, optimizer=optim.SGD, criterion=nn.BCEWithLogitsLoss, callbacks=[ LRScheduler( policy=scheduler_info["scheduler_class"], **scheduler_info["params"] ), LRCaptureCallback(), ], iterator_train__shuffle=True, train_split=None, module__input_dim=X_train.shape[1], module__hidden_dim=hidden_dim, verbose=0 ) # Train the model net.fit(X_train, y_train) # Evaluate the model on the test set score = net.score(X_test, y_test) print(f"{scheduler_info['name']} Test Score: {score:.4f}") # Extract learning rates lrs = [event['lr'] for event in net.history if 'lr' in event] print(f"{scheduler_info['name']} Recorded Learning Rates: {lrs[:5]}...") # Save results results[scheduler_info["name"]] = { "model": net, "learning_rates": lrs, "score": score, } print("\nFinal Results Summary:") for scheduler_name, result in results.items(): print(f"\nScheduler: {scheduler_name}") print(f"Test Score: {result['score']:.4f}") print(f"First 5 Learning Rates: {result['learning_rates'][:5]}") return results # Generate synthetic data X_train = np.random.rand(100, 30) X_test = np.random.rand(40, 30) y_train = np.random.randint(0, 2, size=(100,)) y_test = np.random.randint(0, 2, size=(40,)) # Train with schedulers and evaluate results = train_schedulers(X_train, X_test, y_train, y_test, lr=0.05, epochs=1000, hidden_dim=128) # # Visualization of Results # # We observe the following Results: # # 1) StepLR reduces the learning rate in fixed steps, ReduceLROnPlateau adaptively lowers it when progress stagnates, and CosineAnnealing follows a periodic decay to explore new minima. # # 2) Choosing the right scheduler depends on task requirements, with StepLR suited for predefined decays, ReduceLROnPlateau for dynamic adjustments, and CosineAnnealing for periodic resets to escape local minima. # In[6]: def plot_results(results): plt.figure(figsize=(12, 6)) for scheduler_name, result in results.items(): # Extract learning rates learning_rates = result["model"].history[:, 'lr'] plt.plot(learning_rates, label=scheduler_name) plt.title("Learning Rate Schedules") plt.xlabel("Epochs") plt.ylabel("Learning Rate") plt.legend() plt.grid(True) plt.show() plot_results(results)