In the previous class, we discussed machine learning. Today, we will focus on deep learning.
Learning is largely diveded into two parts: Supervised Learning and Unspervised Learning. We will focus on unsupervised learning in the assignment.
Supervised Learning is a methodology where a model is trained using input data $x$ and the corresponding labels $y$ to predict the correct output for a given input. In supervised learning, the dataset includes labeled data, and the model learns the relationship between the inputs and outputs based on these labels.
When paired data $(x, y)$(input and corresponding output) is given, the objective is to find the optimal parameters $\theta$ of a model $f_\text{NN}$ that best approximates the mapping from $x$ to $y$. Depending on the nature of the output $y$ supervised learning can be broadly categorized into two types:
Objective: Predict the class to which the data belongs.
Output: Binary classification or multi-class classification.
Examples:
Loss Function: Commonly, Cross-Entropy Loss is used, defined as:
where,
$N$: Number of data samples.
$N_c$: Number of classes
$Y = \bmat{y_1 & y_2& \cdots& y_N}^T$ where $y_i \in \mathbb{R}^{N_c}$ is the one-hot encoded true label vector for the $i$-th data sample.
$P = \bmat{p_1 & p_2& \cdots& p_N}^T$ where $p_i \in \mathbb{R}^{N_c}$ is the predicted probability vector for each class for $i$-th data sample.
Here is the example of Classification.
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader, TensorDataset
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.manifold import TSNE
from sklearn.model_selection import train_test_split
device = torch.device('cuda:0')
The Stellar Classification Dataset (SDSS17) contains spectroscopic data of stars, galaxies, and quasars collected from the Sloan Digital Sky Survey (SDSS). It is commonly used for classifying astronomical objects. Below are the details for processing the dataset and building the training dataset.
Load the archive.zip
File
Split data into train data and test data.
Normalization
'u', 'g', 'r', 'i', 'z', 'alpha', 'delta', 'redshift'
.$$\hat{x} = \frac{x - x_\text{min}}{x_\text{max} - x_\text{min}}$$
- This ensures that no particular feature dominates the learning process, improving model stability and preventing overfitting.
- Use the minimum and maximum values from the training set to normalize both the training and testing sets. This ensures consistency and prevents information leakage from the test set into the training process.
numpy
to torch
torch.FloatTensor
for compatibility with PyTorch.torch.LongTensor
as it represents categorical data for classification.## 1. load file
df = pd.read_csv('https://jonghank.github.io/ase3001/files/star_classification.csv')
selected = ['u', 'g', 'r', 'i', 'z', 'alpha', 'delta', 'redshift']
data = df[selected].to_numpy()
_cls = df['class'].to_numpy().copy()
tmp = np.unique(_cls)
# string class to integer
for idx, i in enumerate(tmp):
_cls[_cls == i] = idx
#----------------------------#
## 2. train test split
X_train, X_test, y_train, y_test = train_test_split(
data, _cls, test_size=0.2, random_state=42, stratify=_cls
)
#----------------------------#
## 3. normalization
_min = np.min(X_train, axis=0)
_max = np.max(X_train, axis=0)
X_train = (X_train - _min) / (_max - _min)
X_test = (X_test - _min) / (_max - _min)
#----------------------------#
## 4. numpy to torch
train_dataset = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train.astype(np.int32)))
test_dataset = TensorDataset(torch.FloatTensor(X_test), torch.LongTensor(y_test.astype(np.int32)))
train_dataloader = DataLoader(train_dataset, batch_size=512, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=512, shuffle=True)
Model
'u', 'g', 'r', 'i', 'z', 'alpha', 'delta', 'redshift'
Loss function
torch.nn.CrossEntropyLoss
to train the model.Optimizer
torch.optim.Adam
to optimize the model parameters. Adam is widely used for its adaptability and efficiency in deep learning tasks.class custom_model(nn.Module):
def __init__(self, dim_in, dim_out):
super().__init__()
self.ilayer = nn.Linear(dim_in, 100)
self.hlayer = nn.Linear(100, 200)
self.olayer = nn.Linear(200, dim_out)
self.act_func = nn.ReLU()
return
def forward(self, x):
x = self.ilayer(x)
x = self.act_func(x)
x = self.hlayer(x)
x = self.act_func(x)
x = self.olayer(x)
return x
lr = 1e-3
_input_model = {'dim_in': len(selected) ,
'dim_out': 3}
model = custom_model(**_input_model).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-8)
losses = []
accs = []
optimizer.zero_grad()
to reset the gradients calculated in the previous step.loss.backward()
to compute the gradients of the model parameters with respect to the loss.optimizer.step()
to update the model parameters based on the computed gradients.def get_acc(model, dataloader, n_cls, device):
model.eval()
with torch.no_grad():
cm = np.zeros((n_cls,n_cls))
for (_in, tgt) in dataloader:
_in = _in.to(device)
tgt = tgt.to(device)
_out = model(_in)
_, predicted = torch.max(_out, 1)
for (i_p, i_r) in zip(predicted, tgt):
cm[i_r, i_p] += 1
acc = np.sum(np.diag(cm)) / np.sum(cm)
tmp = np.sum(cm, axis=1)
tf = tmp != 0
cm[tf] = cm[tf] / tmp[tf,np.newaxis]
model.train()
return acc, cm
epoch = 500
model.train()
progress = tqdm(range(epoch))
for i in progress:
batch_loss = 0
for (_in, tgt) in train_dataloader:
## tensor: CPU -> GPU
_in = _in.to(device)
tgt = tgt.to(device)
#---------------------------------------#
optimizer.zero_grad()
out = model(_in)
loss = criterion(out, tgt)
loss.backward()
optimizer.step()
batch_loss += loss.cpu().detach().item()
losses.append(batch_loss / len(train_dataloader))
acc = get_acc(model, train_dataloader, 3, device)[0]
accs.append(acc)
progress.set_description(f"Training Loss: {losses[-1]:e}" + f" Training Accuracy:{acc}")
Training Loss: 9.632788e-02 Training Accuracy:0.9701625: 100%|██████████| 500/500 [42:38<00:00, 5.12s/it]
fig, axes = plt.subplots(1,2, figsize=(1.6*4*2,1*4), dpi=100)
ax = axes[0]
ax.semilogy(losses)
ax.set_title('Training Loss')
ax.grid()
ax.set_xlabel('Iterations')
ax = axes[1]
ax.semilogy(accs)
ax.set_title('Training Accuracy')
ax.grid()
ax.set_xlabel('Iterations')
Text(0.5, 0, 'Iterations')
acc, cm = get_acc(model, train_dataloader, 3, device)
#------------------------------------------------------#
labels = df['class'].unique()
size = 5
plt.figure(dpi=100, figsize=(1.6*size,size))
sns.heatmap(cm, annot=True , fmt='.0%', cmap='Blues', xticklabels=labels, yticklabels=labels)
plt.xlabel('Predicted label')
plt.ylabel('True label')
plt.axis('equal')
plt.title('Confusion Matrix: Test Data')
plt.show()
Objective: Predict continuous values.
Output: Real-valued numbers.
Examples:
Loss function:
Commonly used loss functions for regression tasks include:
Mean Squared Error(MSE)
$$\mathbf{L}$(y, \hat{y}) = \frac{1}{N} \sum_{i=1}^N (y_i - \hat{y}_i)^2$$Mean Absolute Error (MAE):
<ㅠㄱ>
Let's train a deep model to approximate the given spiral defined by the parametric equations:
theta = np.linspace(0, 4 * np.pi, 100)
x = theta / 12 * np.cos(theta)
y = theta / 12 * np.sin(theta)
plt.figure(figsize=(6,6), dpi=100)
plt.plot(x, y, label='2D Spiral')
plt.title('2D Spiral Plot')
plt.xlabel('X')
plt.ylabel('Y')
plt.axis('equal')
plt.grid(True)
plt.show()
To start, we generate the spiral dataset using the parametric equations provided above. This dataset will serve as the ground truth for training the model. $\theta$ will be sampled over a specified range, and the corresponding $(x, y)$ coordinates will be computed.
theta = np.linspace(0, 4 * np.pi, 50)
_in = torch.FloatTensor(theta)[:,None]
x = theta / 12 * np.cos(theta)
y = theta / 12 * np.sin(theta)
out = np.hstack([x[:,None], y[:,None]])
out = torch.FloatTensor(out)
train_dataset = TensorDataset(_in, out)
train_dataloader = DataLoader(train_dataset, batch_size=512)
We define a neural network model to map $\theta$ to $(x, y)$. we use nn.MSELoss()
, which minimizes the mean squared error between the predicted and actual coordinates. The optimizer is identical to the one used previously.
class custom_model(nn.Module):
def __init__(self, dim_in, dim_out):
super().__init__()
self.ilayer = nn.Linear(dim_in, 1000)
self.hlayer = nn.Linear(1000, 500)
self.olayer = nn.Linear(500, dim_out)
self.act_func = nn.ReLU()
return
def forward(self, x):
x = self.ilayer(x)
x = self.act_func(x)
x = self.hlayer(x)
x = self.act_func(x)
x = self.olayer(x)
return x
lr = 1e-3
_input_model = {'dim_in': 1 ,
'dim_out': 2}
model = custom_model(**_input_model).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-8)
losses = []
epoch = 30000
model.train()
progress = tqdm(range(epoch))
for i in progress:
batch_loss = 0
for (_in, tgt) in train_dataloader:
## tensor: CPU -> GPU
_in = _in.to(device)
tgt = tgt.to(device)
#---------------------------------------#
optimizer.zero_grad()
out = model(_in)
loss = criterion(out, tgt)
loss.backward()
optimizer.step()
batch_loss += loss.cpu().detach().item()
losses.append(batch_loss / len(train_dataloader))
progress.set_description(f"Training Loss: {losses[-1]:e}")
Training Loss: 3.028496e-05: 100%|██████████| 30000/30000 [02:42<00:00, 184.18it/s]
plt.figure(figsize=(1.6*4,1*4), dpi=100)
plt.semilogy(losses)
plt.title('Training Loss')
plt.grid()
plt.xlabel('Iterations')
plt.show()
Using the trained model, we predict the values of $(x, y)$ for both the trained points and a new set of test points.
theta = np.linspace(0, 4 * np.pi, 50)
x = theta / 12 * np.cos(theta)
y = theta / 12 * np.sin(theta)
plt.figure(figsize=(6,6), dpi=100)
plt.plot(x, y)
plt.scatter(x, y, s=50, facecolors='none', edgecolors='blue', label='Trained Points')
theta = np.linspace(0, 4 * np.pi, 200)
theta = torch.FloatTensor(theta[:,None]).to(device)
model.eval()
with torch.no_grad():
out = model(theta).cpu().detach().numpy()
plt.plot(*out.T, '*', label='Predicted Points')
plt.title('2D Spiral Plot')
plt.xlabel('X')
plt.ylabel('Y')
plt.axis('equal')
plt.grid(True)
plt.legend()
plt.show()