#!/usr/bin/env python # coding: utf-8 # Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks. # - Author: Sebastian Raschka # - GitHub Repository: https://github.com/rasbt/deeplearning-models # In[1]: get_ipython().run_line_magic('load_ext', 'watermark') get_ipython().run_line_magic('watermark', "-a 'Sebastian Raschka' -v -p torch") # - Runs on CPU or GPU (if available) # # Model Zoo -- Logistic Regression # Implementation of *classic* logistic regression for binary class labels. # ## Imports # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import numpy as np from io import BytesIO import torch import torch.nn.functional as F # ## Preparing a toy dataset # In[3]: ########################## ### DATASET ########################## ds = np.lib.DataSource() fp = ds.open('http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data') x = np.genfromtxt(BytesIO(fp.read().encode()), delimiter=',', usecols=range(2), max_rows=100) y = np.zeros(100) y[50:] = 1 np.random.seed(1) idx = np.arange(y.shape[0]) np.random.shuffle(idx) X_test, y_test = x[idx[:25]], y[idx[:25]] X_train, y_train = x[idx[25:]], y[idx[25:]] mu, std = np.mean(X_train, axis=0), np.std(X_train, axis=0) X_train, X_test = (X_train - mu) / std, (X_test - mu) / std fig, ax = plt.subplots(1, 2, figsize=(7, 2.5)) ax[0].scatter(X_train[y_train == 1, 0], X_train[y_train == 1, 1]) ax[0].scatter(X_train[y_train == 0, 0], X_train[y_train == 0, 1]) ax[1].scatter(X_test[y_test == 1, 0], X_test[y_test == 1, 1]) ax[1].scatter(X_test[y_test == 0, 0], X_test[y_test == 0, 1]) plt.show() # ## Low-level implementation with manual gradients # In[4]: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") def custom_where(cond, x_1, x_2): return (cond * x_1) + ((1-cond) * x_2) class LogisticRegression1(): def __init__(self, num_features): self.num_features = num_features self.weights = torch.zeros(num_features, 1, dtype=torch.float32, device=device) self.bias = torch.zeros(1, dtype=torch.float32, device=device) def forward(self, x): linear = torch.add(torch.mm(x, self.weights), self.bias) probas = self._sigmoid(linear) return probas def backward(self, probas, y): errors = y - probas.view(-1) return errors def predict_labels(self, x): probas = self.forward(x) labels = custom_where(probas >= .5, 1, 0) return labels def evaluate(self, x, y): labels = self.predict_labels(x).float() accuracy = torch.sum(labels.view(-1) == y) / y.size()[0] return accuracy def _sigmoid(self, z): return 1. / (1. + torch.exp(-z)) def _logit_cost(self, y, proba): tmp1 = torch.mm(-y.view(1, -1), torch.log(proba)) tmp2 = torch.mm((1 - y).view(1, -1), torch.log(1 - proba)) return tmp1 - tmp2 def train(self, x, y, num_epochs, learning_rate=0.01): for e in range(num_epochs): #### Compute outputs #### probas = self.forward(x) #### Compute gradients #### errors = self.backward(probas, y) neg_grad = torch.mm(x.transpose(0, 1), errors.view(-1, 1)) #### Update weights #### self.weights += learning_rate * neg_grad self.bias += learning_rate * torch.sum(errors) #### Logging #### print('Epoch: %03d' % (e+1), end="") print(' | Train ACC: %.3f' % self.evaluate(x, y), end="") print(' | Cost: %.3f' % self._logit_cost(y, self.forward(x))) # In[5]: X_train_tensor = torch.tensor(X_train, dtype=torch.float32, device=device) y_train_tensor = torch.tensor(y_train, dtype=torch.float32, device=device) logr = LogisticRegression1(num_features=2) logr.train(X_train_tensor, y_train_tensor, num_epochs=10, learning_rate=0.1) print('\nModel parameters:') print(' Weights: %s' % logr.weights) print(' Bias: %s' % logr.bias) # #### Evaluating the Model # In[6]: X_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device) y_test_tensor = torch.tensor(y_test, dtype=torch.float32, device=device) test_acc = logr.evaluate(X_test_tensor, y_test_tensor) print('Test set accuracy: %.2f%%' % (test_acc*100)) # In[7]: ########################## ### 2D Decision Boundary ########################## w, b = logr.weights, logr.bias x_min = -2 y_min = ( (-(w[0] * x_min) - b[0]) / w[1] ) x_max = 2 y_max = ( (-(w[0] * x_max) - b[0]) / w[1] ) fig, ax = plt.subplots(1, 2, sharex=True, figsize=(7, 3)) ax[0].plot([x_min, x_max], [y_min, y_max]) ax[1].plot([x_min, x_max], [y_min, y_max]) ax[0].scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], label='class 0', marker='o') ax[0].scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], label='class 1', marker='s') ax[1].scatter(X_test[y_test==0, 0], X_test[y_test==0, 1], label='class 0', marker='o') ax[1].scatter(X_test[y_test==1, 0], X_test[y_test==1, 1], label='class 1', marker='s') ax[1].legend(loc='upper left') plt.show() # ## Low-level implementation using autograd # In[8]: def custom_where(cond, x_1, x_2): return (cond * x_1) + ((1-cond) * x_2) class LogisticRegression2(): def __init__(self, num_features): self.num_features = num_features self.weights = torch.zeros(num_features, 1, dtype=torch.float32, device=device, requires_grad=True) # req. for autograd! self.bias = torch.zeros(1, dtype=torch.float32, device=device, requires_grad=True) # req. for autograd! def forward(self, x): linear = torch.add(torch.mm(x, self.weights), self.bias) probas = self._sigmoid(linear) return probas def predict_labels(self, x): probas = self.forward(x) labels = custom_where((probas >= .5).float(), 1, 0) return labels def evaluate(self, x, y): labels = self.predict_labels(x) accuracy = (torch.sum(labels.view(-1) == y.view(-1))).float() / y.size()[0] return accuracy def _sigmoid(self, z): return 1. / (1. + torch.exp(-z)) def _logit_cost(self, y, proba): tmp1 = torch.mm(-y.view(1, -1), torch.log(proba)) tmp2 = torch.mm((1 - y).view(1, -1), torch.log(1 - proba)) return tmp1 - tmp2 def train(self, x, y, num_epochs, learning_rate=0.01): for e in range(num_epochs): #### Compute outputs #### proba = self.forward(x) cost = self._logit_cost(y, proba) #### Compute gradients #### cost.backward() #### Update weights #### tmp = self.weights.detach() tmp -= learning_rate * self.weights.grad tmp = self.bias.detach() tmp -= learning_rate * self.bias.grad #### Reset gradients to zero for next iteration #### self.weights.grad.zero_() self.bias.grad.zero_() #### Logging #### print('Epoch: %03d' % (e+1), end="") print(' | Train ACC: %.3f' % self.evaluate(x, y), end="") print(' | Cost: %.3f' % self._logit_cost(y, self.forward(x))) # In[9]: X_train_tensor = torch.tensor(X_train, dtype=torch.float32, device=device) y_train_tensor = torch.tensor(y_train, dtype=torch.float32, device=device) logr = LogisticRegression2(num_features=2) logr.train(X_train_tensor, y_train_tensor, num_epochs=10, learning_rate=0.1) print('\nModel parameters:') print(' Weights: %s' % logr.weights) print(' Bias: %s' % logr.bias) # #### Evaluating the Model # In[10]: X_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device) y_test_tensor = torch.tensor(y_test, dtype=torch.float32, device=device) test_acc = logr.evaluate(X_test_tensor, y_test_tensor) print('Test set accuracy: %.2f%%' % (test_acc*100)) # In[11]: ########################## ### 2D Decision Boundary ########################## w, b = logr.weights, logr.bias x_min = -2 y_min = ( (-(w[0] * x_min) - b[0]) / w[1] ) x_max = 2 y_max = ( (-(w[0] * x_max) - b[0]) / w[1] ) fig, ax = plt.subplots(1, 2, sharex=True, figsize=(7, 3)) ax[0].plot([x_min, x_max], [y_min, y_max]) ax[1].plot([x_min, x_max], [y_min, y_max]) ax[0].scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], label='class 0', marker='o') ax[0].scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], label='class 1', marker='s') ax[1].scatter(X_test[y_test==0, 0], X_test[y_test==0, 1], label='class 0', marker='o') ax[1].scatter(X_test[y_test==1, 0], X_test[y_test==1, 1], label='class 1', marker='s') ax[1].legend(loc='upper left') plt.show() # ## High-level implementation using the nn.Module API # In[12]: class LogisticRegression3(torch.nn.Module): def __init__(self, num_features): super(LogisticRegression3, self).__init__() self.linear = torch.nn.Linear(num_features, 1) # initialize weights to zeros here, # since we used zero weights in the # manual approach self.linear.weight.detach().zero_() self.linear.bias.detach().zero_() # Note: the trailing underscore # means "in-place operation" in the context # of PyTorch def forward(self, x): logits = self.linear(x) probas = torch.sigmoid(logits) return probas model = LogisticRegression3(num_features=2).to(device) # In[13]: ##### Define cost function and set up optimizer ##### cost_fn = torch.nn.BCELoss(reduction='sum') # average_size=False to match results in # manual approach, where we did not normalize # the cost by the batch size optimizer = torch.optim.SGD(model.parameters(), lr=0.1) # In[14]: def comp_accuracy(label_var, pred_probas): pred_labels = custom_where((pred_probas > 0.5).float(), 1, 0).view(-1) acc = torch.sum(pred_labels == label_var.view(-1)).float() / label_var.size(0) return acc num_epochs = 10 X_train_tensor = torch.tensor(X_train, dtype=torch.float32, device=device) y_train_tensor = torch.tensor(y_train, dtype=torch.float32, device=device).view(-1, 1) for epoch in range(num_epochs): #### Compute outputs #### out = model(X_train_tensor) #### Compute gradients #### cost = cost_fn(out, y_train_tensor) optimizer.zero_grad() cost.backward() #### Update weights #### optimizer.step() #### Logging #### pred_probas = model(X_train_tensor) acc = comp_accuracy(y_train_tensor, pred_probas) print('Epoch: %03d' % (epoch + 1), end="") print(' | Train ACC: %.3f' % acc, end="") print(' | Cost: %.3f' % cost_fn(pred_probas, y_train_tensor)) print('\nModel parameters:') print(' Weights: %s' % model.linear.weight) print(' Bias: %s' % model.linear.bias) # #### Evaluating the Model # In[15]: X_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device) y_test_tensor = torch.tensor(y_test, dtype=torch.float32, device=device) pred_probas = model(X_test_tensor) test_acc = comp_accuracy(y_test_tensor, pred_probas) print('Test set accuracy: %.2f%%' % (test_acc*100)) # In[16]: ########################## ### 2D Decision Boundary ########################## w, b = logr.weights, logr.bias x_min = -2 y_min = ( (-(w[0] * x_min) - b[0]) / w[1] ) x_max = 2 y_max = ( (-(w[0] * x_max) - b[0]) / w[1] ) fig, ax = plt.subplots(1, 2, sharex=True, figsize=(7, 3)) ax[0].plot([x_min, x_max], [y_min, y_max]) ax[1].plot([x_min, x_max], [y_min, y_max]) ax[0].scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], label='class 0', marker='o') ax[0].scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], label='class 1', marker='s') ax[1].scatter(X_test[y_test==0, 0], X_test[y_test==0, 1], label='class 0', marker='o') ax[1].scatter(X_test[y_test==1, 0], X_test[y_test==1, 1], label='class 1', marker='s') ax[1].legend(loc='upper left') plt.show() # In[17]: get_ipython().run_line_magic('watermark', '-iv')