#!/usr/bin/env python # coding: utf-8 # Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks. # - Author: Sebastian Raschka # - GitHub Repository: https://github.com/rasbt/deeplearning-models # # Model Zoo -- Character RNN for Generating Text # A simple character-level RNN to generate new bits of text based on text from a novel. # In[1]: get_ipython().run_line_magic('load_ext', 'watermark') get_ipython().run_line_magic('watermark', "-a 'Sebastian Raschka' -v -p torch") import torch import torch.nn.functional as F from torchtext import data from torchtext import datasets import time import random import unidecode import string import random import re torch.backends.cudnn.deterministic = True # ## General Settings # In[2]: RANDOM_SEED = 123 torch.manual_seed(RANDOM_SEED) DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') TEXT_PORTION_SIZE = 200 NUM_ITER = 20000 LEARNING_RATE = 0.005 EMBEDDING_DIM = 100 HIDDEN_DIM = 100 NUM_HIDDEN = 1 # ## Dataset # Download *[A Tale of Two Cities](http://www.gutenberg.org/files/98/98-0.txt)* by Charles Dickens from the Gutenberg Project: # In[3]: get_ipython().system('wget http://www.gutenberg.org/files/98/98-0.txt') # Convert all characters into ASCII characters provided by `string.printable`: # In[4]: string.printable # In[5]: with open('./98-0.txt', 'r') as f: textfile = f.read() # convert special characters textfile = unidecode.unidecode(textfile) # strip extra whitespaces textfile = re.sub(' +',' ', textfile) TEXT_LENGTH = len(textfile) print(f'Number of characters in text: {TEXT_LENGTH}') # Divide the text into smaller portions: # In[6]: random.seed(RANDOM_SEED) def random_portion(textfile): start_index = random.randint(0, TEXT_LENGTH - TEXT_PORTION_SIZE) end_index = start_index + TEXT_PORTION_SIZE + 1 return textfile[start_index:end_index] print(random_portion(textfile)) # Define a function to convert characters into tensors of integers (type long): # # In[7]: def char_to_tensor(text): lst = [string.printable.index(c) for c in text] tensor = torch.tensor(lst).long() return tensor print(char_to_tensor('abcDEF')) # Putting it together to make a function that draws random batches for training: # In[ ]: def draw_random_sample(textfile): text_long = char_to_tensor(random_portion(textfile)) inputs = text_long[:-1] targets = text_long[1:] return inputs, targets # In[9]: draw_random_sample(textfile) # ## Model # In[ ]: class RNN(torch.nn.Module): def __init__(self, input_size, embed_size, hidden_size, output_size, num_layers): super(RNN, self).__init__() self.num_layers = num_layers self.hidden_size = hidden_size self.embed = torch.nn.Embedding(input_size, hidden_size) self.gru = torch.nn.GRU(input_size=embed_size, hidden_size=hidden_size, num_layers=num_layers) self.fc = torch.nn.Linear(hidden_size, output_size) self.init_hidden = torch.nn.Parameter(torch.zeros( num_layers, 1, hidden_size)) def forward(self, features, hidden): embedded = self.embed(features.view(1, -1)) output, hidden = self.gru(embedded.view(1, 1, -1), hidden) output = self.fc(output.view(1, -1)) return output, hidden def init_zero_state(self): init_hidden = torch.zeros(self.num_layers, 1, self.hidden_size).to(DEVICE) return init_hidden # In[ ]: torch.manual_seed(RANDOM_SEED) model = RNN(len(string.printable), EMBEDDING_DIM, HIDDEN_DIM, len(string.printable), NUM_HIDDEN) model = model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) # ## Training # In[ ]: def evaluate(model, prime_str='A', predict_len=100, temperature=0.8): ## based on https://github.com/spro/practical-pytorch/ ## blob/master/char-rnn-generation/char-rnn-generation.ipynb hidden = model.init_zero_state() prime_input = char_to_tensor(prime_str) predicted = prime_str # Use priming string to "build up" hidden state for p in range(len(prime_str) - 1): _, hidden = model(prime_input[p].to(DEVICE), hidden.to(DEVICE)) inp = prime_input[-1] for p in range(predict_len): output, hidden = model(inp.to(DEVICE), hidden.to(DEVICE)) # Sample from the network as a multinomial distribution output_dist = output.data.view(-1).div(temperature).exp() top_i = torch.multinomial(output_dist, 1)[0] # Add predicted character to string and use as next input predicted_char = string.printable[top_i] predicted += predicted_char inp = char_to_tensor(predicted_char) return predicted # In[13]: start_time = time.time() for iteration in range(NUM_ITER): ### FORWARD AND BACK PROP hidden = model.init_zero_state() optimizer.zero_grad() loss = 0. inputs, targets = draw_random_sample(textfile) inputs, targets = inputs.to(DEVICE), targets.to(DEVICE) for c in range(TEXT_PORTION_SIZE): outputs, hidden = model(inputs[c], hidden) loss += F.cross_entropy(outputs, targets[c].view(1)) loss /= TEXT_PORTION_SIZE loss.backward() ### UPDATE MODEL PARAMETERS optimizer.step() ### LOGGING with torch.set_grad_enabled(False): if iteration % 1000 == 0: print(f'Time elapsed: {(time.time() - start_time)/60:.2f} min') print(f'Iteration {iteration} | Loss {loss.item():.2f}\n\n') print(evaluate(model, 'Th', 200), '\n') print(50*'=') # In[ ]: