There is a branch of Deep Learning that is dedicated to processing time series. These deep Nets are Recursive Neural Nets (RNNs). LSTMs are one of the few types of RNNs that are available. Gated Recurent Units (GRUs) are the other type of popular RNNs.
This is an illustration from http://colah.github.io/posts/2015-08-Understanding-LSTMs/ (A highly recommended read)
Pros:
Cons:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
from keras.models import Sequential
from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization, LSTM, Embedding, TimeDistributed
from keras.models import load_model, model_from_json
Using TensorFlow backend.
def chr2val(ch):
ch = ch.lower()
if ch.isalpha():
return 1 + (ord(ch) - ord('a'))
else:
return 0
def val2chr(v):
if v == 0:
return ' '
else:
return chr(ord('a') + v - 1)
df = pd.read_csv('trump.csv')
df = df[df.is_retweet=='false']
df.text = df.text.str.replace(r'http[\w:/\.]+','') # remove urls
df.text = df.text.str.lower()
df = df[[len(t)<180 for t in df.text.values]]
df = df[[len(t)>50 for t in df.text.values]]
df.head()
source | text | created_at | favorite_count | is_retweet | id_str | |
---|---|---|---|---|---|---|
0 | Twitter for iPhone | i think senator blumenthal should take a nice ... | 08-07-2017 20:48:54 | 61446 | false | 8.946617e+17 |
1 | Twitter for iPhone | how much longer will the failing nytimes with ... | 08-07-2017 20:39:46 | 42235 | false | 8.946594e+17 |
2 | Twitter for iPhone | the fake news media will not talk about the im... | 08-07-2017 20:15:18 | 45050 | false | 8.946532e+17 |
4 | Twitter for iPhone | on #purpleheartday💜i thank all the brave men a... | 08-07-2017 18:03:42 | 48472 | false | 8.946201e+17 |
5 | Twitter for iPhone | ...conquests how brave he was and it was all a... | 08-07-2017 12:01:20 | 59253 | false | 8.945289e+17 |
df.shape
(23938, 6)
Remove emojis, flags etc from tweets. Also notice how I have used [::-1]
to indicate that I want the tweets in chrnological order.
# remove emojis and flags
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
"]+", flags=re.UNICODE)
trump_tweets = [emoji_pattern.sub(r' ', text) for text in df.text.values[::-1]]
trump_tweets[:5]
['be sure to tune in and watch donald trump on late night with david letterman as he presents the top ten list tonight!', 'donald trump will be appearing on the view tomorrow morning to discuss celebrity apprentice and his new book think like a champion!', 'donald trump reads top ten financial tips on late show with david letterman: - very funny!', 'new blog post: celebrity apprentice finale and lessons learned along the way: ', 'my persona will never be that of a wallflower - i’d rather build walls than cling to them --donald j. trump']
Create a dictionary to convert letters to numbers and vice versa.
all_tweets = ''.join(trump_tweets)
char2int = dict(zip(set(all_tweets), range(len(set(all_tweets)))))
char2int['<END>'] = len(char2int)
char2int['<GO>'] = len(char2int)
char2int['<PAD>'] = len(char2int)
int2char = dict(zip(char2int.values(), char2int.keys()))
text_num = [[char2int['<GO>']]+[char2int[c] for c in tweet]+ [char2int['<END>']] for tweet in trump_tweets]
plt.hist([len(t) for t in trump_tweets],50)
plt.show()
# Concatenate all the tweets
int_text = []
for t in text_num:
int_text += t
len_vocab = len(char2int)
sentence_len = 40
# n_chars = len(text_num)//sentence_len*sentence_len
num_chunks = len(text_num)-sentence_len
def get_batches(int_text, batch_size, seq_length):
"""
Return batches of input and target
:param int_text: Text with the words replaced by their ids
:param batch_size: The size of batch
:param seq_length: The length of sequence
:return: Batches as a Numpy array
"""
slice_size = batch_size * seq_length
n_batches = len(int_text) // slice_size
x = int_text[: n_batches*slice_size]
y = int_text[1: n_batches*slice_size + 1]
x = np.split(np.reshape(x,(batch_size,-1)),n_batches,1)
y = np.split(np.reshape(y,(batch_size,-1)),n_batches,1)
x = np.vstack(x)
y = np.vstack(y)
y = y.reshape(y.shape+(1,))
return x, y
batch_size = 128
x, y = get_batches(int_text, batch_size, sentence_len)
Notice what the get_batches
function looks like.
get_batches(np.arange(20), 2,4)
(array([[ 0, 1, 2, 3], [ 8, 9, 10, 11], [ 4, 5, 6, 7], [12, 13, 14, 15]]), array([[ 1, 2, 3, 4], [ 9, 10, 11, 12], [ 5, 6, 7, 8], [13, 14, 15, 16]]))
model = Sequential()
# TODO:
# 1. Add an embedding layer
# 2. Add a LSTM layer, set `return_sequences=True` and stateful = True
# 3. Add another LSTM layer, set `return_sequences=True` and stateful = True
# 4. Add a `TimeDistributed(Dense(...)) layer (connected to how many outputs? What is the activation?)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
model.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= embedding_1 (Embedding) (128, None, 64) 8512 _________________________________________________________________ lstm_1 (LSTM) (128, None, 64) 33024 _________________________________________________________________ lstm_2 (LSTM) (128, None, 64) 33024 _________________________________________________________________ time_distributed_1 (TimeDist (128, None, 133) 8645 ================================================================= Total params: 83,205.0 Trainable params: 83,205 Non-trainable params: 0.0 _________________________________________________________________
Pay special attention to how the probabilites are taken. p is of shape (1, sequence_len, len(char2int))
where len(char2int) is the number of available characters. The 1 is there because we are only predicting one feature, y
. We are only concerned about the last prediction probability of the sequence. This is due to the fact that all other letters have already been appended. Hence we predict a letter from the distribution p[0][-1]
.
Why did we keep appending to the sequence and predicting? Why not use simply the last letter. If we were to do this, we would lose information that comes from the previous letter via the hidden state and cell memory. Keep in mind that each LSTM unit has 3 inputs, the x, the hidden state, and the cell memory.
Also important to notice that the Cell Memory is not used in connecting to the Dense layer, only the hidden state.
What happens when stateful=True
is that the last cell memory state computed at the i-th example in the n-th batch gets passed on to i-th sample in the n+1-th batch. This is one way of seeing patterns beyond the sentence length specified. Which in this case is 40.
fit
the model I set shuffle=False
when training stateful models.batch_size
inputs at a time. Whereas, I only want to predict one character at a time.model2 = Sequential()
# TODO
# Create the same model architecture as above, however, no need to compile or create the summary
n_epochs = 50
for i in range(n_epochs+1):
if i%5==0:
sentence = []
letter = [char2int['<GO>']] #choose a random letter
for i in range(100):
sentence.append(int2char[letter[-1]])
model2.set_weights(model.get_weights())
p = model2.predict(np.array(letter)[None,:])
letter.append(np.random.choice(len(char2int),1,p=p[0][-1])[0])
print(''.join(sentence))
print('='*100)
if i!=n_epochs:
model.fit(x,y, batch_size=batch_size, epochs=1, shuffle=False)
model.reset_states()
<GO>teradisid @realdonaldtrump i!.<END><GO>shaoning!<END><GO>dong drindter town be pabitate.<END><GO>americas you honely thi ==================================================================================================== Epoch 1/1 66560/66560 [==============================] - 90s - loss: 1.8014 Epoch 1/1 66560/66560 [==============================] - 107s - loss: 1.7881 Epoch 1/1 66560/66560 [==============================] - 109s - loss: 1.7763 Epoch 1/1 66560/66560 [==============================] - 106s - loss: 1.7657 Epoch 1/1 66560/66560 [==============================] - 105s - loss: 1.7560 - ETA: 3s <GO>ohamerible anergigh manigion (316 9 thinks? fambew losonised made up plcind. the beaded?thing ==================================================================================================== Epoch 1/1 66560/66560 [==============================] - 107s - loss: 1.7471 - ETA: 1s - loss Epoch 1/1 66560/66560 [==============================] - 105s - loss: 1.7389 Epoch 1/1 66560/66560 [==============================] - 102s - loss: 1.7314 Epoch 1/1 66560/66560 [==============================] - 98s - loss: 1.7241 Epoch 1/1 66560/66560 [==============================] - 99s - loss: 1.7172 <GO>fund having abifationally diand have as prokervia @lanneals<END><GO>say<END><GO>musts country on my two amerhiera ==================================================================================================== Epoch 1/1 66560/66560 [==============================] - 98s - loss: 1.7106 Epoch 1/1 66560/66560 [==============================] - 113s - loss: 1.7043 Epoch 1/1 66560/66560 [==============================] - 113s - loss: 1.6985 Epoch 1/1 66560/66560 [==============================] - 112s - loss: 1.6930 Epoch 1/1 66560/66560 [==============================] - 116s - loss: 1.6879 <GO>je @realdonaldtrump segew interview- see. years <END><GO>@foxer: @an79: #patc @unclifferi how the (contra ==================================================================================================== Epoch 1/1 66560/66560 [==============================] - 114s - loss: 1.6831 Epoch 1/1 66560/66560 [==============================] - 115s - loss: 1.6784 Epoch 1/1 66560/66560 [==============================] - 106s - loss: 1.6739 Epoch 1/1 66560/66560 [==============================] - 106s - loss: 1.6695 Epoch 1/1 66560/66560 [==============================] - 106s - loss: 1.6652 <GO>woues donald trump yourspire roling more cothanks?<END><GO>“@courst_cmingc in than same than courterful s ==================================================================================================== Epoch 1/1 66560/66560 [==============================] - 105s - loss: 1.6609 - ETA: 0s - loss: 1. Epoch 1/1 66560/66560 [==============================] - 106s - loss: 1.6567 Epoch 1/1 66560/66560 [==============================] - 106s - loss: 1.6528 Epoch 1/1 66560/66560 [==============================] - 116s - loss: 1.6490 Epoch 1/1 66560/66560 [==============================] - 120s - loss: 1.6455 <GO>10 mittry was speed focus to u wornd & the provings losbacces has carolina<END><GO>for the great the c ==================================================================================================== Epoch 1/1 66560/66560 [==============================] - 116s - loss: 1.6421 Epoch 1/1 6528/66560 [=>............................] - ETA: 95s - loss: 1.6425
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-72-3f7e4b2e195c> in <module>() 12 print('='*100) 13 if i!=n_epochs: ---> 14 model.fit(x,y, batch_size=batch_size, epochs=1, shuffle=False) 15 model.reset_states() ~/miniconda3/lib/python3.6/site-packages/keras/models.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs) 843 class_weight=class_weight, 844 sample_weight=sample_weight, --> 845 initial_epoch=initial_epoch) 846 847 def evaluate(self, x, y, batch_size=32, verbose=1, ~/miniconda3/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs) 1483 val_f=val_f, val_ins=val_ins, shuffle=shuffle, 1484 callback_metrics=callback_metrics, -> 1485 initial_epoch=initial_epoch) 1486 1487 def evaluate(self, x, y, batch_size=32, verbose=1, sample_weight=None): ~/miniconda3/lib/python3.6/site-packages/keras/engine/training.py in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch) 1138 batch_logs['size'] = len(batch_ids) 1139 callbacks.on_batch_begin(batch_index, batch_logs) -> 1140 outs = f(ins_batch) 1141 if not isinstance(outs, list): 1142 outs = [outs] ~/miniconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs) 2071 session = get_session() 2072 updated = session.run(self.outputs + [self.updates_op], -> 2073 feed_dict=feed_dict) 2074 return updated[:len(self.outputs)] 2075 ~/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata) 776 try: 777 result = self._run(None, fetches, feed_dict, options_ptr, --> 778 run_metadata_ptr) 779 if run_metadata: 780 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr) ~/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata) 980 if final_fetches or final_targets: 981 results = self._do_run(handle, final_targets, final_fetches, --> 982 feed_dict_string, options, run_metadata) 983 else: 984 results = [] ~/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata) 1030 if handle is None: 1031 return self._do_call(_run_fn, self._session, feed_dict, fetch_list, -> 1032 target_list, options, run_metadata) 1033 else: 1034 return self._do_call(_prun_fn, self._session, handle, feed_dict, ~/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args) 1037 def _do_call(self, fn, *args): 1038 try: -> 1039 return fn(*args) 1040 except errors.OpError as e: 1041 message = compat.as_text(e.message) ~/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata) 1019 return tf_session.TF_Run(session, options, 1020 feed_dict, fetch_list, target_list, -> 1021 status, run_metadata) 1022 1023 def _prun_fn(session, handle, feed_dict, fetch_list): KeyboardInterrupt:
with open('model_struct.json','w') as f:
f.write(model.to_json())
model.save_weights('model_weights.h5')
model.save('model.h5')
# if not 'model' in vars():
# # model = load_model('model.h5') # This doesn't seem to work for some odd reason
# with open('model_struct.json','r') as f:
# model = model_from_json(f.read())
# model.load_weights('model_weights.h5')
model = Sequential()
model.add(Embedding(len_vocab, 64)) # , batch_size=batch_size
model.add(LSTM(64, return_sequences=True)) # , stateful=True
model.add(TimeDistributed(Dense(len_vocab, activation='softmax')))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
model.summary()
model.load_weights('model_weights.h5')
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= embedding_4 (Embedding) (None, None, 64) 8512 _________________________________________________________________ lstm_7 (LSTM) (None, None, 64) 33024 _________________________________________________________________ time_distributed_4 (TimeDist (None, None, 133) 8645 ================================================================= Total params: 50,181.0 Trainable params: 50,181 Non-trainable params: 0.0 _________________________________________________________________
for j in range(10):
sentence = []
letter = [char2int['<GO>']] #choose a random letter
for i in range(150):
sentence.append(int2char[letter[-1]])
if sentence[-1]=='<END>':
break
p = model.predict(np.array(letter)[None,:])
letter.append(np.random.choice(len(char2int),1,p=p[0][-1])[0])
print(''.join(sentence))
print('='*100)
<GO>❤ q✨1iqáiqit iw6♥❤/wi☁6 iq✨i☁q✨/d‼i~wqá6w☁(i☁´t6i/ q✨1i ´♥iá/❤\iw♥✨☝iq✨áw♥áq´✨i"´wiá☁qiá´i16~(i/✨☝i✨´ái☺❤/♥✨i☺❤/✨i/☝☉di16w/❤i"´☝ig✨61w6ái/✨ ==================================================================================================== <GO>it/\6i ´♥i☁6iá☁6iq✨á6w6☝=i/✨☝i~q❤❤it/\/✨☝♡{i~q❤w6i/✨☝i/i☺6w´✨i☁/☉6i♥✨"áiáqt6☝☞iw´´\6w(ii❌áw♥t☺´❤6á (i‼☁/☺☺6✨iqi/9´w iá´iw6/á6i❤´´\i☝´{igw ==================================================================================================== <GO>~☁ i☝´✨=ái96"´w6iá☁6i♥☺iá´i16ááq✨1i/✨t♥á(igá6☉6✨☝"´~á❤☝1☁áiqiq✨i/6i~☁ i☁/❤❤6/ái/t☺/q1✨=ái☺´❤❤(i/i1´☉6w✨61/ i☁♥ái´♥wi☺´❤qá´✨i~3)♥✨☝❤i~☁ i ==================================================================================================== <GO>g/á❤♥9(i96´♥w✨6wi´ttiá´i»☺6ww (i/i6áá6w(i/i☝/☉6☝i9w/q✨á6wi ´♥i♥✨qá (i❌1w6✨✨á6w´tqáq´✨i~6/☺6´✨6☝i/q☝i ´♥iá☁/✨iá☁6=i✨/áq´✨(i/ iá´iá☁6i ==================================================================================================== <GO>gá☁6á6wt♥☺6iq✨i6☝q✨6(iá☁6 i/✨☝i❤´´\(i❤/✨it/☝6i/✨ ´✨6i«-s-i6%☺/✨☝ i"´wi´♥wi❌´♥á/1/✨i☁/✨☝i"´wi/i❤´✨1(i´♥✨áw (igá´tt6☝/16(i6/ #i/✨i6%6♥áq☉6i☺❤/6 ====================================================================================================
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-20-73ecae28f52a> in <module>() 6 if sentence[-1]=='<END>': 7 break ----> 8 p = model.predict(np.array(letter)[None,:]) 9 letter.append(np.random.choice(len(char2int),1,p=p[0][-1])[0]) 10 ~/miniconda3/lib/python3.6/site-packages/keras/models.py in predict(self, x, batch_size, verbose) 889 if self.model is None: 890 self.build() --> 891 return self.model.predict(x, batch_size=batch_size, verbose=verbose) 892 893 def predict_on_batch(self, x): ~/miniconda3/lib/python3.6/site-packages/keras/engine/training.py in predict(self, x, batch_size, verbose) 1570 f = self.predict_function 1571 return self._predict_loop(f, ins, -> 1572 batch_size=batch_size, verbose=verbose) 1573 1574 def train_on_batch(self, x, y, ~/miniconda3/lib/python3.6/site-packages/keras/engine/training.py in _predict_loop(self, f, ins, batch_size, verbose) 1200 ins_batch = _slice_arrays(ins, batch_ids) 1201 -> 1202 batch_outs = f(ins_batch) 1203 if not isinstance(batch_outs, list): 1204 batch_outs = [batch_outs] ~/miniconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs) 2071 session = get_session() 2072 updated = session.run(self.outputs + [self.updates_op], -> 2073 feed_dict=feed_dict) 2074 return updated[:len(self.outputs)] 2075 ~/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata) 776 try: 777 result = self._run(None, fetches, feed_dict, options_ptr, --> 778 run_metadata_ptr) 779 if run_metadata: 780 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr) ~/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata) 980 if final_fetches or final_targets: 981 results = self._do_run(handle, final_targets, final_fetches, --> 982 feed_dict_string, options, run_metadata) 983 else: 984 results = [] ~/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata) 1030 if handle is None: 1031 return self._do_call(_run_fn, self._session, feed_dict, fetch_list, -> 1032 target_list, options, run_metadata) 1033 else: 1034 return self._do_call(_prun_fn, self._session, handle, feed_dict, ~/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args) 1037 def _do_call(self, fn, *args): 1038 try: -> 1039 return fn(*args) 1040 except errors.OpError as e: 1041 message = compat.as_text(e.message) ~/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata) 1019 return tf_session.TF_Run(session, options, 1020 feed_dict, fetch_list, target_list, -> 1021 status, run_metadata) 1022 1023 def _prun_fn(session, handle, feed_dict, fetch_list): KeyboardInterrupt: