This project will demonstrate Bitcoin (BTC-USD) stock closing price prediction using stacking bidirection LSTM (bi-LSTM)
Data is collected using yfinance
API
%%capture
!pip install yfinance --upgrade --no-cache-dir
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM, CuDNNLSTM, Bidirectional
from keras.layers import Activation, Dropout
%matplotlib inline
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
plt.rcParams['figure.figsize'] = 12, 8
btc_ticker = yf.Ticker('BTC-USD')
df_btc_data = btc_ticker.history(period="max")
# to download for a specific period of time
# yf.download("BTC-USD", start="2014-01-01", end="2019-04-30")
df_btc_data
Open | High | Low | Close | Volume | Dividends | Stock Splits | |
---|---|---|---|---|---|---|---|
Date | |||||||
2014-09-17 | 465.864014 | 468.174011 | 452.421997 | 457.334015 | 21056800 | 0 | 0 |
2014-09-18 | 456.859985 | 456.859985 | 413.104004 | 424.440002 | 34483200 | 0 | 0 |
2014-09-19 | 424.102997 | 427.834991 | 384.532013 | 394.795990 | 37919700 | 0 | 0 |
2014-09-20 | 394.673004 | 423.295990 | 389.882996 | 408.903992 | 36863600 | 0 | 0 |
2014-09-21 | 408.084991 | 412.425995 | 393.181000 | 398.821014 | 26580100 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... |
2021-11-09 | 67549.734375 | 68530.335938 | 66382.062500 | 66971.828125 | 42357991721 | 0 | 0 |
2021-11-10 | 66953.335938 | 68789.625000 | 63208.113281 | 64995.230469 | 48730828378 | 0 | 0 |
2021-11-11 | 64978.890625 | 65579.015625 | 64180.488281 | 64949.960938 | 35880633236 | 0 | 0 |
2021-11-12 | 64863.980469 | 65460.816406 | 62333.914062 | 64155.941406 | 36084893887 | 0 | 0 |
2021-11-13 | 64378.949219 | 64901.621094 | 63358.320312 | 64727.914062 | 30252488704 | 0 | 0 |
2611 rows × 7 columns
pd.date_range(df_btc_data.index.min(), df_btc_data.index.max()).difference(df_btc_data.index)
DatetimeIndex(['2020-04-17', '2020-10-09', '2020-10-12', '2020-10-13'], dtype='datetime64[ns]', freq=None)
Since the ratio of missing dates is very low (~0.1%), no action is taken to fill missing values
Close
¶df_btc_data.Close.plot();
df_btc_data.loc['2018-01':'2021-11',"Close"].plot();
df_btc_close = df_btc_data[['Close']]
df_btc_close
Close | |
---|---|
Date | |
2014-09-17 | 457.334015 |
2014-09-18 | 424.440002 |
2014-09-19 | 394.795990 |
2014-09-20 | 408.903992 |
2014-09-21 | 398.821014 |
... | ... |
2021-11-09 | 66971.828125 |
2021-11-10 | 64995.230469 |
2021-11-11 | 64949.960938 |
2021-11-12 | 64155.941406 |
2021-11-13 | 64727.914062 |
2611 rows × 1 columns
scaler = MinMaxScaler()
close_price = df_btc_close.Close.values.reshape(-1, 1)
scaled_close = scaler.fit_transform(close_price)
scaled_close.shape
(2611, 1)
scaled_close
array([[0.00414359], [0.00365546], [0.00321557], ..., [0.96116758], [0.94938491], [0.95787257]])
Check Missing value
np.isnan(scaled_close).sum()
0
scaled_close.reshape(-1, 1).shape
(2611, 1)
Target Shape of The Data : [batch_size, sequence_length, n_features]
sequence_length = 100 # history of 99 input time steps
def to_sequences(data, seq_len):
d = []
for index in range(len(data) - seq_len):
d.append(data[index: index + seq_len])
return np.array(d)
def preprocess_data(data_raw, seq_len, train_split):
data = to_sequences(data_raw, seq_len)
num_train = int(train_split * data.shape[0])
X_train = data[:num_train, :-1, :]
y_train = data[:num_train, -1, :]
# input->X1: [1 2 3...99]-> y1: 100
# input->X2: [2 3 4...100]-> y2: 101
# input->X3: [3 4 5...101]-> y3: 102
X_test = data[num_train:, :-1, :]
y_test = data[num_train:, -1, :]
return X_train, y_train, X_test, y_test
X_train, y_train, X_test, y_test = preprocess_data(scaled_close, sequence_length, 0.9)
X_train.shape, y_train.shape, X_test.shape, y_test.shape
((2259, 99, 1), (2259, 1), (252, 99, 1), (252, 1))
# https://machinelearningmastery.com/stacked-long-short-term-memory-networks/
# https://machinelearningmastery.com/return-sequences-and-return-states-for-lstms-in-keras/
model = Sequential()
# CuDNNLSTM is roughly 15x faster than LSTM
model.add(Bidirectional(
CuDNNLSTM(X_train.shape[1], return_sequences=True),
input_shape=(X_train.shape[1], X_train.shape[-1])
))
# set return_sequences=True
# so that the second LSTM layer has a three-dimensional sequence input
model.add(Dropout(0.2))
model.add(Bidirectional(CuDNNLSTM((X_train.shape[1] * 2),
return_sequences=True)))
model.add(Dropout(0.2))
model.add(Bidirectional(CuDNNLSTM(X_train.shape[1])))
model.add(Dense(units=1))
model.add(Activation('linear'))
model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= bidirectional (Bidirectiona (None, 99, 198) 80784 l) dropout (Dropout) (None, 99, 198) 0 bidirectional_1 (Bidirectio (None, 99, 396) 630432 nal) dropout_1 (Dropout) (None, 99, 396) 0 bidirectional_2 (Bidirectio (None, 198) 393624 nal) dense (Dense) (None, 1) 199 activation (Activation) (None, 1) 0 ================================================================= Total params: 1,105,039 Trainable params: 1,105,039 Non-trainable params: 0 _________________________________________________________________
# Setting up an early stop
earlystop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=80, verbose=1, mode='min')
callbacks_list = [earlystop]
BATCH_SIZE = 64
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(X_train, y_train, epochs=50, batch_size=BATCH_SIZE,
shuffle=False, validation_split=0.1 )
Epoch 1/50 32/32 [==============================] - 16s 211ms/step - loss: 0.0027 - val_loss: 0.0525 Epoch 2/50 32/32 [==============================] - 5s 154ms/step - loss: 0.0033 - val_loss: 0.0516 Epoch 3/50 32/32 [==============================] - 5s 153ms/step - loss: 0.0020 - val_loss: 0.0338 Epoch 4/50 32/32 [==============================] - 5s 154ms/step - loss: 0.0020 - val_loss: 0.0250 Epoch 5/50 32/32 [==============================] - 5s 153ms/step - loss: 0.0015 - val_loss: 0.0131 Epoch 6/50 32/32 [==============================] - 5s 153ms/step - loss: 0.0011 - val_loss: 0.0048 Epoch 7/50 32/32 [==============================] - 5s 153ms/step - loss: 7.1743e-04 - val_loss: 0.0128 Epoch 8/50 32/32 [==============================] - 5s 153ms/step - loss: 6.5780e-04 - val_loss: 0.0103 Epoch 9/50 32/32 [==============================] - 5s 154ms/step - loss: 0.0011 - val_loss: 0.0381 Epoch 10/50 32/32 [==============================] - 5s 153ms/step - loss: 9.3656e-04 - val_loss: 0.0217 Epoch 11/50 32/32 [==============================] - 5s 154ms/step - loss: 0.0030 - val_loss: 0.0176 Epoch 12/50 32/32 [==============================] - 5s 154ms/step - loss: 9.9426e-04 - val_loss: 0.0045 Epoch 13/50 32/32 [==============================] - 5s 153ms/step - loss: 5.6209e-04 - val_loss: 0.0024 Epoch 14/50 32/32 [==============================] - 5s 153ms/step - loss: 3.7187e-04 - val_loss: 0.0036 Epoch 15/50 32/32 [==============================] - 5s 154ms/step - loss: 2.6282e-04 - val_loss: 0.0039 Epoch 16/50 32/32 [==============================] - 5s 154ms/step - loss: 2.1326e-04 - val_loss: 0.0041 Epoch 17/50 32/32 [==============================] - 5s 154ms/step - loss: 1.8399e-04 - val_loss: 0.0041 Epoch 18/50 32/32 [==============================] - 5s 155ms/step - loss: 1.7238e-04 - val_loss: 0.0040 Epoch 19/50 32/32 [==============================] - 5s 154ms/step - loss: 1.7420e-04 - val_loss: 0.0041 Epoch 20/50 32/32 [==============================] - 5s 153ms/step - loss: 1.6652e-04 - val_loss: 0.0036 Epoch 21/50 32/32 [==============================] - 5s 154ms/step - loss: 1.9384e-04 - val_loss: 0.0033 Epoch 22/50 32/32 [==============================] - 5s 153ms/step - loss: 1.8642e-04 - val_loss: 0.0034 Epoch 23/50 32/32 [==============================] - 5s 157ms/step - loss: 2.0777e-04 - val_loss: 0.0032 Epoch 24/50 32/32 [==============================] - 5s 154ms/step - loss: 2.1582e-04 - val_loss: 0.0035 Epoch 25/50 32/32 [==============================] - 5s 153ms/step - loss: 2.1004e-04 - val_loss: 0.0036 Epoch 26/50 32/32 [==============================] - 5s 153ms/step - loss: 2.0698e-04 - val_loss: 0.0031 Epoch 27/50 32/32 [==============================] - 5s 154ms/step - loss: 2.3536e-04 - val_loss: 0.0044 Epoch 28/50 32/32 [==============================] - 5s 154ms/step - loss: 1.8200e-04 - val_loss: 0.0023 Epoch 29/50 32/32 [==============================] - 5s 154ms/step - loss: 3.4417e-04 - val_loss: 0.0041 Epoch 30/50 32/32 [==============================] - 5s 154ms/step - loss: 2.0592e-04 - val_loss: 0.0010 Epoch 31/50 32/32 [==============================] - 5s 154ms/step - loss: 7.4489e-04 - val_loss: 0.0031 Epoch 32/50 32/32 [==============================] - 5s 155ms/step - loss: 2.5712e-04 - val_loss: 0.0028 Epoch 33/50 32/32 [==============================] - 5s 155ms/step - loss: 1.3549e-04 - val_loss: 0.0016 Epoch 34/50 32/32 [==============================] - 5s 154ms/step - loss: 2.9134e-04 - val_loss: 0.0017 Epoch 35/50 32/32 [==============================] - 5s 153ms/step - loss: 1.3780e-04 - val_loss: 0.0018 Epoch 36/50 32/32 [==============================] - 5s 154ms/step - loss: 2.0649e-04 - val_loss: 0.0019 Epoch 37/50 32/32 [==============================] - 5s 154ms/step - loss: 1.3328e-04 - val_loss: 0.0013 Epoch 38/50 32/32 [==============================] - 5s 154ms/step - loss: 2.5050e-04 - val_loss: 0.0017 Epoch 39/50 32/32 [==============================] - 5s 154ms/step - loss: 1.5038e-04 - val_loss: 8.8623e-04 Epoch 40/50 32/32 [==============================] - 5s 154ms/step - loss: 3.8063e-04 - val_loss: 0.0013 Epoch 41/50 32/32 [==============================] - 5s 154ms/step - loss: 1.7219e-04 - val_loss: 7.4542e-04 Epoch 42/50 32/32 [==============================] - 5s 152ms/step - loss: 4.9994e-04 - val_loss: 0.0017 Epoch 43/50 32/32 [==============================] - 5s 152ms/step - loss: 1.3355e-04 - val_loss: 0.0012 Epoch 44/50 32/32 [==============================] - 5s 154ms/step - loss: 1.7310e-04 - val_loss: 0.0011 Epoch 45/50 32/32 [==============================] - 5s 153ms/step - loss: 1.3199e-04 - val_loss: 9.3129e-04 Epoch 46/50 32/32 [==============================] - 5s 154ms/step - loss: 1.7162e-04 - val_loss: 9.9057e-04 Epoch 47/50 32/32 [==============================] - 5s 154ms/step - loss: 1.2942e-04 - val_loss: 7.9150e-04 Epoch 48/50 32/32 [==============================] - 5s 153ms/step - loss: 1.8981e-04 - val_loss: 0.0010 Epoch 49/50 32/32 [==============================] - 5s 154ms/step - loss: 1.2551e-04 - val_loss: 6.1445e-04 Epoch 50/50 32/32 [==============================] - 5s 153ms/step - loss: 3.3240e-04 - val_loss: 8.9620e-04
model.evaluate(X_test, y_test)
8/8 [==============================] - 1s 66ms/step - loss: 0.0027
0.0027122455649077892
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
y_hat = model.predict(X_test)
y_test_inverse = scaler.inverse_transform(y_test)
y_hat_inverse = scaler.inverse_transform(y_hat)
plt.plot(y_test_inverse, label="Actual Price", color='green')
plt.plot(y_hat_inverse, label="Predicted Price", color='red')
plt.title('Bitcoin price prediction')
plt.xlabel('Time [days]')
plt.ylabel('Price')
plt.legend(loc='best')
plt.show();