## Further recommendation, test multiple frameworks, see what works better on average.
## Open source RL: https://docs.google.com/spreadsheets/d/1EeFPd-XIQ3mq_9snTlAZSsFY7Hbnmd7P5bbT8LPuMn0/edit#gid=0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
This notebook is part of a series on machine learning asset managment. https://ssrn.com/abstract=3420952
I have had some enquiries about future releases, I will post future content on the FirmAI Linkedin page.
!pip install yfinance --upgrade --no-cache-dir
## Save future files to your drive
## In this notebook control for multiple testing
import numpy as np
from google.colab import drive
drive.mount('/content/drive',force_remount=True)
%cd "/content/drive/My Drive/FirmAI/FinML/Data/Agent Trading"
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code Enter your authorization code: ·········· Mounted at /content/drive /content/drive/My Drive/FirmAI/FinML/Data/Agent Trading
from pandas_datareader import data as pdr
import fix_yahoo_finance as yf
yf.pdr_override()
df_full = pdr.get_data_yahoo("JPM", start="2018-01-01").reset_index()
df_full.to_csv('output/JPM.csv',index=False)
df_full.head()
[*********************100%***********************] 1 of 1 downloaded
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
0 | 2018-01-02 | 107.629997 | 108.019997 | 106.809998 | 107.949997 | 103.156837 | 13578800 |
1 | 2018-01-03 | 107.860001 | 108.489998 | 107.480003 | 108.059998 | 103.261963 | 11901000 |
2 | 2018-01-04 | 108.360001 | 110.029999 | 108.199997 | 109.040001 | 104.741249 | 12953700 |
3 | 2018-01-05 | 109.260002 | 109.550003 | 107.779999 | 108.339996 | 104.068840 | 14155000 |
4 | 2018-01-08 | 108.150002 | 108.680000 | 107.699997 | 108.500000 | 104.222534 | 12466500 |
df_full = pd.read_csv('output/JPM.csv')
df= df_full.copy()
name = 'Turtle Trading Agent'
count = int(np.ceil(len(df) * 0.1))
signals = pd.DataFrame(index=df.index)
signals['signal'] = 0.0
signals['trend'] = df['Close']
signals['RollingMax'] = (signals.trend.shift(1).rolling(count).max())
signals['RollingMin'] = (signals.trend.shift(1).rolling(count).min())
signals.loc[signals['RollingMax'] < signals.trend, 'signal'] = -1
signals.loc[signals['RollingMin'] > signals.trend, 'signal'] = 1
signals
signal | trend | RollingMax | RollingMin | |
---|---|---|---|---|
0 | 0.0 | 107.949997 | NaN | NaN |
1 | 0.0 | 108.059998 | NaN | NaN |
2 | 0.0 | 109.040001 | NaN | NaN |
3 | 0.0 | 108.339996 | NaN | NaN |
4 | 0.0 | 108.500000 | NaN | NaN |
5 | 0.0 | 109.050003 | NaN | NaN |
6 | 0.0 | 110.250000 | NaN | NaN |
7 | 0.0 | 110.839996 | NaN | NaN |
8 | 0.0 | 112.669998 | NaN | NaN |
9 | 0.0 | 112.269997 | NaN | NaN |
10 | 0.0 | 112.989998 | NaN | NaN |
11 | 0.0 | 113.260002 | NaN | NaN |
12 | 0.0 | 113.010002 | NaN | NaN |
13 | 0.0 | 114.330002 | NaN | NaN |
14 | 0.0 | 114.209999 | NaN | NaN |
15 | 0.0 | 115.669998 | NaN | NaN |
16 | 0.0 | 115.699997 | NaN | NaN |
17 | 0.0 | 116.320000 | NaN | NaN |
18 | 0.0 | 116.199997 | NaN | NaN |
19 | 0.0 | 115.110001 | NaN | NaN |
20 | 0.0 | 115.669998 | NaN | NaN |
21 | 0.0 | 116.870003 | NaN | NaN |
22 | 0.0 | 114.279999 | NaN | NaN |
23 | 0.0 | 108.800003 | NaN | NaN |
24 | 0.0 | 112.110001 | NaN | NaN |
25 | 0.0 | 112.870003 | NaN | NaN |
26 | 0.0 | 107.879997 | NaN | NaN |
27 | 0.0 | 110.040001 | NaN | NaN |
28 | 0.0 | 111.739998 | NaN | NaN |
29 | 0.0 | 112.430000 | NaN | NaN |
... | ... | ... | ... | ... |
349 | 0.0 | 108.639999 | 116.120003 | 101.230003 |
350 | 0.0 | 109.709999 | 116.120003 | 104.639999 |
351 | 0.0 | 108.519997 | 116.120003 | 104.870003 |
352 | 0.0 | 108.220001 | 116.120003 | 104.870003 |
353 | 0.0 | 107.059998 | 116.120003 | 104.870003 |
354 | 0.0 | 105.959999 | 116.120003 | 104.870003 |
355 | 0.0 | 106.459999 | 116.120003 | 104.870003 |
356 | 0.0 | 109.739998 | 116.120003 | 104.870003 |
357 | 0.0 | 110.129997 | 116.120003 | 105.339996 |
358 | 0.0 | 110.370003 | 116.120003 | 105.959999 |
359 | 0.0 | 109.160004 | 116.120003 | 105.959999 |
360 | 0.0 | 110.339996 | 116.120003 | 105.959999 |
361 | 0.0 | 110.680000 | 116.120003 | 105.959999 |
362 | 0.0 | 109.269997 | 116.120003 | 105.959999 |
363 | 0.0 | 109.540001 | 116.120003 | 105.959999 |
364 | 0.0 | 109.820000 | 116.120003 | 105.959999 |
365 | 0.0 | 109.220001 | 116.120003 | 105.959999 |
366 | 0.0 | 110.709999 | 116.120003 | 105.959999 |
367 | 0.0 | 109.910004 | 116.120003 | 105.959999 |
368 | 0.0 | 110.190002 | 116.120003 | 105.959999 |
369 | 0.0 | 109.440002 | 116.120003 | 105.959999 |
370 | 0.0 | 108.660004 | 116.120003 | 105.959999 |
371 | 0.0 | 107.760002 | 116.120003 | 105.959999 |
372 | 0.0 | 108.480003 | 116.120003 | 105.959999 |
373 | 0.0 | 108.839996 | 116.120003 | 105.959999 |
374 | 0.0 | 111.800003 | 115.089996 | 105.959999 |
375 | -1.0 | 113.680000 | 113.209999 | 105.959999 |
376 | -1.0 | 113.800003 | 113.680000 | 105.959999 |
377 | 0.0 | 112.820000 | 113.800003 | 105.959999 |
378 | 0.0 | 113.489998 | 113.800003 | 105.959999 |
379 rows × 4 columns
def buy_stock(
real_movement,
signal,
initial_money = 10000,
max_buy = 1,
max_sell = 1,
):
"""
real_movement = actual movement in the real world
delay = how much interval you want to delay to change our decision from buy to sell, vice versa
initial_state = 1 is buy, 0 is sell
initial_money = 1000, ignore what kind of currency
max_buy = max quantity for share to buy
max_sell = max quantity for share to sell
"""
starting_money = initial_money
states_sell = []
states_buy = []
current_inventory = 0
def buy(i, initial_money, current_inventory):
shares = initial_money // real_movement[i]
if shares < 1:
print(
'day %d: total balances %f, not enough money to buy a unit price %f'
% (i, initial_money, real_movement[i])
)
else:
if shares > max_buy:
buy_units = max_buy
else:
buy_units = shares
initial_money -= buy_units * real_movement[i]
current_inventory += buy_units
print(
'day %d: buy %d units at price %f, total balance %f'
% (i, buy_units, buy_units * real_movement[i], initial_money)
)
states_buy.append(0)
return initial_money, current_inventory
for i in range(real_movement.shape[0] - int(0.025 * len(df))):
state = signal[i]
if state == 1:
initial_money, current_inventory = buy(
i, initial_money, current_inventory
)
states_buy.append(i)
elif state == -1:
if current_inventory == 0:
print('day %d: cannot sell anything, inventory 0' % (i))
else:
if current_inventory > max_sell:
sell_units = max_sell
else:
sell_units = current_inventory
current_inventory -= sell_units
total_sell = sell_units * real_movement[i]
initial_money += total_sell
try:
invest = (
(real_movement[i] - real_movement[states_buy[-1]])
/ real_movement[states_buy[-1]]
) * 100
except:
invest = 0
print(
'day %d, sell %d units at price %f, investment %f %%, total balance %f,'
% (i, sell_units, total_sell, invest, initial_money)
)
states_sell.append(i)
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
states_buy, states_sell, total_gains, invest = buy_stock(df.Close, signals['signal'])
day 56: buy 1 units at price 107.010002, total balance 9892.989998 day 101: buy 1 units at price 105.930000, total balance 9787.059998 day 119: buy 1 units at price 105.750000, total balance 9681.309998 day 120: buy 1 units at price 104.790001, total balance 9576.519997 day 122: buy 1 units at price 103.239998, total balance 9473.279999 day 139, sell 1 units at price 113.349998, investment 9.792716 %, total balance 9586.629997, day 140, sell 1 units at price 114.150002, investment 10.567614 %, total balance 9700.779999, day 141, sell 1 units at price 115.180000, investment 11.565287 %, total balance 9815.959999, day 143, sell 1 units at price 116.029999, investment 12.388610 %, total balance 9931.989998, day 144, sell 1 units at price 116.730003, investment 13.066646 %, total balance 10048.720001, day 148: cannot sell anything, inventory 0 day 149: cannot sell anything, inventory 0 day 150: cannot sell anything, inventory 0 day 151: cannot sell anything, inventory 0 day 181: cannot sell anything, inventory 0 day 187: buy 1 units at price 112.839996, total balance 9935.880005 day 195: buy 1 units at price 111.470001, total balance 9824.410004 day 196: buy 1 units at price 108.129997, total balance 9716.280007 day 197: buy 1 units at price 106.949997, total balance 9609.330010 day 198: buy 1 units at price 106.339996, total balance 9502.990014 day 204: buy 1 units at price 105.250000, total balance 9397.740014 day 205: buy 1 units at price 103.290001, total balance 9294.450013 day 236: buy 1 units at price 101.360001, total balance 9193.090012 day 237: buy 1 units at price 100.370003, total balance 9092.720009 day 240: buy 1 units at price 100.290001, total balance 8992.430008 day 241: buy 1 units at price 99.010002, total balance 8893.420006 day 242: buy 1 units at price 98.540001, total balance 8794.880005 day 243: buy 1 units at price 97.290001, total balance 8697.590004 day 244: buy 1 units at price 96.449997, total balance 8601.140007 day 245: buy 1 units at price 94.169998, total balance 8506.970009 day 246: buy 1 units at price 92.139999, total balance 8414.830010 day 282, sell 1 units at price 105.550003, investment 14.553944 %, total balance 8520.380013, day 284, sell 1 units at price 105.690002, investment 14.705886 %, total balance 8626.070015, day 287, sell 1 units at price 106.099998, investment 15.150856 %, total balance 8732.170013, day 301, sell 1 units at price 106.550003, investment 15.639249 %, total balance 8838.720016, day 302, sell 1 units at price 107.190002, investment 16.333843 %, total balance 8945.910018, day 306: buy 1 units at price 99.760002, total balance 8846.150016 day 307: buy 1 units at price 98.930000, total balance 8747.220016 day 321, sell 1 units at price 111.209999, investment 12.412816 %, total balance 8858.430015, day 324, sell 1 units at price 114.300003, investment 15.536241 %, total balance 8972.730018, day 330, sell 1 units at price 114.470001, investment 15.708077 %, total balance 9087.200019, day 331, sell 1 units at price 116.120003, investment 17.375925 %, total balance 9203.320022,
close = df['Close']
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
df= df_full.copy()
name = 'Moving Average agent'
short_window = int(0.025 * len(df))
long_window = int(0.05 * len(df))
signals = pd.DataFrame(index=df.index)
signals['signal'] = 0.0
signals['short_ma'] = df['Close'].rolling(window=short_window, min_periods=1, center=False).mean()
signals['long_ma'] = df['Close'].rolling(window=long_window, min_periods=1, center=False).mean()
signals['signal'][short_window:] = np.where(signals['short_ma'][short_window:]
> signals['long_ma'][short_window:], 1.0, 0.0)
signals['positions'] = signals['signal'].diff()
signals
signal | short_ma | long_ma | positions | |
---|---|---|---|---|
0 | 0.0 | 107.949997 | 107.949997 | NaN |
1 | 0.0 | 108.004998 | 108.004998 | 0.0 |
2 | 0.0 | 108.349999 | 108.349999 | 0.0 |
3 | 0.0 | 108.347498 | 108.347498 | 0.0 |
4 | 0.0 | 108.377998 | 108.377998 | 0.0 |
5 | 0.0 | 108.489999 | 108.489999 | 0.0 |
6 | 0.0 | 108.741428 | 108.741428 | 0.0 |
7 | 0.0 | 109.003749 | 109.003749 | 0.0 |
8 | 0.0 | 109.411110 | 109.411110 | 0.0 |
9 | 1.0 | 109.891110 | 109.696999 | 1.0 |
10 | 1.0 | 110.438888 | 109.996362 | 0.0 |
11 | 1.0 | 110.907777 | 110.268332 | 0.0 |
12 | 1.0 | 111.426666 | 110.479230 | 0.0 |
13 | 1.0 | 112.074444 | 110.754285 | 0.0 |
14 | 1.0 | 112.647777 | 110.984666 | 0.0 |
15 | 1.0 | 113.249999 | 111.277499 | 0.0 |
16 | 1.0 | 113.789999 | 111.537646 | 0.0 |
17 | 1.0 | 114.195555 | 111.803332 | 0.0 |
18 | 1.0 | 114.632222 | 112.261666 | 0.0 |
19 | 1.0 | 114.867778 | 112.653333 | 0.0 |
20 | 1.0 | 115.135555 | 113.021666 | 0.0 |
21 | 1.0 | 115.564444 | 113.495555 | 0.0 |
22 | 1.0 | 115.558888 | 113.816666 | 0.0 |
23 | 1.0 | 114.957777 | 113.802777 | 0.0 |
24 | 1.0 | 114.562222 | 113.906111 | 0.0 |
25 | 1.0 | 114.247778 | 114.018889 | 0.0 |
26 | 0.0 | 113.310000 | 113.752778 | -1.0 |
27 | 0.0 | 112.625556 | 113.628889 | 0.0 |
28 | 0.0 | 112.251111 | 113.559444 | 0.0 |
29 | 0.0 | 111.891112 | 113.513333 | 0.0 |
... | ... | ... | ... | ... |
349 | 0.0 | 110.476666 | 112.387222 | 0.0 |
350 | 0.0 | 110.505555 | 112.035000 | 0.0 |
351 | 0.0 | 110.305555 | 111.666111 | 0.0 |
352 | 0.0 | 110.118888 | 111.266666 | 0.0 |
353 | 0.0 | 109.646666 | 110.763332 | 0.0 |
354 | 0.0 | 109.112222 | 110.256110 | 0.0 |
355 | 0.0 | 108.568888 | 109.881110 | 0.0 |
356 | 0.0 | 108.347777 | 109.721666 | 0.0 |
357 | 0.0 | 108.271110 | 109.588888 | 0.0 |
358 | 0.0 | 108.463332 | 109.469999 | 0.0 |
359 | 0.0 | 108.402222 | 109.453888 | 0.0 |
360 | 0.0 | 108.604444 | 109.454999 | 0.0 |
361 | 0.0 | 108.877777 | 109.498333 | 0.0 |
362 | 0.0 | 109.123333 | 109.384999 | 0.0 |
363 | 1.0 | 109.521111 | 109.316666 | 1.0 |
364 | 1.0 | 109.894444 | 109.231666 | 0.0 |
365 | 1.0 | 109.836667 | 109.092222 | 0.0 |
366 | 1.0 | 109.901111 | 109.086110 | 0.0 |
367 | 1.0 | 109.850000 | 109.156666 | 0.0 |
368 | 1.0 | 109.964444 | 109.183333 | 0.0 |
369 | 1.0 | 109.864445 | 109.234444 | 0.0 |
370 | 1.0 | 109.640001 | 109.258889 | 0.0 |
371 | 1.0 | 109.472224 | 109.297778 | 0.0 |
372 | 0.0 | 109.354446 | 109.437778 | -1.0 |
373 | 0.0 | 109.245557 | 109.570000 | 0.0 |
374 | 0.0 | 109.532224 | 109.684445 | 0.0 |
375 | 0.0 | 109.862224 | 109.881668 | 0.0 |
376 | 1.0 | 110.294446 | 110.072223 | 1.0 |
377 | 1.0 | 110.586668 | 110.275556 | 0.0 |
378 | 1.0 | 111.036668 | 110.450556 | 0.0 |
379 rows × 4 columns
def buy_stock(
real_movement,
signal,
initial_money = 10000,
max_buy = 1,
max_sell = 1,
):
"""
real_movement = actual movement in the real world
delay = how much interval you want to delay to change our decision from buy to sell, vice versa
initial_state = 1 is buy, 0 is sell
initial_money = 1000, ignore what kind of currency
max_buy = max quantity for share to buy
max_sell = max quantity for share to sell
"""
starting_money = initial_money
states_sell = []
states_buy = []
current_inventory = 0
def buy(i, initial_money, current_inventory):
shares = initial_money // real_movement[i]
if shares < 1:
print(
'day %d: total balances %f, not enough money to buy a unit price %f'
% (i, initial_money, real_movement[i])
)
else:
if shares > max_buy:
buy_units = max_buy
else:
buy_units = shares
initial_money -= buy_units * real_movement[i]
current_inventory += buy_units
print(
'day %d: buy %d units at price %f, total balance %f'
% (i, buy_units, buy_units * real_movement[i], initial_money)
)
states_buy.append(0)
return initial_money, current_inventory
for i in range(real_movement.shape[0] - int(0.025 * len(df))):
state = signal[i]
if state == 1:
initial_money, current_inventory = buy(
i, initial_money, current_inventory
)
states_buy.append(i)
elif state == -1:
if current_inventory == 0:
print('day %d: cannot sell anything, inventory 0' % (i))
else:
if current_inventory > max_sell:
sell_units = max_sell
else:
sell_units = current_inventory
current_inventory -= sell_units
total_sell = sell_units * real_movement[i]
initial_money += total_sell
try:
invest = (
(real_movement[i] - real_movement[states_buy[-1]])
/ real_movement[states_buy[-1]]
) * 100
except:
invest = 0
print(
'day %d, sell %d units at price %f, investment %f %%, total balance %f,'
% (i, sell_units, total_sell, invest, initial_money)
)
states_sell.append(i)
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
states_buy, states_sell, total_gains, invest = buy_stock(df.Close, signals['positions'])
day 9: buy 1 units at price 112.269997, total balance 9887.730003 day 26, sell 1 units at price 107.879997, investment -3.910217 %, total balance 9995.610000, day 35: buy 1 units at price 114.980003, total balance 9880.629997 day 46, sell 1 units at price 118.040001, investment 2.661331 %, total balance 9998.669998, day 50: buy 1 units at price 115.239998, total balance 9883.430000 day 55, sell 1 units at price 109.949997, investment -4.590421 %, total balance 9993.379997, day 70: buy 1 units at price 110.300003, total balance 9883.079994 day 78, sell 1 units at price 109.989998, investment -0.281056 %, total balance 9993.069992, day 90: buy 1 units at price 113.860001, total balance 9879.209991 day 101, sell 1 units at price 105.930000, investment -6.964694 %, total balance 9985.139991, day 114: buy 1 units at price 107.900002, total balance 9877.239989 day 118, sell 1 units at price 107.510002, investment -0.361446 %, total balance 9984.749991, day 134: buy 1 units at price 110.580002, total balance 9874.169989 day 158, sell 1 units at price 114.769997, investment 3.789107 %, total balance 9988.939986, day 168: buy 1 units at price 114.580002, total balance 9874.359984 day 173, sell 1 units at price 113.709999, investment -0.759297 %, total balance 9988.069983, day 182: buy 1 units at price 117.849998, total balance 9870.219985 day 191, sell 1 units at price 115.269997, investment -2.189224 %, total balance 9985.489982, day 214: buy 1 units at price 109.599998, total balance 9875.889984 day 226, sell 1 units at price 106.650002, investment -2.691602 %, total balance 9982.539986, day 255: buy 1 units at price 100.570000, total balance 9881.969986 day 278, sell 1 units at price 100.879997, investment 0.308240 %, total balance 9982.849983, day 286: buy 1 units at price 105.000000, total balance 9877.849983 day 296, sell 1 units at price 103.010002, investment -1.895236 %, total balance 9980.859985, day 303: buy 1 units at price 106.800003, total balance 9874.059982 day 308, sell 1 units at price 99.919998, investment -6.441952 %, total balance 9973.979980, day 317: buy 1 units at price 105.650002, total balance 9868.329978 day 341, sell 1 units at price 109.449997, investment 3.596777 %, total balance 9977.779975, day 363: buy 1 units at price 109.540001, total balance 9868.239974
close = df['Close']
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
df= df_full.copy()
name = 'Signal Rolling agent'
def buy_stock(
real_movement,
delay = 5,
initial_state = 1,
initial_money = 10000,
max_buy = 1,
max_sell = 1,
):
"""
real_movement = actual movement in the real world
delay = how much interval you want to delay to change our decision from buy to sell, vice versa
initial_state = 1 is buy, 0 is sell
initial_money = 1000, ignore what kind of currency
max_buy = max quantity for share to buy
max_sell = max quantity for share to sell
"""
starting_money = initial_money
delay_change_decision = delay
current_decision = 0
state = initial_state
current_val = real_movement[0]
states_sell = []
states_buy = []
current_inventory = 0
def buy(i, initial_money, current_inventory):
shares = initial_money // real_movement[i]
if shares < 1:
print(
'day %d: total balances %f, not enough money to buy a unit price %f'
% (i, initial_money, real_movement[i])
)
else:
if shares > max_buy:
buy_units = max_buy
else:
buy_units = shares
initial_money -= buy_units * real_movement[i]
current_inventory += buy_units
print(
'day %d: buy %d units at price %f, total balance %f'
% (i, buy_units, buy_units * real_movement[i], initial_money)
)
states_buy.append(0)
return initial_money, current_inventory
if state == 1:
initial_money, current_inventory = buy(
0, initial_money, current_inventory
)
for i in range(1, real_movement.shape[0], 1):
if real_movement[i] < current_val and state == 0:
if current_decision < delay_change_decision:
current_decision += 1
else:
state = 1
initial_money, current_inventory = buy(
i, initial_money, current_inventory
)
current_decision = 0
states_buy.append(i)
if real_movement[i] > current_val and state == 1:
if current_decision < delay_change_decision:
current_decision += 1
else:
state = 0
if current_inventory == 0:
print('day %d: cannot sell anything, inventory 0' % (i))
else:
if current_inventory > max_sell:
sell_units = max_sell
else:
sell_units = current_inventory
current_inventory -= sell_units
total_sell = sell_units * real_movement[i]
initial_money += total_sell
try:
invest = (
(real_movement[i] - real_movement[states_buy[-1]])
/ real_movement[states_buy[-1]]
) * 100
except:
invest = 0
print(
'day %d, sell %d units at price %f, investment %f %%, total balance %f,'
% (i, sell_units, total_sell, invest, initial_money)
)
current_decision = 0
states_sell.append(i)
current_val = real_movement[i]
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
states_buy, states_sell, total_gains, invest = buy_stock(df.Close, initial_state = 1,
delay = 4, initial_money = 10000)
day 0: buy 1 units at price 107.949997, total balance 9892.050003 day 6, sell 1 units at price 110.250000, investment 2.130619 %, total balance 10002.300003, day 19: buy 1 units at price 115.110001, total balance 9887.190002 day 27, sell 1 units at price 110.040001, investment -4.404483 %, total balance 9997.230003, day 40: buy 1 units at price 113.430000, total balance 9883.800003 day 50, sell 1 units at price 115.239998, investment 1.595696 %, total balance 9999.040001, day 59: buy 1 units at price 108.000000, total balance 9891.040001 day 66, sell 1 units at price 110.400002, investment 2.222224 %, total balance 10001.440003, day 75: buy 1 units at price 111.470001, total balance 9889.970002 day 88, sell 1 units at price 113.410004, investment 1.740381 %, total balance 10003.380006, day 98: buy 1 units at price 112.489998, total balance 9890.890008 day 108, sell 1 units at price 110.800003, investment -1.502351 %, total balance 10001.690011, day 114: buy 1 units at price 107.900002, total balance 9893.790009 day 127, sell 1 units at price 103.720001, investment -3.873958 %, total balance 9997.510010, day 137: buy 1 units at price 109.889999, total balance 9887.620011 day 143, sell 1 units at price 116.029999, investment 5.587406 %, total balance 10003.650010, day 156: buy 1 units at price 113.699997, total balance 9889.950013 day 172, sell 1 units at price 114.320000, investment 0.545297 %, total balance 10004.270013, day 183: buy 1 units at price 116.720001, total balance 9887.550012 day 193, sell 1 units at price 115.320000, investment -1.199453 %, total balance 10002.870012, day 198: buy 1 units at price 106.339996, total balance 9896.530016 day 209, sell 1 units at price 106.699997, investment 0.338538 %, total balance 10003.230013, day 220: buy 1 units at price 107.330002, total balance 9895.900011 day 229, sell 1 units at price 110.940002, investment 3.363458 %, total balance 10006.840013, day 236: buy 1 units at price 101.360001, total balance 9905.480012 day 250, sell 1 units at price 97.620003, investment -3.689816 %, total balance 10003.100015, day 258: buy 1 units at price 99.910004, total balance 9903.190011 day 263, sell 1 units at price 104.589996, investment 4.684208 %, total balance 10007.780007, day 275: buy 1 units at price 103.739998, total balance 9904.040009 day 287, sell 1 units at price 106.099998, investment 2.274918 %, total balance 10010.140007, day 293: buy 1 units at price 104.110001, total balance 9906.030006 day 301, sell 1 units at price 106.550003, investment 2.343677 %, total balance 10012.580009, day 307: buy 1 units at price 98.930000, total balance 9913.650009 day 313, sell 1 units at price 105.139999, investment 6.277165 %, total balance 10018.790008, day 328: buy 1 units at price 113.550003, total balance 9905.240005 day 335, sell 1 units at price 116.120003, investment 2.263320 %, total balance 10021.360008, day 340: buy 1 units at price 112.510002, total balance 9908.850006 day 350, sell 1 units at price 109.709999, investment -2.488670 %, total balance 10018.560005, day 359: buy 1 units at price 109.160004, total balance 9909.400001 day 366, sell 1 units at price 110.709999, investment 1.419929 %, total balance 10020.110000, day 377: buy 1 units at price 112.820000, total balance 9907.290000
close = df['Close']
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
df = df_full.copy()
name = 'Policy Gradient agent'
class Agent:
LEARNING_RATE = 1e-4
LAYER_SIZE = 256
GAMMA = 0.9
OUTPUT_SIZE = 3
def __init__(self, state_size, window_size, trend, skip):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
self.X = tf.placeholder(tf.float32, (None, self.state_size))
self.REWARDS = tf.placeholder(tf.float32, (None))
self.ACTIONS = tf.placeholder(tf.int32, (None))
feed_forward = tf.layers.dense(self.X, self.LAYER_SIZE, activation = tf.nn.relu)
self.logits = tf.layers.dense(feed_forward, self.OUTPUT_SIZE, activation = tf.nn.softmax)
input_y = tf.one_hot(self.ACTIONS, self.OUTPUT_SIZE)
loglike = tf.log((input_y * (input_y - self.logits) + (1 - input_y) * (input_y + self.logits)) + 1)
rewards = tf.tile(tf.reshape(self.REWARDS, (-1,1)), [1, self.OUTPUT_SIZE])
self.cost = -tf.reduce_mean(loglike * (rewards + 1))
self.optimizer = tf.train.AdamOptimizer(learning_rate = self.LEARNING_RATE).minimize(self.cost)
self.sess = tf.InteractiveSession()
self.sess.run(tf.global_variables_initializer())
def predict(self, inputs):
return self.sess.run(self.logits, feed_dict={self.X:inputs})
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array([res])
def discount_rewards(self, r):
discounted_r = np.zeros_like(r)
running_add = 0
for t in reversed(range(0, r.size)):
running_add = running_add * self.GAMMA + r[t]
discounted_r[t] = running_add
return discounted_r
def get_predicted_action(self, sequence):
prediction = self.predict(np.array(sequence))[0]
return np.argmax(prediction)
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
for t in range(0, len(self.trend) - 1, self.skip):
action = self.get_predicted_action(state)
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t] and t < (len(self.trend) - self.half_window):
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
state = next_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
ep_history = []
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
for t in range(0, len(self.trend) - 1, self.skip):
action = self.get_predicted_action(state)
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t] and t < (len(self.trend) - self.half_window):
inventory.append(self.trend[t])
starting_money -= close[t]
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
ep_history.append([state,action,starting_money,next_state])
state = next_state
ep_history = np.array(ep_history)
ep_history[:,2] = self.discount_rewards(ep_history[:,2])
cost, _ = self.sess.run([self.cost, self.optimizer], feed_dict={self.X:np.vstack(ep_history[:,0]),
self.REWARDS:ep_history[:,2],
self.ACTIONS:ep_history[:,1]})
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
epoch: 10, total rewards: 1781.590144.3, cost: -3782.833740, total money: 7062.900203 epoch: 20, total rewards: 1808.720396.3, cost: -6238.727539, total money: 10819.470396 epoch: 30, total rewards: 644.675288.3, cost: -10399.220703, total money: 10644.675288 epoch: 40, total rewards: 1696.944943.3, cost: -9798.079102, total money: 11696.944943 epoch: 50, total rewards: 593.719845.3, cost: -13938.982422, total money: 10593.719845 epoch: 60, total rewards: 634.539913.3, cost: -14890.398438, total money: 9645.289913 epoch: 70, total rewards: 1586.160156.3, cost: -10411.115234, total money: 11586.160156 epoch: 80, total rewards: 368.749937.3, cost: -15986.910156, total money: 10368.749937 epoch: 90, total rewards: 733.844603.3, cost: -15352.789062, total money: 8857.304625 epoch: 100, total rewards: 645.715148.3, cost: -15976.339844, total money: 10645.715148 epoch: 110, total rewards: 994.814937.3, cost: -11198.958984, total money: 4471.054988 epoch: 120, total rewards: 1771.289852.3, cost: -6539.313477, total money: 5164.829891 epoch: 130, total rewards: 1643.744995.3, cost: -11630.438477, total money: 11643.744995 epoch: 140, total rewards: 1877.095029.3, cost: -7103.230957, total money: 9104.255063 epoch: 150, total rewards: 481.749932.3, cost: -18531.593750, total money: 10481.749932 epoch: 160, total rewards: 638.035152.3, cost: -16995.314453, total money: 10638.035152 epoch: 170, total rewards: 1188.049925.3, cost: -13348.065430, total money: 10263.189940 epoch: 180, total rewards: 633.885008.3, cost: -14666.952148, total money: 10633.885008 epoch: 190, total rewards: 1675.079952.3, cost: -9106.298828, total money: 5977.189998 epoch: 200, total rewards: 567.955136.3, cost: -17828.587891, total money: 10567.955136
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 12: buy 1 unit at price 760.539978, total balance 9239.460022 day 19, sell 1 unit at price 758.039978, investment -0.328714 %, total balance 9997.500000, day 23: buy 1 unit at price 759.109985, total balance 9238.390015 day 26, sell 1 unit at price 789.289978, investment 3.975708 %, total balance 10027.679993, day 27: buy 1 unit at price 789.270020, total balance 9238.409973 day 28, sell 1 unit at price 796.099976, investment 0.865351 %, total balance 10034.509949, day 31: buy 1 unit at price 790.799988, total balance 9243.709961 day 33, sell 1 unit at price 796.419983, investment 0.710672 %, total balance 10040.129944, day 35: buy 1 unit at price 791.260010, total balance 9248.869934 day 36: buy 1 unit at price 789.909973, total balance 8458.959961 day 37, sell 1 unit at price 791.549988, investment 0.036648 %, total balance 9250.509949, day 38: buy 1 unit at price 785.049988, total balance 8465.459961 day 39: buy 1 unit at price 782.789978, total balance 7682.669983 day 40: buy 1 unit at price 771.820007, total balance 6910.849976 day 41, sell 1 unit at price 786.140015, investment -0.477264 %, total balance 7696.989991, day 42, sell 1 unit at price 786.900024, investment 0.235658 %, total balance 8483.890015, day 44: buy 1 unit at price 806.150024, total balance 7677.739991 day 46: buy 1 unit at price 804.789978, total balance 6872.950013 day 50: buy 1 unit at price 804.609985, total balance 6068.340028 day 51: buy 1 unit at price 806.070007, total balance 5262.270021 day 52, sell 1 unit at price 802.174988, investment 2.476400 %, total balance 6064.445009, day 53, sell 1 unit at price 805.020020, investment 4.301523 %, total balance 6869.465029, day 54, sell 1 unit at price 819.309998, investment 1.632447 %, total balance 7688.775027, day 56: buy 1 unit at price 835.669983, total balance 6853.105044 day 57: buy 1 unit at price 832.150024, total balance 6020.955020 day 59, sell 1 unit at price 802.320007, investment -0.306909 %, total balance 6823.275027, day 61: buy 1 unit at price 795.695007, total balance 6027.580020 day 63: buy 1 unit at price 801.489990, total balance 5226.090030 day 64, sell 1 unit at price 801.340027, investment -0.406403 %, total balance 6027.430057, day 65: buy 1 unit at price 806.969971, total balance 5220.460086 day 71: buy 1 unit at price 818.979980, total balance 4401.480106 day 72: buy 1 unit at price 824.159973, total balance 3577.320133 day 73, sell 1 unit at price 828.070007, investment 2.729291 %, total balance 4405.390140, day 74, sell 1 unit at price 831.659973, investment -0.479856 %, total balance 5237.050113, day 78, sell 1 unit at price 829.280029, investment -0.344889 %, total balance 6066.330142, day 79: buy 1 unit at price 823.210022, total balance 5243.120120 day 80: buy 1 unit at price 835.239990, total balance 4407.880130 day 81, sell 1 unit at price 830.630005, investment 4.390501 %, total balance 5238.510135, day 83: buy 1 unit at price 827.780029, total balance 4410.730106 day 86, sell 1 unit at price 838.679993, investment 4.640108 %, total balance 5249.410099, day 88, sell 1 unit at price 845.539978, investment 4.779609 %, total balance 6094.950077, day 89, sell 1 unit at price 845.619995, investment 3.252829 %, total balance 6940.570072, day 90: buy 1 unit at price 847.200012, total balance 6093.370060 day 92, sell 1 unit at price 852.119995, investment 3.392548 %, total balance 6945.490055, day 93, sell 1 unit at price 848.400024, investment 3.059973 %, total balance 7793.890079, day 94: buy 1 unit at price 830.460022, total balance 6963.430057 day 95, sell 1 unit at price 829.590027, investment -0.676448 %, total balance 7793.020084, day 98: buy 1 unit at price 819.510010, total balance 6973.510074 day 101: buy 1 unit at price 831.500000, total balance 6142.010074 day 102: buy 1 unit at price 829.559998, total balance 5312.450076 day 103, sell 1 unit at price 838.549988, investment 1.301065 %, total balance 6151.000064, day 105, sell 1 unit at price 831.409973, investment -1.863791 %, total balance 6982.410037, day 106: buy 1 unit at price 827.880005, total balance 6154.530032 day 108, sell 1 unit at price 824.729980, investment -0.689984 %, total balance 6979.260012, day 109: buy 1 unit at price 823.349976, total balance 6155.910036 day 110, sell 1 unit at price 824.320007, investment 0.586936 %, total balance 6980.230043, day 111, sell 1 unit at price 823.559998, investment -0.954901 %, total balance 7803.790041, day 113, sell 1 unit at price 836.820007, investment 0.875164 %, total balance 8640.610048, day 114: buy 1 unit at price 838.210022, total balance 7802.400026 day 115: buy 1 unit at price 841.650024, total balance 6960.750002 day 116, sell 1 unit at price 843.190002, investment 1.849301 %, total balance 7803.940004, day 117, sell 1 unit at price 862.760010, investment 4.786547 %, total balance 8666.700014, day 118, sell 1 unit at price 872.299988, investment 4.066996 %, total balance 9539.000002, day 119: buy 1 unit at price 871.729980, total balance 8667.270022 day 120, sell 1 unit at price 874.250000, investment 3.873341 %, total balance 9541.520022, day 121: buy 1 unit at price 905.960022, total balance 8635.560000 day 123, sell 1 unit at price 916.440002, investment 5.128884 %, total balance 9552.000002, day 124: buy 1 unit at price 927.039978, total balance 8624.960024 day 127: buy 1 unit at price 934.299988, total balance 7690.660036 day 128: buy 1 unit at price 932.169983, total balance 6758.490053 day 129, sell 1 unit at price 928.780029, investment 2.518876 %, total balance 7687.270082, day 130: buy 1 unit at price 930.599976, total balance 6756.670106 day 131: buy 1 unit at price 932.219971, total balance 5824.450135 day 132: buy 1 unit at price 937.080017, total balance 4887.370118 day 133: buy 1 unit at price 943.000000, total balance 3944.370118 day 134, sell 1 unit at price 919.619995, investment -0.800395 %, total balance 4863.990113, day 135: buy 1 unit at price 930.239990, total balance 3933.750123 day 136: buy 1 unit at price 934.010010, total balance 2999.740113 day 137, sell 1 unit at price 941.859985, investment 0.809162 %, total balance 3941.600098, day 138, sell 1 unit at price 948.820007, investment 1.786157 %, total balance 4890.420105, day 139: buy 1 unit at price 954.960022, total balance 3935.460083 day 140, sell 1 unit at price 969.539978, investment 4.184397 %, total balance 4905.000061, day 141: buy 1 unit at price 971.469971, total balance 3933.530090 day 142: buy 1 unit at price 975.880005, total balance 2957.650085 day 143, sell 1 unit at price 964.859985, investment 3.501321 %, total balance 3922.510070, day 145: buy 1 unit at price 975.599976, total balance 2946.910094 day 146, sell 1 unit at price 983.679993, investment 4.972892 %, total balance 3930.590087, day 147: buy 1 unit at price 976.570007, total balance 2954.020080 day 148, sell 1 unit at price 980.940002, investment 4.023330 %, total balance 3934.960082, day 149, sell 1 unit at price 983.409973, investment 5.715728 %, total balance 4918.370055, day 150: buy 1 unit at price 949.830017, total balance 3968.540038 day 151: buy 1 unit at price 942.900024, total balance 3025.640014 day 153, sell 1 unit at price 950.760010, investment 1.793343 %, total balance 3976.400024, day 154: buy 1 unit at price 942.309998, total balance 3034.090026 day 155: buy 1 unit at price 939.780029, total balance 2094.309997 day 156: buy 1 unit at price 957.369995, total balance 1136.940002 day 157, sell 1 unit at price 950.630005, investment -0.453424 %, total balance 2087.570007, day 158: buy 1 unit at price 959.450012, total balance 1128.119995 day 159: buy 1 unit at price 957.090027, total balance 171.029968 day 161, sell 1 unit at price 952.270020, investment -1.976381 %, total balance 1123.299988, day 164, sell 1 unit at price 917.789978, investment -5.952579 %, total balance 2041.089966, day 165, sell 1 unit at price 908.729980, investment -6.854243 %, total balance 2949.819946, day 166: buy 1 unit at price 898.700012, total balance 2051.119934 day 168, sell 1 unit at price 906.690002, investment -7.155658 %, total balance 2957.809936, day 170: buy 1 unit at price 928.799988, total balance 2029.009948 day 172, sell 1 unit at price 943.830017, investment -0.631692 %, total balance 2972.839965, day 173, sell 1 unit at price 947.159973, investment 0.451792 %, total balance 3919.999938, day 175: buy 1 unit at price 953.419983, total balance 2966.579955 day 176, sell 1 unit at price 965.400024, investment 2.450364 %, total balance 3931.979979, day 177, sell 1 unit at price 970.890015, investment 3.310348 %, total balance 4902.869994, day 179, sell 1 unit at price 972.919983, investment 1.624240 %, total balance 5875.789977, day 182, sell 1 unit at price 947.799988, investment -1.214240 %, total balance 6823.589965, day 184: buy 1 unit at price 941.530029, total balance 5882.059936 day 185: buy 1 unit at price 930.500000, total balance 4951.559936 day 186, sell 1 unit at price 930.830017, investment -2.743735 %, total balance 5882.389953, day 187: buy 1 unit at price 930.390015, total balance 4951.999938 day 188, sell 1 unit at price 923.650024, investment 2.776234 %, total balance 5875.649962, day 189: buy 1 unit at price 927.960022, total balance 4947.689940 day 191, sell 1 unit at price 926.789978, investment -0.216409 %, total balance 5874.479918, day 192, sell 1 unit at price 922.900024, investment -3.201103 %, total balance 6797.379942, day 194, sell 1 unit at price 914.390015, investment -2.882544 %, total balance 7711.769957, day 195, sell 1 unit at price 922.669983, investment -0.841485 %, total balance 8634.439940, day 196: buy 1 unit at price 922.219971, total balance 7712.219969 day 198: buy 1 unit at price 910.979980, total balance 6801.239989 day 199, sell 1 unit at price 910.669983, investment -2.119545 %, total balance 7711.909972, day 201: buy 1 unit at price 924.690002, total balance 6787.219970 day 202, sell 1 unit at price 927.000000, investment -0.103455 %, total balance 7714.219970, day 203, sell 1 unit at price 921.280029, investment -0.101922 %, total balance 8635.499999, day 204: buy 1 unit at price 915.890015, total balance 7719.609984 day 206: buy 1 unit at price 921.289978, total balance 6798.320006 day 207, sell 1 unit at price 929.570007, investment 2.040663 %, total balance 7727.890013, day 208, sell 1 unit at price 939.330017, investment 1.583235 %, total balance 8667.220030, day 210, sell 1 unit at price 928.450012, investment 1.371343 %, total balance 9595.670042, day 213: buy 1 unit at price 926.500000, total balance 8669.170042 day 215: buy 1 unit at price 932.070007, total balance 7737.100035 day 217, sell 1 unit at price 925.109985, investment 0.414637 %, total balance 8662.210020, day 218: buy 1 unit at price 920.289978, total balance 7741.920042 day 219: buy 1 unit at price 915.000000, total balance 6826.920042 day 221: buy 1 unit at price 931.580017, total balance 5895.340025 day 222, sell 1 unit at price 932.450012, investment 0.642203 %, total balance 6827.790037, day 223, sell 1 unit at price 928.530029, investment -0.379797 %, total balance 7756.320066, day 224: buy 1 unit at price 920.969971, total balance 6835.350095 day 225: buy 1 unit at price 924.859985, total balance 5910.490110 day 227, sell 1 unit at price 949.500000, investment 3.174002 %, total balance 6859.990110, day 228, sell 1 unit at price 959.109985, investment 4.820763 %, total balance 7819.100095, day 230: buy 1 unit at price 957.789978, total balance 6861.310117 day 233, sell 1 unit at price 978.890015, investment 5.078469 %, total balance 7840.200132, day 234, sell 1 unit at price 977.000000, investment 6.083806 %, total balance 8817.200132, day 235: buy 1 unit at price 972.599976, total balance 7844.600156 day 236: buy 1 unit at price 989.250000, total balance 6855.350156 day 237, sell 1 unit at price 987.830017, investment 6.808602 %, total balance 7843.180173, day 240, sell 1 unit at price 992.179993, investment 3.590559 %, total balance 8835.360166, day 243, sell 1 unit at price 988.200012, investment 1.603952 %, total balance 9823.560178, day 245, sell 1 unit at price 970.539978, investment -1.891334 %, total balance 10794.100156,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Q-learning agent'
class Agent:
def __init__(self, state_size, window_size, trend, skip, batch_size):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
self.action_size = 3
self.batch_size = batch_size
self.memory = deque(maxlen = 1000)
self.inventory = []
self.gamma = 0.95
self.epsilon = 0.5
self.epsilon_min = 0.01
self.epsilon_decay = 0.999
tf.reset_default_graph()
self.sess = tf.InteractiveSession()
self.X = tf.placeholder(tf.float32, [None, self.state_size])
self.Y = tf.placeholder(tf.float32, [None, self.action_size])
feed = tf.layers.dense(self.X, 256, activation = tf.nn.relu)
self.logits = tf.layers.dense(feed, self.action_size)
self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
self.optimizer = tf.train.GradientDescentOptimizer(1e-5).minimize(
self.cost
)
self.sess.run(tf.global_variables_initializer())
def act(self, state):
if random.random() <= self.epsilon:
return random.randrange(self.action_size)
return np.argmax(
self.sess.run(self.logits, feed_dict = {self.X: state})[0]
)
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array([res])
def replay(self, batch_size):
mini_batch = []
l = len(self.memory)
for i in range(l - batch_size, l):
mini_batch.append(self.memory[i])
replay_size = len(mini_batch)
X = np.empty((replay_size, self.state_size))
Y = np.empty((replay_size, self.action_size))
states = np.array([a[0][0] for a in mini_batch])
new_states = np.array([a[3][0] for a in mini_batch])
Q = self.sess.run(self.logits, feed_dict = {self.X: states})
Q_new = self.sess.run(self.logits, feed_dict = {self.X: new_states})
for i in range(len(mini_batch)):
state, action, reward, next_state, done = mini_batch[i]
target = Q[i]
target[action] = reward
if not done:
target[action] += self.gamma * np.amax(Q_new[i])
X[i] = state
Y[i] = target
cost, _ = self.sess.run(
[self.cost, self.optimizer], feed_dict = {self.X: X, self.Y: Y}
)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
return cost
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
for t in range(0, len(self.trend) - 1, self.skip):
action = self.act(state)
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t] and t < (len(self.trend) - self.half_window):
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
state = next_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
for t in range(0, len(self.trend) - 1, self.skip):
action = self.act(state)
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t] and t < (len(self.trend) - self.half_window):
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
self.memory.append((state, action, invest,
next_state, starting_money < initial_money))
state = next_state
batch_size = min(self.batch_size, len(self.memory))
cost = self.replay(batch_size)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip,
batch_size = batch_size)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
epoch: 10, total rewards: 274.710201.3, cost: 0.810730, total money: 10274.710201 epoch: 20, total rewards: 161.429929.3, cost: 0.406487, total money: 10161.429929 epoch: 30, total rewards: 89.659849.3, cost: 0.225568, total money: 10089.659849 epoch: 40, total rewards: 121.209836.3, cost: 0.152499, total money: 10121.209836 epoch: 50, total rewards: 94.869810.3, cost: 0.120762, total money: 10094.869810 epoch: 60, total rewards: 123.609922.3, cost: 0.097353, total money: 10123.609922 epoch: 70, total rewards: 130.149901.3, cost: 0.131718, total money: 10130.149901 epoch: 80, total rewards: 55.369871.3, cost: 0.072531, total money: 10055.369871 epoch: 90, total rewards: 177.780026.3, cost: 0.062346, total money: 10177.780026 epoch: 100, total rewards: 151.249997.3, cost: 0.056566, total money: 10151.249997 epoch: 110, total rewards: 101.629942.3, cost: 0.050717, total money: 10101.629942 epoch: 120, total rewards: 138.329892.3, cost: 0.075178, total money: 10138.329892 epoch: 130, total rewards: 187.559812.3, cost: 0.039170, total money: 10187.559812 epoch: 140, total rewards: 125.699889.3, cost: 0.035156, total money: 10125.699889 epoch: 150, total rewards: 138.249876.3, cost: 0.403965, total money: 10138.249876 epoch: 160, total rewards: 141.329832.3, cost: 0.029966, total money: 10141.329832 epoch: 170, total rewards: 179.989928.3, cost: 0.027219, total money: 10179.989928 epoch: 180, total rewards: 191.619871.3, cost: 0.025002, total money: 10191.619871 epoch: 190, total rewards: 191.929868.3, cost: 0.149151, total money: 10191.929868 epoch: 200, total rewards: 113.759886.3, cost: 0.021398, total money: 10113.759886
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 4: buy 1 unit at price 790.510010, total balance 9209.489990 day 5, sell 1 unit at price 785.309998, investment -0.657805 %, total balance 9994.799988, day 14: buy 1 unit at price 768.270020, total balance 9226.529968 day 16, sell 1 unit at price 761.679993, investment -0.857775 %, total balance 9988.209961, day 22: buy 1 unit at price 762.520020, total balance 9225.689941 day 23, sell 1 unit at price 759.109985, investment -0.447206 %, total balance 9984.799926, day 28: buy 1 unit at price 796.099976, total balance 9188.699950 day 29, sell 1 unit at price 797.070007, investment 0.121848 %, total balance 9985.769957, day 32: buy 1 unit at price 794.200012, total balance 9191.569945 day 35: buy 1 unit at price 791.260010, total balance 8400.309935 day 36: buy 1 unit at price 789.909973, total balance 7610.399962 day 37, sell 1 unit at price 791.549988, investment -0.333672 %, total balance 8401.949950, day 38, sell 1 unit at price 785.049988, investment -0.784827 %, total balance 9186.999938, day 39, sell 1 unit at price 782.789978, investment -0.901368 %, total balance 9969.789916, day 42: buy 1 unit at price 786.900024, total balance 9182.889892 day 43: buy 1 unit at price 794.020020, total balance 8388.869872 day 44: buy 1 unit at price 806.150024, total balance 7582.719848 day 45: buy 1 unit at price 806.650024, total balance 6776.069824 day 46: buy 1 unit at price 804.789978, total balance 5971.279846 day 48, sell 1 unit at price 806.359985, investment 2.472990 %, total balance 6777.639831, day 49, sell 1 unit at price 807.880005, investment 1.745546 %, total balance 7585.519836, day 50, sell 1 unit at price 804.609985, investment -0.191036 %, total balance 8390.129821, day 51, sell 1 unit at price 806.070007, investment -0.071904 %, total balance 9196.199828, day 53: buy 1 unit at price 805.020020, total balance 8391.179808 day 56, sell 1 unit at price 835.669983, investment 3.837027 %, total balance 9226.849791, day 57, sell 1 unit at price 832.150024, investment 3.370103 %, total balance 10058.999815, day 63: buy 1 unit at price 801.489990, total balance 9257.509825 day 66: buy 1 unit at price 808.380005, total balance 8449.129820 day 67, sell 1 unit at price 809.559998, investment 1.006876 %, total balance 9258.689818, day 68, sell 1 unit at price 813.669983, investment 0.654392 %, total balance 10072.359801, day 71: buy 1 unit at price 818.979980, total balance 9253.379821 day 72: buy 1 unit at price 824.159973, total balance 8429.219848 day 73: buy 1 unit at price 828.070007, total balance 7601.149841 day 74: buy 1 unit at price 831.659973, total balance 6769.489868 day 75: buy 1 unit at price 830.760010, total balance 5938.729858 day 81, sell 1 unit at price 830.630005, investment 1.422504 %, total balance 6769.359863, day 82, sell 1 unit at price 829.080017, investment 0.596977 %, total balance 7598.439880, day 85, sell 1 unit at price 835.369995, investment 0.881567 %, total balance 8433.809875, day 87, sell 1 unit at price 843.250000, investment 1.393602 %, total balance 9277.059875, day 88, sell 1 unit at price 845.539978, investment 1.779090 %, total balance 10122.599853, day 92: buy 1 unit at price 852.119995, total balance 9270.479858 day 93, sell 1 unit at price 848.400024, investment -0.436555 %, total balance 10118.879882, day 99: buy 1 unit at price 820.919983, total balance 9297.959899 day 101: buy 1 unit at price 831.500000, total balance 8466.459899 day 104, sell 1 unit at price 834.570007, investment 1.662772 %, total balance 9301.029906, day 105, sell 1 unit at price 831.409973, investment -0.010827 %, total balance 10132.439879, day 111: buy 1 unit at price 823.559998, total balance 9308.879881 day 113, sell 1 unit at price 836.820007, investment 1.610084 %, total balance 10145.699888, day 116: buy 1 unit at price 843.190002, total balance 9302.509886 day 117: buy 1 unit at price 862.760010, total balance 8439.749876 day 118: buy 1 unit at price 872.299988, total balance 7567.449888 day 119, sell 1 unit at price 871.729980, investment 3.384762 %, total balance 8439.179868, day 120: buy 1 unit at price 874.250000, total balance 7564.929868 day 121: buy 1 unit at price 905.960022, total balance 6658.969846 day 122, sell 1 unit at price 912.570007, investment 5.773332 %, total balance 7571.539853, day 123, sell 1 unit at price 916.440002, investment 5.060187 %, total balance 8487.979855, day 124, sell 1 unit at price 927.039978, investment 6.038316 %, total balance 9415.019833, day 125: buy 1 unit at price 931.659973, total balance 8483.359860 day 126: buy 1 unit at price 927.130005, total balance 7556.229855 day 127: buy 1 unit at price 934.299988, total balance 6621.929867 day 128: buy 1 unit at price 932.169983, total balance 5689.759884 day 129, sell 1 unit at price 928.780029, investment 2.518876 %, total balance 6618.539913, day 130, sell 1 unit at price 930.599976, investment -0.113775 %, total balance 7549.139889, day 131: buy 1 unit at price 932.219971, total balance 6616.919918 day 133, sell 1 unit at price 943.000000, investment 1.711734 %, total balance 7559.919918, day 134, sell 1 unit at price 919.619995, investment -1.571229 %, total balance 8479.539913, day 136, sell 1 unit at price 934.010010, investment 0.197392 %, total balance 9413.549923, day 137, sell 1 unit at price 941.859985, investment 1.034092 %, total balance 10355.409908, day 139: buy 1 unit at price 954.960022, total balance 9400.449886 day 141, sell 1 unit at price 971.469971, investment 1.728863 %, total balance 10371.919857, day 144: buy 1 unit at price 966.950012, total balance 9404.969845 day 145, sell 1 unit at price 975.599976, investment 0.894562 %, total balance 10380.569821, day 149: buy 1 unit at price 983.409973, total balance 9397.159848 day 151: buy 1 unit at price 942.900024, total balance 8454.259824 day 154, sell 1 unit at price 942.309998, investment -4.179333 %, total balance 9396.569822, day 157: buy 1 unit at price 950.630005, total balance 8445.939817 day 159, sell 1 unit at price 957.090027, investment 1.504932 %, total balance 9403.029844, day 160: buy 1 unit at price 965.590027, total balance 8437.439817 day 161, sell 1 unit at price 952.270020, investment 0.172519 %, total balance 9389.709837, day 162, sell 1 unit at price 927.330017, investment -3.962345 %, total balance 10317.039854, day 173: buy 1 unit at price 947.159973, total balance 9369.879881 day 176, sell 1 unit at price 965.400024, investment 1.925762 %, total balance 10335.279905, day 180: buy 1 unit at price 980.340027, total balance 9354.939878 day 184, sell 1 unit at price 941.530029, investment -3.958830 %, total balance 10296.469907, day 185: buy 1 unit at price 930.500000, total balance 9365.969907 day 186: buy 1 unit at price 930.830017, total balance 8435.139890 day 187, sell 1 unit at price 930.390015, investment -0.011820 %, total balance 9365.529905, day 189, sell 1 unit at price 927.960022, investment -0.308326 %, total balance 10293.489927, day 197: buy 1 unit at price 926.960022, total balance 9366.529905 day 200, sell 1 unit at price 906.659973, investment -2.189959 %, total balance 10273.189878, day 203: buy 1 unit at price 921.280029, total balance 9351.909849 day 205: buy 1 unit at price 913.809998, total balance 8438.099851 day 210, sell 1 unit at price 928.450012, investment 0.778263 %, total balance 9366.549863, day 213, sell 1 unit at price 926.500000, investment 1.388692 %, total balance 10293.049863, day 229: buy 1 unit at price 953.270020, total balance 9339.779843 day 232, sell 1 unit at price 969.960022, investment 1.750816 %, total balance 10309.739865, day 234: buy 1 unit at price 977.000000, total balance 9332.739865 day 239, sell 1 unit at price 992.000000, investment 1.535312 %, total balance 10324.739865,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
import pkg_resources
import types
df= df_full.copy()
name = 'Evolution Strategy agent'
def get_imports():
for name, val in globals().items():
if isinstance(val, types.ModuleType):
name = val.__name__.split('.')[0]
elif isinstance(val, type):
name = val.__module__.split('.')[0]
poorly_named_packages = {'PIL': 'Pillow', 'sklearn': 'scikit-learn'}
if name in poorly_named_packages.keys():
name = poorly_named_packages[name]
yield name
imports = list(set(get_imports()))
requirements = []
for m in pkg_resources.working_set:
if m.project_name in imports and m.project_name != 'pip':
requirements.append((m.project_name, m.version))
for r in requirements:
print('{}=={}'.format(*r))
seaborn==0.9.0 pandas==0.23.4 numpy==1.14.5 matplotlib==3.0.2
class Deep_Evolution_Strategy:
inputs = None
def __init__(
self, weights, reward_function, population_size, sigma, learning_rate
):
self.weights = weights
self.reward_function = reward_function
self.population_size = population_size
self.sigma = sigma
self.learning_rate = learning_rate
def _get_weight_from_population(self, weights, population):
weights_population = []
for index, i in enumerate(population):
jittered = self.sigma * i
weights_population.append(weights[index] + jittered)
return weights_population
def get_weights(self):
return self.weights
def train(self, epoch = 100, print_every = 1):
lasttime = time.time()
for i in range(epoch):
population = []
rewards = np.zeros(self.population_size)
for k in range(self.population_size):
x = []
for w in self.weights:
x.append(np.random.randn(*w.shape))
population.append(x)
for k in range(self.population_size):
weights_population = self._get_weight_from_population(
self.weights, population[k]
)
rewards[k] = self.reward_function(weights_population)
rewards = (rewards - np.mean(rewards)) / (np.std(rewards) + 1e-7)
for index, w in enumerate(self.weights):
A = np.array([p[index] for p in population])
self.weights[index] = (
w
+ self.learning_rate
/ (self.population_size * self.sigma)
* np.dot(A.T, rewards).T
)
if (i + 1) % print_every == 0:
print(
'iter %d. reward: %f'
% (i + 1, self.reward_function(self.weights))
)
print('time taken to train:', time.time() - lasttime, 'seconds')
class Model:
def __init__(self, input_size, layer_size, output_size):
self.weights = [
np.random.randn(input_size, layer_size),
np.random.randn(layer_size, output_size),
np.random.randn(1, layer_size),
]
def predict(self, inputs):
feed = np.dot(inputs, self.weights[0]) + self.weights[-1]
decision = np.dot(feed, self.weights[1])
return decision
def get_weights(self):
return self.weights
def set_weights(self, weights):
self.weights = weights
class Agent:
POPULATION_SIZE = 15
SIGMA = 0.1
LEARNING_RATE = 0.03
def __init__(self, model, window_size, trend, skip, initial_money):
self.model = model
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
self.initial_money = initial_money
self.es = Deep_Evolution_Strategy(
self.model.get_weights(),
self.get_reward,
self.POPULATION_SIZE,
self.SIGMA,
self.LEARNING_RATE,
)
def act(self, sequence):
decision = self.model.predict(np.array(sequence))
return np.argmax(decision[0])
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array([res])
def get_reward(self, weights):
initial_money = self.initial_money
starting_money = initial_money
self.model.weights = weights
state = self.get_state(0)
inventory = []
quantity = 0
for t in range(0, len(self.trend) - 1, self.skip):
action = self.act(state)
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= close[t]
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
starting_money += self.trend[t]
state = next_state
return ((starting_money - initial_money) / initial_money) * 100
def fit(self, iterations, checkpoint):
self.es.train(iterations, print_every = checkpoint)
def buy(self):
initial_money = self.initial_money
state = self.get_state(0)
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
for t in range(0, len(self.trend) - 1, self.skip):
action = self.act(state)
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
state = next_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
close = df.Close.values.tolist()
window_size = 30
skip = 1
initial_money = 10000
model = Model(input_size = window_size, layer_size = 500, output_size = 3)
agent = Agent(model = model,
window_size = window_size,
trend = close,
skip = skip,
initial_money = initial_money)
agent.fit(iterations = 500, checkpoint = 10)
iter 10. reward: 8.610248 iter 20. reward: 12.257399 iter 30. reward: 7.689600 iter 40. reward: 18.719300 iter 50. reward: 16.883897 iter 60. reward: 18.100399 iter 70. reward: 17.280399 iter 80. reward: 15.865947 iter 90. reward: 17.435298 iter 100. reward: 22.108749 iter 110. reward: 21.537897 iter 120. reward: 21.986898 iter 130. reward: 22.303096 iter 140. reward: 27.540547 iter 150. reward: 24.151497 iter 160. reward: 26.339196 iter 170. reward: 26.184596 iter 180. reward: 25.859546 iter 190. reward: 28.623797 iter 200. reward: 30.171547 iter 210. reward: 29.712899 iter 220. reward: 28.880399 iter 230. reward: 29.221448 iter 240. reward: 26.622551 iter 250. reward: 21.736548 iter 260. reward: 32.192049 iter 270. reward: 31.077749 iter 280. reward: 30.869947 iter 290. reward: 30.829648 iter 300. reward: 32.587899 iter 310. reward: 32.627998 iter 320. reward: 32.198498 iter 330. reward: 31.940298 iter 340. reward: 32.789998 iter 350. reward: 33.619697 iter 360. reward: 32.738196 iter 370. reward: 34.456997 iter 380. reward: 34.972598 iter 390. reward: 34.632198 iter 400. reward: 32.573597 iter 410. reward: 35.826097 iter 420. reward: 33.999698 iter 430. reward: 35.530297 iter 440. reward: 35.589196 iter 450. reward: 32.944796 iter 460. reward: 36.473798 iter 470. reward: 38.662997 iter 480. reward: 37.648599 iter 490. reward: 37.361099 iter 500. reward: 37.407198 time taken to train: 33.66626238822937 seconds
states_buy, states_sell, total_gains, invest = agent.buy()
day 1: buy 1 unit at price 762.130005, total balance 9237.869995 day 3: buy 1 unit at price 782.520020, total balance 8455.349975 day 4, sell 1 unit at price 790.510010, investment 3.723775 %, total balance 9245.859985, day 5, sell 1 unit at price 785.309998, investment 0.356538 %, total balance 10031.169983, day 6: buy 1 unit at price 762.559998, total balance 9268.609985 day 10: buy 1 unit at price 764.479980, total balance 8504.130005 day 11: buy 1 unit at price 771.229980, total balance 7732.900025 day 12: buy 1 unit at price 760.539978, total balance 6972.360047 day 17: buy 1 unit at price 768.239990, total balance 6204.120057 day 18: buy 1 unit at price 770.840027, total balance 5433.280030 day 19: buy 1 unit at price 758.039978, total balance 4675.240052 day 20: buy 1 unit at price 747.919983, total balance 3927.320069 day 21: buy 1 unit at price 750.500000, total balance 3176.820069 day 24, sell 1 unit at price 771.190002, investment 1.131715 %, total balance 3948.010071, day 25: buy 1 unit at price 776.419983, total balance 3171.590088 day 26, sell 1 unit at price 789.289978, investment 3.245343 %, total balance 3960.880066, day 27: buy 1 unit at price 789.270020, total balance 3171.610046 day 29, sell 1 unit at price 797.070007, investment 3.350496 %, total balance 3968.680053, day 30, sell 1 unit at price 797.849976, investment 4.905725 %, total balance 4766.530029, day 31, sell 1 unit at price 790.799988, investment 2.936582 %, total balance 5557.330017, day 32: buy 1 unit at price 794.200012, total balance 4763.130005 day 33: buy 1 unit at price 796.419983, total balance 3966.710022 day 34: buy 1 unit at price 794.559998, total balance 3172.150024 day 36: buy 1 unit at price 789.909973, total balance 2382.240051 day 37, sell 1 unit at price 791.549988, investment 2.686674 %, total balance 3173.790039, day 40: buy 1 unit at price 771.820007, total balance 2401.970032 day 41: buy 1 unit at price 786.140015, total balance 1615.830017 day 42: buy 1 unit at price 786.900024, total balance 828.929993 day 44: buy 1 unit at price 806.150024, total balance 22.779969 day 45, sell 1 unit at price 806.650024, investment 6.412597 %, total balance 829.429993, day 48, sell 1 unit at price 806.359985, investment 7.813670 %, total balance 1635.789978, day 49, sell 1 unit at price 807.880005, investment 7.645570 %, total balance 2443.669983, day 50, sell 1 unit at price 804.609985, investment 3.630767 %, total balance 3248.279968, day 51: buy 1 unit at price 806.070007, total balance 2442.209961 day 52: buy 1 unit at price 802.174988, total balance 1640.034973 day 53: buy 1 unit at price 805.020020, total balance 835.014953 day 56, sell 1 unit at price 835.669983, investment 5.878845 %, total balance 1670.684936, day 57, sell 1 unit at price 832.150024, investment 4.778395 %, total balance 2502.834960, day 59: buy 1 unit at price 802.320007, total balance 1700.514953 day 61: buy 1 unit at price 795.695007, total balance 904.819946 day 62: buy 1 unit at price 798.530029, total balance 106.289917 day 69, sell 1 unit at price 819.239990, investment 2.865323 %, total balance 925.529907, day 70, sell 1 unit at price 820.450012, investment 3.258409 %, total balance 1745.979919, day 71: buy 1 unit at price 818.979980, total balance 926.999939 day 72: buy 1 unit at price 824.159973, total balance 102.839966 day 74, sell 1 unit at price 831.659973, investment 5.285412 %, total balance 934.499939, day 76, sell 1 unit at price 831.330017, investment 7.710348 %, total balance 1765.829956, day 77, sell 1 unit at price 828.640015, investment 5.406162 %, total balance 2594.469971, day 78, sell 1 unit at price 829.280029, investment 5.385691 %, total balance 3423.750000, day 79: buy 1 unit at price 823.210022, total balance 2600.539978 day 80: buy 1 unit at price 835.239990, total balance 1765.299988 day 81: buy 1 unit at price 830.630005, total balance 934.669983 day 83: buy 1 unit at price 827.780029, total balance 106.889954 day 85, sell 1 unit at price 835.369995, investment 3.624632 %, total balance 942.259949, day 88: buy 1 unit at price 845.539978, total balance 96.719971 day 89, sell 1 unit at price 845.619995, investment 4.906520 %, total balance 942.339966, day 90, sell 1 unit at price 847.200012, investment 5.612868 %, total balance 1789.539978, day 92, sell 1 unit at price 852.119995, investment 5.850783 %, total balance 2641.659973, day 93, sell 1 unit at price 848.400024, investment 5.743346 %, total balance 3490.059997, day 95: buy 1 unit at price 829.590027, total balance 2660.469970 day 96: buy 1 unit at price 817.580017, total balance 1842.889953 day 98: buy 1 unit at price 819.510010, total balance 1023.379943 day 99: buy 1 unit at price 820.919983, total balance 202.459960 day 105, sell 1 unit at price 831.409973, investment 4.488525 %, total balance 1033.869933, day 107: buy 1 unit at price 824.669983, total balance 209.199950 day 113, sell 1 unit at price 836.820007, investment 4.795058 %, total balance 1046.019957, day 115: buy 1 unit at price 841.650024, total balance 204.369933 day 116, sell 1 unit at price 843.190002, investment 2.956119 %, total balance 1047.559935, day 118: buy 1 unit at price 872.299988, total balance 175.259947 day 119, sell 1 unit at price 871.729980, investment 5.771939 %, total balance 1046.989927, day 120: buy 1 unit at price 874.250000, total balance 172.739927 day 121, sell 1 unit at price 905.960022, investment 10.052113 %, total balance 1078.699949, day 122, sell 1 unit at price 912.570007, investment 9.258419 %, total balance 1991.269956, day 123, sell 1 unit at price 916.440002, investment 10.330712 %, total balance 2907.709958, day 124, sell 1 unit at price 927.039978, investment 11.991102 %, total balance 3834.749936, day 125, sell 1 unit at price 931.659973, investment 10.185207 %, total balance 4766.409909, day 126, sell 1 unit at price 927.130005, investment 11.757612 %, total balance 5693.539914, day 127, sell 1 unit at price 934.299988, investment 14.276275 %, total balance 6627.839902, day 128, sell 1 unit at price 932.169983, investment 13.747236 %, total balance 7560.009885, day 129: buy 1 unit at price 928.780029, total balance 6631.229856 day 130: buy 1 unit at price 930.599976, total balance 5700.629880 day 132: buy 1 unit at price 937.080017, total balance 4763.549863 day 133: buy 1 unit at price 943.000000, total balance 3820.549863 day 136: buy 1 unit at price 934.010010, total balance 2886.539853 day 137, sell 1 unit at price 941.859985, investment 14.732252 %, total balance 3828.399838, day 139, sell 1 unit at price 954.960022, investment 15.799052 %, total balance 4783.359860, day 140, sell 1 unit at price 969.539978, investment 15.195146 %, total balance 5752.899838, day 141, sell 1 unit at price 971.469971, investment 11.368793 %, total balance 6724.369809, day 142, sell 1 unit at price 975.880005, investment 11.624822 %, total balance 7700.249814, day 143, sell 1 unit at price 964.859985, investment 3.884661 %, total balance 8665.109799, day 144, sell 1 unit at price 966.950012, investment 3.906086 %, total balance 9632.059811, day 145, sell 1 unit at price 975.599976, investment 4.110637 %, total balance 10607.659787, day 146, sell 1 unit at price 983.679993, investment 4.313891 %, total balance 11591.339780, day 147: buy 1 unit at price 976.570007, total balance 10614.769773 day 148, sell 1 unit at price 980.940002, investment 5.024571 %, total balance 11595.709775, day 153, sell 1 unit at price 950.760010, investment -2.642923 %, total balance 12546.469785, day 154: buy 1 unit at price 942.309998, total balance 11604.159787 day 155: buy 1 unit at price 939.780029, total balance 10664.379758 day 156: buy 1 unit at price 957.369995, total balance 9707.009763 day 158, sell 1 unit at price 959.450012, investment 1.818936 %, total balance 10666.459775, day 160, sell 1 unit at price 965.590027, investment 2.746387 %, total balance 11632.049802, day 161, sell 1 unit at price 952.270020, investment -0.532707 %, total balance 12584.319822, day 163: buy 1 unit at price 940.489990, total balance 11643.829832 day 164: buy 1 unit at price 917.789978, total balance 10726.039854 day 166: buy 1 unit at price 898.700012, total balance 9827.339842 day 167: buy 1 unit at price 911.710022, total balance 8915.629820 day 168: buy 1 unit at price 906.690002, total balance 8008.939818 day 171: buy 1 unit at price 930.090027, total balance 7078.849791 day 172, sell 1 unit at price 943.830017, investment 0.355137 %, total balance 8022.679808, day 174, sell 1 unit at price 955.989990, investment 4.162174 %, total balance 8978.669798, day 175: buy 1 unit at price 953.419983, total balance 8025.249815 day 176, sell 1 unit at price 965.400024, investment 7.421833 %, total balance 8990.649839, day 178, sell 1 unit at price 968.150024, investment 6.190565 %, total balance 9958.799863, day 179, sell 1 unit at price 972.919983, investment 7.304589 %, total balance 10931.719846, day 180, sell 1 unit at price 980.340027, investment 5.402703 %, total balance 11912.059873, day 181, sell 1 unit at price 950.700012, investment -0.285286 %, total balance 12862.759885, day 183: buy 1 unit at price 934.090027, total balance 11928.669858 day 185: buy 1 unit at price 930.500000, total balance 10998.169858 day 186: buy 1 unit at price 930.830017, total balance 10067.339841 day 187: buy 1 unit at price 930.390015, total balance 9136.949826 day 188: buy 1 unit at price 923.650024, total balance 8213.299802 day 192: buy 1 unit at price 922.900024, total balance 7290.399778 day 194: buy 1 unit at price 914.390015, total balance 6376.009763 day 195: buy 1 unit at price 922.669983, total balance 5453.339780 day 197: buy 1 unit at price 926.960022, total balance 4526.379758 day 202: buy 1 unit at price 927.000000, total balance 3599.379758 day 203: buy 1 unit at price 921.280029, total balance 2678.099729 day 204: buy 1 unit at price 915.890015, total balance 1762.209714 day 205: buy 1 unit at price 913.809998, total balance 848.399716 day 210, sell 1 unit at price 928.450012, investment -0.603798 %, total balance 1776.849728, day 212, sell 1 unit at price 935.950012, investment 0.585708 %, total balance 2712.799740, day 214: buy 1 unit at price 929.080017, total balance 1783.719723 day 215: buy 1 unit at price 932.070007, total balance 851.649716 day 216, sell 1 unit at price 935.090027, investment 0.457657 %, total balance 1786.739743, day 217: buy 1 unit at price 925.109985, total balance 861.629758 day 222, sell 1 unit at price 932.450012, investment 0.221412 %, total balance 1794.079770, day 223: buy 1 unit at price 928.530029, total balance 865.549741 day 227, sell 1 unit at price 949.500000, investment 2.798676 %, total balance 1815.049741, day 228, sell 1 unit at price 959.109985, investment 3.923498 %, total balance 2774.159726, day 229: buy 1 unit at price 953.270020, total balance 1820.889706 day 230, sell 1 unit at price 957.789978, investment 4.746329 %, total balance 2778.679684, day 232, sell 1 unit at price 969.960022, investment 5.125347 %, total balance 3748.639706, day 233, sell 1 unit at price 978.890015, investment 5.602183 %, total balance 4727.529721, day 234, sell 1 unit at price 977.000000, investment 5.393743 %, total balance 5704.529721, day 235: buy 1 unit at price 972.599976, total balance 4731.929745 day 236, sell 1 unit at price 989.250000, investment 7.377775 %, total balance 5721.179745, day 237, sell 1 unit at price 987.830017, investment 7.854655 %, total balance 6709.009762, day 238, sell 1 unit at price 989.679993, investment 8.302601 %, total balance 7698.689755, day 239, sell 1 unit at price 992.000000, investment 6.772289 %, total balance 8690.689755, day 241, sell 1 unit at price 992.809998, investment 6.516677 %, total balance 9683.499753, day 242, sell 1 unit at price 984.450012, investment 6.414375 %, total balance 10667.949765, day 243, sell 1 unit at price 988.200012, investment 6.426285 %, total balance 11656.149777, day 244: buy 1 unit at price 968.450012, total balance 10687.699765 day 248, sell 1 unit at price 1019.270020, investment 6.923537 %, total balance 11706.969785, day 249, sell 1 unit at price 1017.109985, investment 4.576394 %, total balance 12724.079770, day 250, sell 1 unit at price 1016.640015, investment 4.975993 %, total balance 13740.719785,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Double Q-learning agent'
class Model:
def __init__(self, input_size, output_size, layer_size, learning_rate):
self.X = tf.placeholder(tf.float32, (None, input_size))
self.Y = tf.placeholder(tf.float32, (None, output_size))
feed_forward = tf.layers.dense(self.X, layer_size, activation = tf.nn.relu)
self.logits = tf.layers.dense(feed_forward, output_size)
self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
class Agent:
LEARNING_RATE = 0.003
BATCH_SIZE = 32
LAYER_SIZE = 500
OUTPUT_SIZE = 3
EPSILON = 0.5
DECAY_RATE = 0.005
MIN_EPSILON = 0.1
GAMMA = 0.99
MEMORIES = deque()
COPY = 1000
T_COPY = 0
MEMORY_SIZE = 300
def __init__(self, state_size, window_size, trend, skip):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
tf.reset_default_graph()
self.model = Model(self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE)
self.model_negative = Model(self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE)
self.sess = tf.InteractiveSession()
self.sess.run(tf.global_variables_initializer())
self.trainable = tf.trainable_variables()
def _assign(self):
for i in range(len(self.trainable)//2):
assign_op = self.trainable[i+len(self.trainable)//2].assign(self.trainable[i])
self.sess.run(assign_op)
def _memorize(self, state, action, reward, new_state, done):
self.MEMORIES.append((state, action, reward, new_state, done))
if len(self.MEMORIES) > self.MEMORY_SIZE:
self.MEMORIES.popleft()
def _select_action(self, state):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action = self.get_predicted_action([state])
return action
def _construct_memories(self, replay):
states = np.array([a[0] for a in replay])
new_states = np.array([a[3] for a in replay])
Q = self.predict(states)
Q_new = self.predict(new_states)
Q_new_negative = self.sess.run(self.model_negative.logits, feed_dict={self.model_negative.X:new_states})
replay_size = len(replay)
X = np.empty((replay_size, self.state_size))
Y = np.empty((replay_size, self.OUTPUT_SIZE))
for i in range(replay_size):
state_r, action_r, reward_r, new_state_r, done_r = replay[i]
target = Q[i]
target[action_r] = reward_r
if not done_r:
target[action_r] += self.GAMMA * Q_new_negative[i, np.argmax(Q_new[i])]
X[i] = state_r
Y[i] = target
return X, Y
def predict(self, inputs):
return self.sess.run(self.model.logits, feed_dict={self.model.X:inputs})
def get_predicted_action(self, sequence):
prediction = self.predict(np.array(sequence))[0]
return np.argmax(prediction)
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array(res)
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
for t in range(0, len(self.trend) - 1, self.skip):
action = self._select_action(state)
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
state = next_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
for t in range(0, len(self.trend) - 1, self.skip):
if (self.T_COPY + 1) % self.COPY == 0:
self._assign()
action = self._select_action(state)
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
self._memorize(state, action, invest, next_state, starting_money < initial_money)
batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
replay = random.sample(self.MEMORIES, batch_size)
state = next_state
X, Y = self._construct_memories(replay)
cost, _ = self.sess.run([self.model.cost, self.model.optimizer],
feed_dict={self.model.X: X, self.model.Y:Y})
self.T_COPY += 1
self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
epoch: 10, total rewards: 1241.885127.3, cost: 1.110860, total money: 1744.875178 epoch: 20, total rewards: 89.105106.3, cost: 0.649060, total money: 8097.275088 epoch: 30, total rewards: 719.079470.3, cost: 0.823131, total money: 9699.809450 epoch: 40, total rewards: 684.040043.3, cost: 1.931746, total money: 134.750004 epoch: 50, total rewards: 1744.829771.3, cost: 0.895153, total money: 11744.829771 epoch: 60, total rewards: 149.195010.3, cost: 1.097174, total money: 5196.854982 epoch: 70, total rewards: 1389.289786.3, cost: 0.860031, total money: 9399.319754 epoch: 80, total rewards: 529.019898.3, cost: 0.305593, total money: 10529.019898 epoch: 90, total rewards: 1285.264893.3, cost: 1.882383, total money: 9251.514893 epoch: 100, total rewards: 409.474970.3, cost: 0.146280, total money: 551.414972 epoch: 110, total rewards: 1074.725155.3, cost: 0.661549, total money: 2231.475154 epoch: 120, total rewards: 1713.854676.3, cost: 1.219318, total money: 11713.854676 epoch: 130, total rewards: 871.945621.3, cost: 1.460638, total money: 8947.665652 epoch: 140, total rewards: 1564.314818.3, cost: 1.133385, total money: 2767.354796 epoch: 150, total rewards: 855.729796.3, cost: 1.886093, total money: 10855.729796 epoch: 160, total rewards: 302.970157.3, cost: 0.642825, total money: 6320.700137 epoch: 170, total rewards: 512.139521.3, cost: 3.411159, total money: 1801.649470 epoch: 180, total rewards: 769.354739.3, cost: 0.379282, total money: 10769.354739 epoch: 190, total rewards: 332.274720.3, cost: 1.111366, total money: 10332.274720 epoch: 200, total rewards: 395.419923.3, cost: 0.270106, total money: 5401.389893
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 7: buy 1 unit at price 754.020020, total balance 9245.979980 day 9, sell 1 unit at price 758.489990, investment 0.592818 %, total balance 10004.469970, day 10: buy 1 unit at price 764.479980, total balance 9239.989990 day 11: buy 1 unit at price 771.229980, total balance 8468.760010 day 12, sell 1 unit at price 760.539978, investment -0.515383 %, total balance 9229.299988, day 13, sell 1 unit at price 769.200012, investment -0.263212 %, total balance 9998.500000, day 17: buy 1 unit at price 768.239990, total balance 9230.260010 day 19, sell 1 unit at price 758.039978, investment -1.327712 %, total balance 9988.299988, day 21: buy 1 unit at price 750.500000, total balance 9237.799988 day 22, sell 1 unit at price 762.520020, investment 1.601602 %, total balance 10000.320008, day 26: buy 1 unit at price 789.289978, total balance 9211.030030 day 27, sell 1 unit at price 789.270020, investment -0.002529 %, total balance 10000.300050, day 30: buy 1 unit at price 797.849976, total balance 9202.450074 day 33, sell 1 unit at price 796.419983, investment -0.179231 %, total balance 9998.870057, day 41: buy 1 unit at price 786.140015, total balance 9212.730042 day 42, sell 1 unit at price 786.900024, investment 0.096676 %, total balance 9999.630066, day 45: buy 1 unit at price 806.650024, total balance 9192.980042 day 46: buy 1 unit at price 804.789978, total balance 8388.190064 day 47, sell 1 unit at price 807.909973, investment 0.156195 %, total balance 9196.100037, day 49, sell 1 unit at price 807.880005, investment 0.383954 %, total balance 10003.980042, day 54: buy 1 unit at price 819.309998, total balance 9184.670044 day 55, sell 1 unit at price 823.869995, investment 0.556566 %, total balance 10008.540039, day 61: buy 1 unit at price 795.695007, total balance 9212.845032 day 62, sell 1 unit at price 798.530029, investment 0.356295 %, total balance 10011.375061, day 64: buy 1 unit at price 801.340027, total balance 9210.035034 day 65, sell 1 unit at price 806.969971, investment 0.702566 %, total balance 10017.005005, day 66: buy 1 unit at price 808.380005, total balance 9208.625000 day 67: buy 1 unit at price 809.559998, total balance 8399.065002 day 68: buy 1 unit at price 813.669983, total balance 7585.395019 day 69, sell 1 unit at price 819.239990, investment 1.343426 %, total balance 8404.635009, day 70, sell 1 unit at price 820.450012, investment 1.345177 %, total balance 9225.085021, day 71, sell 1 unit at price 818.979980, investment 0.652598 %, total balance 10044.065001, day 74: buy 1 unit at price 831.659973, total balance 9212.405028 day 76, sell 1 unit at price 831.330017, investment -0.039674 %, total balance 10043.735045, day 79: buy 1 unit at price 823.210022, total balance 9220.525023 day 80, sell 1 unit at price 835.239990, investment 1.461349 %, total balance 10055.765013, day 83: buy 1 unit at price 827.780029, total balance 9227.984984 day 84, sell 1 unit at price 831.909973, investment 0.498918 %, total balance 10059.894957, day 97: buy 1 unit at price 814.429993, total balance 9245.464964 day 98, sell 1 unit at price 819.510010, investment 0.623751 %, total balance 10064.974974, day 102: buy 1 unit at price 829.559998, total balance 9235.414976 day 103, sell 1 unit at price 838.549988, investment 1.083706 %, total balance 10073.964964, day 104: buy 1 unit at price 834.570007, total balance 9239.394957 day 105, sell 1 unit at price 831.409973, investment -0.378642 %, total balance 10070.804930, day 107: buy 1 unit at price 824.669983, total balance 9246.134947 day 108: buy 1 unit at price 824.729980, total balance 8421.404967 day 109, sell 1 unit at price 823.349976, investment -0.160065 %, total balance 9244.754943, day 110, sell 1 unit at price 824.320007, investment -0.049710 %, total balance 10069.074950, day 113: buy 1 unit at price 836.820007, total balance 9232.254943 day 114, sell 1 unit at price 838.210022, investment 0.166107 %, total balance 10070.464965, day 117: buy 1 unit at price 862.760010, total balance 9207.704955 day 118, sell 1 unit at price 872.299988, investment 1.105751 %, total balance 10080.004943, day 120: buy 1 unit at price 874.250000, total balance 9205.754943 day 123, sell 1 unit at price 916.440002, investment 4.825851 %, total balance 10122.194945, day 125: buy 1 unit at price 931.659973, total balance 9190.534972 day 126: buy 1 unit at price 927.130005, total balance 8263.404967 day 127, sell 1 unit at price 934.299988, investment 0.283367 %, total balance 9197.704955, day 128, sell 1 unit at price 932.169983, investment 0.543611 %, total balance 10129.874938, day 132: buy 1 unit at price 937.080017, total balance 9192.794921 day 133, sell 1 unit at price 943.000000, investment 0.631748 %, total balance 10135.794921, day 135: buy 1 unit at price 930.239990, total balance 9205.554931 day 138, sell 1 unit at price 948.820007, investment 1.997336 %, total balance 10154.374938, day 139: buy 1 unit at price 954.960022, total balance 9199.414916 day 140, sell 1 unit at price 969.539978, investment 1.526761 %, total balance 10168.954894, day 141: buy 1 unit at price 971.469971, total balance 9197.484923 day 143, sell 1 unit at price 964.859985, investment -0.680411 %, total balance 10162.344908, day 153: buy 1 unit at price 950.760010, total balance 9211.584898 day 154, sell 1 unit at price 942.309998, investment -0.888764 %, total balance 10153.894896, day 157: buy 1 unit at price 950.630005, total balance 9203.264891 day 158, sell 1 unit at price 959.450012, investment 0.927807 %, total balance 10162.714903, day 161: buy 1 unit at price 952.270020, total balance 9210.444883 day 162: buy 1 unit at price 927.330017, total balance 8283.114866 day 163, sell 1 unit at price 940.489990, investment -1.237047 %, total balance 9223.604856, day 164, sell 1 unit at price 917.789978, investment -1.028764 %, total balance 10141.394834, day 171: buy 1 unit at price 930.090027, total balance 9211.304807 day 172, sell 1 unit at price 943.830017, investment 1.477275 %, total balance 10155.134824, day 178: buy 1 unit at price 968.150024, total balance 9186.984800 day 179, sell 1 unit at price 972.919983, investment 0.492688 %, total balance 10159.904783, day 180: buy 1 unit at price 980.340027, total balance 9179.564756 day 181: buy 1 unit at price 950.700012, total balance 8228.864744 day 182: buy 1 unit at price 947.799988, total balance 7281.064756 day 183: buy 1 unit at price 934.090027, total balance 6346.974729 day 185: buy 1 unit at price 930.500000, total balance 5416.474729 day 186, sell 1 unit at price 930.830017, investment -5.050290 %, total balance 6347.304746, day 188: buy 1 unit at price 923.650024, total balance 5423.654722 day 189: buy 1 unit at price 927.960022, total balance 4495.694700 day 190, sell 1 unit at price 929.359985, investment -2.244665 %, total balance 5425.054685, day 193, sell 1 unit at price 907.239990, investment -4.279384 %, total balance 6332.294675, day 195, sell 1 unit at price 922.669983, investment -1.222585 %, total balance 7254.964658, day 196, sell 1 unit at price 922.219971, investment -0.889847 %, total balance 8177.184629, day 197, sell 1 unit at price 926.960022, investment 0.358361 %, total balance 9104.144651, day 198, sell 1 unit at price 910.979980, investment -1.829825 %, total balance 10015.124631, day 202: buy 1 unit at price 927.000000, total balance 9088.124631 day 205: buy 1 unit at price 913.809998, total balance 8174.314633 day 207, sell 1 unit at price 929.570007, investment 0.277239 %, total balance 9103.884640, day 208, sell 1 unit at price 939.330017, investment 2.792705 %, total balance 10043.214657, day 215: buy 1 unit at price 932.070007, total balance 9111.144650 day 217: buy 1 unit at price 925.109985, total balance 8186.034665 day 218: buy 1 unit at price 920.289978, total balance 7265.744687 day 219: buy 1 unit at price 915.000000, total balance 6350.744687 day 220: buy 1 unit at price 921.809998, total balance 5428.934689 day 221, sell 1 unit at price 931.580017, investment -0.052570 %, total balance 6360.514706, day 222: buy 1 unit at price 932.450012, total balance 5428.064694 day 223: buy 1 unit at price 928.530029, total balance 4499.534665 day 224, sell 1 unit at price 920.969971, investment -0.447516 %, total balance 5420.504636, day 225: buy 1 unit at price 924.859985, total balance 4495.644651 day 226: buy 1 unit at price 944.489990, total balance 3551.154661 day 228, sell 1 unit at price 959.109985, investment 4.218236 %, total balance 4510.264646, day 229, sell 1 unit at price 953.270020, investment 4.182516 %, total balance 5463.534666, day 230, sell 1 unit at price 957.789978, investment 3.903188 %, total balance 6421.324644, day 231, sell 1 unit at price 951.679993, investment 2.062307 %, total balance 7373.004637, day 232, sell 1 unit at price 969.960022, investment 4.461890 %, total balance 8342.964659, day 235: buy 1 unit at price 972.599976, total balance 7370.364683 day 236, sell 1 unit at price 989.250000, investment 6.962137 %, total balance 8359.614683, day 238: buy 1 unit at price 989.679993, total balance 7369.934690 day 241: buy 1 unit at price 992.809998, total balance 6377.124692 day 242, sell 1 unit at price 984.450012, investment 4.230857 %, total balance 7361.574704, day 243: buy 1 unit at price 988.200012, total balance 6373.374692 day 244: buy 1 unit at price 968.450012, total balance 5404.924680 day 245, sell 1 unit at price 970.539978, investment -0.211803 %, total balance 6375.464658, day 246: buy 1 unit at price 973.330017, total balance 5402.134641 day 247, sell 1 unit at price 972.559998, investment -1.729852 %, total balance 6374.694639, day 248: buy 1 unit at price 1019.270020, total balance 5355.424619 day 249, sell 1 unit at price 1017.109985, investment 2.447597 %, total balance 6372.534604, day 250: buy 1 unit at price 1016.640015, total balance 5355.894589
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Recurrent Q-learning agent'
class Agent:
LEARNING_RATE = 0.003
BATCH_SIZE = 32
LAYER_SIZE = 256
OUTPUT_SIZE = 3
EPSILON = 0.5
DECAY_RATE = 0.005
MIN_EPSILON = 0.1
GAMMA = 0.99
MEMORIES = deque()
MEMORY_SIZE = 300
def __init__(self, state_size, window_size, trend, skip):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
tf.reset_default_graph()
self.INITIAL_FEATURES = np.zeros((4, self.state_size))
self.X = tf.placeholder(tf.float32, (None, None, self.state_size))
self.Y = tf.placeholder(tf.float32, (None, self.OUTPUT_SIZE))
cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple = False)
self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * self.LAYER_SIZE))
self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X,cell=cell,
dtype=tf.float32,
initial_state=self.hidden_layer)
self.logits = tf.layers.dense(self.rnn[:,-1], self.OUTPUT_SIZE)
self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
self.optimizer = tf.train.AdamOptimizer(learning_rate = self.LEARNING_RATE).minimize(self.cost)
self.sess = tf.InteractiveSession()
self.sess.run(tf.global_variables_initializer())
def _memorize(self, state, action, reward, new_state, dead, rnn_state):
self.MEMORIES.append((state, action, reward, new_state, dead, rnn_state))
if len(self.MEMORIES) > self.MEMORY_SIZE:
self.MEMORIES.popleft()
def _construct_memories(self, replay):
states = np.array([a[0] for a in replay])
new_states = np.array([a[3] for a in replay])
init_values = np.array([a[-1] for a in replay])
Q = self.sess.run(self.logits, feed_dict={self.X:states, self.hidden_layer:init_values})
Q_new = self.sess.run(self.logits, feed_dict={self.X:new_states, self.hidden_layer:init_values})
replay_size = len(replay)
X = np.empty((replay_size, 4, self.state_size))
Y = np.empty((replay_size, self.OUTPUT_SIZE))
INIT_VAL = np.empty((replay_size, 2 * self.LAYER_SIZE))
for i in range(replay_size):
state_r, action_r, reward_r, new_state_r, dead_r, rnn_memory = replay[i]
target = Q[i]
target[action_r] = reward_r
if not dead_r:
target[action_r] += self.GAMMA * np.amax(Q_new[i])
X[i] = state_r
Y[i] = target
INIT_VAL[i] = rnn_memory
return X, Y, INIT_VAL
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array(res)
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
init_value = np.zeros((1, 2 * self.LAYER_SIZE))
for k in range(self.INITIAL_FEATURES.shape[0]):
self.INITIAL_FEATURES[k,:] = state
for t in range(0, len(self.trend) - 1, self.skip):
action, last_state = self.sess.run([self.logits,self.last_state],
feed_dict={self.X:[self.INITIAL_FEATURES],
self.hidden_layer:init_value})
action, init_value = np.argmax(action[0]), last_state
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)
self.INITIAL_FEATURES = new_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
init_value = np.zeros((1, 2 * self.LAYER_SIZE))
for k in range(self.INITIAL_FEATURES.shape[0]):
self.INITIAL_FEATURES[k,:] = state
for t in range(0, len(self.trend) - 1, self.skip):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action, last_state = self.sess.run([self.logits,
self.last_state],
feed_dict={self.X:[self.INITIAL_FEATURES],
self.hidden_layer:init_value})
action, init_value = np.argmax(action[0]), last_state
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)
self._memorize(self.INITIAL_FEATURES, action, invest, new_state,
starting_money < initial_money, init_value[0])
self.INITIAL_FEATURES = new_state
batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
replay = random.sample(self.MEMORIES, batch_size)
X, Y, INIT_VAL = self._construct_memories(replay)
cost, _ = self.sess.run([self.cost, self.optimizer],
feed_dict={self.X: X, self.Y:Y,
self.hidden_layer: INIT_VAL})
self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7fef003b2d30>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. epoch: 10, total rewards: 449.400388.3, cost: 0.117951, total money: 7420.680355 epoch: 20, total rewards: 513.109983.3, cost: 0.187314, total money: 7552.130003 epoch: 30, total rewards: 1755.114813.3, cost: 0.337607, total money: 6759.834784 epoch: 40, total rewards: 545.719909.3, cost: 0.555657, total money: 9529.079894 epoch: 50, total rewards: 593.435182.3, cost: 0.399239, total money: 6611.165162 epoch: 60, total rewards: 285.174678.3, cost: 0.071772, total money: 6314.564631 epoch: 70, total rewards: 169.200014.3, cost: 0.796504, total money: 4264.030030 epoch: 80, total rewards: 520.019840.3, cost: 0.567794, total money: 6501.959842 epoch: 90, total rewards: 498.320189.3, cost: 0.245750, total money: 9481.210204 epoch: 100, total rewards: 1572.605044.3, cost: 1.142984, total money: 11572.605044 epoch: 110, total rewards: 297.584960.3, cost: 0.973414, total money: 10297.584960 epoch: 120, total rewards: 912.394901.3, cost: 2.032860, total money: 6987.034854 epoch: 130, total rewards: 22.109988.3, cost: 0.097879, total money: 10022.109988 epoch: 140, total rewards: 471.779909.3, cost: 0.532008, total money: 10471.779909 epoch: 150, total rewards: 215.255126.3, cost: 0.236825, total money: 10215.255126 epoch: 160, total rewards: 147.780093.3, cost: 0.432537, total money: 9174.450076 epoch: 170, total rewards: 203.309817.3, cost: 0.413111, total money: 10203.309817 epoch: 180, total rewards: 76.350403.3, cost: 0.132205, total money: 8084.520385 epoch: 190, total rewards: 173.749880.3, cost: 1.325852, total money: 10173.749880 epoch: 200, total rewards: 4.325196.3, cost: 0.500293, total money: 8987.685181
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 13: buy 1 unit at price 769.200012, total balance 9230.799988 day 14: buy 1 unit at price 768.270020, total balance 8462.529968 day 15, sell 1 unit at price 760.989990, investment -1.067346 %, total balance 9223.519958, day 17: buy 1 unit at price 768.239990, total balance 8455.279968 day 18, sell 1 unit at price 770.840027, investment 0.334519 %, total balance 9226.119995, day 19, sell 1 unit at price 758.039978, investment -1.327712 %, total balance 9984.159973, day 29: buy 1 unit at price 797.070007, total balance 9187.089966 day 30: buy 1 unit at price 797.849976, total balance 8389.239990 day 33, sell 1 unit at price 796.419983, investment -0.081552 %, total balance 9185.659973, day 34: buy 1 unit at price 794.559998, total balance 8391.099975 day 36, sell 1 unit at price 789.909973, investment -0.995175 %, total balance 9181.009948, day 37, sell 1 unit at price 791.549988, investment -0.378827 %, total balance 9972.559936, day 39: buy 1 unit at price 782.789978, total balance 9189.769958 day 40, sell 1 unit at price 771.820007, investment -1.401394 %, total balance 9961.589965, day 46: buy 1 unit at price 804.789978, total balance 9156.799987 day 47: buy 1 unit at price 807.909973, total balance 8348.890014 day 49, sell 1 unit at price 807.880005, investment 0.383954 %, total balance 9156.770019, day 50, sell 1 unit at price 804.609985, investment -0.408460 %, total balance 9961.380004, day 51: buy 1 unit at price 806.070007, total balance 9155.309997 day 54, sell 1 unit at price 819.309998, investment 1.642536 %, total balance 9974.619995, day 110: buy 1 unit at price 824.320007, total balance 9150.299988 day 111, sell 1 unit at price 823.559998, investment -0.092198 %, total balance 9973.859986, day 128: buy 1 unit at price 932.169983, total balance 9041.690003 day 129: buy 1 unit at price 928.780029, total balance 8112.909974 day 130, sell 1 unit at price 930.599976, investment -0.168425 %, total balance 9043.509950, day 131, sell 1 unit at price 932.219971, investment 0.370372 %, total balance 9975.729921, day 173: buy 1 unit at price 947.159973, total balance 9028.569948 day 175, sell 1 unit at price 953.419983, investment 0.660924 %, total balance 9981.989931, day 182: buy 1 unit at price 947.799988, total balance 9034.189943 day 183, sell 1 unit at price 934.090027, investment -1.446504 %, total balance 9968.279970, day 197: buy 1 unit at price 926.960022, total balance 9041.319948 day 198, sell 1 unit at price 910.979980, investment -1.723919 %, total balance 9952.299928, day 204: buy 1 unit at price 915.890015, total balance 9036.409913 day 205, sell 1 unit at price 913.809998, investment -0.227103 %, total balance 9950.219911, day 207: buy 1 unit at price 929.570007, total balance 9020.649904 day 209, sell 1 unit at price 937.340027, investment 0.835872 %, total balance 9957.989931,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Double Recurrent Q-learning agent'
class Model:
def __init__(self, input_size, output_size, layer_size, learning_rate, name):
with tf.variable_scope(name):
self.X = tf.placeholder(tf.float32, (None, None, input_size))
self.Y = tf.placeholder(tf.float32, (None, output_size))
cell = tf.nn.rnn_cell.LSTMCell(layer_size, state_is_tuple = False)
self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * layer_size))
self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X,cell=cell,
dtype=tf.float32,
initial_state=self.hidden_layer)
self.logits = tf.layers.dense(self.rnn[:,-1], output_size)
self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
class Agent:
LEARNING_RATE = 0.003
BATCH_SIZE = 32
LAYER_SIZE = 256
OUTPUT_SIZE = 3
EPSILON = 0.5
DECAY_RATE = 0.005
MIN_EPSILON = 0.1
GAMMA = 0.99
MEMORIES = deque()
COPY = 1000
T_COPY = 0
MEMORY_SIZE = 300
def __init__(self, state_size, window_size, trend, skip):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
tf.reset_default_graph()
self.INITIAL_FEATURES = np.zeros((4, self.state_size))
self.model = Model(self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE,
'real_model')
self.model_negative = Model(self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE,
'negative_model')
self.sess = tf.InteractiveSession()
self.sess.run(tf.global_variables_initializer())
self.trainable = tf.trainable_variables()
def _assign(self, from_name, to_name):
from_w = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=from_name)
to_w = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=to_name)
for i in range(len(from_w)):
assign_op = to_w[i].assign(from_w[i])
self.sess.run(assign_op)
def _memorize(self, state, action, reward, new_state, dead, rnn_state):
self.MEMORIES.append((state, action, reward, new_state, dead, rnn_state))
if len(self.MEMORIES) > self.MEMORY_SIZE:
self.MEMORIES.popleft()
def _select_action(self, state):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action = self.get_predicted_action([state])
return action
def _construct_memories(self, replay):
states = np.array([a[0] for a in replay])
new_states = np.array([a[3] for a in replay])
init_values = np.array([a[-1] for a in replay])
Q = self.sess.run(self.model.logits, feed_dict={self.model.X:states,
self.model.hidden_layer:init_values})
Q_new = self.sess.run(self.model.logits, feed_dict={self.model.X:new_states,
self.model.hidden_layer:init_values})
Q_new_negative = self.sess.run(self.model_negative.logits,
feed_dict={self.model_negative.X:new_states,
self.model_negative.hidden_layer:init_values})
replay_size = len(replay)
X = np.empty((replay_size, 4, self.state_size))
Y = np.empty((replay_size, self.OUTPUT_SIZE))
INIT_VAL = np.empty((replay_size, 2 * self.LAYER_SIZE))
for i in range(replay_size):
state_r, action_r, reward_r, new_state_r, dead_r, rnn_memory = replay[i]
target = Q[i]
target[action_r] = reward_r
if not dead_r:
target[action_r] += self.GAMMA * Q_new_negative[i, np.argmax(Q_new[i])]
X[i] = state_r
Y[i] = target
INIT_VAL[i] = rnn_memory
return X, Y, INIT_VAL
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array(res)
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
init_value = np.zeros((1, 2 * self.LAYER_SIZE))
for k in range(self.INITIAL_FEATURES.shape[0]):
self.INITIAL_FEATURES[k,:] = state
for t in range(0, len(self.trend) - 1, self.skip):
action, last_state = self.sess.run([self.model.logits,self.model.last_state],
feed_dict={self.model.X:[self.INITIAL_FEATURES],
self.model.hidden_layer:init_value})
action, init_value = np.argmax(action[0]), last_state
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)
self.INITIAL_FEATURES = new_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
init_value = np.zeros((1, 2 * self.LAYER_SIZE))
for k in range(self.INITIAL_FEATURES.shape[0]):
self.INITIAL_FEATURES[k,:] = state
for t in range(0, len(self.trend) - 1, self.skip):
if (self.T_COPY + 1) % self.COPY == 0:
self._assign('real_model', 'negative_model')
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action, last_state = self.sess.run([self.model.logits,
self.model.last_state],
feed_dict={self.model.X:[self.INITIAL_FEATURES],
self.model.hidden_layer:init_value})
action, init_value = np.argmax(action[0]), last_state
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)
self._memorize(self.INITIAL_FEATURES, action, invest, new_state,
starting_money < initial_money, init_value[0])
self.INITIAL_FEATURES = new_state
batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
replay = random.sample(self.MEMORIES, batch_size)
X, Y, INIT_VAL = self._construct_memories(replay)
cost, _ = self.sess.run([self.model.cost, self.model.optimizer],
feed_dict={self.model.X: X, self.model.Y:Y,
self.model.hidden_layer: INIT_VAL})
self.T_COPY += 1
self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7fb85fd10940>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7fb85f9de7b8>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. epoch: 10, total rewards: 1305.274912.3, cost: 0.402263, total money: 777.284860 epoch: 20, total rewards: 582.070375.3, cost: 0.782595, total money: 804.650331 epoch: 30, total rewards: 420.380369.3, cost: 1.481925, total money: 80.210326 epoch: 40, total rewards: 1502.554748.3, cost: 0.343374, total money: 2823.564757 epoch: 50, total rewards: 589.170222.3, cost: 0.370314, total money: 6597.640193 epoch: 60, total rewards: 1069.864985.3, cost: 0.733583, total money: 10052.755000 epoch: 70, total rewards: 900.360168.3, cost: 0.154633, total money: 8866.610168 epoch: 80, total rewards: 625.559509.3, cost: 0.573019, total money: 9652.999511 epoch: 90, total rewards: 966.905028.3, cost: 0.080430, total money: 6971.785033 epoch: 100, total rewards: 784.169802.3, cost: 0.568819, total money: 10784.169802 epoch: 110, total rewards: 658.149963.3, cost: 0.052230, total money: 9641.509948 epoch: 120, total rewards: 615.210201.3, cost: 0.802322, total money: 9595.940181 epoch: 130, total rewards: 623.289978.3, cost: 0.278659, total money: 10623.289978 epoch: 140, total rewards: 595.960078.3, cost: 0.094435, total money: 10595.960078 epoch: 150, total rewards: 594.979550.3, cost: 0.360762, total money: 1819.289547 epoch: 160, total rewards: 794.614687.3, cost: 1.058314, total money: 3118.034730 epoch: 170, total rewards: 1225.854981.3, cost: 0.226553, total money: 5322.584961 epoch: 180, total rewards: 1099.610169.3, cost: 0.275357, total money: 6189.200135 epoch: 190, total rewards: 857.554813.3, cost: 0.417154, total money: 7946.004825 epoch: 200, total rewards: 1049.100096.3, cost: 0.839669, total money: 3317.970090
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 0: buy 1 unit at price 768.700012, total balance 9231.299988 day 1, sell 1 unit at price 762.130005, investment -0.854691 %, total balance 9993.429993, day 3: buy 1 unit at price 782.520020, total balance 9210.909973 day 4, sell 1 unit at price 790.510010, investment 1.021059 %, total balance 10001.419983, day 5: buy 1 unit at price 785.309998, total balance 9216.109985 day 6, sell 1 unit at price 762.559998, investment -2.896945 %, total balance 9978.669983, day 7: buy 1 unit at price 754.020020, total balance 9224.649963 day 8, sell 1 unit at price 736.080017, investment -2.379248 %, total balance 9960.729980, day 13: buy 1 unit at price 769.200012, total balance 9191.529968 day 16, sell 1 unit at price 761.679993, investment -0.977642 %, total balance 9953.209961, day 19: buy 1 unit at price 758.039978, total balance 9195.169983 day 20, sell 1 unit at price 747.919983, investment -1.335021 %, total balance 9943.089966, day 24: buy 1 unit at price 771.190002, total balance 9171.899964 day 28: buy 1 unit at price 796.099976, total balance 8375.799988 day 29: buy 1 unit at price 797.070007, total balance 7578.729981 day 31: buy 1 unit at price 790.799988, total balance 6787.929993 day 32, sell 1 unit at price 794.200012, investment 2.983702 %, total balance 7582.130005, day 33, sell 1 unit at price 796.419983, investment 0.040197 %, total balance 8378.549988, day 35, sell 1 unit at price 791.260010, investment -0.728919 %, total balance 9169.809998, day 36, sell 1 unit at price 789.909973, investment -0.112546 %, total balance 9959.719971, day 38: buy 1 unit at price 785.049988, total balance 9174.669983 day 41: buy 1 unit at price 786.140015, total balance 8388.529968 day 42, sell 1 unit at price 786.900024, investment 0.235658 %, total balance 9175.429992, day 43, sell 1 unit at price 794.020020, investment 1.002367 %, total balance 9969.450012, day 44: buy 1 unit at price 806.150024, total balance 9163.299988 day 46, sell 1 unit at price 804.789978, investment -0.168709 %, total balance 9968.089966, day 47: buy 1 unit at price 807.909973, total balance 9160.179993 day 49, sell 1 unit at price 807.880005, investment -0.003709 %, total balance 9968.059998, day 51: buy 1 unit at price 806.070007, total balance 9161.989991 day 52: buy 1 unit at price 802.174988, total balance 8359.815003 day 54: buy 1 unit at price 819.309998, total balance 7540.505005 day 55, sell 1 unit at price 823.869995, investment 2.208243 %, total balance 8364.375000, day 56: buy 1 unit at price 835.669983, total balance 7528.705017 day 57: buy 1 unit at price 832.150024, total balance 6696.554993 day 58, sell 1 unit at price 823.309998, investment 2.634713 %, total balance 7519.864991, day 59, sell 1 unit at price 802.320007, investment -2.073695 %, total balance 8322.184998, day 61, sell 1 unit at price 795.695007, investment -4.783584 %, total balance 9117.880005, day 62, sell 1 unit at price 798.530029, investment -4.040136 %, total balance 9916.410034, day 68: buy 1 unit at price 813.669983, total balance 9102.740051 day 69, sell 1 unit at price 819.239990, investment 0.684554 %, total balance 9921.980041, day 76: buy 1 unit at price 831.330017, total balance 9090.650024 day 77: buy 1 unit at price 828.640015, total balance 8262.010009 day 79, sell 1 unit at price 823.210022, investment -0.976747 %, total balance 9085.220031, day 81, sell 1 unit at price 830.630005, investment 0.240151 %, total balance 9915.850036, day 86: buy 1 unit at price 838.679993, total balance 9077.170043 day 88: buy 1 unit at price 845.539978, total balance 8231.630065 day 89, sell 1 unit at price 845.619995, investment 0.827491 %, total balance 9077.250060, day 91, sell 1 unit at price 848.780029, investment 0.383193 %, total balance 9926.030089, day 95: buy 1 unit at price 829.590027, total balance 9096.440062 day 96, sell 1 unit at price 817.580017, investment -1.447704 %, total balance 9914.020079, day 97: buy 1 unit at price 814.429993, total balance 9099.590086 day 101: buy 1 unit at price 831.500000, total balance 8268.090086 day 102: buy 1 unit at price 829.559998, total balance 7438.530088 day 104, sell 1 unit at price 834.570007, investment 2.472897 %, total balance 8273.100095, day 105, sell 1 unit at price 831.409973, investment -0.010827 %, total balance 9104.510068, day 106, sell 1 unit at price 827.880005, investment -0.202516 %, total balance 9932.390073, day 108: buy 1 unit at price 824.729980, total balance 9107.660093 day 109, sell 1 unit at price 823.349976, investment -0.167328 %, total balance 9931.010069, day 114: buy 1 unit at price 838.210022, total balance 9092.800047 day 117, sell 1 unit at price 862.760010, investment 2.928859 %, total balance 9955.560057, day 121: buy 1 unit at price 905.960022, total balance 9049.600035 day 122: buy 1 unit at price 912.570007, total balance 8137.030028 day 124: buy 1 unit at price 927.039978, total balance 7209.990050 day 125: buy 1 unit at price 931.659973, total balance 6278.330077 day 130, sell 1 unit at price 930.599976, investment 2.719762 %, total balance 7208.930053, day 131, sell 1 unit at price 932.219971, investment 2.153256 %, total balance 8141.150024, day 132, sell 1 unit at price 937.080017, investment 1.083021 %, total balance 9078.230041, day 133, sell 1 unit at price 943.000000, investment 1.217185 %, total balance 10021.230041, day 137: buy 1 unit at price 941.859985, total balance 9079.370056 day 138, sell 1 unit at price 948.820007, investment 0.738966 %, total balance 10028.190063, day 142: buy 1 unit at price 975.880005, total balance 9052.310058 day 143: buy 1 unit at price 964.859985, total balance 8087.450073 day 144: buy 1 unit at price 966.950012, total balance 7120.500061 day 145, sell 1 unit at price 975.599976, investment -0.028695 %, total balance 8096.100037, day 146, sell 1 unit at price 983.679993, investment 1.950543 %, total balance 9079.780030, day 147: buy 1 unit at price 976.570007, total balance 8103.210023 day 148: buy 1 unit at price 980.940002, total balance 7122.270021 day 150, sell 1 unit at price 949.830017, investment -1.770515 %, total balance 8072.100038, day 151: buy 1 unit at price 942.900024, total balance 7129.200014 day 152, sell 1 unit at price 953.400024, investment -2.372588 %, total balance 8082.600038, day 153: buy 1 unit at price 950.760010, total balance 7131.840028 day 154, sell 1 unit at price 942.309998, investment -3.938060 %, total balance 8074.150026, day 155, sell 1 unit at price 939.780029, investment -0.330894 %, total balance 9013.930055, day 156, sell 1 unit at price 957.369995, investment 0.695232 %, total balance 9971.300050, day 159: buy 1 unit at price 957.090027, total balance 9014.210023 day 160: buy 1 unit at price 965.590027, total balance 8048.619996 day 161, sell 1 unit at price 952.270020, investment -0.503611 %, total balance 9000.890016, day 162: buy 1 unit at price 927.330017, total balance 8073.559999 day 163: buy 1 unit at price 940.489990, total balance 7133.070009 day 165: buy 1 unit at price 908.729980, total balance 6224.340029 day 167: buy 1 unit at price 911.710022, total balance 5312.630007 day 169, sell 1 unit at price 918.590027, investment -4.867490 %, total balance 6231.220034, day 170, sell 1 unit at price 928.799988, investment 0.158516 %, total balance 7160.020022, day 173, sell 1 unit at price 947.159973, investment 0.709203 %, total balance 8107.179995, day 174: buy 1 unit at price 955.989990, total balance 7151.190005 day 175: buy 1 unit at price 953.419983, total balance 6197.770022 day 176: buy 1 unit at price 965.400024, total balance 5232.369998 day 177, sell 1 unit at price 970.890015, investment 6.840320 %, total balance 6203.260013, day 178: buy 1 unit at price 968.150024, total balance 5235.109989 day 179: buy 1 unit at price 972.919983, total balance 4262.190006 day 180: buy 1 unit at price 980.340027, total balance 3281.849979 day 181, sell 1 unit at price 950.700012, investment 4.276578 %, total balance 4232.549991, day 182, sell 1 unit at price 947.799988, investment -0.856704 %, total balance 5180.349979, day 184, sell 1 unit at price 941.530029, investment -1.247085 %, total balance 6121.880008, day 185, sell 1 unit at price 930.500000, investment -3.615084 %, total balance 7052.380008, day 186: buy 1 unit at price 930.830017, total balance 6121.549991 day 190: buy 1 unit at price 929.359985, total balance 5192.190006 day 191, sell 1 unit at price 926.789978, investment -4.272070 %, total balance 6118.979984, day 192, sell 1 unit at price 922.900024, investment -5.141220 %, total balance 7041.880008, day 193, sell 1 unit at price 907.239990, investment -7.456600 %, total balance 7949.119998, day 196: buy 1 unit at price 922.219971, total balance 7026.900027 day 198: buy 1 unit at price 910.979980, total balance 6115.920047 day 199, sell 1 unit at price 910.669983, investment -2.165813 %, total balance 7026.590030, day 200, sell 1 unit at price 906.659973, investment -2.442542 %, total balance 7933.250003, day 201, sell 1 unit at price 924.690002, investment 0.267835 %, total balance 8857.940005, day 204, sell 1 unit at price 915.890015, investment 0.538984 %, total balance 9773.830020, day 205: buy 1 unit at price 913.809998, total balance 8860.020022 day 206, sell 1 unit at price 921.289978, investment 0.818549 %, total balance 9781.310000, day 209: buy 1 unit at price 937.340027, total balance 8843.969973 day 210: buy 1 unit at price 928.450012, total balance 7915.519961 day 211, sell 1 unit at price 927.809998, investment -1.016710 %, total balance 8843.329959, day 212: buy 1 unit at price 935.950012, total balance 7907.379947 day 214, sell 1 unit at price 929.080017, investment 0.067856 %, total balance 8836.459964, day 216, sell 1 unit at price 935.090027, investment -0.091884 %, total balance 9771.549991, day 217: buy 1 unit at price 925.109985, total balance 8846.440006 day 219, sell 1 unit at price 915.000000, investment -1.092841 %, total balance 9761.440006, day 220: buy 1 unit at price 921.809998, total balance 8839.630008 day 221: buy 1 unit at price 931.580017, total balance 7908.049991 day 222: buy 1 unit at price 932.450012, total balance 6975.599979 day 226, sell 1 unit at price 944.489990, investment 2.460376 %, total balance 7920.089969, day 227: buy 1 unit at price 949.500000, total balance 6970.589969 day 229, sell 1 unit at price 953.270020, investment 2.328303 %, total balance 7923.859989, day 230: buy 1 unit at price 957.789978, total balance 6966.070011 day 231: buy 1 unit at price 951.679993, total balance 6014.390018 day 232: buy 1 unit at price 969.960022, total balance 5044.429996 day 233, sell 1 unit at price 978.890015, investment 4.980428 %, total balance 6023.320011, day 234: buy 1 unit at price 977.000000, total balance 5046.320011 day 237, sell 1 unit at price 987.830017, investment 4.036863 %, total balance 6034.150028, day 239, sell 1 unit at price 992.000000, investment 3.571767 %, total balance 7026.150028, day 240, sell 1 unit at price 992.179993, investment 4.255632 %, total balance 8018.330021, day 243, sell 1 unit at price 988.200012, investment 1.880489 %, total balance 9006.530033, day 244, sell 1 unit at price 968.450012, investment -0.875127 %, total balance 9974.980045,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Duel Q-learning agent'
class Agent:
def __init__(self, state_size, window_size, trend, skip, batch_size):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
self.action_size = 3
self.batch_size = batch_size
self.memory = deque(maxlen = 1000)
self.inventory = []
self.gamma = 0.95
self.epsilon = 0.5
self.epsilon_min = 0.01
self.epsilon_decay = 0.999
tf.reset_default_graph()
self.sess = tf.InteractiveSession()
self.X = tf.placeholder(tf.float32, [None, self.state_size])
self.Y = tf.placeholder(tf.float32, [None, self.action_size])
feed = tf.layers.dense(self.X, 512, activation = tf.nn.relu)
tensor_action, tensor_validation = tf.split(feed,2,1)
feed_action = tf.layers.dense(tensor_action, self.action_size)
feed_validation = tf.layers.dense(tensor_validation, 1)
self.logits = feed_validation + tf.subtract(feed_action,tf.reduce_mean(feed_action,axis=1,keep_dims=True))
self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
self.optimizer = tf.train.GradientDescentOptimizer(1e-5).minimize(
self.cost
)
self.sess.run(tf.global_variables_initializer())
def act(self, state):
if random.random() <= self.epsilon:
return random.randrange(self.action_size)
return np.argmax(
self.sess.run(self.logits, feed_dict = {self.X: state})[0]
)
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array([res])
def replay(self, batch_size):
mini_batch = []
l = len(self.memory)
for i in range(l - batch_size, l):
mini_batch.append(self.memory[i])
replay_size = len(mini_batch)
X = np.empty((replay_size, self.state_size))
Y = np.empty((replay_size, self.action_size))
states = np.array([a[0][0] for a in mini_batch])
new_states = np.array([a[3][0] for a in mini_batch])
Q = self.sess.run(self.logits, feed_dict = {self.X: states})
Q_new = self.sess.run(self.logits, feed_dict = {self.X: new_states})
for i in range(len(mini_batch)):
state, action, reward, next_state, done = mini_batch[i]
target = Q[i]
target[action] = reward
if not done:
target[action] += self.gamma * np.amax(Q_new[i])
X[i] = state
Y[i] = target
cost, _ = self.sess.run(
[self.cost, self.optimizer], feed_dict = {self.X: X, self.Y: Y}
)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
return cost
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
for t in range(0, len(self.trend) - 1, self.skip):
action = self.act(state)
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t] and t < (len(self.trend) - self.half_window):
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
state = next_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
for t in range(0, len(self.trend) - 1, self.skip):
action = self.act(state)
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t] and t < (len(self.trend) - self.half_window):
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
self.memory.append((state, action, invest,
next_state, starting_money < initial_money))
state = next_state
batch_size = min(self.batch_size, len(self.memory))
cost = self.replay(batch_size)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip,
batch_size = batch_size)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
WARNING:tensorflow:From <ipython-input-3-28bed545c0f8>:30: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version. Instructions for updating: keep_dims is deprecated, use keepdims instead epoch: 10, total rewards: 231.100222.3, cost: 0.499693, total money: 10231.100222 epoch: 20, total rewards: 195.875063.3, cost: 0.324152, total money: 10195.875063 epoch: 30, total rewards: 219.615054.3, cost: 0.237771, total money: 10219.615054 epoch: 40, total rewards: 56.505131.3, cost: 0.183305, total money: 10056.505131 epoch: 50, total rewards: 190.745120.3, cost: 0.129967, total money: 10190.745120 epoch: 60, total rewards: 165.275088.3, cost: 0.134246, total money: 10165.275088 epoch: 70, total rewards: 201.795107.3, cost: 0.075016, total money: 10201.795107 epoch: 80, total rewards: 187.545045.3, cost: 0.062454, total money: 10187.545045 epoch: 90, total rewards: 206.835023.3, cost: 0.050687, total money: 10206.835023 epoch: 100, total rewards: 199.895082.3, cost: 0.041359, total money: 10199.895082 epoch: 110, total rewards: 184.405092.3, cost: 0.035289, total money: 10184.405092 epoch: 120, total rewards: 242.405092.3, cost: 0.047248, total money: 10242.405092 epoch: 130, total rewards: 148.405032.3, cost: 0.050786, total money: 10148.405032 epoch: 140, total rewards: 225.724978.3, cost: 0.021171, total money: 10225.724978 epoch: 150, total rewards: 168.344972.3, cost: 0.018388, total money: 10168.344972 epoch: 160, total rewards: 230.095034.3, cost: 0.199324, total money: 10230.095034 epoch: 170, total rewards: 206.275026.3, cost: 0.044696, total money: 10206.275026 epoch: 180, total rewards: 364.895023.3, cost: 0.016494, total money: 10364.895023 epoch: 190, total rewards: 220.664980.3, cost: 0.014381, total money: 10220.664980 epoch: 200, total rewards: 175.284975.3, cost: 0.010883, total money: 10175.284975
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 7: buy 1 unit at price 754.020020, total balance 9245.979980 day 8: buy 1 unit at price 736.080017, total balance 8509.899963 day 9, sell 1 unit at price 758.489990, investment 0.592818 %, total balance 9268.389953, day 11: buy 1 unit at price 771.229980, total balance 8497.159973 day 12, sell 1 unit at price 760.539978, investment 3.323003 %, total balance 9257.699951, day 14, sell 1 unit at price 768.270020, investment -0.383797 %, total balance 10025.969971, day 22: buy 1 unit at price 762.520020, total balance 9263.449951 day 24, sell 1 unit at price 771.190002, investment 1.137017 %, total balance 10034.639953, day 28: buy 1 unit at price 796.099976, total balance 9238.539977 day 29: buy 1 unit at price 797.070007, total balance 8441.469970 day 31, sell 1 unit at price 790.799988, investment -0.665744 %, total balance 9232.269958, day 32: buy 1 unit at price 794.200012, total balance 8438.069946 day 33: buy 1 unit at price 796.419983, total balance 7641.649963 day 37, sell 1 unit at price 791.549988, investment -0.692539 %, total balance 8433.199951, day 39, sell 1 unit at price 782.789978, investment -1.436670 %, total balance 9215.989929, day 41, sell 1 unit at price 786.140015, investment -1.290772 %, total balance 10002.129944, day 48: buy 1 unit at price 806.359985, total balance 9195.769959 day 49: buy 1 unit at price 807.880005, total balance 8387.889954 day 50, sell 1 unit at price 804.609985, investment -0.217025 %, total balance 9192.499939, day 52: buy 1 unit at price 802.174988, total balance 8390.324951 day 53, sell 1 unit at price 805.020020, investment -0.354011 %, total balance 9195.344971, day 56, sell 1 unit at price 835.669983, investment 4.175522 %, total balance 10031.014954, day 67: buy 1 unit at price 809.559998, total balance 9221.454956 day 68: buy 1 unit at price 813.669983, total balance 8407.784973 day 69, sell 1 unit at price 819.239990, investment 1.195710 %, total balance 9227.024963, day 71: buy 1 unit at price 818.979980, total balance 8408.044983 day 72, sell 1 unit at price 824.159973, investment 1.289219 %, total balance 9232.204956, day 76: buy 1 unit at price 831.330017, total balance 8400.874939 day 77, sell 1 unit at price 828.640015, investment 1.179520 %, total balance 9229.514954, day 78: buy 1 unit at price 829.280029, total balance 8400.234925 day 79, sell 1 unit at price 823.210022, investment -0.976747 %, total balance 9223.444947, day 80: buy 1 unit at price 835.239990, total balance 8388.204957 day 81, sell 1 unit at price 830.630005, investment 0.162789 %, total balance 9218.834962, day 83, sell 1 unit at price 827.780029, investment -0.893152 %, total balance 10046.614991, day 88: buy 1 unit at price 845.539978, total balance 9201.075013 day 89, sell 1 unit at price 845.619995, investment 0.009463 %, total balance 10046.695008, day 91: buy 1 unit at price 848.780029, total balance 9197.914979 day 92: buy 1 unit at price 852.119995, total balance 8345.794984 day 93: buy 1 unit at price 848.400024, total balance 7497.394960 day 96, sell 1 unit at price 817.580017, investment -3.675865 %, total balance 8314.974977, day 97: buy 1 unit at price 814.429993, total balance 7500.544984 day 100, sell 1 unit at price 831.409973, investment -2.430411 %, total balance 8331.954957, day 101, sell 1 unit at price 831.500000, investment -1.991988 %, total balance 9163.454957, day 103, sell 1 unit at price 838.549988, investment 2.961580 %, total balance 10002.004945, day 104: buy 1 unit at price 834.570007, total balance 9167.434938 day 105: buy 1 unit at price 831.409973, total balance 8336.024965 day 106, sell 1 unit at price 827.880005, investment -0.801611 %, total balance 9163.904970, day 107: buy 1 unit at price 824.669983, total balance 8339.234987 day 108: buy 1 unit at price 824.729980, total balance 7514.505007 day 110: buy 1 unit at price 824.320007, total balance 6690.185000 day 111: buy 1 unit at price 823.559998, total balance 5866.625002 day 112, sell 1 unit at price 837.169983, investment 0.692800 %, total balance 6703.794985, day 113, sell 1 unit at price 836.820007, investment 1.473320 %, total balance 7540.614992, day 114, sell 1 unit at price 838.210022, investment 1.634479 %, total balance 8378.825014, day 115, sell 1 unit at price 841.650024, investment 2.102341 %, total balance 9220.475038, day 116, sell 1 unit at price 843.190002, investment 2.383555 %, total balance 10063.665040, day 122: buy 1 unit at price 912.570007, total balance 9151.095033 day 123, sell 1 unit at price 916.440002, investment 0.424077 %, total balance 10067.535035, day 125: buy 1 unit at price 931.659973, total balance 9135.875062 day 126, sell 1 unit at price 927.130005, investment -0.486225 %, total balance 10063.005067, day 144: buy 1 unit at price 966.950012, total balance 9096.055055 day 145, sell 1 unit at price 975.599976, investment 0.894562 %, total balance 10071.655031, day 146: buy 1 unit at price 983.679993, total balance 9087.975038 day 148: buy 1 unit at price 980.940002, total balance 8107.035036 day 149, sell 1 unit at price 983.409973, investment -0.027450 %, total balance 9090.445009, day 151, sell 1 unit at price 942.900024, investment -3.877911 %, total balance 10033.345033, day 155: buy 1 unit at price 939.780029, total balance 9093.565004 day 157, sell 1 unit at price 950.630005, investment 1.154523 %, total balance 10044.195009, day 160: buy 1 unit at price 965.590027, total balance 9078.604982 day 161, sell 1 unit at price 952.270020, investment -1.379468 %, total balance 10030.875002, day 164: buy 1 unit at price 917.789978, total balance 9113.085024 day 166, sell 1 unit at price 898.700012, investment -2.079993 %, total balance 10011.785036, day 168: buy 1 unit at price 906.690002, total balance 9105.095034 day 171: buy 1 unit at price 930.090027, total balance 8175.005007 day 172, sell 1 unit at price 943.830017, investment 4.096220 %, total balance 9118.835024, day 173, sell 1 unit at price 947.159973, investment 1.835300 %, total balance 10065.994997, day 176: buy 1 unit at price 965.400024, total balance 9100.594973 day 179, sell 1 unit at price 972.919983, investment 0.778947 %, total balance 10073.514956, day 180: buy 1 unit at price 980.340027, total balance 9093.174929 day 181, sell 1 unit at price 950.700012, investment -3.023442 %, total balance 10043.874941, day 194: buy 1 unit at price 914.390015, total balance 9129.484926 day 195, sell 1 unit at price 922.669983, investment 0.905518 %, total balance 10052.154909, day 197: buy 1 unit at price 926.960022, total balance 9125.194887 day 199: buy 1 unit at price 910.669983, total balance 8214.524904 day 201, sell 1 unit at price 924.690002, investment -0.244889 %, total balance 9139.214906, day 202, sell 1 unit at price 927.000000, investment 1.793187 %, total balance 10066.214906, day 208: buy 1 unit at price 939.330017, total balance 9126.884889 day 211, sell 1 unit at price 927.809998, investment -1.226408 %, total balance 10054.694887, day 214: buy 1 unit at price 929.080017, total balance 9125.614870 day 216, sell 1 unit at price 935.090027, investment 0.646878 %, total balance 10060.704897, day 219: buy 1 unit at price 915.000000, total balance 9145.704897 day 221: buy 1 unit at price 931.580017, total balance 8214.124880 day 222, sell 1 unit at price 932.450012, investment 1.907105 %, total balance 9146.574892, day 223, sell 1 unit at price 928.530029, investment -0.327399 %, total balance 10075.104921, day 224: buy 1 unit at price 920.969971, total balance 9154.134950 day 226: buy 1 unit at price 944.489990, total balance 8209.644960 day 227: buy 1 unit at price 949.500000, total balance 7260.144960 day 228, sell 1 unit at price 959.109985, investment 4.141287 %, total balance 8219.254945, day 229, sell 1 unit at price 953.270020, investment 0.929605 %, total balance 9172.524965, day 230: buy 1 unit at price 957.789978, total balance 8214.734987 day 231: buy 1 unit at price 951.679993, total balance 7263.054994 day 232, sell 1 unit at price 969.960022, investment 2.154821 %, total balance 8233.015016, day 233, sell 1 unit at price 978.890015, investment 2.202992 %, total balance 9211.905031, day 234, sell 1 unit at price 977.000000, investment 2.660559 %, total balance 10188.905031,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Double Duel Q-learning agent'
class Model:
def __init__(self, input_size, output_size, layer_size, learning_rate):
self.X = tf.placeholder(tf.float32, (None, input_size))
self.Y = tf.placeholder(tf.float32, (None, output_size))
feed = tf.layers.dense(self.X, layer_size, activation = tf.nn.relu)
tensor_action, tensor_validation = tf.split(feed,2,1)
feed_action = tf.layers.dense(tensor_action, output_size)
feed_validation = tf.layers.dense(tensor_validation, 1)
self.logits = feed_validation + tf.subtract(feed_action,tf.reduce_mean(feed_action,axis=1,keep_dims=True))
self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
class Agent:
LEARNING_RATE = 0.003
BATCH_SIZE = 32
LAYER_SIZE = 500
OUTPUT_SIZE = 3
EPSILON = 0.5
DECAY_RATE = 0.005
MIN_EPSILON = 0.1
GAMMA = 0.99
MEMORIES = deque()
COPY = 1000
T_COPY = 0
MEMORY_SIZE = 300
def __init__(self, state_size, window_size, trend, skip):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
tf.reset_default_graph()
self.model = Model(self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE)
self.model_negative = Model(self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE)
self.sess = tf.InteractiveSession()
self.sess.run(tf.global_variables_initializer())
self.trainable = tf.trainable_variables()
def _assign(self):
for i in range(len(self.trainable)//2):
assign_op = self.trainable[i+len(self.trainable)//2].assign(self.trainable[i])
self.sess.run(assign_op)
def _memorize(self, state, action, reward, new_state, done):
self.MEMORIES.append((state, action, reward, new_state, done))
if len(self.MEMORIES) > self.MEMORY_SIZE:
self.MEMORIES.popleft()
def _select_action(self, state):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action = self.get_predicted_action([state])
return action
def _construct_memories(self, replay):
states = np.array([a[0] for a in replay])
new_states = np.array([a[3] for a in replay])
Q = self.predict(states)
Q_new = self.predict(new_states)
Q_new_negative = self.sess.run(self.model_negative.logits, feed_dict={self.model_negative.X:new_states})
replay_size = len(replay)
X = np.empty((replay_size, self.state_size))
Y = np.empty((replay_size, self.OUTPUT_SIZE))
for i in range(replay_size):
state_r, action_r, reward_r, new_state_r, done_r = replay[i]
target = Q[i]
target[action_r] = reward_r
if not done_r:
target[action_r] += self.GAMMA * Q_new_negative[i, np.argmax(Q_new[i])]
X[i] = state_r
Y[i] = target
return X, Y
def predict(self, inputs):
return self.sess.run(self.model.logits, feed_dict={self.model.X:inputs})
def get_predicted_action(self, sequence):
prediction = self.predict(np.array(sequence))[0]
return np.argmax(prediction)
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array(res)
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
for t in range(0, len(self.trend) - 1, self.skip):
action = self._select_action(state)
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
state = next_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
for t in range(0, len(self.trend) - 1, self.skip):
if (self.T_COPY + 1) % self.COPY == 0:
self._assign()
action = self._select_action(state)
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
self._memorize(state, action, invest, next_state, starting_money < initial_money)
batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
state = next_state
replay = random.sample(self.MEMORIES, batch_size)
X, Y = self._construct_memories(replay)
cost, _ = self.sess.run([self.model.cost, self.model.optimizer],
feed_dict={self.model.X: X, self.model.Y:Y})
self.T_COPY += 1
self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
WARNING:tensorflow:From <ipython-input-3-42f2d1e26a9d>:12: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version. Instructions for updating: keep_dims is deprecated, use keepdims instead epoch: 10, total rewards: 1486.684997.3, cost: 0.694152, total money: 10514.124999 epoch: 20, total rewards: 313.279660.3, cost: 0.878157, total money: 8354.909665 epoch: 30, total rewards: 752.595089.3, cost: 0.320037, total money: 10752.595089 epoch: 40, total rewards: 1159.299987.3, cost: 0.318166, total money: 10186.739989 epoch: 50, total rewards: 993.220279.3, cost: 0.391151, total money: 4149.310245 epoch: 60, total rewards: 1616.499880.3, cost: 0.307440, total money: 9630.939883 epoch: 70, total rewards: 941.484560.3, cost: 0.332979, total money: 6969.054506 epoch: 80, total rewards: 904.899903.3, cost: 0.718111, total money: 1132.559876 epoch: 90, total rewards: 346.619873.3, cost: 0.482044, total money: 542.599852 epoch: 100, total rewards: 141.554626.3, cost: 0.238426, total money: 6115.974608 epoch: 110, total rewards: -159.529845.3, cost: 0.202412, total money: 8852.270143 epoch: 120, total rewards: -37.579779.3, cost: 0.433529, total money: 8945.780206 epoch: 130, total rewards: 1049.544800.3, cost: 0.408910, total money: 8099.664795 epoch: 140, total rewards: 59.114809.3, cost: 0.028664, total money: 7098.904848 epoch: 150, total rewards: 96.424866.3, cost: 0.070552, total money: 9079.784851 epoch: 160, total rewards: 74.179754.3, cost: 0.044092, total money: 10074.179754 epoch: 170, total rewards: 80.999883.3, cost: 0.018813, total money: 8047.249883 epoch: 180, total rewards: 62.700011.3, cost: 0.083292, total money: 10062.700011 epoch: 190, total rewards: 70.424991.3, cost: 0.013884, total money: 9053.315006 epoch: 200, total rewards: 10.620115.3, cost: 0.030838, total money: 10010.620115
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 1: buy 1 unit at price 762.130005, total balance 9237.869995 day 2, sell 1 unit at price 762.020020, investment -0.014431 %, total balance 9999.890015, day 11: buy 1 unit at price 771.229980, total balance 9228.660035 day 12: buy 1 unit at price 760.539978, total balance 8468.120057 day 13, sell 1 unit at price 769.200012, investment -0.263212 %, total balance 9237.320069, day 15, sell 1 unit at price 760.989990, investment 0.059170 %, total balance 9998.310059, day 34: buy 1 unit at price 794.559998, total balance 9203.750061 day 35, sell 1 unit at price 791.260010, investment -0.415323 %, total balance 9995.010071, day 36: buy 1 unit at price 789.909973, total balance 9205.100098 day 37, sell 1 unit at price 791.549988, investment 0.207620 %, total balance 9996.650086, day 38: buy 1 unit at price 785.049988, total balance 9211.600098 day 40, sell 1 unit at price 771.820007, investment -1.685241 %, total balance 9983.420105, day 54: buy 1 unit at price 819.309998, total balance 9164.110107 day 55, sell 1 unit at price 823.869995, investment 0.556566 %, total balance 9987.980102, day 62: buy 1 unit at price 798.530029, total balance 9189.450073 day 64, sell 1 unit at price 801.340027, investment 0.351896 %, total balance 9990.790100, day 68: buy 1 unit at price 813.669983, total balance 9177.120117 day 69, sell 1 unit at price 819.239990, investment 0.684554 %, total balance 9996.360107, day 72: buy 1 unit at price 824.159973, total balance 9172.200134 day 73, sell 1 unit at price 828.070007, investment 0.474427 %, total balance 10000.270141, day 74: buy 1 unit at price 831.659973, total balance 9168.610168 day 75, sell 1 unit at price 830.760010, investment -0.108213 %, total balance 9999.370178, day 79: buy 1 unit at price 823.210022, total balance 9176.160156 day 80, sell 1 unit at price 835.239990, investment 1.461349 %, total balance 10011.400146, day 90: buy 1 unit at price 847.200012, total balance 9164.200134 day 91, sell 1 unit at price 848.780029, investment 0.186499 %, total balance 10012.980163, day 93: buy 1 unit at price 848.400024, total balance 9164.580139 day 94: buy 1 unit at price 830.460022, total balance 8334.120117 day 95, sell 1 unit at price 829.590027, investment -2.217114 %, total balance 9163.710144, day 96, sell 1 unit at price 817.580017, investment -1.550948 %, total balance 9981.290161, day 100: buy 1 unit at price 831.409973, total balance 9149.880188 day 101, sell 1 unit at price 831.500000, investment 0.010828 %, total balance 9981.380188, day 104: buy 1 unit at price 834.570007, total balance 9146.810181 day 106: buy 1 unit at price 827.880005, total balance 8318.930176 day 107, sell 1 unit at price 824.669983, investment -1.186242 %, total balance 9143.600159, day 108, sell 1 unit at price 824.729980, investment -0.380493 %, total balance 9968.330139, day 110: buy 1 unit at price 824.320007, total balance 9144.010132 day 111, sell 1 unit at price 823.559998, investment -0.092198 %, total balance 9967.570130, day 115: buy 1 unit at price 841.650024, total balance 9125.920106 day 116, sell 1 unit at price 843.190002, investment 0.182971 %, total balance 9969.110108, day 125: buy 1 unit at price 931.659973, total balance 9037.450135 day 126, sell 1 unit at price 927.130005, investment -0.486225 %, total balance 9964.580140, day 127: buy 1 unit at price 934.299988, total balance 9030.280152 day 128, sell 1 unit at price 932.169983, investment -0.227979 %, total balance 9962.450135, day 141: buy 1 unit at price 971.469971, total balance 8990.980164 day 142, sell 1 unit at price 975.880005, investment 0.453955 %, total balance 9966.860169, day 152: buy 1 unit at price 953.400024, total balance 9013.460145 day 153, sell 1 unit at price 950.760010, investment -0.276905 %, total balance 9964.220155, day 156: buy 1 unit at price 957.369995, total balance 9006.850160 day 157, sell 1 unit at price 950.630005, investment -0.704011 %, total balance 9957.480165, day 166: buy 1 unit at price 898.700012, total balance 9058.780153 day 167, sell 1 unit at price 911.710022, investment 1.447648 %, total balance 9970.490175, day 172: buy 1 unit at price 943.830017, total balance 9026.660158 day 173, sell 1 unit at price 947.159973, investment 0.352813 %, total balance 9973.820131, day 185: buy 1 unit at price 930.500000, total balance 9043.320131 day 186: buy 1 unit at price 930.830017, total balance 8112.490114 day 187, sell 1 unit at price 930.390015, investment -0.011820 %, total balance 9042.880129, day 188, sell 1 unit at price 923.650024, investment -0.771354 %, total balance 9966.530153, day 193: buy 1 unit at price 907.239990, total balance 9059.290163 day 194, sell 1 unit at price 914.390015, investment 0.788107 %, total balance 9973.680178, day 197: buy 1 unit at price 926.960022, total balance 9046.720156 day 199, sell 1 unit at price 910.669983, investment -1.757362 %, total balance 9957.390139, day 211: buy 1 unit at price 927.809998, total balance 9029.580141 day 212, sell 1 unit at price 935.950012, investment 0.877336 %, total balance 9965.530153, day 213: buy 1 unit at price 926.500000, total balance 9039.030153 day 214, sell 1 unit at price 929.080017, investment 0.278469 %, total balance 9968.110170,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Duel Recurrent Q-learning agent'
class Agent:
LEARNING_RATE = 0.003
BATCH_SIZE = 32
LAYER_SIZE = 256
OUTPUT_SIZE = 3
EPSILON = 0.5
DECAY_RATE = 0.005
MIN_EPSILON = 0.1
GAMMA = 0.99
MEMORIES = deque()
MEMORY_SIZE = 300
def __init__(self, state_size, window_size, trend, skip):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
tf.reset_default_graph()
self.INITIAL_FEATURES = np.zeros((4, self.state_size))
self.X = tf.placeholder(tf.float32, (None, None, self.state_size))
self.Y = tf.placeholder(tf.float32, (None, self.OUTPUT_SIZE))
cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple = False)
self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * self.LAYER_SIZE))
self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X,cell=cell,
dtype=tf.float32,
initial_state=self.hidden_layer)
tensor_action, tensor_validation = tf.split(self.rnn[:,-1],2,1)
feed_action = tf.layers.dense(tensor_action, self.OUTPUT_SIZE)
feed_validation = tf.layers.dense(tensor_validation, 1)
self.logits = feed_validation + tf.subtract(feed_action,tf.reduce_mean(feed_action,axis=1,keep_dims=True))
self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
self.optimizer = tf.train.AdamOptimizer(learning_rate = self.LEARNING_RATE).minimize(self.cost)
self.sess = tf.InteractiveSession()
self.sess.run(tf.global_variables_initializer())
def _memorize(self, state, action, reward, new_state, dead, rnn_state):
self.MEMORIES.append((state, action, reward, new_state, dead, rnn_state))
if len(self.MEMORIES) > self.MEMORY_SIZE:
self.MEMORIES.popleft()
def _construct_memories(self, replay):
states = np.array([a[0] for a in replay])
new_states = np.array([a[3] for a in replay])
init_values = np.array([a[-1] for a in replay])
Q = self.sess.run(self.logits, feed_dict={self.X:states, self.hidden_layer:init_values})
Q_new = self.sess.run(self.logits, feed_dict={self.X:new_states, self.hidden_layer:init_values})
replay_size = len(replay)
X = np.empty((replay_size, 4, self.state_size))
Y = np.empty((replay_size, self.OUTPUT_SIZE))
INIT_VAL = np.empty((replay_size, 2 * self.LAYER_SIZE))
for i in range(replay_size):
state_r, action_r, reward_r, new_state_r, dead_r, rnn_memory = replay[i]
target = Q[i]
target[action_r] = reward_r
if not dead_r:
target[action_r] += self.GAMMA * np.amax(Q_new[i])
X[i] = state_r
Y[i] = target
INIT_VAL[i] = rnn_memory
return X, Y, INIT_VAL
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array(res)
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
init_value = np.zeros((1, 2 * self.LAYER_SIZE))
for k in range(self.INITIAL_FEATURES.shape[0]):
self.INITIAL_FEATURES[k,:] = state
for t in range(0, len(self.trend) - 1, self.skip):
action, last_state = self.sess.run([self.logits,self.last_state],
feed_dict={self.X:[self.INITIAL_FEATURES],
self.hidden_layer:init_value})
action, init_value = np.argmax(action[0]), last_state
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)
self.INITIAL_FEATURES = new_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
init_value = np.zeros((1, 2 * self.LAYER_SIZE))
for k in range(self.INITIAL_FEATURES.shape[0]):
self.INITIAL_FEATURES[k,:] = state
for t in range(0, len(self.trend) - 1, self.skip):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action, last_state = self.sess.run([self.logits,
self.last_state],
feed_dict={self.X:[self.INITIAL_FEATURES],
self.hidden_layer:init_value})
action, init_value = np.argmax(action[0]), last_state
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)
self._memorize(self.INITIAL_FEATURES, action, invest, new_state,
starting_money < initial_money, init_value[0])
self.INITIAL_FEATURES = new_state
batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
replay = random.sample(self.MEMORIES, batch_size)
X, Y, INIT_VAL = self._construct_memories(replay)
cost, _ = self.sess.run([self.cost, self.optimizer],
feed_dict={self.X: X, self.Y:Y,
self.hidden_layer: INIT_VAL})
self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7f2873435940>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. WARNING:tensorflow:From <ipython-input-3-976c717fc00c>:35: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version. Instructions for updating: keep_dims is deprecated, use keepdims instead epoch: 10, total rewards: 1303.755127.3, cost: 0.204159, total money: 2622.175109 epoch: 20, total rewards: 1332.510133.3, cost: 2.512769, total money: 11332.510133 epoch: 30, total rewards: 167.034789.3, cost: 0.204751, total money: 10167.034789 epoch: 40, total rewards: 885.269897.3, cost: 0.095390, total money: 8848.889892 epoch: 50, total rewards: 312.624996.3, cost: 0.415782, total money: 10312.624996 epoch: 60, total rewards: 220.209960.3, cost: 0.119438, total money: 10220.209960 epoch: 70, total rewards: 407.794859.3, cost: 0.983801, total money: 8417.984861 epoch: 80, total rewards: 200.149718.3, cost: 0.235913, total money: 9226.819701 epoch: 90, total rewards: 87.564821.3, cost: 0.034903, total money: 8097.894838 epoch: 100, total rewards: 1056.600041.3, cost: 0.286240, total money: 11056.600041 epoch: 110, total rewards: 537.204957.3, cost: 0.140037, total money: 7610.014955 epoch: 120, total rewards: 263.944828.3, cost: 0.535866, total money: 9247.304813 epoch: 130, total rewards: 387.030092.3, cost: 0.352989, total money: 8396.590090 epoch: 140, total rewards: 207.069887.3, cost: 0.474047, total money: 10207.069887 epoch: 150, total rewards: -119.230104.3, cost: 0.301262, total money: 9880.769896 epoch: 160, total rewards: 21.299804.3, cost: 0.709494, total money: 10021.299804 epoch: 170, total rewards: 241.145077.3, cost: 0.486697, total money: 10241.145077 epoch: 180, total rewards: 5.329770.3, cost: 0.447255, total money: 7042.329770 epoch: 190, total rewards: 126.395198.3, cost: 0.240739, total money: 9107.125178 epoch: 200, total rewards: 91.499876.3, cost: 0.259028, total money: 8055.119871
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 53: buy 1 unit at price 805.020020, total balance 9194.979980 day 54, sell 1 unit at price 819.309998, investment 1.775108 %, total balance 10014.289978, day 64: buy 1 unit at price 801.340027, total balance 9212.949951 day 68, sell 1 unit at price 813.669983, investment 1.538667 %, total balance 10026.619934, day 79: buy 1 unit at price 823.210022, total balance 9203.409912 day 82: buy 1 unit at price 829.080017, total balance 8374.329895 day 84, sell 1 unit at price 831.909973, investment 1.056832 %, total balance 9206.239868, day 86, sell 1 unit at price 838.679993, investment 1.157907 %, total balance 10044.919861, day 111: buy 1 unit at price 823.559998, total balance 9221.359863 day 116, sell 1 unit at price 843.190002, investment 2.383555 %, total balance 10064.549865, day 167: buy 1 unit at price 911.710022, total balance 9152.839843 day 169, sell 1 unit at price 918.590027, investment 0.754626 %, total balance 10071.429870, day 182: buy 1 unit at price 947.799988, total balance 9123.629882 day 183, sell 1 unit at price 934.090027, investment -1.446504 %, total balance 10057.719909, day 185: buy 1 unit at price 930.500000, total balance 9127.219909 day 187: buy 1 unit at price 930.390015, total balance 8196.829894 day 188, sell 1 unit at price 923.650024, investment -0.736161 %, total balance 9120.479918, day 190, sell 1 unit at price 929.359985, investment -0.110709 %, total balance 10049.839903, day 206: buy 1 unit at price 921.289978, total balance 9128.549925 day 207, sell 1 unit at price 929.570007, investment 0.898743 %, total balance 10058.119932,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Double Duel Recurrent Q-learning agent'
class Model:
def __init__(self, input_size, output_size, layer_size, learning_rate, name):
with tf.variable_scope(name):
self.X = tf.placeholder(tf.float32, (None, None, input_size))
self.Y = tf.placeholder(tf.float32, (None, output_size))
cell = tf.nn.rnn_cell.LSTMCell(layer_size, state_is_tuple = False)
self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * layer_size))
self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X,cell=cell,
dtype=tf.float32,
initial_state=self.hidden_layer)
tensor_action, tensor_validation = tf.split(self.rnn[:,-1],2,1)
feed_action = tf.layers.dense(tensor_action, output_size)
feed_validation = tf.layers.dense(tensor_validation, 1)
self.logits = feed_validation + tf.subtract(feed_action,tf.reduce_mean(feed_action,axis=1,keep_dims=True))
self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
class Agent:
LEARNING_RATE = 0.003
BATCH_SIZE = 32
LAYER_SIZE = 256
OUTPUT_SIZE = 3
EPSILON = 0.5
DECAY_RATE = 0.005
MIN_EPSILON = 0.1
GAMMA = 0.99
MEMORIES = deque()
COPY = 1000
T_COPY = 0
MEMORY_SIZE = 300
def __init__(self, state_size, window_size, trend, skip):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
tf.reset_default_graph()
self.INITIAL_FEATURES = np.zeros((4, self.state_size))
self.model = Model(self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE,
'real_model')
self.model_negative = Model(self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE,
'negative_model')
self.sess = tf.InteractiveSession()
self.sess.run(tf.global_variables_initializer())
self.trainable = tf.trainable_variables()
def _assign(self, from_name, to_name):
from_w = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=from_name)
to_w = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=to_name)
for i in range(len(from_w)):
assign_op = to_w[i].assign(from_w[i])
self.sess.run(assign_op)
def _memorize(self, state, action, reward, new_state, dead, rnn_state):
self.MEMORIES.append((state, action, reward, new_state, dead, rnn_state))
if len(self.MEMORIES) > self.MEMORY_SIZE:
self.MEMORIES.popleft()
def _select_action(self, state):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action = self.get_predicted_action([state])
return action
def _construct_memories(self, replay):
states = np.array([a[0] for a in replay])
new_states = np.array([a[3] for a in replay])
init_values = np.array([a[-1] for a in replay])
Q = self.sess.run(self.model.logits, feed_dict={self.model.X:states,
self.model.hidden_layer:init_values})
Q_new = self.sess.run(self.model.logits, feed_dict={self.model.X:new_states,
self.model.hidden_layer:init_values})
Q_new_negative = self.sess.run(self.model_negative.logits,
feed_dict={self.model_negative.X:new_states,
self.model_negative.hidden_layer:init_values})
replay_size = len(replay)
X = np.empty((replay_size, 4, self.state_size))
Y = np.empty((replay_size, self.OUTPUT_SIZE))
INIT_VAL = np.empty((replay_size, 2 * self.LAYER_SIZE))
for i in range(replay_size):
state_r, action_r, reward_r, new_state_r, dead_r, rnn_memory = replay[i]
target = Q[i]
target[action_r] = reward_r
if not dead_r:
target[action_r] += self.GAMMA * Q_new_negative[i, np.argmax(Q_new[i])]
X[i] = state_r
Y[i] = target
INIT_VAL[i] = rnn_memory
return X, Y, INIT_VAL
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array(res)
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
init_value = np.zeros((1, 2 * self.LAYER_SIZE))
for k in range(self.INITIAL_FEATURES.shape[0]):
self.INITIAL_FEATURES[k,:] = state
for t in range(0, len(self.trend) - 1, self.skip):
action, last_state = self.sess.run([self.model.logits,self.model.last_state],
feed_dict={self.model.X:[self.INITIAL_FEATURES],
self.model.hidden_layer:init_value})
action, init_value = np.argmax(action[0]), last_state
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)
self.INITIAL_FEATURES = new_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
init_value = np.zeros((1, 2 * self.LAYER_SIZE))
for k in range(self.INITIAL_FEATURES.shape[0]):
self.INITIAL_FEATURES[k,:] = state
for t in range(0, len(self.trend) - 1, self.skip):
if (self.T_COPY + 1) % self.COPY == 0:
self._assign('real_model', 'negative_model')
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action, last_state = self.sess.run([self.model.logits,
self.model.last_state],
feed_dict={self.model.X:[self.INITIAL_FEATURES],
self.model.hidden_layer:init_value})
action, init_value = np.argmax(action[0]), last_state
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)
self._memorize(self.INITIAL_FEATURES, action, invest, new_state,
starting_money < initial_money, init_value[0])
self.INITIAL_FEATURES = new_state
batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
replay = random.sample(self.MEMORIES, batch_size)
X, Y, INIT_VAL = self._construct_memories(replay)
cost, _ = self.sess.run([self.model.cost, self.model.optimizer],
feed_dict={self.model.X: X, self.model.Y:Y,
self.model.hidden_layer: INIT_VAL})
self.T_COPY += 1
self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7f39ffaed7b8>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. WARNING:tensorflow:From <ipython-input-3-401815182242>:17: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version. Instructions for updating: keep_dims is deprecated, use keepdims instead WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7f39ffaede80>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. epoch: 10, total rewards: 328.014401.3, cost: 0.233912, total money: 2446.714413 epoch: 20, total rewards: 629.485052.3, cost: 0.592428, total money: 5723.605047 epoch: 30, total rewards: 1222.065245.3, cost: 0.182284, total money: 7288.965209 epoch: 40, total rewards: 719.309753.3, cost: 0.690094, total money: 3739.159728 epoch: 50, total rewards: 328.994876.3, cost: 0.918951, total money: 2756.724856 epoch: 60, total rewards: 1518.540281.3, cost: 0.226017, total money: 10545.210264 epoch: 70, total rewards: 440.315127.3, cost: 0.145386, total money: 7494.335086 epoch: 80, total rewards: 656.779966.3, cost: 0.113699, total money: 6666.949948 epoch: 90, total rewards: 846.820129.3, cost: 0.444679, total money: 6860.080139 epoch: 100, total rewards: 1044.679930.3, cost: 0.240218, total money: 9067.419920 epoch: 110, total rewards: 207.934935.3, cost: 0.236219, total money: 10207.934935 epoch: 120, total rewards: 6.745002.3, cost: 1.133358, total money: 10006.745002 epoch: 130, total rewards: 586.910091.3, cost: 0.162622, total money: 4665.650081 epoch: 140, total rewards: 1084.244877.3, cost: 0.630996, total money: 6178.484867 epoch: 150, total rewards: 991.774842.3, cost: 1.439193, total money: 420.904786 epoch: 160, total rewards: 714.735100.3, cost: 0.337296, total money: 5744.735038 epoch: 170, total rewards: 1158.574706.3, cost: 0.186633, total money: 10185.244689 epoch: 180, total rewards: 1120.314817.3, cost: 0.539594, total money: 7186.704770 epoch: 190, total rewards: 230.760193.3, cost: 0.110742, total money: 4290.020202 epoch: 200, total rewards: 218.420047.3, cost: 0.125164, total money: 10218.420047
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 17: buy 1 unit at price 768.239990, total balance 9231.760010 day 18, sell 1 unit at price 770.840027, investment 0.338441 %, total balance 10002.600037, day 20: buy 1 unit at price 747.919983, total balance 9254.680054 day 21: buy 1 unit at price 750.500000, total balance 8504.180054 day 23, sell 1 unit at price 759.109985, investment 1.496150 %, total balance 9263.290039, day 24, sell 1 unit at price 771.190002, investment 2.756829 %, total balance 10034.480041, day 27: buy 1 unit at price 789.270020, total balance 9245.210021 day 28, sell 1 unit at price 796.099976, investment 0.865351 %, total balance 10041.309997, day 34: buy 1 unit at price 794.559998, total balance 9246.749999 day 35, sell 1 unit at price 791.260010, investment -0.415323 %, total balance 10038.010009, day 36: buy 1 unit at price 789.909973, total balance 9248.100036 day 38: buy 1 unit at price 785.049988, total balance 8463.050048 day 40: buy 1 unit at price 771.820007, total balance 7691.230041 day 41, sell 1 unit at price 786.140015, investment -0.477264 %, total balance 8477.370056, day 44: buy 1 unit at price 806.150024, total balance 7671.220032 day 45, sell 1 unit at price 806.650024, investment 2.751422 %, total balance 8477.870056, day 48, sell 1 unit at price 806.359985, investment 4.475134 %, total balance 9284.230041, day 49, sell 1 unit at price 807.880005, investment 0.214598 %, total balance 10092.110046, day 51: buy 1 unit at price 806.070007, total balance 9286.040039 day 52, sell 1 unit at price 802.174988, investment -0.483211 %, total balance 10088.215027, day 57: buy 1 unit at price 832.150024, total balance 9256.065003 day 58: buy 1 unit at price 823.309998, total balance 8432.755005 day 61: buy 1 unit at price 795.695007, total balance 7637.059998 day 63: buy 1 unit at price 801.489990, total balance 6835.570008 day 64, sell 1 unit at price 801.340027, investment -3.702457 %, total balance 7636.910035, day 66, sell 1 unit at price 808.380005, investment -1.813411 %, total balance 8445.290040, day 67, sell 1 unit at price 809.559998, investment 1.742501 %, total balance 9254.850038, day 68: buy 1 unit at price 813.669983, total balance 8441.180055 day 70, sell 1 unit at price 820.450012, investment 2.365597 %, total balance 9261.630067, day 71: buy 1 unit at price 818.979980, total balance 8442.650087 day 73: buy 1 unit at price 828.070007, total balance 7614.580080 day 76, sell 1 unit at price 831.330017, investment 2.170417 %, total balance 8445.910097, day 77, sell 1 unit at price 828.640015, investment 1.179520 %, total balance 9274.550112, day 78: buy 1 unit at price 829.280029, total balance 8445.270083 day 82: buy 1 unit at price 829.080017, total balance 7616.190066 day 83: buy 1 unit at price 827.780029, total balance 6788.410037 day 84: buy 1 unit at price 831.909973, total balance 5956.500064 day 87: buy 1 unit at price 843.250000, total balance 5113.250064 day 88: buy 1 unit at price 845.539978, total balance 4267.710086 day 90: buy 1 unit at price 847.200012, total balance 3420.510074 day 91, sell 1 unit at price 848.780029, investment 2.500999 %, total balance 4269.290103, day 98: buy 1 unit at price 819.510010, total balance 3449.780093 day 99, sell 1 unit at price 820.919983, investment -1.008109 %, total balance 4270.700076, day 100: buy 1 unit at price 831.409973, total balance 3439.290103 day 103, sell 1 unit at price 838.549988, investment 1.142226 %, total balance 4277.840091, day 104: buy 1 unit at price 834.570007, total balance 3443.270084 day 106: buy 1 unit at price 827.880005, total balance 2615.390079 day 107, sell 1 unit at price 824.669983, investment -0.375709 %, total balance 3440.060062, day 108, sell 1 unit at price 824.729980, investment -0.863073 %, total balance 4264.790042, day 109, sell 1 unit at price 823.349976, investment -2.359920 %, total balance 5088.140018, day 110: buy 1 unit at price 824.320007, total balance 4263.820011 day 111, sell 1 unit at price 823.559998, investment -2.599520 %, total balance 5087.380009, day 114: buy 1 unit at price 838.210022, total balance 4249.169987 day 115, sell 1 unit at price 841.650024, investment -0.655098 %, total balance 5090.820011, day 117, sell 1 unit at price 862.760010, investment 5.277544 %, total balance 5953.580021, day 118, sell 1 unit at price 872.299988, investment 4.918153 %, total balance 6825.880009, day 119: buy 1 unit at price 871.729980, total balance 5954.150029 day 121, sell 1 unit at price 905.960022, investment 8.554107 %, total balance 6860.110051, day 122, sell 1 unit at price 912.570007, investment 10.229744 %, total balance 7772.680058, day 123: buy 1 unit at price 916.440002, total balance 6856.240056 day 124, sell 1 unit at price 927.039978, investment 12.461177 %, total balance 7783.280034, day 125, sell 1 unit at price 931.659973, investment 11.148751 %, total balance 8714.940007, day 126, sell 1 unit at price 927.130005, investment 6.355182 %, total balance 9642.070012, day 129: buy 1 unit at price 928.780029, total balance 8713.289983 day 131: buy 1 unit at price 932.219971, total balance 7781.070012 day 134, sell 1 unit at price 919.619995, investment 0.346994 %, total balance 8700.690007, day 136: buy 1 unit at price 934.010010, total balance 7766.679997 day 138, sell 1 unit at price 948.820007, investment 2.157667 %, total balance 8715.500004, day 139, sell 1 unit at price 954.960022, investment 2.439344 %, total balance 9670.460026, day 140, sell 1 unit at price 969.539978, investment 3.804024 %, total balance 10640.000004, day 141: buy 1 unit at price 971.469971, total balance 9668.530033 day 142, sell 1 unit at price 975.880005, investment 0.453955 %, total balance 10644.410038, day 143: buy 1 unit at price 964.859985, total balance 9679.550053 day 144, sell 1 unit at price 966.950012, investment 0.216615 %, total balance 10646.500065, day 145: buy 1 unit at price 975.599976, total balance 9670.900089 day 146: buy 1 unit at price 983.679993, total balance 8687.220096 day 148, sell 1 unit at price 980.940002, investment 0.547358 %, total balance 9668.160098, day 150, sell 1 unit at price 949.830017, investment -3.441157 %, total balance 10617.990115, day 152: buy 1 unit at price 953.400024, total balance 9664.590091 day 154, sell 1 unit at price 942.309998, investment -1.163208 %, total balance 10606.900089, day 162: buy 1 unit at price 927.330017, total balance 9679.570072 day 163: buy 1 unit at price 940.489990, total balance 8739.080082 day 170: buy 1 unit at price 928.799988, total balance 7810.280094 day 173, sell 1 unit at price 947.159973, investment 2.138393 %, total balance 8757.440067, day 175: buy 1 unit at price 953.419983, total balance 7804.020084 day 176, sell 1 unit at price 965.400024, investment 2.648623 %, total balance 8769.420108, day 177, sell 1 unit at price 970.890015, investment 4.531657 %, total balance 9740.310123, day 178: buy 1 unit at price 968.150024, total balance 8772.160099 day 179, sell 1 unit at price 972.919983, investment 2.045269 %, total balance 9745.080082, day 180, sell 1 unit at price 980.340027, investment 1.259103 %, total balance 10725.420109, day 185: buy 1 unit at price 930.500000, total balance 9794.920109 day 186: buy 1 unit at price 930.830017, total balance 8864.090092 day 187: buy 1 unit at price 930.390015, total balance 7933.700077 day 188: buy 1 unit at price 923.650024, total balance 7010.050053 day 191, sell 1 unit at price 926.789978, investment -0.398713 %, total balance 7936.840031, day 192, sell 1 unit at price 922.900024, investment -0.851927 %, total balance 8859.740055, day 195, sell 1 unit at price 922.669983, investment -0.829763 %, total balance 9782.410038, day 198: buy 1 unit at price 910.979980, total balance 8871.430058 day 202: buy 1 unit at price 927.000000, total balance 7944.430058 day 203, sell 1 unit at price 921.280029, investment -0.256590 %, total balance 8865.710087, day 205, sell 1 unit at price 913.809998, investment 0.310656 %, total balance 9779.520085, day 206, sell 1 unit at price 921.289978, investment -0.615968 %, total balance 10700.810063, day 207: buy 1 unit at price 929.570007, total balance 9771.240056 day 209, sell 1 unit at price 937.340027, investment 0.835872 %, total balance 10708.580083, day 212: buy 1 unit at price 935.950012, total balance 9772.630071 day 213, sell 1 unit at price 926.500000, investment -1.009671 %, total balance 10699.130071, day 216: buy 1 unit at price 935.090027, total balance 9764.040044 day 217: buy 1 unit at price 925.109985, total balance 8838.930059 day 219: buy 1 unit at price 915.000000, total balance 7923.930059 day 221: buy 1 unit at price 931.580017, total balance 6992.350042 day 222: buy 1 unit at price 932.450012, total balance 6059.900030 day 223, sell 1 unit at price 928.530029, investment -0.701537 %, total balance 6988.430059, day 224, sell 1 unit at price 920.969971, investment -0.447516 %, total balance 7909.400030, day 225: buy 1 unit at price 924.859985, total balance 6984.540045 day 226: buy 1 unit at price 944.489990, total balance 6040.050055 day 227: buy 1 unit at price 949.500000, total balance 5090.550055 day 228, sell 1 unit at price 959.109985, investment 4.820763 %, total balance 6049.660040, day 229, sell 1 unit at price 953.270020, investment 2.328303 %, total balance 7002.930060, day 230: buy 1 unit at price 957.789978, total balance 6045.140082 day 235, sell 1 unit at price 972.599976, investment 4.305857 %, total balance 7017.740058, day 236, sell 1 unit at price 989.250000, investment 6.962137 %, total balance 8006.990058, day 238: buy 1 unit at price 989.679993, total balance 7017.310065 day 239, sell 1 unit at price 992.000000, investment 5.030229 %, total balance 8009.310065, day 240: buy 1 unit at price 992.179993, total balance 7017.130072 day 241, sell 1 unit at price 992.809998, investment 4.561348 %, total balance 8009.940070, day 242, sell 1 unit at price 984.450012, investment 2.783495 %, total balance 8994.390082, day 244, sell 1 unit at price 968.450012, investment -2.145136 %, total balance 9962.840094, day 245, sell 1 unit at price 970.539978, investment -2.181057 %, total balance 10933.380072, day 248: buy 1 unit at price 1019.270020, total balance 9914.110052 day 249, sell 1 unit at price 1017.109985, investment -0.211920 %, total balance 10931.220037, day 250: buy 1 unit at price 1016.640015, total balance 9914.580022
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Actor-critic agent'
class Actor:
def __init__(self, name, input_size, output_size, size_layer):
with tf.variable_scope(name):
self.X = tf.placeholder(tf.float32, (None, input_size))
feed_actor = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu)
self.logits = tf.layers.dense(feed_actor, output_size)
class Critic:
def __init__(self, name, input_size, output_size, size_layer, learning_rate):
with tf.variable_scope(name):
self.X = tf.placeholder(tf.float32, (None, input_size))
self.Y = tf.placeholder(tf.float32, (None, output_size))
self.REWARD = tf.placeholder(tf.float32, (None, 1))
feed_critic = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu)
feed_critic = tf.layers.dense(feed_critic, output_size, activation = tf.nn.relu) + self.Y
feed_critic = tf.layers.dense(feed_critic, size_layer//2, activation = tf.nn.relu)
self.logits = tf.layers.dense(feed_critic, 1)
self.cost = tf.reduce_mean(tf.square(self.REWARD - self.logits))
self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
class Agent:
LEARNING_RATE = 0.001
BATCH_SIZE = 32
LAYER_SIZE = 256
OUTPUT_SIZE = 3
EPSILON = 0.5
DECAY_RATE = 0.005
MIN_EPSILON = 0.1
GAMMA = 0.99
MEMORIES = deque()
MEMORY_SIZE = 300
COPY = 1000
T_COPY = 0
def __init__(self, state_size, window_size, trend, skip):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
tf.reset_default_graph()
self.actor = Actor('actor-original', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE)
self.actor_target = Actor('actor-target', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE)
self.critic = Critic('critic-original', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE)
self.critic_target = Critic('critic-target', self.state_size, self.OUTPUT_SIZE,
self.LAYER_SIZE, self.LEARNING_RATE)
self.grad_critic = tf.gradients(self.critic.logits, self.critic.Y)
self.actor_critic_grad = tf.placeholder(tf.float32, [None, self.OUTPUT_SIZE])
weights_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='actor')
self.grad_actor = tf.gradients(self.actor.logits, weights_actor, -self.actor_critic_grad)
grads = zip(self.grad_actor, weights_actor)
self.optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE).apply_gradients(grads)
self.sess = tf.InteractiveSession()
self.sess.run(tf.global_variables_initializer())
def _assign(self, from_name, to_name):
from_w = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=from_name)
to_w = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=to_name)
for i in range(len(from_w)):
assign_op = to_w[i].assign(from_w[i])
self.sess.run(assign_op)
def _memorize(self, state, action, reward, new_state, dead):
self.MEMORIES.append((state, action, reward, new_state, dead))
if len(self.MEMORIES) > self.MEMORY_SIZE:
self.MEMORIES.popleft()
def _select_action(self, state):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
prediction = self.sess.run(self.actor.logits, feed_dict={self.actor.X:[state]})[0]
action = np.argmax(prediction)
return action
def _construct_memories_and_train(self, replay):
states = np.array([a[0] for a in replay])
new_states = np.array([a[3] for a in replay])
Q = self.sess.run(self.actor.logits, feed_dict={self.actor.X: states})
Q_target = self.sess.run(self.actor_target.logits, feed_dict={self.actor_target.X: states})
grads = self.sess.run(self.grad_critic, feed_dict={self.critic.X:states, self.critic.Y:Q})[0]
self.sess.run(self.optimizer, feed_dict={self.actor.X:states, self.actor_critic_grad:grads})
rewards = np.array([a[2] for a in replay]).reshape((-1, 1))
rewards_target = self.sess.run(self.critic_target.logits,
feed_dict={self.critic_target.X:new_states,self.critic_target.Y:Q_target})
for i in range(len(replay)):
if not replay[0][-1]:
rewards[i] += self.GAMMA * rewards_target[i]
cost, _ = self.sess.run([self.critic.cost, self.critic.optimizer],
feed_dict={self.critic.X:states, self.critic.Y:Q, self.critic.REWARD:rewards})
return cost
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array(res)
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
for t in range(0, len(self.trend) - 1, self.skip):
action = self._select_action(state)
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
state = next_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
for t in range(0, len(self.trend) - 1, self.skip):
if (self.T_COPY + 1) % self.COPY == 0:
self._assign('actor-original', 'actor-target')
self._assign('critic-original', 'critic-target')
action = self._select_action(state)
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
self._memorize(state, action, invest, next_state, starting_money < initial_money)
state = next_state
batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
replay = random.sample(self.MEMORIES, batch_size)
cost = self._construct_memories_and_train(replay)
self.T_COPY += 1
self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
epoch: 10, total rewards: 1539.185237.3, cost: 2.181347, total money: 1684.395196 epoch: 20, total rewards: 1308.335026.3, cost: 658.992737, total money: 11308.335026 epoch: 30, total rewards: 810.315002.3, cost: 19406.357422, total money: 5871.594971 epoch: 40, total rewards: 380.889899.3, cost: 436790400.000000, total money: 7327.869879 epoch: 50, total rewards: 676.170224.3, cost: 27570524160.000000, total money: 10676.170224 epoch: 60, total rewards: 796.770199.3, cost: 935274741760.000000, total money: 10796.770199 epoch: 70, total rewards: 47.440366.3, cost: 8344191369216.000000, total money: 7043.150388 epoch: 80, total rewards: 450.169980.3, cost: 88121093914624.000000, total money: 6472.479916 epoch: 90, total rewards: 443.664980.3, cost: 675454474256384.000000, total money: 9427.024965 epoch: 100, total rewards: 350.460142.3, cost: 1153362061950976.000000, total money: 10350.460142 epoch: 110, total rewards: 247.584961.3, cost: 6317238688677888.000000, total money: 9230.944946 epoch: 120, total rewards: 138.510132.3, cost: 3956869119726321664.000000, total money: 8102.600097 epoch: 130, total rewards: 410.025086.3, cost: 2205253088434978816.000000, total money: 10410.025086 epoch: 140, total rewards: 513.814999.3, cost: 5849743807884558336.000000, total money: 9497.174984 epoch: 150, total rewards: 876.734991.3, cost: 25442419893862400.000000, total money: 9860.094976 epoch: 160, total rewards: 216.929627.3, cost: 73146239398445056.000000, total money: 9244.369629 epoch: 170, total rewards: 26.000066.3, cost: 210379489706770432.000000, total money: 7992.250066 epoch: 180, total rewards: 230.090269.3, cost: 378469838063927296.000000, total money: 8194.180234 epoch: 190, total rewards: 31.099796.3, cost: 1333389845631860736.000000, total money: 6978.079776 epoch: 200, total rewards: 158.599487.3, cost: 459357028892629008384.000000, total money: 10158.599487
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 3: buy 1 unit at price 782.520020, total balance 9217.479980 day 4: buy 1 unit at price 790.510010, total balance 8426.969970 day 5, sell 1 unit at price 785.309998, investment 0.356538 %, total balance 9212.279968, day 6, sell 1 unit at price 762.559998, investment -3.535694 %, total balance 9974.839966, day 16: buy 1 unit at price 761.679993, total balance 9213.159973 day 17: buy 1 unit at price 768.239990, total balance 8444.919983 day 18, sell 1 unit at price 770.840027, investment 1.202609 %, total balance 9215.760010, day 19, sell 1 unit at price 758.039978, investment -1.327712 %, total balance 9973.799988, day 20: buy 1 unit at price 747.919983, total balance 9225.880005 day 21: buy 1 unit at price 750.500000, total balance 8475.380005 day 22, sell 1 unit at price 762.520020, investment 1.952085 %, total balance 9237.900025, day 24, sell 1 unit at price 771.190002, investment 2.756829 %, total balance 10009.090027, day 25: buy 1 unit at price 776.419983, total balance 9232.670044 day 26, sell 1 unit at price 789.289978, investment 1.657607 %, total balance 10021.960022, day 27: buy 1 unit at price 789.270020, total balance 9232.690002 day 28: buy 1 unit at price 796.099976, total balance 8436.590026 day 31, sell 1 unit at price 790.799988, investment 0.193846 %, total balance 9227.390014, day 32, sell 1 unit at price 794.200012, investment -0.238659 %, total balance 10021.590026, day 33: buy 1 unit at price 796.419983, total balance 9225.170043 day 34, sell 1 unit at price 794.559998, investment -0.233543 %, total balance 10019.730041, day 36: buy 1 unit at price 789.909973, total balance 9229.820068 day 37, sell 1 unit at price 791.549988, investment 0.207620 %, total balance 10021.370056, day 49: buy 1 unit at price 807.880005, total balance 9213.490051 day 51: buy 1 unit at price 806.070007, total balance 8407.420044 day 52, sell 1 unit at price 802.174988, investment -0.706171 %, total balance 9209.595032, day 53, sell 1 unit at price 805.020020, investment -0.130260 %, total balance 10014.615052, day 59: buy 1 unit at price 802.320007, total balance 9212.295045 day 62, sell 1 unit at price 798.530029, investment -0.472377 %, total balance 10010.825074, day 63: buy 1 unit at price 801.489990, total balance 9209.335084 day 64, sell 1 unit at price 801.340027, investment -0.018711 %, total balance 10010.675111, day 67: buy 1 unit at price 809.559998, total balance 9201.115113 day 68, sell 1 unit at price 813.669983, investment 0.507681 %, total balance 10014.785096, day 73: buy 1 unit at price 828.070007, total balance 9186.715089 day 74, sell 1 unit at price 831.659973, investment 0.433534 %, total balance 10018.375062, day 81: buy 1 unit at price 830.630005, total balance 9187.745057 day 82, sell 1 unit at price 829.080017, investment -0.186604 %, total balance 10016.825074, day 87: buy 1 unit at price 843.250000, total balance 9173.575074 day 88, sell 1 unit at price 845.539978, investment 0.271566 %, total balance 10019.115052, day 92: buy 1 unit at price 852.119995, total balance 9166.995057 day 93, sell 1 unit at price 848.400024, investment -0.436555 %, total balance 10015.395081, day 97: buy 1 unit at price 814.429993, total balance 9200.965088 day 98: buy 1 unit at price 819.510010, total balance 8381.455078 day 99, sell 1 unit at price 820.919983, investment 0.796875 %, total balance 9202.375061, day 100: buy 1 unit at price 831.409973, total balance 8370.965088 day 101, sell 1 unit at price 831.500000, investment 1.463068 %, total balance 9202.465088, day 102, sell 1 unit at price 829.559998, investment -0.222511 %, total balance 10032.025086, day 107: buy 1 unit at price 824.669983, total balance 9207.355103 day 109, sell 1 unit at price 823.349976, investment -0.160065 %, total balance 10030.705079, day 110: buy 1 unit at price 824.320007, total balance 9206.385072 day 111, sell 1 unit at price 823.559998, investment -0.092198 %, total balance 10029.945070, day 115: buy 1 unit at price 841.650024, total balance 9188.295046 day 116, sell 1 unit at price 843.190002, investment 0.182971 %, total balance 10031.485048, day 119: buy 1 unit at price 871.729980, total balance 9159.755068 day 120, sell 1 unit at price 874.250000, investment 0.289083 %, total balance 10034.005068, day 129: buy 1 unit at price 928.780029, total balance 9105.225039 day 130, sell 1 unit at price 930.599976, investment 0.195950 %, total balance 10035.825015, day 137: buy 1 unit at price 941.859985, total balance 9093.965030 day 138, sell 1 unit at price 948.820007, investment 0.738966 %, total balance 10042.785037, day 140: buy 1 unit at price 969.539978, total balance 9073.245059 day 141, sell 1 unit at price 971.469971, investment 0.199063 %, total balance 10044.715030, day 144: buy 1 unit at price 966.950012, total balance 9077.765018 day 145, sell 1 unit at price 975.599976, investment 0.894562 %, total balance 10053.364994, day 147: buy 1 unit at price 976.570007, total balance 9076.794987 day 148, sell 1 unit at price 980.940002, investment 0.447484 %, total balance 10057.734989, day 156: buy 1 unit at price 957.369995, total balance 9100.364994 day 157, sell 1 unit at price 950.630005, investment -0.704011 %, total balance 10050.994999, day 160: buy 1 unit at price 965.590027, total balance 9085.404972 day 161, sell 1 unit at price 952.270020, investment -1.379468 %, total balance 10037.674992, day 165: buy 1 unit at price 908.729980, total balance 9128.945012 day 166: buy 1 unit at price 898.700012, total balance 8230.245000 day 167, sell 1 unit at price 911.710022, investment 0.327935 %, total balance 9141.955022, day 168, sell 1 unit at price 906.690002, investment 0.889061 %, total balance 10048.645024, day 169: buy 1 unit at price 918.590027, total balance 9130.054997 day 170, sell 1 unit at price 928.799988, investment 1.111482 %, total balance 10058.854985, day 171: buy 1 unit at price 930.090027, total balance 9128.764958 day 172: buy 1 unit at price 943.830017, total balance 8184.934941 day 173, sell 1 unit at price 947.159973, investment 1.835300 %, total balance 9132.094914, day 174: buy 1 unit at price 955.989990, total balance 8176.104924 day 176: buy 1 unit at price 965.400024, total balance 7210.704900 day 177, sell 1 unit at price 970.890015, investment 2.867041 %, total balance 8181.594915, day 178, sell 1 unit at price 968.150024, investment 1.271983 %, total balance 9149.744939, day 179, sell 1 unit at price 972.919983, investment 0.778947 %, total balance 10122.664922, day 182: buy 1 unit at price 947.799988, total balance 9174.864934 day 183, sell 1 unit at price 934.090027, investment -1.446504 %, total balance 10108.954961, day 184: buy 1 unit at price 941.530029, total balance 9167.424932 day 185: buy 1 unit at price 930.500000, total balance 8236.924932 day 186, sell 1 unit at price 930.830017, investment -1.136449 %, total balance 9167.754949, day 187, sell 1 unit at price 930.390015, investment -0.011820 %, total balance 10098.144964, day 189: buy 1 unit at price 927.960022, total balance 9170.184942 day 190, sell 1 unit at price 929.359985, investment 0.150865 %, total balance 10099.544927, day 192: buy 1 unit at price 922.900024, total balance 9176.644903 day 193, sell 1 unit at price 907.239990, investment -1.696829 %, total balance 10083.884893, day 197: buy 1 unit at price 926.960022, total balance 9156.924871 day 198, sell 1 unit at price 910.979980, investment -1.723919 %, total balance 10067.904851, day 199: buy 1 unit at price 910.669983, total balance 9157.234868 day 200, sell 1 unit at price 906.659973, investment -0.440336 %, total balance 10063.894841, day 202: buy 1 unit at price 927.000000, total balance 9136.894841 day 203, sell 1 unit at price 921.280029, investment -0.617041 %, total balance 10058.174870, day 204: buy 1 unit at price 915.890015, total balance 9142.284855 day 205: buy 1 unit at price 913.809998, total balance 8228.474857 day 206, sell 1 unit at price 921.289978, investment 0.589586 %, total balance 9149.764835, day 207, sell 1 unit at price 929.570007, investment 1.724648 %, total balance 10079.334842, day 209: buy 1 unit at price 937.340027, total balance 9141.994815 day 210, sell 1 unit at price 928.450012, investment -0.948430 %, total balance 10070.444827, day 211: buy 1 unit at price 927.809998, total balance 9142.634829 day 212: buy 1 unit at price 935.950012, total balance 8206.684817 day 213: buy 1 unit at price 926.500000, total balance 7280.184817 day 214, sell 1 unit at price 929.080017, investment 0.136884 %, total balance 8209.264834, day 215: buy 1 unit at price 932.070007, total balance 7277.194827 day 216: buy 1 unit at price 935.090027, total balance 6342.104800 day 217, sell 1 unit at price 925.109985, investment -1.158184 %, total balance 7267.214785, day 218, sell 1 unit at price 920.289978, investment -0.670267 %, total balance 8187.504763, day 219, sell 1 unit at price 915.000000, investment -1.831408 %, total balance 9102.504763, day 221, sell 1 unit at price 931.580017, investment -0.375366 %, total balance 10034.084780, day 223: buy 1 unit at price 928.530029, total balance 9105.554751 day 224: buy 1 unit at price 920.969971, total balance 8184.584780 day 225, sell 1 unit at price 924.859985, investment -0.395253 %, total balance 9109.444765, day 226, sell 1 unit at price 944.489990, investment 2.553831 %, total balance 10053.934755, day 227: buy 1 unit at price 949.500000, total balance 9104.434755 day 228, sell 1 unit at price 959.109985, investment 1.012110 %, total balance 10063.544740, day 231: buy 1 unit at price 951.679993, total balance 9111.864747 day 232: buy 1 unit at price 969.960022, total balance 8141.904725 day 233: buy 1 unit at price 978.890015, total balance 7163.014710 day 235, sell 1 unit at price 972.599976, investment 2.198216 %, total balance 8135.614686, day 236, sell 1 unit at price 989.250000, investment 1.988739 %, total balance 9124.864686, day 238, sell 1 unit at price 989.679993, investment 1.102267 %, total balance 10114.544679, day 243: buy 1 unit at price 988.200012, total balance 9126.344667 day 244: buy 1 unit at price 968.450012, total balance 8157.894655 day 245: buy 1 unit at price 970.539978, total balance 7187.354677 day 246, sell 1 unit at price 973.330017, investment -1.504756 %, total balance 8160.684694, day 247: buy 1 unit at price 972.559998, total balance 7188.124696 day 248, sell 1 unit at price 1019.270020, investment 5.247561 %, total balance 8207.394716, day 249, sell 1 unit at price 1017.109985, investment 4.798361 %, total balance 9224.504701, day 250: buy 1 unit at price 1016.640015, total balance 8207.864686
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Actor-critic Duel agent'
class Actor:
def __init__(self, name, input_size, output_size, size_layer):
with tf.variable_scope(name):
self.X = tf.placeholder(tf.float32, (None, input_size))
feed_actor = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu)
tensor_action, tensor_validation = tf.split(feed_actor,2,1)
feed_action = tf.layers.dense(tensor_action, output_size)
feed_validation = tf.layers.dense(tensor_validation, 1)
self.logits = feed_validation + tf.subtract(feed_action,
tf.reduce_mean(feed_action,axis=1,keep_dims=True))
class Critic:
def __init__(self, name, input_size, output_size, size_layer, learning_rate):
with tf.variable_scope(name):
self.X = tf.placeholder(tf.float32, (None, input_size))
self.Y = tf.placeholder(tf.float32, (None, output_size))
self.REWARD = tf.placeholder(tf.float32, (None, 1))
feed_critic = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu)
tensor_action, tensor_validation = tf.split(feed_critic,2,1)
feed_action = tf.layers.dense(tensor_action, output_size)
feed_validation = tf.layers.dense(tensor_validation, 1)
feed_critic = feed_validation + tf.subtract(feed_action,tf.reduce_mean(feed_action,axis=1,keep_dims=True))
feed_critic = tf.nn.relu(feed_critic) + self.Y
feed_critic = tf.layers.dense(feed_critic, size_layer//2, activation = tf.nn.relu)
self.logits = tf.layers.dense(feed_critic, 1)
self.cost = tf.reduce_mean(tf.square(self.REWARD - self.logits))
self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
class Agent:
LEARNING_RATE = 0.001
BATCH_SIZE = 32
LAYER_SIZE = 256
OUTPUT_SIZE = 3
EPSILON = 0.5
DECAY_RATE = 0.005
MIN_EPSILON = 0.1
GAMMA = 0.99
MEMORIES = deque()
MEMORY_SIZE = 300
COPY = 1000
T_COPY = 0
def __init__(self, state_size, window_size, trend, skip):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
tf.reset_default_graph()
self.actor = Actor('actor-original', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE)
self.actor_target = Actor('actor-target', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE)
self.critic = Critic('critic-original', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE)
self.critic_target = Critic('critic-target', self.state_size, self.OUTPUT_SIZE,
self.LAYER_SIZE, self.LEARNING_RATE)
self.grad_critic = tf.gradients(self.critic.logits, self.critic.Y)
self.actor_critic_grad = tf.placeholder(tf.float32, [None, self.OUTPUT_SIZE])
weights_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='actor')
self.grad_actor = tf.gradients(self.actor.logits, weights_actor, -self.actor_critic_grad)
grads = zip(self.grad_actor, weights_actor)
self.optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE).apply_gradients(grads)
self.sess = tf.InteractiveSession()
self.sess.run(tf.global_variables_initializer())
def _assign(self, from_name, to_name):
from_w = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=from_name)
to_w = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=to_name)
for i in range(len(from_w)):
assign_op = to_w[i].assign(from_w[i])
self.sess.run(assign_op)
def _memorize(self, state, action, reward, new_state, dead):
self.MEMORIES.append((state, action, reward, new_state, dead))
if len(self.MEMORIES) > self.MEMORY_SIZE:
self.MEMORIES.popleft()
def _select_action(self, state):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
prediction = self.sess.run(self.actor.logits, feed_dict={self.actor.X:[state]})[0]
action = np.argmax(prediction)
return action
def _construct_memories_and_train(self, replay):
states = np.array([a[0] for a in replay])
new_states = np.array([a[3] for a in replay])
Q = self.sess.run(self.actor.logits, feed_dict={self.actor.X: states})
Q_target = self.sess.run(self.actor_target.logits, feed_dict={self.actor_target.X: states})
grads = self.sess.run(self.grad_critic, feed_dict={self.critic.X:states, self.critic.Y:Q})[0]
self.sess.run(self.optimizer, feed_dict={self.actor.X:states, self.actor_critic_grad:grads})
rewards = np.array([a[2] for a in replay]).reshape((-1, 1))
rewards_target = self.sess.run(self.critic_target.logits,
feed_dict={self.critic_target.X:new_states,self.critic_target.Y:Q_target})
for i in range(len(replay)):
if not replay[0][-1]:
rewards[i] += self.GAMMA * rewards_target[i]
cost, _ = self.sess.run([self.critic.cost, self.critic.optimizer],
feed_dict={self.critic.X:states, self.critic.Y:Q, self.critic.REWARD:rewards})
return cost
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array(res)
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
for t in range(0, len(self.trend) - 1, self.skip):
action = self._select_action(state)
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
state = next_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
for t in range(0, len(self.trend) - 1, self.skip):
if (self.T_COPY + 1) % self.COPY == 0:
self._assign('actor-original', 'actor-target')
self._assign('critic-original', 'critic-target')
action = self._select_action(state)
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
self._memorize(state, action, invest, next_state, starting_money < initial_money)
batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
state = next_state
replay = random.sample(self.MEMORIES, batch_size)
cost = self._construct_memories_and_train(replay)
self.T_COPY += 1
self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
WARNING:tensorflow:From <ipython-input-3-a50a3d0b4e36>:13: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version. Instructions for updating: keep_dims is deprecated, use keepdims instead epoch: 10, total rewards: 707.200200.3, cost: 0.405626, total money: 9715.020207 epoch: 20, total rewards: 1598.640143.3, cost: 30.734631, total money: 10581.530158 epoch: 30, total rewards: 1271.279733.3, cost: 465.966644, total money: 10254.169748 epoch: 40, total rewards: 611.054993.3, cost: 38.079464, total money: 2818.014953 epoch: 50, total rewards: 1098.115172.3, cost: 71481.406250, total money: 1453.295102 epoch: 60, total rewards: 575.370237.3, cost: 45955692.000000, total money: 9558.260252 epoch: 70, total rewards: 1020.545110.3, cost: 244974075904.000000, total money: 10003.435125 epoch: 80, total rewards: 824.555359.3, cost: 62751015698432.000000, total money: 4025.125366 epoch: 90, total rewards: 182.215205.3, cost: 3949580517376.000000, total money: 10182.215205 epoch: 100, total rewards: 861.215276.3, cost: 7310792458240.000000, total money: 7918.025274 epoch: 110, total rewards: 68.690005.3, cost: 3184271573385216.000000, total money: 10068.690005 epoch: 120, total rewards: 205.980352.3, cost: 224217291292672.000000, total money: 10205.980352 epoch: 130, total rewards: 256.794983.3, cost: 363017178972160.000000, total money: 8275.784973 epoch: 140, total rewards: 1586.720156.3, cost: 530019768074240.000000, total money: 11586.720156 epoch: 150, total rewards: 824.849978.3, cost: 3151772092727296.000000, total money: 8881.750002 epoch: 160, total rewards: 222.490291.3, cost: 6080023886823424.000000, total money: 9205.850276 epoch: 170, total rewards: 37.630069.3, cost: 9586346603577344.000000, total money: 9020.990054 epoch: 180, total rewards: 510.125126.3, cost: 22490134536519680.000000, total money: 5604.765140 epoch: 190, total rewards: 639.559874.3, cost: 106721235701858304.000000, total money: 9669.019896 epoch: 200, total rewards: 945.395079.3, cost: 31826508674760704.000000, total money: 384.445006
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 0: buy 1 unit at price 768.700012, total balance 9231.299988 day 2: buy 1 unit at price 762.020020, total balance 8469.279968 day 3, sell 1 unit at price 782.520020, investment 1.797842 %, total balance 9251.799988, day 4, sell 1 unit at price 790.510010, investment 3.738746 %, total balance 10042.309998, day 5: buy 1 unit at price 785.309998, total balance 9257.000000 day 8: buy 1 unit at price 736.080017, total balance 8520.919983 day 11, sell 1 unit at price 771.229980, investment -1.792925 %, total balance 9292.149963, day 12: buy 1 unit at price 760.539978, total balance 8531.609985 day 14: buy 1 unit at price 768.270020, total balance 7763.339965 day 15: buy 1 unit at price 760.989990, total balance 7002.349975 day 17, sell 1 unit at price 768.239990, investment 4.369087 %, total balance 7770.589965, day 20, sell 1 unit at price 747.919983, investment -1.659347 %, total balance 8518.509948, day 21, sell 1 unit at price 750.500000, investment -2.312991 %, total balance 9269.009948, day 22, sell 1 unit at price 762.520020, investment 0.201058 %, total balance 10031.529968, day 27: buy 1 unit at price 789.270020, total balance 9242.259948 day 33: buy 1 unit at price 796.419983, total balance 8445.839965 day 34, sell 1 unit at price 794.559998, investment 0.670237 %, total balance 9240.399963, day 35, sell 1 unit at price 791.260010, investment -0.647896 %, total balance 10031.659973, day 36: buy 1 unit at price 789.909973, total balance 9241.750000 day 37, sell 1 unit at price 791.549988, investment 0.207620 %, total balance 10033.299988, day 41: buy 1 unit at price 786.140015, total balance 9247.159973 day 43: buy 1 unit at price 794.020020, total balance 8453.139953 day 44: buy 1 unit at price 806.150024, total balance 7646.989929 day 45, sell 1 unit at price 806.650024, investment 2.608951 %, total balance 8453.639953, day 47: buy 1 unit at price 807.909973, total balance 7645.729980 day 48: buy 1 unit at price 806.359985, total balance 6839.369995 day 49, sell 1 unit at price 807.880005, investment 1.745546 %, total balance 7647.250000, day 55: buy 1 unit at price 823.869995, total balance 6823.380005 day 56, sell 1 unit at price 835.669983, investment 3.661844 %, total balance 7659.049988, day 57, sell 1 unit at price 832.150024, investment 3.000341 %, total balance 8491.200012, day 58, sell 1 unit at price 823.309998, investment 2.102040 %, total balance 9314.510010, day 59: buy 1 unit at price 802.320007, total balance 8512.190003 day 60, sell 1 unit at price 796.789978, investment -3.286928 %, total balance 9308.979981, day 62: buy 1 unit at price 798.530029, total balance 8510.449952 day 63, sell 1 unit at price 801.489990, investment -0.103452 %, total balance 9311.939942, day 69, sell 1 unit at price 819.239990, investment 2.593511 %, total balance 10131.179932, day 75: buy 1 unit at price 830.760010, total balance 9300.419922 day 76, sell 1 unit at price 831.330017, investment 0.068613 %, total balance 10131.749939, day 77: buy 1 unit at price 828.640015, total balance 9303.109924 day 78: buy 1 unit at price 829.280029, total balance 8473.829895 day 80, sell 1 unit at price 835.239990, investment 0.796483 %, total balance 9309.069885, day 83: buy 1 unit at price 827.780029, total balance 8481.289856 day 84, sell 1 unit at price 831.909973, investment 0.317136 %, total balance 9313.199829, day 86: buy 1 unit at price 838.679993, total balance 8474.519836 day 87: buy 1 unit at price 843.250000, total balance 7631.269836 day 88: buy 1 unit at price 845.539978, total balance 6785.729858 day 89, sell 1 unit at price 845.619995, investment 2.155158 %, total balance 7631.349853, day 91: buy 1 unit at price 848.780029, total balance 6782.569824 day 92: buy 1 unit at price 852.119995, total balance 5930.449829 day 94, sell 1 unit at price 830.460022, investment -0.980108 %, total balance 6760.909851, day 95: buy 1 unit at price 829.590027, total balance 5931.319824 day 96: buy 1 unit at price 817.580017, total balance 5113.739807 day 97: buy 1 unit at price 814.429993, total balance 4299.309814 day 98: buy 1 unit at price 819.510010, total balance 3479.799804 day 104, sell 1 unit at price 834.570007, investment -1.029350 %, total balance 4314.369811, day 105: buy 1 unit at price 831.409973, total balance 3482.959838 day 106: buy 1 unit at price 827.880005, total balance 2655.079833 day 107, sell 1 unit at price 824.669983, investment -2.468245 %, total balance 3479.749816, day 108: buy 1 unit at price 824.729980, total balance 2655.019836 day 110, sell 1 unit at price 824.320007, investment -2.881786 %, total balance 3479.339843, day 111, sell 1 unit at price 823.559998, investment -3.351640 %, total balance 4302.899841, day 112: buy 1 unit at price 837.169983, total balance 3465.729858 day 118, sell 1 unit at price 872.299988, investment 5.148321 %, total balance 4338.029846, day 121, sell 1 unit at price 905.960022, investment 10.809952 %, total balance 5243.989868, day 122, sell 1 unit at price 912.570007, investment 12.050147 %, total balance 6156.559875, day 123, sell 1 unit at price 916.440002, investment 11.827798 %, total balance 7072.999877, day 125: buy 1 unit at price 931.659973, total balance 6141.339904 day 128: buy 1 unit at price 932.169983, total balance 5209.169921 day 132, sell 1 unit at price 937.080017, investment 12.709740 %, total balance 6146.249938, day 133, sell 1 unit at price 943.000000, investment 13.905396 %, total balance 7089.249938, day 134: buy 1 unit at price 919.619995, total balance 6169.629943 day 136, sell 1 unit at price 934.010010, investment 13.250401 %, total balance 7103.639953, day 137, sell 1 unit at price 941.859985, investment 12.505226 %, total balance 8045.499938, day 139, sell 1 unit at price 954.960022, investment 2.500918 %, total balance 9000.459960, day 140: buy 1 unit at price 969.539978, total balance 8030.919982 day 143: buy 1 unit at price 964.859985, total balance 7066.059997 day 149, sell 1 unit at price 983.409973, investment 5.496850 %, total balance 8049.469970, day 150: buy 1 unit at price 949.830017, total balance 7099.639953 day 152, sell 1 unit at price 953.400024, investment 3.673260 %, total balance 8053.039977, day 153, sell 1 unit at price 950.760010, investment -1.936998 %, total balance 9003.799987, day 154, sell 1 unit at price 942.309998, investment -2.337125 %, total balance 9946.109985, day 156: buy 1 unit at price 957.369995, total balance 8988.739990 day 157, sell 1 unit at price 950.630005, investment 0.084224 %, total balance 9939.369995, day 159, sell 1 unit at price 957.090027, investment -0.029243 %, total balance 10896.460022, day 161: buy 1 unit at price 952.270020, total balance 9944.190002 day 163: buy 1 unit at price 940.489990, total balance 9003.700012 day 167, sell 1 unit at price 911.710022, investment -4.259296 %, total balance 9915.410034, day 168: buy 1 unit at price 906.690002, total balance 9008.720032 day 170, sell 1 unit at price 928.799988, investment -1.242969 %, total balance 9937.520020, day 171, sell 1 unit at price 930.090027, investment 2.580819 %, total balance 10867.610047, day 188: buy 1 unit at price 923.650024, total balance 9943.960023 day 191, sell 1 unit at price 926.789978, investment 0.339951 %, total balance 10870.750001, day 196: buy 1 unit at price 922.219971, total balance 9948.530030 day 197: buy 1 unit at price 926.960022, total balance 9021.570008 day 198, sell 1 unit at price 910.979980, investment -1.218797 %, total balance 9932.549988, day 199: buy 1 unit at price 910.669983, total balance 9021.880005 day 202, sell 1 unit at price 927.000000, investment 0.004313 %, total balance 9948.880005, day 203: buy 1 unit at price 921.280029, total balance 9027.599976 day 204, sell 1 unit at price 915.890015, investment 0.573208 %, total balance 9943.489991, day 205: buy 1 unit at price 913.809998, total balance 9029.679993 day 206, sell 1 unit at price 921.289978, investment 0.001080 %, total balance 9950.969971, day 207: buy 1 unit at price 929.570007, total balance 9021.399964 day 208, sell 1 unit at price 939.330017, investment 2.792705 %, total balance 9960.729981, day 209: buy 1 unit at price 937.340027, total balance 9023.389954 day 210: buy 1 unit at price 928.450012, total balance 8094.939942 day 212: buy 1 unit at price 935.950012, total balance 7158.989930 day 213, sell 1 unit at price 926.500000, investment -0.330261 %, total balance 8085.489930, day 219: buy 1 unit at price 915.000000, total balance 7170.489930 day 220, sell 1 unit at price 921.809998, investment -1.656819 %, total balance 8092.299928, day 221, sell 1 unit at price 931.580017, investment 0.337122 %, total balance 9023.879945, day 222, sell 1 unit at price 932.450012, investment -0.373952 %, total balance 9956.329957, day 223: buy 1 unit at price 928.530029, total balance 9027.799928 day 226: buy 1 unit at price 944.489990, total balance 8083.309938 day 227, sell 1 unit at price 949.500000, investment 3.770492 %, total balance 9032.809938, day 228: buy 1 unit at price 959.109985, total balance 8073.699953 day 229, sell 1 unit at price 953.270020, investment 2.664426 %, total balance 9026.969973, day 231: buy 1 unit at price 951.679993, total balance 8075.289980 day 232, sell 1 unit at price 969.960022, investment 2.696697 %, total balance 9045.250002, day 233, sell 1 unit at price 978.890015, investment 2.062332 %, total balance 10024.140017, day 234, sell 1 unit at price 977.000000, investment 2.660559 %, total balance 11001.140017, day 235: buy 1 unit at price 972.599976, total balance 10028.540041 day 238: buy 1 unit at price 989.679993, total balance 9038.860048 day 240, sell 1 unit at price 992.179993, investment 2.013162 %, total balance 10031.040041, day 241, sell 1 unit at price 992.809998, investment 0.316264 %, total balance 11023.850039, day 247: buy 1 unit at price 972.559998, total balance 10051.290041 day 248, sell 1 unit at price 1019.270020, investment 4.802791 %, total balance 11070.560061,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Actor-critic Recurrent agent'
class Actor:
def __init__(self, name, input_size, output_size, size_layer):
with tf.variable_scope(name):
self.X = tf.placeholder(tf.float32, (None, None, input_size))
self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * size_layer))
cell = tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple = False)
self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X, cell=cell,
dtype=tf.float32,
initial_state=self.hidden_layer)
self.logits = tf.layers.dense(self.rnn[:,-1], output_size)
class Critic:
def __init__(self, name, input_size, output_size, size_layer, learning_rate):
with tf.variable_scope(name):
self.X = tf.placeholder(tf.float32, (None, None, input_size))
self.Y = tf.placeholder(tf.float32, (None, output_size))
self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * size_layer))
self.REWARD = tf.placeholder(tf.float32, (None, 1))
feed_critic = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu)
cell = tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple = False)
self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X, cell=cell,
dtype=tf.float32,
initial_state=self.hidden_layer)
feed_critic = tf.layers.dense(self.rnn[:,-1], output_size, activation = tf.nn.relu) + self.Y
feed_critic = tf.layers.dense(feed_critic, size_layer//2, activation = tf.nn.relu)
self.logits = tf.layers.dense(feed_critic, 1)
self.cost = tf.reduce_mean(tf.square(self.REWARD - self.logits))
self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
class Agent:
LEARNING_RATE = 0.001
BATCH_SIZE = 32
LAYER_SIZE = 256
OUTPUT_SIZE = 3
EPSILON = 0.5
DECAY_RATE = 0.005
MIN_EPSILON = 0.1
GAMMA = 0.99
MEMORIES = deque()
MEMORY_SIZE = 300
COPY = 1000
T_COPY = 0
def __init__(self, state_size, window_size, trend, skip):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.INITIAL_FEATURES = np.zeros((4, self.state_size))
self.skip = skip
tf.reset_default_graph()
self.actor = Actor('actor-original', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE)
self.actor_target = Actor('actor-target', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE)
self.critic = Critic('critic-original', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE)
self.critic_target = Critic('critic-target', self.state_size, self.OUTPUT_SIZE,
self.LAYER_SIZE, self.LEARNING_RATE)
self.grad_critic = tf.gradients(self.critic.logits, self.critic.Y)
self.actor_critic_grad = tf.placeholder(tf.float32, [None, self.OUTPUT_SIZE])
weights_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='actor')
self.grad_actor = tf.gradients(self.actor.logits, weights_actor, -self.actor_critic_grad)
grads = zip(self.grad_actor, weights_actor)
self.optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE).apply_gradients(grads)
self.sess = tf.InteractiveSession()
self.sess.run(tf.global_variables_initializer())
def _assign(self, from_name, to_name):
from_w = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=from_name)
to_w = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=to_name)
for i in range(len(from_w)):
assign_op = to_w[i].assign(from_w[i])
self.sess.run(assign_op)
def _memorize(self, state, action, reward, new_state, dead, rnn_state):
self.MEMORIES.append((state, action, reward, new_state, dead, rnn_state))
if len(self.MEMORIES) > self.MEMORY_SIZE:
self.MEMORIES.popleft()
def _select_action(self, state):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
prediction = self.sess.run(self.actor.logits, feed_dict={self.actor.X:[state]})[0]
action = np.argmax(prediction)
return action
def _construct_memories_and_train(self, replay):
states = np.array([a[0] for a in replay])
new_states = np.array([a[3] for a in replay])
init_values = np.array([a[-1] for a in replay])
Q = self.sess.run(self.actor.logits, feed_dict={self.actor.X: states,
self.actor.hidden_layer: init_values})
Q_target = self.sess.run(self.actor_target.logits, feed_dict={self.actor_target.X: states,
self.actor_target.hidden_layer: init_values})
grads = self.sess.run(self.grad_critic, feed_dict={self.critic.X:states, self.critic.Y:Q,
self.critic.hidden_layer: init_values})[0]
self.sess.run(self.optimizer, feed_dict={self.actor.X:states, self.actor_critic_grad:grads,
self.actor.hidden_layer: init_values})
rewards = np.array([a[2] for a in replay]).reshape((-1, 1))
rewards_target = self.sess.run(self.critic_target.logits,
feed_dict={self.critic_target.X:new_states,self.critic_target.Y:Q_target,
self.critic_target.hidden_layer: init_values})
for i in range(len(replay)):
if not replay[0][-2]:
rewards[i] += self.GAMMA * rewards_target[i]
cost, _ = self.sess.run([self.critic.cost, self.critic.optimizer],
feed_dict={self.critic.X:states, self.critic.Y:Q, self.critic.REWARD:rewards,
self.critic.hidden_layer: init_values})
return cost
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array(res)
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
init_value = np.zeros((1, 2 * self.LAYER_SIZE))
for k in range(self.INITIAL_FEATURES.shape[0]):
self.INITIAL_FEATURES[k,:] = state
for t in range(0, len(self.trend) - 1, self.skip):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action, last_state = self.sess.run([self.actor.logits,
self.actor.last_state],
feed_dict={self.actor.X:[self.INITIAL_FEATURES],
self.actor.hidden_layer:init_value})
action, init_value = np.argmax(action[0]), last_state
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)
self.INITIAL_FEATURES = new_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
init_value = np.zeros((1, 2 * self.LAYER_SIZE))
for k in range(self.INITIAL_FEATURES.shape[0]):
self.INITIAL_FEATURES[k,:] = state
for t in range(0, len(self.trend) - 1, self.skip):
if (self.T_COPY + 1) % self.COPY == 0:
self._assign('actor-original', 'actor-target')
self._assign('critic-original', 'critic-target')
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action, last_state = self.sess.run([self.actor.logits,
self.actor.last_state],
feed_dict={self.actor.X:[self.INITIAL_FEATURES],
self.actor.hidden_layer:init_value})
action, init_value = np.argmax(action[0]), last_state
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)
self._memorize(self.INITIAL_FEATURES, action, invest, new_state,
starting_money < initial_money, init_value[0])
batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
self.INITIAL_FEATURES = new_state
replay = random.sample(self.MEMORIES, batch_size)
cost = self._construct_memories_and_train(replay)
self.T_COPY += 1
self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7f46cd19b6d8>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7f46cd102ef0>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7f46ccc7ce10>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7f46cc5685f8>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. epoch: 10, total rewards: 1158.549991.3, cost: 0.046632, total money: 4247.099979 epoch: 20, total rewards: 466.185119.3, cost: 0.035100, total money: 5537.135131 epoch: 30, total rewards: 477.615173.3, cost: 0.330107, total money: 975.775206 epoch: 40, total rewards: 1200.205012.3, cost: 0.215860, total money: 10180.934992 epoch: 50, total rewards: 283.615237.3, cost: 0.116108, total money: 3314.845217 epoch: 60, total rewards: 324.265078.3, cost: 0.435482, total money: 9334.585085 epoch: 70, total rewards: 587.429873.3, cost: 0.749076, total money: 4785.129884 epoch: 80, total rewards: 1248.729918.3, cost: 0.167420, total money: 663.739866 epoch: 90, total rewards: 520.270204.3, cost: 0.006982, total money: 9503.630189 epoch: 100, total rewards: 195.270142.3, cost: 0.153058, total money: 10195.270142 epoch: 110, total rewards: 74.399840.3, cost: 0.350105, total money: 10074.399840 epoch: 120, total rewards: 2842.805359.3, cost: 0.074852, total money: 7832.085327 epoch: 130, total rewards: 509.049985.3, cost: 0.053447, total money: 8518.609983 epoch: 140, total rewards: -2.900205.3, cost: 0.015182, total money: 8979.989810 epoch: 150, total rewards: 93.080022.3, cost: 0.008775, total money: 10093.080022 epoch: 160, total rewards: 89.794983.3, cost: 0.107893, total money: 10089.794983 epoch: 170, total rewards: 222.045106.3, cost: 0.189179, total money: 10222.045106 epoch: 180, total rewards: -57.619995.3, cost: 0.002425, total money: 8925.739990 epoch: 190, total rewards: 21.009889.3, cost: 0.005919, total money: 10021.009889 epoch: 200, total rewards: 201.354980.3, cost: 0.002352, total money: 10201.354980
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 0: buy 1 unit at price 768.700012, total balance 9231.299988 day 1, sell 1 unit at price 762.130005, investment -0.854691 %, total balance 9993.429993, day 3: buy 1 unit at price 782.520020, total balance 9210.909973 day 4, sell 1 unit at price 790.510010, investment 1.021059 %, total balance 10001.419983, day 22: buy 1 unit at price 762.520020, total balance 9238.899963 day 23: buy 1 unit at price 759.109985, total balance 8479.789978 day 24, sell 1 unit at price 771.190002, investment 1.137017 %, total balance 9250.979980, day 26, sell 1 unit at price 789.289978, investment 3.975708 %, total balance 10040.269958, day 31: buy 1 unit at price 790.799988, total balance 9249.469970 day 32, sell 1 unit at price 794.200012, investment 0.429947 %, total balance 10043.669982, day 33: buy 1 unit at price 796.419983, total balance 9247.249999 day 34, sell 1 unit at price 794.559998, investment -0.233543 %, total balance 10041.809997, day 39: buy 1 unit at price 782.789978, total balance 9259.020019 day 40: buy 1 unit at price 771.820007, total balance 8487.200012 day 42, sell 1 unit at price 786.900024, investment 0.525051 %, total balance 9274.100036, day 45, sell 1 unit at price 806.650024, investment 4.512712 %, total balance 10080.750060, day 64: buy 1 unit at price 801.340027, total balance 9279.410033 day 65, sell 1 unit at price 806.969971, investment 0.702566 %, total balance 10086.380004, day 68: buy 1 unit at price 813.669983, total balance 9272.710021 day 70, sell 1 unit at price 820.450012, investment 0.833265 %, total balance 10093.160033, day 103: buy 1 unit at price 838.549988, total balance 9254.610045 day 104, sell 1 unit at price 834.570007, investment -0.474627 %, total balance 10089.180052, day 110: buy 1 unit at price 824.320007, total balance 9264.860045 day 111, sell 1 unit at price 823.559998, investment -0.092198 %, total balance 10088.420043, day 114: buy 1 unit at price 838.210022, total balance 9250.210021 day 115, sell 1 unit at price 841.650024, investment 0.410399 %, total balance 10091.860045, day 128: buy 1 unit at price 932.169983, total balance 9159.690062 day 129: buy 1 unit at price 928.780029, total balance 8230.910033 day 131, sell 1 unit at price 932.219971, investment 0.005363 %, total balance 9163.130004, day 132, sell 1 unit at price 937.080017, investment 0.893644 %, total balance 10100.210021, day 144: buy 1 unit at price 966.950012, total balance 9133.260009 day 145, sell 1 unit at price 975.599976, investment 0.894562 %, total balance 10108.859985, day 148: buy 1 unit at price 980.940002, total balance 9127.919983 day 149, sell 1 unit at price 983.409973, investment 0.251796 %, total balance 10111.329956, day 151: buy 1 unit at price 942.900024, total balance 9168.429932 day 153, sell 1 unit at price 950.760010, investment 0.833597 %, total balance 10119.189942, day 168: buy 1 unit at price 906.690002, total balance 9212.499940 day 169, sell 1 unit at price 918.590027, investment 1.312469 %, total balance 10131.089967, day 171: buy 1 unit at price 930.090027, total balance 9200.999940 day 172, sell 1 unit at price 943.830017, investment 1.477275 %, total balance 10144.829957, day 175: buy 1 unit at price 953.419983, total balance 9191.409974 day 176, sell 1 unit at price 965.400024, investment 1.256533 %, total balance 10156.809998, day 178: buy 1 unit at price 968.150024, total balance 9188.659974 day 179, sell 1 unit at price 972.919983, investment 0.492688 %, total balance 10161.579957, day 192: buy 1 unit at price 922.900024, total balance 9238.679933 day 193, sell 1 unit at price 907.239990, investment -1.696829 %, total balance 10145.919923, day 194: buy 1 unit at price 914.390015, total balance 9231.529908 day 196: buy 1 unit at price 922.219971, total balance 8309.309937 day 197, sell 1 unit at price 926.960022, investment 1.374688 %, total balance 9236.269959, day 198, sell 1 unit at price 910.979980, investment -1.218797 %, total balance 10147.249939, day 207: buy 1 unit at price 929.570007, total balance 9217.679932 day 208: buy 1 unit at price 939.330017, total balance 8278.349915 day 209, sell 1 unit at price 937.340027, investment 0.835872 %, total balance 9215.689942, day 210, sell 1 unit at price 928.450012, investment -1.158273 %, total balance 10144.139954, day 211: buy 1 unit at price 927.809998, total balance 9216.329956 day 212, sell 1 unit at price 935.950012, investment 0.877336 %, total balance 10152.279968, day 214: buy 1 unit at price 929.080017, total balance 9223.199951 day 215, sell 1 unit at price 932.070007, investment 0.321823 %, total balance 10155.269958, day 226: buy 1 unit at price 944.489990, total balance 9210.779968 day 227, sell 1 unit at price 949.500000, investment 0.530446 %, total balance 10160.279968, day 233: buy 1 unit at price 978.890015, total balance 9181.389953 day 234, sell 1 unit at price 977.000000, investment -0.193077 %, total balance 10158.389953, day 243: buy 1 unit at price 988.200012, total balance 9170.189941 day 244, sell 1 unit at price 968.450012, investment -1.998583 %, total balance 10138.639953,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Actor-critic Duel Recurrent agent'
class Actor:
def __init__(self, name, input_size, output_size, size_layer):
with tf.variable_scope(name):
self.X = tf.placeholder(tf.float32, (None, None, input_size))
self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * size_layer))
cell = tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple = False)
self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X, cell=cell,
dtype=tf.float32,
initial_state=self.hidden_layer)
tensor_action, tensor_validation = tf.split(self.rnn[:,-1],2,1)
feed_action = tf.layers.dense(tensor_action, output_size)
feed_validation = tf.layers.dense(tensor_validation, 1)
self.logits = feed_validation + tf.subtract(feed_action,
tf.reduce_mean(feed_action,axis=1,keep_dims=True))
class Critic:
def __init__(self, name, input_size, output_size, size_layer, learning_rate):
with tf.variable_scope(name):
self.X = tf.placeholder(tf.float32, (None, None, input_size))
self.Y = tf.placeholder(tf.float32, (None, output_size))
self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * size_layer))
self.REWARD = tf.placeholder(tf.float32, (None, 1))
feed_critic = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu)
cell = tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple = False)
self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X, cell=cell,
dtype=tf.float32,
initial_state=self.hidden_layer)
tensor_action, tensor_validation = tf.split(self.rnn[:,-1],2,1)
feed_action = tf.layers.dense(tensor_action, output_size)
feed_validation = tf.layers.dense(tensor_validation, 1)
feed_critic = feed_validation + tf.subtract(feed_action,tf.reduce_mean(feed_action,axis=1,keep_dims=True))
feed_critic = tf.nn.relu(feed_critic) + self.Y
feed_critic = tf.layers.dense(feed_critic, size_layer//2, activation = tf.nn.relu)
self.logits = tf.layers.dense(feed_critic, 1)
self.cost = tf.reduce_mean(tf.square(self.REWARD - self.logits))
self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
class Agent:
LEARNING_RATE = 0.001
BATCH_SIZE = 32
LAYER_SIZE = 256
OUTPUT_SIZE = 3
EPSILON = 0.5
DECAY_RATE = 0.005
MIN_EPSILON = 0.1
GAMMA = 0.99
MEMORIES = deque()
MEMORY_SIZE = 300
COPY = 1000
T_COPY = 0
def __init__(self, state_size, window_size, trend, skip):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.INITIAL_FEATURES = np.zeros((4, self.state_size))
self.skip = skip
tf.reset_default_graph()
self.actor = Actor('actor-original', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE)
self.actor_target = Actor('actor-target', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE)
self.critic = Critic('critic-original', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE)
self.critic_target = Critic('critic-target', self.state_size, self.OUTPUT_SIZE,
self.LAYER_SIZE, self.LEARNING_RATE)
self.grad_critic = tf.gradients(self.critic.logits, self.critic.Y)
self.actor_critic_grad = tf.placeholder(tf.float32, [None, self.OUTPUT_SIZE])
weights_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='actor')
self.grad_actor = tf.gradients(self.actor.logits, weights_actor, -self.actor_critic_grad)
grads = zip(self.grad_actor, weights_actor)
self.optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE).apply_gradients(grads)
self.sess = tf.InteractiveSession()
self.sess.run(tf.global_variables_initializer())
def _assign(self, from_name, to_name):
from_w = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=from_name)
to_w = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=to_name)
for i in range(len(from_w)):
assign_op = to_w[i].assign(from_w[i])
self.sess.run(assign_op)
def _memorize(self, state, action, reward, new_state, dead, rnn_state):
self.MEMORIES.append((state, action, reward, new_state, dead, rnn_state))
if len(self.MEMORIES) > self.MEMORY_SIZE:
self.MEMORIES.popleft()
def _select_action(self, state):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
prediction = self.sess.run(self.actor.logits, feed_dict={self.actor.X:[state]})[0]
action = np.argmax(prediction)
return action
def _construct_memories_and_train(self, replay):
states = np.array([a[0] for a in replay])
new_states = np.array([a[3] for a in replay])
init_values = np.array([a[-1] for a in replay])
Q = self.sess.run(self.actor.logits, feed_dict={self.actor.X: states,
self.actor.hidden_layer: init_values})
Q_target = self.sess.run(self.actor_target.logits, feed_dict={self.actor_target.X: states,
self.actor_target.hidden_layer: init_values})
grads = self.sess.run(self.grad_critic, feed_dict={self.critic.X:states, self.critic.Y:Q,
self.critic.hidden_layer: init_values})[0]
self.sess.run(self.optimizer, feed_dict={self.actor.X:states, self.actor_critic_grad:grads,
self.actor.hidden_layer: init_values})
rewards = np.array([a[2] for a in replay]).reshape((-1, 1))
rewards_target = self.sess.run(self.critic_target.logits,
feed_dict={self.critic_target.X:new_states,self.critic_target.Y:Q_target,
self.critic_target.hidden_layer: init_values})
for i in range(len(replay)):
if not replay[0][-2]:
rewards[i] += self.GAMMA * rewards_target[i]
cost, _ = self.sess.run([self.critic.cost, self.critic.optimizer],
feed_dict={self.critic.X:states, self.critic.Y:Q, self.critic.REWARD:rewards,
self.critic.hidden_layer: init_values})
return cost
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array(res)
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
init_value = np.zeros((1, 2 * self.LAYER_SIZE))
for k in range(self.INITIAL_FEATURES.shape[0]):
self.INITIAL_FEATURES[k,:] = state
for t in range(0, len(self.trend) - 1, self.skip):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action, last_state = self.sess.run([self.actor.logits,
self.actor.last_state],
feed_dict={self.actor.X:[self.INITIAL_FEATURES],
self.actor.hidden_layer:init_value})
action, init_value = np.argmax(action[0]), last_state
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)
self.INITIAL_FEATURES = new_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
init_value = np.zeros((1, 2 * self.LAYER_SIZE))
for k in range(self.INITIAL_FEATURES.shape[0]):
self.INITIAL_FEATURES[k,:] = state
for t in range(0, len(self.trend) - 1, self.skip):
if (self.T_COPY + 1) % self.COPY == 0:
self._assign('actor-original', 'actor-target')
self._assign('critic-original', 'critic-target')
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action, last_state = self.sess.run([self.actor.logits,
self.actor.last_state],
feed_dict={self.actor.X:[self.INITIAL_FEATURES],
self.actor.hidden_layer:init_value})
action, init_value = np.argmax(action[0]), last_state
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)
self._memorize(self.INITIAL_FEATURES, action, invest, new_state,
starting_money < initial_money, init_value[0])
batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
self.INITIAL_FEATURES = new_state
replay = random.sample(self.MEMORIES, batch_size)
cost = self._construct_memories_and_train(replay)
self.T_COPY += 1
self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7f8ac3f890b8>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. WARNING:tensorflow:From <ipython-input-3-b82c6dfdfdbf>:17: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version. Instructions for updating: keep_dims is deprecated, use keepdims instead WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7f8a4343d2b0>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7f8a42e484e0>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7f8a42670c50>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. epoch: 10, total rewards: 1217.199710.3, cost: 0.428947, total money: 9258.459720 epoch: 20, total rewards: 154.669988.3, cost: 0.205311, total money: 8167.020025 epoch: 30, total rewards: 225.259892.3, cost: 0.080974, total money: 10225.259892 epoch: 40, total rewards: 1857.994754.3, cost: 0.147440, total money: 7906.464724 epoch: 50, total rewards: 864.365355.3, cost: 0.133079, total money: 3145.525327 epoch: 60, total rewards: 252.179754.3, cost: 0.349886, total money: 10252.179754 epoch: 70, total rewards: 2285.265256.3, cost: 0.122869, total money: 841.845272 epoch: 80, total rewards: 2273.160095.3, cost: 0.042144, total money: 1779.580078 epoch: 90, total rewards: 695.794921.3, cost: 0.652829, total money: 10695.794921 epoch: 100, total rewards: -63.870359.3, cost: 0.026901, total money: 9936.129641 epoch: 110, total rewards: 1660.049986.3, cost: 0.050525, total money: 236.529905 epoch: 120, total rewards: 2137.930355.3, cost: 0.019048, total money: 635.270319 epoch: 130, total rewards: 1263.700071.3, cost: 0.105621, total money: 836.610044 epoch: 140, total rewards: 2582.234985.3, cost: 0.026973, total money: 1985.844970 epoch: 150, total rewards: 1342.129822.3, cost: 0.045669, total money: 1933.479859 epoch: 160, total rewards: 171.394838.3, cost: 0.186082, total money: 9198.064821 epoch: 170, total rewards: 581.185307.3, cost: 0.243257, total money: 26.655338 epoch: 180, total rewards: 109.954956.3, cost: 0.001933, total money: 9092.844971 epoch: 190, total rewards: -85.549868.3, cost: 0.004746, total money: 9914.450132 epoch: 200, total rewards: 94.994872.3, cost: 0.006849, total money: 10094.994872
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 0: buy 1 unit at price 768.700012, total balance 9231.299988 day 1, sell 1 unit at price 762.130005, investment -0.854691 %, total balance 9993.429993, day 3: buy 1 unit at price 782.520020, total balance 9210.909973 day 4, sell 1 unit at price 790.510010, investment 1.021059 %, total balance 10001.419983, day 22: buy 1 unit at price 762.520020, total balance 9238.899963 day 23: buy 1 unit at price 759.109985, total balance 8479.789978 day 24, sell 1 unit at price 771.190002, investment 1.137017 %, total balance 9250.979980, day 26, sell 1 unit at price 789.289978, investment 3.975708 %, total balance 10040.269958, day 31: buy 1 unit at price 790.799988, total balance 9249.469970 day 32, sell 1 unit at price 794.200012, investment 0.429947 %, total balance 10043.669982, day 33: buy 1 unit at price 796.419983, total balance 9247.249999 day 34, sell 1 unit at price 794.559998, investment -0.233543 %, total balance 10041.809997, day 39: buy 1 unit at price 782.789978, total balance 9259.020019 day 40: buy 1 unit at price 771.820007, total balance 8487.200012 day 42, sell 1 unit at price 786.900024, investment 0.525051 %, total balance 9274.100036, day 45, sell 1 unit at price 806.650024, investment 4.512712 %, total balance 10080.750060, day 64: buy 1 unit at price 801.340027, total balance 9279.410033 day 65, sell 1 unit at price 806.969971, investment 0.702566 %, total balance 10086.380004, day 68: buy 1 unit at price 813.669983, total balance 9272.710021 day 70, sell 1 unit at price 820.450012, investment 0.833265 %, total balance 10093.160033, day 103: buy 1 unit at price 838.549988, total balance 9254.610045 day 104, sell 1 unit at price 834.570007, investment -0.474627 %, total balance 10089.180052, day 110: buy 1 unit at price 824.320007, total balance 9264.860045 day 111, sell 1 unit at price 823.559998, investment -0.092198 %, total balance 10088.420043, day 114: buy 1 unit at price 838.210022, total balance 9250.210021 day 115, sell 1 unit at price 841.650024, investment 0.410399 %, total balance 10091.860045, day 128: buy 1 unit at price 932.169983, total balance 9159.690062 day 129: buy 1 unit at price 928.780029, total balance 8230.910033 day 131, sell 1 unit at price 932.219971, investment 0.005363 %, total balance 9163.130004, day 132, sell 1 unit at price 937.080017, investment 0.893644 %, total balance 10100.210021, day 144: buy 1 unit at price 966.950012, total balance 9133.260009 day 145, sell 1 unit at price 975.599976, investment 0.894562 %, total balance 10108.859985, day 148: buy 1 unit at price 980.940002, total balance 9127.919983 day 149, sell 1 unit at price 983.409973, investment 0.251796 %, total balance 10111.329956, day 151: buy 1 unit at price 942.900024, total balance 9168.429932 day 153, sell 1 unit at price 950.760010, investment 0.833597 %, total balance 10119.189942, day 168: buy 1 unit at price 906.690002, total balance 9212.499940 day 169, sell 1 unit at price 918.590027, investment 1.312469 %, total balance 10131.089967, day 171: buy 1 unit at price 930.090027, total balance 9200.999940 day 172, sell 1 unit at price 943.830017, investment 1.477275 %, total balance 10144.829957, day 175: buy 1 unit at price 953.419983, total balance 9191.409974 day 176, sell 1 unit at price 965.400024, investment 1.256533 %, total balance 10156.809998, day 178: buy 1 unit at price 968.150024, total balance 9188.659974 day 179, sell 1 unit at price 972.919983, investment 0.492688 %, total balance 10161.579957, day 192: buy 1 unit at price 922.900024, total balance 9238.679933 day 193, sell 1 unit at price 907.239990, investment -1.696829 %, total balance 10145.919923, day 194: buy 1 unit at price 914.390015, total balance 9231.529908 day 196: buy 1 unit at price 922.219971, total balance 8309.309937 day 197, sell 1 unit at price 926.960022, investment 1.374688 %, total balance 9236.269959, day 198, sell 1 unit at price 910.979980, investment -1.218797 %, total balance 10147.249939, day 207: buy 1 unit at price 929.570007, total balance 9217.679932 day 208: buy 1 unit at price 939.330017, total balance 8278.349915 day 209, sell 1 unit at price 937.340027, investment 0.835872 %, total balance 9215.689942, day 210, sell 1 unit at price 928.450012, investment -1.158273 %, total balance 10144.139954, day 211: buy 1 unit at price 927.809998, total balance 9216.329956 day 212, sell 1 unit at price 935.950012, investment 0.877336 %, total balance 10152.279968, day 214: buy 1 unit at price 929.080017, total balance 9223.199951 day 215, sell 1 unit at price 932.070007, investment 0.321823 %, total balance 10155.269958, day 226: buy 1 unit at price 944.489990, total balance 9210.779968 day 227, sell 1 unit at price 949.500000, investment 0.530446 %, total balance 10160.279968, day 233: buy 1 unit at price 978.890015, total balance 9181.389953 day 234, sell 1 unit at price 977.000000, investment -0.193077 %, total balance 10158.389953, day 243: buy 1 unit at price 988.200012, total balance 9170.189941 day 244, sell 1 unit at price 968.450012, investment -1.998583 %, total balance 10138.639953,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Curiosity Q-learning agent'
class Agent:
LEARNING_RATE = 0.003
BATCH_SIZE = 32
LAYER_SIZE = 500
OUTPUT_SIZE = 3
EPSILON = 0.5
DECAY_RATE = 0.005
MIN_EPSILON = 0.1
GAMMA = 0.99
MEMORIES = deque()
COPY = 1000
T_COPY = 0
MEMORY_SIZE = 300
def __init__(self, state_size, window_size, trend, skip):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
tf.reset_default_graph()
self.X = tf.placeholder(tf.float32, (None, self.state_size))
self.Y = tf.placeholder(tf.float32, (None, self.state_size))
self.ACTION = tf.placeholder(tf.float32, (None))
self.REWARD = tf.placeholder(tf.float32, (None))
self.batch_size = tf.shape(self.ACTION)[0]
with tf.variable_scope('curiosity_model'):
action = tf.reshape(self.ACTION, (-1,1))
state_action = tf.concat([self.X, action], axis=1)
save_state = tf.identity(self.Y)
feed = tf.layers.dense(state_action, 32, activation=tf.nn.relu)
self.curiosity_logits = tf.layers.dense(feed, self.state_size)
self.curiosity_cost = tf.reduce_sum(tf.square(save_state - self.curiosity_logits), axis=1)
self.curiosity_optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE)\
.minimize(tf.reduce_mean(self.curiosity_cost))
total_reward = tf.add(self.curiosity_cost, self.REWARD)
with tf.variable_scope("q_model"):
with tf.variable_scope("eval_net"):
x_action = tf.layers.dense(self.X, 128, tf.nn.relu)
self.logits = tf.layers.dense(x_action, self.OUTPUT_SIZE)
with tf.variable_scope("target_net"):
y_action = tf.layers.dense(self.Y, 128, tf.nn.relu)
y_q = tf.layers.dense(y_action, self.OUTPUT_SIZE)
q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1)
action = tf.cast(self.ACTION, tf.int32)
action_indices = tf.stack([tf.range(self.batch_size, dtype=tf.int32), action], axis=1)
q = tf.gather_nd(params=self.logits, indices=action_indices)
self.cost = tf.losses.mean_squared_error(labels=q_target, predictions=q)
self.optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE).minimize(
self.cost, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net"))
t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/target_net')
e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/eval_net')
self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
self.sess = tf.InteractiveSession()
self.sess.run(tf.global_variables_initializer())
def _memorize(self, state, action, reward, new_state, done):
self.MEMORIES.append((state, action, reward, new_state, done))
if len(self.MEMORIES) > self.MEMORY_SIZE:
self.MEMORIES.popleft()
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array(res)
def predict(self, inputs):
return self.sess.run(self.logits, feed_dict={self.X:inputs})
def get_predicted_action(self, sequence):
prediction = self.predict(np.array(sequence))[0]
return np.argmax(prediction)
def _select_action(self, state):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action = self.get_predicted_action([state])
return action
def _construct_memories(self, replay):
states = np.array([a[0] for a in replay])
actions = np.array([a[1] for a in replay])
rewards = np.array([a[2] for a in replay])
new_states = np.array([a[3] for a in replay])
if (self.T_COPY + 1) % self.COPY == 0:
self.sess.run(self.target_replace_op)
cost, _ = self.sess.run([self.cost, self.optimizer], feed_dict = {
self.X: states, self.Y: new_states, self.ACTION: actions, self.REWARD: rewards
})
if (self.T_COPY + 1) % self.COPY == 0:
self.sess.run(self.curiosity_optimizer, feed_dict = {
self.X: states, self.Y: new_states, self.ACTION: actions, self.REWARD: rewards
})
return cost
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
for t in range(0, len(self.trend) - 1, self.skip):
action = self._select_action(state)
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
state = next_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
for t in range(0, len(self.trend) - 1, self.skip):
action = self._select_action(state)
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
self._memorize(state, action, invest, next_state, starting_money < initial_money)
batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
state = next_state
replay = random.sample(self.MEMORIES, batch_size)
cost = self._construct_memories(replay)
self.T_COPY += 1
self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
epoch: 10, total rewards: 2349.819823.3, cost: 69092.625000, total money: 12349.819823 epoch: 20, total rewards: 648.444882.3, cost: 4775652.000000, total money: 6742.654903 epoch: 30, total rewards: 1543.784977.3, cost: 26533.583984, total money: 7642.034916 epoch: 40, total rewards: 1360.930418.3, cost: 871420.750000, total money: 695.580380 epoch: 50, total rewards: 2233.069826.3, cost: 228718.296875, total money: 6354.209779 epoch: 60, total rewards: 1573.414983.3, cost: 407432.843750, total money: 8625.614995 epoch: 70, total rewards: -7.114931.3, cost: 32132.660156, total money: 5021.405088 epoch: 80, total rewards: 798.045042.3, cost: 435778.562500, total money: 9780.935057 epoch: 90, total rewards: 575.719967.3, cost: 72847.468750, total money: 9559.079952 epoch: 100, total rewards: 338.655157.3, cost: 379671.968750, total money: 820.245184 epoch: 110, total rewards: 277.220155.3, cost: 391019.375000, total money: 3452.330140 epoch: 120, total rewards: 370.379826.3, cost: 429969.843750, total money: 7361.909793 epoch: 130, total rewards: 441.860107.3, cost: 2082513.625000, total money: 2538.970093 epoch: 140, total rewards: 709.099850.3, cost: 558315.562500, total money: 130.919796 epoch: 150, total rewards: 159.675106.3, cost: 2904243.000000, total money: 481.725093 epoch: 160, total rewards: 581.489981.3, cost: 1408646.250000, total money: 5631.309988 epoch: 170, total rewards: 1768.579776.3, cost: 1693698.250000, total money: 15.189760 epoch: 180, total rewards: 952.280210.3, cost: 1472623.250000, total money: 8990.750181 epoch: 190, total rewards: 1418.655145.3, cost: 25627934.000000, total money: 3706.275139 epoch: 200, total rewards: 272.595214.3, cost: 922414.500000, total money: 9255.485229
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 7: buy 1 unit at price 754.020020, total balance 9245.979980 day 8: buy 1 unit at price 736.080017, total balance 8509.899963 day 9: buy 1 unit at price 758.489990, total balance 7751.409973 day 10: buy 1 unit at price 764.479980, total balance 6986.929993 day 11: buy 1 unit at price 771.229980, total balance 6215.700013 day 15, sell 1 unit at price 760.989990, investment 0.924375 %, total balance 6976.690003, day 18: buy 1 unit at price 770.840027, total balance 6205.849976 day 19: buy 1 unit at price 758.039978, total balance 5447.809998 day 22: buy 1 unit at price 762.520020, total balance 4685.289978 day 23: buy 1 unit at price 759.109985, total balance 3926.179993 day 24, sell 1 unit at price 771.190002, investment 4.769860 %, total balance 4697.369995, day 27: buy 1 unit at price 789.270020, total balance 3908.099975 day 28, sell 1 unit at price 796.099976, investment 4.958534 %, total balance 4704.199951, day 29: buy 1 unit at price 797.070007, total balance 3907.129944 day 30, sell 1 unit at price 797.849976, investment 4.365058 %, total balance 4704.979920, day 31: buy 1 unit at price 790.799988, total balance 3914.179932 day 32, sell 1 unit at price 794.200012, investment 2.978363 %, total balance 4708.379944, day 33: buy 1 unit at price 796.419983, total balance 3911.959961 day 35: buy 1 unit at price 791.260010, total balance 3120.699951 day 36, sell 1 unit at price 789.909973, investment 2.473917 %, total balance 3910.609924, day 37: buy 1 unit at price 791.549988, total balance 3119.059936 day 40, sell 1 unit at price 771.820007, investment 1.817850 %, total balance 3890.879943, day 41, sell 1 unit at price 786.140015, investment 3.097623 %, total balance 4677.019958, day 42, sell 1 unit at price 786.900024, investment 3.660871 %, total balance 5463.919982, day 43, sell 1 unit at price 794.020020, investment 0.601822 %, total balance 6257.940002, day 44: buy 1 unit at price 806.150024, total balance 5451.789978 day 45: buy 1 unit at price 806.650024, total balance 4645.139954 day 47: buy 1 unit at price 807.909973, total balance 3837.229981 day 48, sell 1 unit at price 806.359985, investment 1.165516 %, total balance 4643.589966, day 50, sell 1 unit at price 804.609985, investment 1.746332 %, total balance 5448.199951, day 51: buy 1 unit at price 806.070007, total balance 4642.129944 day 52: buy 1 unit at price 802.174988, total balance 3839.954956 day 54: buy 1 unit at price 819.309998, total balance 3020.644958 day 55, sell 1 unit at price 823.869995, investment 3.446675 %, total balance 3844.514953, day 56: buy 1 unit at price 835.669983, total balance 3008.844970 day 57: buy 1 unit at price 832.150024, total balance 2176.694946 day 58: buy 1 unit at price 823.309998, total balance 1353.384948 day 60, sell 1 unit at price 796.789978, investment 0.698881 %, total balance 2150.174926, day 61: buy 1 unit at price 795.695007, total balance 1354.479919 day 62: buy 1 unit at price 798.530029, total balance 555.949890 day 63, sell 1 unit at price 801.489990, investment 1.255764 %, total balance 1357.439880, day 64, sell 1 unit at price 801.340027, investment -0.596663 %, total balance 2158.779907, day 65, sell 1 unit at price 806.969971, investment 0.039664 %, total balance 2965.749878, day 66, sell 1 unit at price 808.380005, investment 0.058179 %, total balance 3774.129883, day 67: buy 1 unit at price 809.559998, total balance 2964.569885 day 68, sell 1 unit at price 813.669983, investment 0.942843 %, total balance 3778.239868, day 69, sell 1 unit at price 819.239990, investment 2.127342 %, total balance 4597.479858, day 70, sell 1 unit at price 820.450012, investment 0.139143 %, total balance 5417.929870, day 73: buy 1 unit at price 828.070007, total balance 4589.859863 day 75: buy 1 unit at price 830.760010, total balance 3759.099853 day 76, sell 1 unit at price 831.330017, investment -0.519340 %, total balance 4590.429870, day 81, sell 1 unit at price 830.630005, investment -0.182662 %, total balance 5421.059875, day 82, sell 1 unit at price 829.080017, investment 0.700832 %, total balance 6250.139892, day 83, sell 1 unit at price 827.780029, investment 4.032327 %, total balance 7077.919921, day 84: buy 1 unit at price 831.909973, total balance 6246.009948 day 87: buy 1 unit at price 843.250000, total balance 5402.759948 day 88: buy 1 unit at price 845.539978, total balance 4557.219970 day 89, sell 1 unit at price 845.619995, investment 5.897081 %, total balance 5402.839965, day 92, sell 1 unit at price 852.119995, investment 5.257176 %, total balance 6254.959960, day 93: buy 1 unit at price 848.400024, total balance 5406.559936 day 94: buy 1 unit at price 830.460022, total balance 4576.099914 day 95, sell 1 unit at price 829.590027, investment 0.183562 %, total balance 5405.689941, day 99, sell 1 unit at price 820.919983, investment -1.184461 %, total balance 6226.609924, day 101, sell 1 unit at price 831.500000, investment -0.049281 %, total balance 7058.109924, day 103, sell 1 unit at price 838.549988, investment -0.557369 %, total balance 7896.659912, day 104: buy 1 unit at price 834.570007, total balance 7062.089905 day 107, sell 1 unit at price 824.669983, investment -2.468245 %, total balance 7886.759888, day 109, sell 1 unit at price 823.349976, investment -2.952622 %, total balance 8710.109864, day 110, sell 1 unit at price 824.320007, investment -0.739351 %, total balance 9534.429871, day 111: buy 1 unit at price 823.559998, total balance 8710.869873 day 112, sell 1 unit at price 837.169983, investment 0.311535 %, total balance 9548.039856, day 113, sell 1 unit at price 836.820007, investment 1.610084 %, total balance 10384.859863, day 122: buy 1 unit at price 912.570007, total balance 9472.289856 day 123, sell 1 unit at price 916.440002, investment 0.424077 %, total balance 10388.729858, day 128: buy 1 unit at price 932.169983, total balance 9456.559875 day 129, sell 1 unit at price 928.780029, investment -0.363663 %, total balance 10385.339904, day 130: buy 1 unit at price 930.599976, total balance 9454.739928 day 132: buy 1 unit at price 937.080017, total balance 8517.659911 day 133, sell 1 unit at price 943.000000, investment 1.332476 %, total balance 9460.659911, day 134, sell 1 unit at price 919.619995, investment -1.863237 %, total balance 10380.279906, day 140: buy 1 unit at price 969.539978, total balance 9410.739928 day 141, sell 1 unit at price 971.469971, investment 0.199063 %, total balance 10382.209899, day 145: buy 1 unit at price 975.599976, total balance 9406.609923 day 146: buy 1 unit at price 983.679993, total balance 8422.929930 day 149, sell 1 unit at price 983.409973, investment 0.800533 %, total balance 9406.339903, day 150: buy 1 unit at price 949.830017, total balance 8456.509886 day 151, sell 1 unit at price 942.900024, investment -4.145654 %, total balance 9399.409910, day 152, sell 1 unit at price 953.400024, investment 0.375857 %, total balance 10352.809934, day 159: buy 1 unit at price 957.090027, total balance 9395.719907 day 160: buy 1 unit at price 965.590027, total balance 8430.129880 day 161: buy 1 unit at price 952.270020, total balance 7477.859860 day 164: buy 1 unit at price 917.789978, total balance 6560.069882 day 167: buy 1 unit at price 911.710022, total balance 5648.359860 day 170: buy 1 unit at price 928.799988, total balance 4719.559872 day 171, sell 1 unit at price 930.090027, investment -2.821051 %, total balance 5649.649899, day 173: buy 1 unit at price 947.159973, total balance 4702.489926 day 174: buy 1 unit at price 955.989990, total balance 3746.499936 day 175: buy 1 unit at price 953.419983, total balance 2793.079953 day 176, sell 1 unit at price 965.400024, investment -0.019677 %, total balance 3758.479977, day 177: buy 1 unit at price 970.890015, total balance 2787.589962 day 178, sell 1 unit at price 968.150024, investment 1.667595 %, total balance 3755.739986, day 179: buy 1 unit at price 972.919983, total balance 2782.820003 day 180: buy 1 unit at price 980.340027, total balance 1802.479976 day 181: buy 1 unit at price 950.700012, total balance 851.779964 day 183, sell 1 unit at price 934.090027, investment 1.776011 %, total balance 1785.869991, day 184, sell 1 unit at price 941.530029, investment 3.270778 %, total balance 2727.400020, day 185, sell 1 unit at price 930.500000, investment 0.183033 %, total balance 3657.900020, day 186: buy 1 unit at price 930.830017, total balance 2727.070003 day 190: buy 1 unit at price 929.359985, total balance 1797.710018 day 191: buy 1 unit at price 926.789978, total balance 870.920040 day 192, sell 1 unit at price 922.900024, investment -2.561336 %, total balance 1793.820064, day 193, sell 1 unit at price 907.239990, investment -5.099426 %, total balance 2701.060054, day 194, sell 1 unit at price 914.390015, investment -4.093681 %, total balance 3615.450069, day 195: buy 1 unit at price 922.669983, total balance 2692.780086 day 196, sell 1 unit at price 922.219971, investment -5.012931 %, total balance 3615.000057, day 197: buy 1 unit at price 926.960022, total balance 2688.040035 day 198: buy 1 unit at price 910.979980, total balance 1777.060055 day 199: buy 1 unit at price 910.669983, total balance 866.390072 day 200, sell 1 unit at price 906.659973, investment -6.810427 %, total balance 1773.050045, day 201, sell 1 unit at price 924.690002, investment -5.676604 %, total balance 2697.740047, day 202: buy 1 unit at price 927.000000, total balance 1770.740047 day 204, sell 1 unit at price 915.890015, investment -3.661512 %, total balance 2686.630062, day 210, sell 1 unit at price 928.450012, investment -0.255686 %, total balance 3615.080074, day 214: buy 1 unit at price 929.080017, total balance 2686.000057 day 215: buy 1 unit at price 932.070007, total balance 1753.930050 day 216: buy 1 unit at price 935.090027, total balance 818.840023 day 220, sell 1 unit at price 921.809998, investment -0.812386 %, total balance 1740.650021, day 221, sell 1 unit at price 931.580017, investment 0.516842 %, total balance 2672.230038, day 222, sell 1 unit at price 932.450012, investment 1.059970 %, total balance 3604.680050, day 224, sell 1 unit at price 920.969971, investment -0.646204 %, total balance 4525.650021, day 226, sell 1 unit at price 944.489990, investment 3.678457 %, total balance 5470.140011, day 227, sell 1 unit at price 949.500000, investment 4.263896 %, total balance 6419.640011, day 228, sell 1 unit at price 959.109985, investment 3.463860 %, total balance 7378.749996, day 229, sell 1 unit at price 953.270020, investment 2.603651 %, total balance 8332.020016, day 230, sell 1 unit at price 957.789978, investment 2.759446 %, total balance 9289.809994, day 232, sell 1 unit at price 969.960022, investment 3.729052 %, total balance 10259.770016, day 235: buy 1 unit at price 972.599976, total balance 9287.170040 day 236, sell 1 unit at price 989.250000, investment 1.711909 %, total balance 10276.420040, day 237: buy 1 unit at price 987.830017, total balance 9288.590023 day 238, sell 1 unit at price 989.679993, investment 0.187277 %, total balance 10278.270016, day 241: buy 1 unit at price 992.809998, total balance 9285.460018 day 242, sell 1 unit at price 984.450012, investment -0.842053 %, total balance 10269.910030, day 245: buy 1 unit at price 970.539978, total balance 9299.370052 day 246: buy 1 unit at price 973.330017, total balance 8326.040035 day 247, sell 1 unit at price 972.559998, investment 0.208134 %, total balance 9298.600033, day 249: buy 1 unit at price 1017.109985, total balance 8281.490048 day 250, sell 1 unit at price 1016.640015, investment 4.449672 %, total balance 9298.130063,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Recurrent Curiosity Q-learning agent'
class Agent:
LEARNING_RATE = 0.003
BATCH_SIZE = 32
LAYER_SIZE = 128
OUTPUT_SIZE = 3
EPSILON = 0.5
DECAY_RATE = 0.005
MIN_EPSILON = 0.1
GAMMA = 0.99
MEMORIES = deque()
COPY = 1000
T_COPY = 0
MEMORY_SIZE = 300
def __init__(self, state_size, window_size, trend, skip):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
tf.reset_default_graph()
self.INITIAL_FEATURES = np.zeros((4, self.state_size))
self.X = tf.placeholder(tf.float32, (None, None, self.state_size))
self.Y = tf.placeholder(tf.float32, (None, None, self.state_size))
self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * self.LAYER_SIZE))
self.ACTION = tf.placeholder(tf.float32, (None))
self.REWARD = tf.placeholder(tf.float32, (None))
self.batch_size = tf.shape(self.ACTION)[0]
self.seq_len = tf.shape(self.X)[1]
with tf.variable_scope('curiosity_model'):
action = tf.reshape(self.ACTION, (-1,1,1))
repeat_action = tf.tile(action, [1,self.seq_len,1])
state_action = tf.concat([self.X, repeat_action], axis=-1)
save_state = tf.identity(self.Y)
cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple = False)
self.rnn,last_state = tf.nn.dynamic_rnn(inputs=state_action,cell=cell,
dtype=tf.float32,
initial_state=self.hidden_layer)
self.curiosity_logits = tf.layers.dense(self.rnn[:,-1], self.state_size)
self.curiosity_cost = tf.reduce_sum(tf.square(save_state[:,-1] - self.curiosity_logits), axis=1)
self.curiosity_optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE)\
.minimize(tf.reduce_mean(self.curiosity_cost))
total_reward = tf.add(self.curiosity_cost, self.REWARD)
with tf.variable_scope("q_model"):
with tf.variable_scope("eval_net"):
cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple = False)
rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X,cell=cell,
dtype=tf.float32,
initial_state=self.hidden_layer)
self.logits = tf.layers.dense(rnn[:,-1], self.OUTPUT_SIZE)
with tf.variable_scope("target_net"):
cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple = False)
rnn,last_state = tf.nn.dynamic_rnn(inputs=self.Y,cell=cell,
dtype=tf.float32,
initial_state=self.hidden_layer)
y_q = tf.layers.dense(rnn[:,-1], self.OUTPUT_SIZE)
q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1)
action = tf.cast(self.ACTION, tf.int32)
action_indices = tf.stack([tf.range(self.batch_size, dtype=tf.int32), action], axis=1)
q = tf.gather_nd(params=self.logits, indices=action_indices)
self.cost = tf.losses.mean_squared_error(labels=q_target, predictions=q)
self.optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE).minimize(
self.cost, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net"))
t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/target_net')
e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/eval_net')
self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
self.sess = tf.InteractiveSession()
self.sess.run(tf.global_variables_initializer())
def _memorize(self, state, action, reward, new_state, done, rnn_state):
self.MEMORIES.append((state, action, reward, new_state, done, rnn_state))
if len(self.MEMORIES) > self.MEMORY_SIZE:
self.MEMORIES.popleft()
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array(res)
def _construct_memories(self, replay):
states = np.array([a[0] for a in replay])
actions = np.array([a[1] for a in replay])
rewards = np.array([a[2] for a in replay])
new_states = np.array([a[3] for a in replay])
init_values = np.array([a[-1] for a in replay])
if (self.T_COPY + 1) % self.COPY == 0:
self.sess.run(self.target_replace_op)
cost, _ = self.sess.run([self.cost, self.optimizer], feed_dict = {
self.X: states, self.Y: new_states, self.ACTION: actions, self.REWARD: rewards,
self.hidden_layer: init_values
})
if (self.T_COPY + 1) % self.COPY == 0:
self.sess.run(self.curiosity_optimizer, feed_dict = {
self.X: states, self.Y: new_states, self.ACTION: actions, self.REWARD: rewards,
self.hidden_layer: init_values
})
return cost
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
init_value = np.zeros((1, 2 * self.LAYER_SIZE))
for k in range(self.INITIAL_FEATURES.shape[0]):
self.INITIAL_FEATURES[k,:] = state
for t in range(0, len(self.trend) - 1, self.skip):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action, last_state = self.sess.run([self.logits,
self.last_state],
feed_dict={self.X:[self.INITIAL_FEATURES],
self.hidden_layer:init_value})
action, init_value = np.argmax(action[0]), last_state
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)
self.INITIAL_FEATURES = new_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
init_value = np.zeros((1, 2 * self.LAYER_SIZE))
for k in range(self.INITIAL_FEATURES.shape[0]):
self.INITIAL_FEATURES[k,:] = state
for t in range(0, len(self.trend) - 1, self.skip):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action, last_state = self.sess.run([self.logits,
self.last_state],
feed_dict={self.X:[self.INITIAL_FEATURES],
self.hidden_layer:init_value})
action, init_value = np.argmax(action[0]), last_state
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)
self._memorize(self.INITIAL_FEATURES, action, invest, new_state,
starting_money < initial_money, init_value[0])
self.INITIAL_FEATURES = new_state
batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
replay = random.sample(self.MEMORIES, batch_size)
cost = self._construct_memories(replay)
self.T_COPY += 1
self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7ff38845bba8>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7ff2f112ed68>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. WARNING:tensorflow:<tensorflow.python.ops.rnn_cell_impl.LSTMCell object at 0x7ff2f112eac8>: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True. epoch: 10, total rewards: 685.860168.3, cost: 4139534.500000, total money: 977.580137 epoch: 20, total rewards: 1724.255003.3, cost: 5132677.500000, total money: 5851.904966 epoch: 30, total rewards: 493.970035.3, cost: 3979546.750000, total money: 8528.600039 epoch: 40, total rewards: 1580.255128.3, cost: 5099559.000000, total money: 4018.855103 epoch: 50, total rewards: 1467.990231.3, cost: 4410721.500000, total money: 8490.720211 epoch: 60, total rewards: 1285.420161.3, cost: 3993190.000000, total money: 2688.440118 epoch: 70, total rewards: 391.130068.3, cost: 3420379.000000, total money: 6491.710085 epoch: 80, total rewards: 1276.110108.3, cost: 3443612.750000, total money: 3698.110047 epoch: 90, total rewards: 672.475340.3, cost: 2882908.000000, total money: 208.605285 epoch: 100, total rewards: 706.604982.3, cost: 3108476.500000, total money: 1169.724916 epoch: 110, total rewards: 979.940367.3, cost: 2024909.750000, total money: 3200.720335 epoch: 120, total rewards: 853.199893.3, cost: 4572564.500000, total money: 6070.309879 epoch: 130, total rewards: 1339.975223.3, cost: 3904469.500000, total money: 7475.465274 epoch: 140, total rewards: 1136.924864.3, cost: 4352429.000000, total money: 4448.164854 epoch: 150, total rewards: 1499.745116.3, cost: 2398584.500000, total money: 3999.355042 epoch: 160, total rewards: 481.755190.3, cost: 3168836.250000, total money: 7573.215212 epoch: 170, total rewards: 1733.610290.3, cost: 1907320.875000, total money: 6940.950254 epoch: 180, total rewards: 390.074828.3, cost: 2862924.000000, total money: 5516.364805 epoch: 190, total rewards: 714.815121.3, cost: 2666878.750000, total money: 9726.615109 epoch: 200, total rewards: 1474.129822.3, cost: 3016419.000000, total money: 1901.589906
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 0: buy 1 unit at price 768.700012, total balance 9231.299988 day 1, sell 1 unit at price 762.130005, investment -0.854691 %, total balance 9993.429993, day 4: buy 1 unit at price 790.510010, total balance 9202.919983 day 5: buy 1 unit at price 785.309998, total balance 8417.609985 day 8: buy 1 unit at price 736.080017, total balance 7681.529968 day 9: buy 1 unit at price 758.489990, total balance 6923.039978 day 11, sell 1 unit at price 771.229980, investment -2.438936 %, total balance 7694.269958, day 13: buy 1 unit at price 769.200012, total balance 6925.069946 day 17: buy 1 unit at price 768.239990, total balance 6156.829956 day 19, sell 1 unit at price 758.039978, investment -3.472517 %, total balance 6914.869934, day 25, sell 1 unit at price 776.419983, investment 5.480378 %, total balance 7691.289917, day 26: buy 1 unit at price 789.289978, total balance 6901.999939 day 28: buy 1 unit at price 796.099976, total balance 6105.899963 day 31: buy 1 unit at price 790.799988, total balance 5315.099975 day 40, sell 1 unit at price 771.820007, investment 1.757441 %, total balance 6086.919982, day 46, sell 1 unit at price 804.789978, investment 4.626881 %, total balance 6891.709960, day 47, sell 1 unit at price 807.909973, investment 5.163749 %, total balance 7699.619933, day 50: buy 1 unit at price 804.609985, total balance 6895.009948 day 57: buy 1 unit at price 832.150024, total balance 6062.859924 day 58, sell 1 unit at price 823.309998, investment 4.310205 %, total balance 6886.169922, day 61: buy 1 unit at price 795.695007, total balance 6090.474915 day 62: buy 1 unit at price 798.530029, total balance 5291.944886 day 70: buy 1 unit at price 820.450012, total balance 4471.494874 day 73: buy 1 unit at price 828.070007, total balance 3643.424867 day 76: buy 1 unit at price 831.330017, total balance 2812.094850 day 85: buy 1 unit at price 835.369995, total balance 1976.724855 day 89, sell 1 unit at price 845.619995, investment 6.220327 %, total balance 2822.344850, day 91: buy 1 unit at price 848.780029, total balance 1973.564821 day 98: buy 1 unit at price 819.510010, total balance 1154.054811 day 100: buy 1 unit at price 831.409973, total balance 322.644838 day 102, sell 1 unit at price 829.559998, investment 4.901367 %, total balance 1152.204836, day 111, sell 1 unit at price 823.559998, investment 2.355180 %, total balance 1975.764834, day 113: buy 1 unit at price 836.820007, total balance 1138.944827 day 114, sell 1 unit at price 838.210022, investment 0.728234 %, total balance 1977.154849, day 117: buy 1 unit at price 862.760010, total balance 1114.394839 day 118: buy 1 unit at price 872.299988, total balance 242.094851 day 132, sell 1 unit at price 937.080017, investment 17.768744 %, total balance 1179.174868, day 138: buy 1 unit at price 948.820007, total balance 230.354861 day 139, sell 1 unit at price 954.960022, investment 19.589745 %, total balance 1185.314883, day 140: buy 1 unit at price 969.539978, total balance 215.774905 day 154, sell 1 unit at price 942.309998, investment 14.852823 %, total balance 1158.084903, day 158, sell 1 unit at price 959.450012, investment 15.865809 %, total balance 2117.534915, day 160: buy 1 unit at price 965.590027, total balance 1151.944888 day 168: buy 1 unit at price 906.690002, total balance 245.254886 day 169, sell 1 unit at price 918.590027, investment 10.496434 %, total balance 1163.844913, day 176: buy 1 unit at price 965.400024, total balance 198.444889 day 189, sell 1 unit at price 927.960022, investment 11.083715 %, total balance 1126.404911, day 191: buy 1 unit at price 926.789978, total balance 199.614933 day 195, sell 1 unit at price 922.669983, investment 8.705430 %, total balance 1122.284916, day 200, sell 1 unit at price 906.659973, investment 10.634399 %, total balance 2028.944889, day 201: buy 1 unit at price 924.690002, total balance 1104.254887 day 202, sell 1 unit at price 927.000000, investment 11.497339 %, total balance 2031.254887, day 206: buy 1 unit at price 921.289978, total balance 1109.964909 day 211: buy 1 unit at price 927.809998, total balance 182.154911 day 220, sell 1 unit at price 921.809998, investment 10.156305 %, total balance 1103.964909, day 226, sell 1 unit at price 944.489990, investment 9.473084 %, total balance 2048.454899, day 228, sell 1 unit at price 959.109985, investment 9.951851 %, total balance 3007.564884, day 230: buy 1 unit at price 957.789978, total balance 2049.774906 day 231, sell 1 unit at price 951.679993, investment 0.301426 %, total balance 3001.454899, day 234: buy 1 unit at price 977.000000, total balance 2024.454899 day 237: buy 1 unit at price 987.830017, total balance 1036.624882 day 238, sell 1 unit at price 989.679993, investment 2.077275 %, total balance 2026.304875, day 240: buy 1 unit at price 992.179993, total balance 1034.124882 day 241: buy 1 unit at price 992.809998, total balance 41.314884 day 242, sell 1 unit at price 984.450012, investment 1.953208 %, total balance 1025.764896, day 248, sell 1 unit at price 1019.270020, investment 12.416594 %, total balance 2045.034916,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
from collections import deque
import random
df= df_full.copy()
name = 'Duel Curiosity Q-learning agent'
class Agent:
LEARNING_RATE = 0.003
BATCH_SIZE = 32
LAYER_SIZE = 500
OUTPUT_SIZE = 3
EPSILON = 0.5
DECAY_RATE = 0.005
MIN_EPSILON = 0.1
GAMMA = 0.99
MEMORIES = deque()
COPY = 1000
T_COPY = 0
MEMORY_SIZE = 300
def __init__(self, state_size, window_size, trend, skip):
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
tf.reset_default_graph()
self.X = tf.placeholder(tf.float32, (None, self.state_size))
self.Y = tf.placeholder(tf.float32, (None, self.state_size))
self.ACTION = tf.placeholder(tf.float32, (None))
self.REWARD = tf.placeholder(tf.float32, (None))
self.batch_size = tf.shape(self.ACTION)[0]
with tf.variable_scope('curiosity_model'):
action = tf.reshape(self.ACTION, (-1,1))
state_action = tf.concat([self.X, action], axis=1)
save_state = tf.identity(self.Y)
feed = tf.layers.dense(state_action, 32, activation=tf.nn.relu)
self.curiosity_logits = tf.layers.dense(feed, self.state_size)
self.curiosity_cost = tf.reduce_sum(tf.square(save_state - self.curiosity_logits), axis=1)
self.curiosity_optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE)\
.minimize(tf.reduce_mean(self.curiosity_cost))
total_reward = tf.add(self.curiosity_cost, self.REWARD)
with tf.variable_scope("q_model"):
with tf.variable_scope("eval_net"):
x_action = tf.layers.dense(self.X, 128, tf.nn.relu)
tensor_action, tensor_validation = tf.split(x_action,2,1)
feed_action = tf.layers.dense(tensor_action, self.OUTPUT_SIZE)
feed_validation = tf.layers.dense(tensor_validation, 1)
self.logits = feed_validation + \
tf.subtract(feed_action,tf.reduce_mean(feed_action,axis=1,keep_dims=True))
with tf.variable_scope("target_net"):
y_action = tf.layers.dense(self.Y, 128, tf.nn.relu)
tensor_action, tensor_validation = tf.split(y_action,2,1)
feed_action = tf.layers.dense(tensor_action, self.OUTPUT_SIZE)
feed_validation = tf.layers.dense(tensor_validation, 1)
y_q = feed_validation + \
tf.subtract(feed_action,tf.reduce_mean(feed_action,axis=1,keep_dims=True))
q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1)
action = tf.cast(self.ACTION, tf.int32)
action_indices = tf.stack([tf.range(self.batch_size, dtype=tf.int32), action], axis=1)
q = tf.gather_nd(params=self.logits, indices=action_indices)
self.cost = tf.losses.mean_squared_error(labels=q_target, predictions=q)
self.optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE).minimize(
self.cost, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net"))
t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/target_net')
e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/eval_net')
self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
self.sess = tf.InteractiveSession()
self.sess.run(tf.global_variables_initializer())
def _memorize(self, state, action, reward, new_state, done):
self.MEMORIES.append((state, action, reward, new_state, done))
if len(self.MEMORIES) > self.MEMORY_SIZE:
self.MEMORIES.popleft()
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array(res)
def predict(self, inputs):
return self.sess.run(self.logits, feed_dict={self.X:inputs})
def get_predicted_action(self, sequence):
prediction = self.predict(np.array(sequence))[0]
return np.argmax(prediction)
def _select_action(self, state):
if np.random.rand() < self.EPSILON:
action = np.random.randint(self.OUTPUT_SIZE)
else:
action = self.get_predicted_action([state])
return action
def _construct_memories(self, replay):
states = np.array([a[0] for a in replay])
actions = np.array([a[1] for a in replay])
rewards = np.array([a[2] for a in replay])
new_states = np.array([a[3] for a in replay])
if (self.T_COPY + 1) % self.COPY == 0:
self.sess.run(self.target_replace_op)
cost, _ = self.sess.run([self.cost, self.optimizer], feed_dict = {
self.X: states, self.Y: new_states, self.ACTION: actions, self.REWARD: rewards
})
if (self.T_COPY + 1) % self.COPY == 0:
self.sess.run(self.curiosity_optimizer, feed_dict = {
self.X: states, self.Y: new_states, self.ACTION: actions, self.REWARD: rewards
})
return cost
def buy(self, initial_money):
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
state = self.get_state(0)
for t in range(0, len(self.trend) - 1, self.skip):
action = self._select_action(state)
next_state = self.get_state(t + 1)
if action == 1 and initial_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((close[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, close[t], invest, initial_money)
)
state = next_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def train(self, iterations, checkpoint, initial_money):
for i in range(iterations):
total_profit = 0
inventory = []
state = self.get_state(0)
starting_money = initial_money
for t in range(0, len(self.trend) - 1, self.skip):
action = self._select_action(state)
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
total_profit += self.trend[t] - bought_price
starting_money += self.trend[t]
invest = ((starting_money - initial_money) / initial_money)
self._memorize(state, action, invest, next_state, starting_money < initial_money)
batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
state = next_state
replay = random.sample(self.MEMORIES, batch_size)
cost = self._construct_memories(replay)
self.T_COPY += 1
self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
if (i+1) % checkpoint == 0:
print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
starting_money))
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size,
window_size = window_size,
trend = close,
skip = skip)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)
WARNING:tensorflow:From <ipython-input-3-e49b5b607a66>:53: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version. Instructions for updating: keep_dims is deprecated, use keepdims instead epoch: 10, total rewards: 698.460085.3, cost: 596251.000000, total money: 10698.460085 epoch: 20, total rewards: 1683.164917.3, cost: 5890915.500000, total money: 6720.204895 epoch: 30, total rewards: 1686.875004.3, cost: 75077.554688, total money: 6721.424992 epoch: 40, total rewards: 541.999876.3, cost: 2707843.750000, total money: 9525.359861 epoch: 50, total rewards: 1668.824950.3, cost: 32719.388672, total money: 7666.774900 epoch: 60, total rewards: 751.654909.3, cost: 1165742.750000, total money: 8743.134889 epoch: 70, total rewards: 1637.889772.3, cost: 325201.937500, total money: 6669.909730 epoch: 80, total rewards: 587.055053.3, cost: 1527037.250000, total money: 892.705077 epoch: 90, total rewards: 2170.969727.3, cost: 122936.546875, total money: 8204.549683 epoch: 100, total rewards: 1565.850155.3, cost: 844705.187500, total money: 19.270138 epoch: 110, total rewards: 1733.244930.3, cost: 557043.125000, total money: 6744.174861 epoch: 120, total rewards: 1282.489866.3, cost: 3785043.750000, total money: 8328.149839 epoch: 130, total rewards: 1260.559873.3, cost: 596946.312500, total money: 6319.639890 epoch: 140, total rewards: 1346.769778.3, cost: 26543662.000000, total money: 10330.129763 epoch: 150, total rewards: 2415.594848.3, cost: 851761.625000, total money: 9467.174865 epoch: 160, total rewards: 1033.800112.3, cost: 3596937.500000, total money: 9044.600099 epoch: 170, total rewards: 1597.439823.3, cost: 511038.375000, total money: 93.789859 epoch: 180, total rewards: 1736.860354.3, cost: 3795484.000000, total money: 1011.990359 epoch: 190, total rewards: 1682.540215.3, cost: 657330.250000, total money: 8675.460198 epoch: 200, total rewards: 875.094668.3, cost: 30907612.000000, total money: 10875.094668
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)
day 3: buy 1 unit at price 782.520020, total balance 9217.479980 day 4: buy 1 unit at price 790.510010, total balance 8426.969970 day 5: buy 1 unit at price 785.309998, total balance 7641.659972 day 6: buy 1 unit at price 762.559998, total balance 6879.099974 day 7: buy 1 unit at price 754.020020, total balance 6125.079954 day 9: buy 1 unit at price 758.489990, total balance 5366.589964 day 10, sell 1 unit at price 764.479980, investment -2.305377 %, total balance 6131.069944, day 11: buy 1 unit at price 771.229980, total balance 5359.839964 day 13: buy 1 unit at price 769.200012, total balance 4590.639952 day 15: buy 1 unit at price 760.989990, total balance 3829.649962 day 16: buy 1 unit at price 761.679993, total balance 3067.969969 day 17: buy 1 unit at price 768.239990, total balance 2299.729979 day 19: buy 1 unit at price 758.039978, total balance 1541.690001 day 21: buy 1 unit at price 750.500000, total balance 791.190001 day 22, sell 1 unit at price 762.520020, investment -3.540751 %, total balance 1553.710021, day 23, sell 1 unit at price 759.109985, investment -3.336264 %, total balance 2312.820006, day 24: buy 1 unit at price 771.190002, total balance 1541.630004 day 25, sell 1 unit at price 776.419983, investment 1.817560 %, total balance 2318.049987, day 27, sell 1 unit at price 789.270020, investment 4.674942 %, total balance 3107.320007, day 29, sell 1 unit at price 797.070007, investment 5.086424 %, total balance 3904.390014, day 30, sell 1 unit at price 797.849976, investment 3.451629 %, total balance 4702.239990, day 32: buy 1 unit at price 794.200012, total balance 3908.039978 day 33, sell 1 unit at price 796.419983, investment 3.538738 %, total balance 4704.459961, day 34, sell 1 unit at price 794.559998, investment 4.411360 %, total balance 5499.019959, day 38: buy 1 unit at price 785.049988, total balance 4713.969971 day 39, sell 1 unit at price 782.789978, investment 2.771503 %, total balance 5496.759949, day 40, sell 1 unit at price 771.820007, investment 0.466002 %, total balance 6268.579956, day 41: buy 1 unit at price 786.140015, total balance 5482.439941 day 45: buy 1 unit at price 806.650024, total balance 4675.789917 day 47, sell 1 unit at price 807.909973, investment 6.578808 %, total balance 5483.699890, day 50: buy 1 unit at price 804.609985, total balance 4679.089905 day 51, sell 1 unit at price 806.070007, investment 7.404398 %, total balance 5485.159912, day 52, sell 1 unit at price 802.174988, investment 4.017815 %, total balance 6287.334900, day 53: buy 1 unit at price 805.020020, total balance 5482.314880 day 54, sell 1 unit at price 819.309998, investment 3.161670 %, total balance 6301.624878, day 56: buy 1 unit at price 835.669983, total balance 5465.954895 day 58, sell 1 unit at price 823.309998, investment 4.873576 %, total balance 6289.264893, day 59, sell 1 unit at price 802.320007, investment 2.058157 %, total balance 7091.584900, day 60: buy 1 unit at price 796.789978, total balance 6294.794922 day 61, sell 1 unit at price 795.695007, investment -1.358088 %, total balance 7090.489929, day 62, sell 1 unit at price 798.530029, investment -0.755640 %, total balance 7889.019958, day 63, sell 1 unit at price 801.489990, investment -0.438502 %, total balance 8690.509948, day 66: buy 1 unit at price 808.380005, total balance 7882.129943 day 67, sell 1 unit at price 809.559998, investment -3.124437 %, total balance 8691.689941, day 68: buy 1 unit at price 813.669983, total balance 7878.019958 day 69, sell 1 unit at price 819.239990, investment 2.817557 %, total balance 8697.259948, day 70, sell 1 unit at price 820.450012, investment 1.493111 %, total balance 9517.709960, day 72, sell 1 unit at price 824.159973, investment 1.289219 %, total balance 10341.869933, day 73: buy 1 unit at price 828.070007, total balance 9513.799926 day 74, sell 1 unit at price 831.659973, investment 0.433534 %, total balance 10345.459899, day 75: buy 1 unit at price 830.760010, total balance 9514.699889 day 76: buy 1 unit at price 831.330017, total balance 8683.369872 day 78: buy 1 unit at price 829.280029, total balance 7854.089843 day 79: buy 1 unit at price 823.210022, total balance 7030.879821 day 80: buy 1 unit at price 835.239990, total balance 6195.639831 day 81, sell 1 unit at price 830.630005, investment -0.015649 %, total balance 7026.269836, day 82, sell 1 unit at price 829.080017, investment -0.270651 %, total balance 7855.349853, day 84, sell 1 unit at price 831.909973, investment 0.317136 %, total balance 8687.259826, day 85: buy 1 unit at price 835.369995, total balance 7851.889831 day 86: buy 1 unit at price 838.679993, total balance 7013.209838 day 88: buy 1 unit at price 845.539978, total balance 6167.669860 day 89: buy 1 unit at price 845.619995, total balance 5322.049865 day 90, sell 1 unit at price 847.200012, investment 2.914200 %, total balance 6169.249877, day 91: buy 1 unit at price 848.780029, total balance 5320.469848 day 92: buy 1 unit at price 852.119995, total balance 4468.349853 day 93, sell 1 unit at price 848.400024, investment 1.575599 %, total balance 5316.749877, day 94: buy 1 unit at price 830.460022, total balance 4486.289855 day 95: buy 1 unit at price 829.590027, total balance 3656.699828 day 96, sell 1 unit at price 817.580017, investment -2.129593 %, total balance 4474.279845, day 97: buy 1 unit at price 814.429993, total balance 3659.849852 day 98, sell 1 unit at price 819.510010, investment -2.285733 %, total balance 4479.359862, day 100: buy 1 unit at price 831.409973, total balance 3647.949889 day 101, sell 1 unit at price 831.500000, investment -1.660475 %, total balance 4479.449889, day 103: buy 1 unit at price 838.549988, total balance 3640.899901 day 104: buy 1 unit at price 834.570007, total balance 2806.329894 day 105, sell 1 unit at price 831.409973, investment -1.680426 %, total balance 3637.739867, day 106: buy 1 unit at price 827.880005, total balance 2809.859862 day 108: buy 1 unit at price 824.729980, total balance 1985.129882 day 109: buy 1 unit at price 823.349976, total balance 1161.779906 day 110: buy 1 unit at price 824.320007, total balance 337.459899 day 112, sell 1 unit at price 837.169983, investment -1.367851 %, total balance 1174.629882, day 113: buy 1 unit at price 836.820007, total balance 337.809875 day 115, sell 1 unit at price 841.650024, investment -1.228697 %, total balance 1179.459899, day 116, sell 1 unit at price 843.190002, investment 1.532883 %, total balance 2022.649901, day 117, sell 1 unit at price 862.760010, investment 3.998358 %, total balance 2885.409911, day 118: buy 1 unit at price 872.299988, total balance 2013.109923 day 119, sell 1 unit at price 871.729980, investment 7.035594 %, total balance 2884.839903, day 120, sell 1 unit at price 874.250000, investment 5.152696 %, total balance 3759.089903, day 123, sell 1 unit at price 916.440002, investment 9.288655 %, total balance 4675.529905, day 124: buy 1 unit at price 927.039978, total balance 3748.489927 day 126, sell 1 unit at price 927.130005, investment 11.090741 %, total balance 4675.619932, day 127, sell 1 unit at price 934.299988, investment 12.854518 %, total balance 5609.919920, day 128, sell 1 unit at price 932.169983, investment 13.027294 %, total balance 6542.089903, day 129: buy 1 unit at price 928.780029, total balance 5613.309874 day 133, sell 1 unit at price 943.000000, investment 14.532098 %, total balance 6556.309874, day 137, sell 1 unit at price 941.859985, investment 14.259023 %, total balance 7498.169859, day 140, sell 1 unit at price 969.539978, investment 15.860038 %, total balance 8467.709837, day 141: buy 1 unit at price 971.469971, total balance 7496.239866 day 142, sell 1 unit at price 975.880005, investment 11.874357 %, total balance 8472.119871, day 143, sell 1 unit at price 964.859985, investment 4.079652 %, total balance 9436.979856, day 144, sell 1 unit at price 966.950012, investment 4.109690 %, total balance 10403.929868, day 145, sell 1 unit at price 975.599976, investment 0.425129 %, total balance 11379.529844, day 147: buy 1 unit at price 976.570007, total balance 10402.959837 day 148: buy 1 unit at price 980.940002, total balance 9422.019835 day 149, sell 1 unit at price 983.409973, investment 0.700407 %, total balance 10405.429808, day 150: buy 1 unit at price 949.830017, total balance 9455.599791 day 151, sell 1 unit at price 942.900024, investment -3.877911 %, total balance 10398.499815, day 152, sell 1 unit at price 953.400024, investment 0.375857 %, total balance 11351.899839, day 153: buy 1 unit at price 950.760010, total balance 10401.139829 day 155, sell 1 unit at price 939.780029, investment -1.154864 %, total balance 11340.919858, day 156: buy 1 unit at price 957.369995, total balance 10383.549863 day 157: buy 1 unit at price 950.630005, total balance 9432.919858 day 158: buy 1 unit at price 959.450012, total balance 8473.469846 day 159, sell 1 unit at price 957.090027, investment -0.029243 %, total balance 9430.559873, day 160: buy 1 unit at price 965.590027, total balance 8464.969846 day 162, sell 1 unit at price 927.330017, investment -2.451005 %, total balance 9392.299863, day 163: buy 1 unit at price 940.489990, total balance 8451.809873 day 164: buy 1 unit at price 917.789978, total balance 7534.019895 day 165: buy 1 unit at price 908.729980, total balance 6625.289915 day 166, sell 1 unit at price 898.700012, investment -6.331752 %, total balance 7523.989927, day 167, sell 1 unit at price 911.710022, investment -5.580008 %, total balance 8435.699949, day 169: buy 1 unit at price 918.590027, total balance 7517.109922 day 170: buy 1 unit at price 928.799988, total balance 6588.309934 day 171: buy 1 unit at price 930.090027, total balance 5658.219907 day 172: buy 1 unit at price 943.830017, total balance 4714.389890 day 173: buy 1 unit at price 947.159973, total balance 3767.229917 day 175: buy 1 unit at price 953.419983, total balance 2813.809934 day 177: buy 1 unit at price 970.890015, total balance 1842.919919 day 178, sell 1 unit at price 968.150024, investment 2.941024 %, total balance 2811.069943, day 179: buy 1 unit at price 972.919983, total balance 1838.149960 day 180: buy 1 unit at price 980.340027, total balance 857.809933 day 187, sell 1 unit at price 930.390015, investment 1.372867 %, total balance 1788.199948, day 189, sell 1 unit at price 927.960022, investment 2.116145 %, total balance 2716.159970, day 190, sell 1 unit at price 929.359985, investment 1.172444 %, total balance 3645.519955, day 191, sell 1 unit at price 926.789978, investment -0.216409 %, total balance 4572.309933, day 192, sell 1 unit at price 922.900024, investment -0.773044 %, total balance 5495.209957, day 193: buy 1 unit at price 907.239990, total balance 4587.969967 day 196, sell 1 unit at price 922.219971, investment -2.289612 %, total balance 5510.189938, day 197, sell 1 unit at price 926.960022, investment -2.132686 %, total balance 6437.149960, day 198, sell 1 unit at price 910.979980, investment -4.451344 %, total balance 7348.129940, day 200, sell 1 unit at price 906.659973, investment -6.615584 %, total balance 8254.789913, day 201: buy 1 unit at price 924.690002, total balance 7330.099911 day 202: buy 1 unit at price 927.000000, total balance 6403.099911 day 204: buy 1 unit at price 915.890015, total balance 5487.209896 day 205: buy 1 unit at price 913.809998, total balance 4573.399898 day 206: buy 1 unit at price 921.289978, total balance 3652.109920 day 207: buy 1 unit at price 929.570007, total balance 2722.539913 day 208: buy 1 unit at price 939.330017, total balance 1783.209896 day 209: buy 1 unit at price 937.340027, total balance 845.869869 day 210, sell 1 unit at price 928.450012, investment -4.570774 %, total balance 1774.319881, day 211: buy 1 unit at price 927.809998, total balance 846.509883 day 215, sell 1 unit at price 932.070007, investment -4.923804 %, total balance 1778.579890, day 218: buy 1 unit at price 920.289978, total balance 858.289912 day 219, sell 1 unit at price 915.000000, investment 0.855343 %, total balance 1773.289912, day 220, sell 1 unit at price 921.809998, investment -0.311456 %, total balance 2695.099910, day 221, sell 1 unit at price 931.580017, investment 0.494069 %, total balance 3626.679927, day 223: buy 1 unit at price 928.530029, total balance 2698.149898 day 225, sell 1 unit at price 924.859985, investment 0.979372 %, total balance 3623.009883, day 228: buy 1 unit at price 959.109985, total balance 2663.899898 day 230, sell 1 unit at price 957.789978, investment 4.812814 %, total balance 3621.689876, day 231, sell 1 unit at price 951.679993, investment 3.298637 %, total balance 4573.369869, day 232, sell 1 unit at price 969.960022, investment 4.345021 %, total balance 5543.329891, day 233, sell 1 unit at price 978.890015, investment 4.211512 %, total balance 6522.219906, day 234, sell 1 unit at price 977.000000, investment 4.231119 %, total balance 7499.219906, day 235, sell 1 unit at price 972.599976, investment 4.827495 %, total balance 8471.819882, day 236, sell 1 unit at price 989.250000, investment 7.493293 %, total balance 9461.069882, day 239: buy 1 unit at price 992.000000, total balance 8469.069882 day 240, sell 1 unit at price 992.179993, investment 6.854917 %, total balance 9461.249875, day 242, sell 1 unit at price 984.450012, investment 2.642036 %, total balance 10445.699887, day 243: buy 1 unit at price 988.200012, total balance 9457.499875 day 245, sell 1 unit at price 970.539978, investment -2.163309 %, total balance 10428.039853, day 246, sell 1 unit at price 973.330017, investment -1.504756 %, total balance 11401.369870,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
df= df_full.copy()
name = 'Neuro-evolution agent'
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
class neuralnetwork:
def __init__(self, id_, hidden_size = 128):
self.W1 = np.random.randn(window_size, hidden_size) / np.sqrt(window_size)
self.W2 = np.random.randn(hidden_size, 3) / np.sqrt(hidden_size)
self.fitness = 0
self.id = id_
def relu(X):
return np.maximum(X, 0)
def softmax(X):
e_x = np.exp(X - np.max(X, axis=-1, keepdims=True))
return e_x / np.sum(e_x, axis=-1, keepdims=True)
def feed_forward(X, nets):
a1 = np.dot(X, nets.W1)
z1 = relu(a1)
a2 = np.dot(z1, nets.W2)
return softmax(a2)
class NeuroEvolution:
def __init__(self, population_size, mutation_rate, model_generator,
state_size, window_size, trend, skip, initial_money):
self.population_size = population_size
self.mutation_rate = mutation_rate
self.model_generator = model_generator
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
self.initial_money = initial_money
def _initialize_population(self):
self.population = []
for i in range(self.population_size):
self.population.append(self.model_generator(i))
def mutate(self, individual, scale=1.0):
mutation_mask = np.random.binomial(1, p=self.mutation_rate, size=individual.W1.shape)
individual.W1 += np.random.normal(loc=0, scale=scale, size=individual.W1.shape) * mutation_mask
mutation_mask = np.random.binomial(1, p=self.mutation_rate, size=individual.W2.shape)
individual.W2 += np.random.normal(loc=0, scale=scale, size=individual.W2.shape) * mutation_mask
return individual
def inherit_weights(self, parent, child):
child.W1 = parent.W1.copy()
child.W2 = parent.W2.copy()
return child
def crossover(self, parent1, parent2):
child1 = self.model_generator((parent1.id+1)*10)
child1 = self.inherit_weights(parent1, child1)
child2 = self.model_generator((parent2.id+1)*10)
child2 = self.inherit_weights(parent2, child2)
# first W
n_neurons = child1.W1.shape[1]
cutoff = np.random.randint(0, n_neurons)
child1.W1[:, cutoff:] = parent2.W1[:, cutoff:].copy()
child2.W1[:, cutoff:] = parent1.W1[:, cutoff:].copy()
# second W
n_neurons = child1.W2.shape[1]
cutoff = np.random.randint(0, n_neurons)
child1.W2[:, cutoff:] = parent2.W2[:, cutoff:].copy()
child2.W2[:, cutoff:] = parent1.W2[:, cutoff:].copy()
return child1, child2
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array([res])
def act(self, p, state):
logits = feed_forward(state, p)
return np.argmax(logits, 1)[0]
def buy(self, individual):
initial_money = self.initial_money
starting_money = initial_money
state = self.get_state(0)
inventory = []
states_sell = []
states_buy = []
for t in range(0, len(self.trend) - 1, self.skip):
action = self.act(individual, state)
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((self.trend[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, self.trend[t], invest, initial_money)
)
state = next_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def calculate_fitness(self):
for i in range(self.population_size):
initial_money = self.initial_money
starting_money = initial_money
state = self.get_state(0)
inventory = []
for t in range(0, len(self.trend) - 1, self.skip):
action = self.act(self.population[i], state)
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
starting_money += self.trend[t]
state = next_state
invest = ((starting_money - initial_money) / initial_money) * 100
self.population[i].fitness = invest
def evolve(self, generations=20, checkpoint= 5):
self._initialize_population()
n_winners = int(self.population_size * 0.4)
n_parents = self.population_size - n_winners
for epoch in range(generations):
self.calculate_fitness()
fitnesses = [i.fitness for i in self.population]
sort_fitness = np.argsort(fitnesses)[::-1]
self.population = [self.population[i] for i in sort_fitness]
fittest_individual = self.population[0]
if (epoch+1) % checkpoint == 0:
print('epoch %d, fittest individual %d with accuracy %f'%(epoch+1, sort_fitness[0],
fittest_individual.fitness))
next_population = [self.population[i] for i in range(n_winners)]
total_fitness = np.sum([np.abs(i.fitness) for i in self.population])
parent_probabilities = [np.abs(i.fitness / total_fitness) for i in self.population]
parents = np.random.choice(self.population, size=n_parents, p=parent_probabilities, replace=False)
for i in np.arange(0, len(parents), 2):
child1, child2 = self.crossover(parents[i], parents[i+1])
next_population += [self.mutate(child1), self.mutate(child2)]
self.population = next_population
return fittest_individual
population_size = 100
generations = 100
mutation_rate = 0.1
neural_evolve = NeuroEvolution(population_size, mutation_rate, neuralnetwork,
window_size, window_size, close, skip, initial_money)
fittest_nets = neural_evolve.evolve(50)
epoch 5, fittest individual 0 with accuracy 10.849749 epoch 10, fittest individual 0 with accuracy 11.095000 epoch 15, fittest individual 0 with accuracy 11.095000 epoch 20, fittest individual 93 with accuracy 13.756802 epoch 25, fittest individual 95 with accuracy 23.728605 epoch 30, fittest individual 0 with accuracy 23.728605 epoch 35, fittest individual 0 with accuracy 23.728605 epoch 40, fittest individual 0 with accuracy 23.728605 epoch 45, fittest individual 0 with accuracy 23.728605 epoch 50, fittest individual 0 with accuracy 23.728605
states_buy, states_sell, total_gains, invest = neural_evolve.buy(fittest_nets)
day 1: buy 1 unit at price 762.130005, total balance 9237.869995 day 2: buy 1 unit at price 762.020020, total balance 8475.849975 day 3, sell 1 unit at price 782.520020, investment 2.675399 %, total balance 9258.369995, day 5, sell 1 unit at price 785.309998, investment 3.056347 %, total balance 10043.679993, day 6: buy 1 unit at price 762.559998, total balance 9281.119995 day 7: buy 1 unit at price 754.020020, total balance 8527.099975 day 8: buy 1 unit at price 736.080017, total balance 7791.019958 day 9: buy 1 unit at price 758.489990, total balance 7032.529968 day 10, sell 1 unit at price 764.479980, investment 0.251781 %, total balance 7797.009948, day 12: buy 1 unit at price 760.539978, total balance 7036.469970 day 14: buy 1 unit at price 768.270020, total balance 6268.199950 day 15: buy 1 unit at price 760.989990, total balance 5507.209960 day 16: buy 1 unit at price 761.679993, total balance 4745.529967 day 17, sell 1 unit at price 768.239990, investment 1.885888 %, total balance 5513.769957, day 18, sell 1 unit at price 770.840027, investment 4.722314 %, total balance 6284.609984, day 20: buy 1 unit at price 747.919983, total balance 5536.690001 day 21: buy 1 unit at price 750.500000, total balance 4786.190001 day 22: buy 1 unit at price 762.520020, total balance 4023.669981 day 24, sell 1 unit at price 771.190002, investment 1.674381 %, total balance 4794.859983, day 25, sell 1 unit at price 776.419983, investment 2.087991 %, total balance 5571.279966, day 26, sell 1 unit at price 789.289978, investment 2.736012 %, total balance 6360.569944, day 27: buy 1 unit at price 789.270020, total balance 5571.299924 day 28: buy 1 unit at price 796.099976, total balance 4775.199948 day 29: buy 1 unit at price 797.070007, total balance 3978.129941 day 31: buy 1 unit at price 790.799988, total balance 3187.329953 day 32, sell 1 unit at price 794.200012, investment 4.364055 %, total balance 3981.529965, day 33: buy 1 unit at price 796.419983, total balance 3185.109982 day 34, sell 1 unit at price 794.559998, investment 4.316774 %, total balance 3979.669980, day 35, sell 1 unit at price 791.260010, investment 5.794741 %, total balance 4770.929990, day 37: buy 1 unit at price 791.549988, total balance 3979.380002 day 38, sell 1 unit at price 785.049988, investment 4.603596 %, total balance 4764.429990, day 39: buy 1 unit at price 782.789978, total balance 3981.640012 day 40: buy 1 unit at price 771.820007, total balance 3209.820005 day 41: buy 1 unit at price 786.140015, total balance 2423.679990 day 42, sell 1 unit at price 786.900024, investment 3.197294 %, total balance 3210.580014, day 43: buy 1 unit at price 794.020020, total balance 2416.559994 day 44: buy 1 unit at price 806.150024, total balance 1610.409970 day 46, sell 1 unit at price 804.789978, investment 1.966369 %, total balance 2415.199948, day 47, sell 1 unit at price 807.909973, investment 1.483482 %, total balance 3223.109921, day 48: buy 1 unit at price 806.359985, total balance 2416.749936 day 49, sell 1 unit at price 807.880005, investment 1.356217 %, total balance 3224.629941, day 50, sell 1 unit at price 804.609985, investment 1.746332 %, total balance 4029.239926, day 51, sell 1 unit at price 806.070007, investment 1.211675 %, total balance 4835.309933, day 52: buy 1 unit at price 802.174988, total balance 4033.134945 day 53: buy 1 unit at price 805.020020, total balance 3228.114925 day 54, sell 1 unit at price 819.309998, investment 3.507044 %, total balance 4047.424923, day 55, sell 1 unit at price 823.869995, investment 5.247898 %, total balance 4871.294918, day 56, sell 1 unit at price 835.669983, investment 8.272651 %, total balance 5706.964901, day 57, sell 1 unit at price 832.150024, investment 5.852648 %, total balance 6539.114925, day 58: buy 1 unit at price 823.309998, total balance 5715.804927 day 59: buy 1 unit at price 802.320007, total balance 4913.484920 day 60: buy 1 unit at price 796.789978, total balance 4116.694942 day 61: buy 1 unit at price 795.695007, total balance 3320.999935 day 63: buy 1 unit at price 801.489990, total balance 2519.509945 day 65: buy 1 unit at price 806.969971, total balance 1712.539974 day 66, sell 1 unit at price 808.380005, investment 1.808517 %, total balance 2520.919979, day 67: buy 1 unit at price 809.559998, total balance 1711.359981 day 68, sell 1 unit at price 813.669983, investment 0.932824 %, total balance 2525.029964, day 69, sell 1 unit at price 819.239990, investment 1.597302 %, total balance 3344.269954, day 70, sell 1 unit at price 820.450012, investment 2.278184 %, total balance 4164.719966, day 71: buy 1 unit at price 818.979980, total balance 3345.739986 day 72: buy 1 unit at price 824.159973, total balance 2521.580013 day 73: buy 1 unit at price 828.070007, total balance 1693.510006 day 75, sell 1 unit at price 830.760010, investment 3.197435 %, total balance 2524.270016, day 76, sell 1 unit at price 831.330017, investment 0.974119 %, total balance 3355.600033, day 77, sell 1 unit at price 828.640015, investment 3.280488 %, total balance 4184.240048, day 78, sell 1 unit at price 829.280029, investment 4.077618 %, total balance 5013.520077, day 79, sell 1 unit at price 823.210022, investment 3.457985 %, total balance 5836.730099, day 80: buy 1 unit at price 835.239990, total balance 5001.490109 day 81, sell 1 unit at price 830.630005, investment 3.635730 %, total balance 5832.120114, day 83: buy 1 unit at price 827.780029, total balance 5004.340085 day 84: buy 1 unit at price 831.909973, total balance 4172.430112 day 85: buy 1 unit at price 835.369995, total balance 3337.060117 day 86, sell 1 unit at price 838.679993, investment 3.929517 %, total balance 4175.740110, day 87, sell 1 unit at price 843.250000, investment 4.161520 %, total balance 5018.990110, day 88, sell 1 unit at price 845.539978, investment 3.243058 %, total balance 5864.530088, day 89: buy 1 unit at price 845.619995, total balance 5018.910093 day 90, sell 1 unit at price 847.200012, investment 2.795578 %, total balance 5866.110105, day 91: buy 1 unit at price 848.780029, total balance 5017.330076 day 92: buy 1 unit at price 852.119995, total balance 4165.210081 day 93, sell 1 unit at price 848.400024, investment 2.455108 %, total balance 5013.610105, day 95: buy 1 unit at price 829.590027, total balance 4184.020078 day 97: buy 1 unit at price 814.429993, total balance 3369.590085 day 98, sell 1 unit at price 819.510010, investment -1.883289 %, total balance 4189.100095, day 100: buy 1 unit at price 831.409973, total balance 3357.690122 day 101, sell 1 unit at price 831.500000, investment 0.449391 %, total balance 4189.190122, day 102: buy 1 unit at price 829.559998, total balance 3359.630124 day 103: buy 1 unit at price 838.549988, total balance 2521.080136 day 104: buy 1 unit at price 834.570007, total balance 1686.510129 day 105, sell 1 unit at price 831.409973, investment -0.060103 %, total balance 2517.920102, day 106: buy 1 unit at price 827.880005, total balance 1690.040097 day 108: buy 1 unit at price 824.729980, total balance 865.310117 day 109: buy 1 unit at price 823.349976, total balance 41.960141 day 110: buy 1 unit at price 824.320007, total balance -782.359866 day 112, sell 1 unit at price 837.169983, investment 0.215472 %, total balance 54.810117, day 113, sell 1 unit at price 836.820007, investment -1.040655 %, total balance 891.630124, day 114, sell 1 unit at price 838.210022, investment -1.245318 %, total balance 1729.840146, day 115: buy 1 unit at price 841.650024, total balance 888.190122 day 117: buy 1 unit at price 862.760010, total balance 25.430112 day 118, sell 1 unit at price 872.299988, investment 2.368210 %, total balance 897.730100, day 119: buy 1 unit at price 871.729980, total balance 26.000120 day 120, sell 1 unit at price 874.250000, investment 5.383379 %, total balance 900.250120, day 121, sell 1 unit at price 905.960022, investment 11.238539 %, total balance 1806.210142, day 122, sell 1 unit at price 912.570007, investment 9.761734 %, total balance 2718.780149, day 123, sell 1 unit at price 916.440002, investment 10.473022 %, total balance 3635.220151, day 124, sell 1 unit at price 927.039978, investment 10.552739 %, total balance 4562.260129, day 125, sell 1 unit at price 931.659973, investment 11.633532 %, total balance 5493.920102, day 126, sell 1 unit at price 927.130005, investment 11.988452 %, total balance 6421.050107, day 127, sell 1 unit at price 934.299988, investment 13.285561 %, total balance 7355.350095, day 128, sell 1 unit at price 932.169983, investment 13.216738 %, total balance 8287.520078, day 129: buy 1 unit at price 928.780029, total balance 7358.740049 day 130, sell 1 unit at price 930.599976, investment 12.893047 %, total balance 8289.340025, day 132: buy 1 unit at price 937.080017, total balance 7352.260008 day 133, sell 1 unit at price 943.000000, investment 12.041819 %, total balance 8295.260008, day 134: buy 1 unit at price 919.619995, total balance 7375.640013 day 135: buy 1 unit at price 930.239990, total balance 6445.400023 day 136: buy 1 unit at price 934.010010, total balance 5511.390013 day 138: buy 1 unit at price 948.820007, total balance 4562.570006 day 140: buy 1 unit at price 969.539978, total balance 3593.030028 day 142, sell 1 unit at price 975.880005, investment 13.111409 %, total balance 4568.910033, day 143, sell 1 unit at price 964.859985, investment 10.683355 %, total balance 5533.770018, day 144, sell 1 unit at price 966.950012, investment 4.109690 %, total balance 6500.720030, day 146, sell 1 unit at price 983.679993, investment 4.972892 %, total balance 7484.400023, day 147: buy 1 unit at price 976.570007, total balance 6507.830016 day 148, sell 1 unit at price 980.940002, investment 6.667972 %, total balance 7488.770018, day 149: buy 1 unit at price 983.409973, total balance 6505.360045 day 150, sell 1 unit at price 949.830017, investment 2.105911 %, total balance 7455.190062, day 151: buy 1 unit at price 942.900024, total balance 6512.290038 day 152, sell 1 unit at price 953.400024, investment 2.075996 %, total balance 7465.690062, day 154: buy 1 unit at price 942.309998, total balance 6523.380064 day 155: buy 1 unit at price 939.780029, total balance 5583.600035 day 157: buy 1 unit at price 950.630005, total balance 4632.970030 day 158, sell 1 unit at price 959.450012, investment 1.120339 %, total balance 5592.420042, day 159: buy 1 unit at price 957.090027, total balance 4635.330015 day 160, sell 1 unit at price 965.590027, investment -0.407405 %, total balance 5600.920042, day 161, sell 1 unit at price 952.270020, investment -2.488300 %, total balance 6553.190062, day 163: buy 1 unit at price 940.489990, total balance 5612.700072 day 165: buy 1 unit at price 908.729980, total balance 4703.970092 day 166, sell 1 unit at price 898.700012, investment -8.613901 %, total balance 5602.670104, day 169, sell 1 unit at price 918.590027, investment -2.578216 %, total balance 6521.260131, day 170: buy 1 unit at price 928.799988, total balance 5592.460143 day 171, sell 1 unit at price 930.090027, investment -1.296810 %, total balance 6522.550170, day 172: buy 1 unit at price 943.830017, total balance 5578.720153 day 173, sell 1 unit at price 947.159973, investment 0.785284 %, total balance 6525.880126, day 174: buy 1 unit at price 955.989990, total balance 5569.890136 day 175: buy 1 unit at price 953.419983, total balance 4616.470153 day 177, sell 1 unit at price 970.890015, investment 2.131219 %, total balance 5587.360168, day 178, sell 1 unit at price 968.150024, investment 1.155586 %, total balance 6555.510192, day 180, sell 1 unit at price 980.340027, investment 4.237157 %, total balance 7535.850219, day 181, sell 1 unit at price 950.700012, investment 4.618537 %, total balance 8486.550231, day 182: buy 1 unit at price 947.799988, total balance 7538.750243 day 183, sell 1 unit at price 934.090027, investment 0.569556 %, total balance 8472.840270, day 185, sell 1 unit at price 930.500000, investment -1.412332 %, total balance 9403.340270, day 186: buy 1 unit at price 930.830017, total balance 8472.510253 day 187: buy 1 unit at price 930.390015, total balance 7542.120238 day 188, sell 1 unit at price 923.650024, investment -3.382877 %, total balance 8465.770262, day 189: buy 1 unit at price 927.960022, total balance 7537.810240 day 191: buy 1 unit at price 926.789978, total balance 6611.020262 day 192, sell 1 unit at price 922.900024, investment -3.201103 %, total balance 7533.920286, day 193: buy 1 unit at price 907.239990, total balance 6626.680296 day 195: buy 1 unit at price 922.669983, total balance 5704.010313 day 197, sell 1 unit at price 926.960022, investment -2.198773 %, total balance 6630.970335, day 199, sell 1 unit at price 910.669983, investment -2.165813 %, total balance 7541.640318, day 201, sell 1 unit at price 924.690002, investment -0.612648 %, total balance 8466.330320, day 202: buy 1 unit at price 927.000000, total balance 7539.330320 day 204, sell 1 unit at price 915.890015, investment -1.300703 %, total balance 8455.220335, day 205, sell 1 unit at price 913.809998, investment -1.400531 %, total balance 9369.030333, day 206: buy 1 unit at price 921.289978, total balance 8447.740355 day 207, sell 1 unit at price 929.570007, investment 2.461313 %, total balance 9377.310362, day 208: buy 1 unit at price 939.330017, total balance 8437.980345 day 209, sell 1 unit at price 937.340027, investment 1.589956 %, total balance 9375.320372, day 211, sell 1 unit at price 927.809998, investment 0.087378 %, total balance 10303.130370, day 212: buy 1 unit at price 935.950012, total balance 9367.180358 day 213: buy 1 unit at price 926.500000, total balance 8440.680358 day 214, sell 1 unit at price 929.080017, investment 0.845558 %, total balance 9369.760375, day 215, sell 1 unit at price 932.070007, investment -0.772892 %, total balance 10301.830382, day 217: buy 1 unit at price 925.109985, total balance 9376.720397 day 218: buy 1 unit at price 920.289978, total balance 8456.430419 day 219: buy 1 unit at price 915.000000, total balance 7541.430419 day 220, sell 1 unit at price 921.809998, investment -1.510766 %, total balance 8463.240417, day 223: buy 1 unit at price 928.530029, total balance 7534.710388 day 224: buy 1 unit at price 920.969971, total balance 6613.740417 day 225: buy 1 unit at price 924.859985, total balance 5688.880432 day 226, sell 1 unit at price 944.489990, investment 1.941715 %, total balance 6633.370422, day 227, sell 1 unit at price 949.500000, investment 2.636445 %, total balance 7582.870422, day 229, sell 1 unit at price 953.270020, investment 3.583658 %, total balance 8536.140442, day 230, sell 1 unit at price 957.789978, investment 4.676500 %, total balance 9493.930420, day 231: buy 1 unit at price 951.679993, total balance 8542.250427 day 232, sell 1 unit at price 969.960022, investment 4.461890 %, total balance 9512.210449, day 233, sell 1 unit at price 978.890015, investment 6.289026 %, total balance 10491.100464, day 235, sell 1 unit at price 972.599976, investment 5.161861 %, total balance 11463.700440, day 236, sell 1 unit at price 989.250000, investment 3.947756 %, total balance 12452.950440, day 238: buy 1 unit at price 989.679993, total balance 11463.270447 day 239, sell 1 unit at price 992.000000, investment 0.234420 %, total balance 12455.270447, day 243: buy 1 unit at price 988.200012, total balance 11467.070435 day 244: buy 1 unit at price 968.450012, total balance 10498.620423 day 245: buy 1 unit at price 970.539978, total balance 9528.080445 day 246, sell 1 unit at price 973.330017, investment -1.504756 %, total balance 10501.410462, day 248, sell 1 unit at price 1019.270020, investment 5.247561 %, total balance 11520.680482, day 249, sell 1 unit at price 1017.109985, investment 4.798361 %, total balance 12537.790467,
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
df= df_full.copy()
name = 'Neuro-evolution with Novelty search agent'
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
novelty_search_threshold = 6
novelty_log_maxlen = 1000
backlog_maxsize = 500
novelty_log_add_amount = 3
class neuralnetwork:
def __init__(self, id_, hidden_size = 128):
self.W1 = np.random.randn(window_size, hidden_size) / np.sqrt(window_size)
self.W2 = np.random.randn(hidden_size, 3) / np.sqrt(hidden_size)
self.fitness = 0
self.last_features = None
self.id = id_
def relu(X):
return np.maximum(X, 0)
def softmax(X):
e_x = np.exp(X - np.max(X, axis=-1, keepdims=True))
return e_x / np.sum(e_x, axis=-1, keepdims=True)
def feed_forward(X, nets):
a1 = np.dot(X, nets.W1)
z1 = relu(a1)
a2 = np.dot(z1, nets.W2)
return softmax(a2)
class NeuroEvolution:
def __init__(self, population_size, mutation_rate, model_generator,
state_size, window_size, trend, skip, initial_money):
self.population_size = population_size
self.mutation_rate = mutation_rate
self.model_generator = model_generator
self.state_size = state_size
self.window_size = window_size
self.half_window = window_size // 2
self.trend = trend
self.skip = skip
self.initial_money = initial_money
self.generation_backlog = []
self.novel_backlog = []
self.novel_pop = []
def _initialize_population(self):
self.population = []
for i in range(self.population_size):
self.population.append(self.model_generator(i))
def _memorize(self, q, i, limit):
q.append(i)
if len(q) > limit:
q.pop()
def mutate(self, individual, scale=1.0):
mutation_mask = np.random.binomial(1, p=self.mutation_rate, size=individual.W1.shape)
individual.W1 += np.random.normal(loc=0, scale=scale, size=individual.W1.shape) * mutation_mask
mutation_mask = np.random.binomial(1, p=self.mutation_rate, size=individual.W2.shape)
individual.W2 += np.random.normal(loc=0, scale=scale, size=individual.W2.shape) * mutation_mask
return individual
def inherit_weights(self, parent, child):
child.W1 = parent.W1.copy()
child.W2 = parent.W2.copy()
return child
def crossover(self, parent1, parent2):
child1 = self.model_generator((parent1.id+1)*10)
child1 = self.inherit_weights(parent1, child1)
child2 = self.model_generator((parent2.id+1)*10)
child2 = self.inherit_weights(parent2, child2)
# first W
n_neurons = child1.W1.shape[1]
cutoff = np.random.randint(0, n_neurons)
child1.W1[:, cutoff:] = parent2.W1[:, cutoff:].copy()
child2.W1[:, cutoff:] = parent1.W1[:, cutoff:].copy()
# second W
n_neurons = child1.W2.shape[1]
cutoff = np.random.randint(0, n_neurons)
child1.W2[:, cutoff:] = parent2.W2[:, cutoff:].copy()
child2.W2[:, cutoff:] = parent1.W2[:, cutoff:].copy()
return child1, child2
def get_state(self, t):
window_size = self.window_size + 1
d = t - window_size + 1
block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
res = []
for i in range(window_size - 1):
res.append(block[i + 1] - block[i])
return np.array([res])
def act(self, p, state):
logits = feed_forward(state, p)
return np.argmax(logits, 1)[0]
def buy(self, individual):
initial_money = self.initial_money
starting_money = initial_money
state = self.get_state(0)
inventory = []
states_sell = []
states_buy = []
for t in range(0, len(self.trend) - 1, self.skip):
action = self.act(individual, state)
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
initial_money -= self.trend[t]
states_buy.append(t)
print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
initial_money += self.trend[t]
states_sell.append(t)
try:
invest = ((self.trend[t] - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
% (t, self.trend[t], invest, initial_money)
)
state = next_state
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest
def calculate_fitness(self):
for i in range(self.population_size):
initial_money = self.initial_money
starting_money = initial_money
state = self.get_state(0)
inventory = []
for t in range(0, len(self.trend) - 1, self.skip):
action = self.act(self.population[i], state)
next_state = self.get_state(t + 1)
if action == 1 and starting_money >= self.trend[t]:
inventory.append(self.trend[t])
starting_money -= self.trend[t]
elif action == 2 and len(inventory):
bought_price = inventory.pop(0)
starting_money += self.trend[t]
state = next_state
invest = ((starting_money - initial_money) / initial_money) * 100
self.population[i].fitness = invest
self.population[i].last_features = self.population[i].W2.flatten()
def evaluate(self, individual, backlog, pop, k = 4):
score = 0
if len(backlog):
x = np.array(backlog)
nn = NearestNeighbors(n_neighbors = k, metric = 'euclidean').fit(np.array(backlog))
d, _ = nn.kneighbors([individual])
score += np.mean(d)
if len(pop):
nn = NearestNeighbors(n_neighbors = k, metric = 'euclidean').fit(np.array(pop))
d, _ = nn.kneighbors([individual])
score += np.mean(d)
return score
def evolve(self, generations=20, checkpoint= 5):
self._initialize_population()
n_winners = int(self.population_size * 0.4)
n_parents = self.population_size - n_winners
for epoch in range(generations):
self.calculate_fitness()
scores = [self.evaluate(p.last_features, self.novel_backlog, self.novel_pop) for p in self.population]
sort_fitness = np.argsort(scores)[::-1]
self.population = [self.population[i] for i in sort_fitness]
fittest_individual = self.population[0]
if (epoch+1) % checkpoint == 0:
print('epoch %d, fittest individual %d with accuracy %f'%(epoch+1, sort_fitness[0],
fittest_individual.fitness))
next_population = [self.population[i] for i in range(n_winners)]
total_fitness = np.sum([np.abs(i.fitness) for i in self.population])
parent_probabilities = [np.abs(i.fitness / total_fitness) for i in self.population]
parents = np.random.choice(self.population, size=n_parents, p=parent_probabilities, replace=False)
for p in next_population:
if p.last_features is not None:
self._memorize(self.novel_pop, p.last_features, backlog_maxsize)
if np.random.randint(0,10) < novelty_search_threshold:
self._memorize(self.novel_backlog, p.last_features, novelty_log_maxlen)
for i in np.arange(0, len(parents), 2):
child1, child2 = self.crossover(parents[i], parents[i+1])
next_population += [self.mutate(child1), self.mutate(child2)]
self.population = next_population
if np.random.randint(0,10) < novelty_search_threshold:
pop_sorted = sorted(self.population, key=lambda p: p.fitness, reverse=True)
self.generation_backlog.append(pop_sorted[0])
print('novel add fittest, score: %f, backlog size: %d'%(pop_sorted[0].fitness,
len(self.generation_backlog)))
generation_backlog_temp = self.generation_backlog
if len(self.generation_backlog) > backlog_maxsize:
generation_backlog_temp = random.sample(generation_backlog, backlog_maxsize)
for p in generation_backlog_temp:
if p.last_features is not None:
self._memorize(self.novel_backlog, p.last_features, novelty_log_maxlen)
return fittest_individual
population_size = 100
generations = 100
mutation_rate = 0.1
neural_evolve = NeuroEvolution(population_size, mutation_rate, neuralnetwork,
window_size, window_size, close, skip, initial_money)
fittest_nets = neural_evolve.evolve(100)
novel add fittest, score: 5.960001, backlog size: 16 novel add fittest, score: 2.560349, backlog size: 17 epoch 5, fittest individual 86 with accuracy -99.353801 novel add fittest, score: 2.073401, backlog size: 18 epoch 10, fittest individual 53 with accuracy -99.622801 novel add fittest, score: 9.773855, backlog size: 19 novel add fittest, score: 1.068502, backlog size: 20 novel add fittest, score: 1.733602, backlog size: 21 epoch 15, fittest individual 49 with accuracy -94.018300 novel add fittest, score: 1.439049, backlog size: 22 novel add fittest, score: 0.000000, backlog size: 23 novel add fittest, score: 0.000000, backlog size: 24 novel add fittest, score: 3.052850, backlog size: 25 epoch 20, fittest individual 83 with accuracy -42.284500 novel add fittest, score: 3.284498, backlog size: 26 novel add fittest, score: 3.284498, backlog size: 27 novel add fittest, score: 0.000000, backlog size: 28 novel add fittest, score: 0.000000, backlog size: 29 epoch 25, fittest individual 43 with accuracy -99.809850 novel add fittest, score: 0.000000, backlog size: 30 novel add fittest, score: 0.000000, backlog size: 31 novel add fittest, score: 4.712602, backlog size: 32 novel add fittest, score: 4.712602, backlog size: 33 epoch 30, fittest individual 51 with accuracy -94.734501 novel add fittest, score: 4.712602, backlog size: 34 novel add fittest, score: 0.000000, backlog size: 35 novel add fittest, score: 0.000000, backlog size: 36 epoch 35, fittest individual 74 with accuracy -99.895853 novel add fittest, score: 0.000000, backlog size: 37 novel add fittest, score: 0.000000, backlog size: 38 novel add fittest, score: 0.000000, backlog size: 39 novel add fittest, score: 0.000000, backlog size: 40 epoch 40, fittest individual 50 with accuracy -99.900900 novel add fittest, score: 0.000000, backlog size: 41 novel add fittest, score: 0.000000, backlog size: 42 epoch 45, fittest individual 98 with accuracy -92.305952 novel add fittest, score: 0.000000, backlog size: 43 novel add fittest, score: 0.000000, backlog size: 44 novel add fittest, score: 0.000000, backlog size: 45 novel add fittest, score: 0.000000, backlog size: 46 epoch 50, fittest individual 55 with accuracy -99.841901 novel add fittest, score: 0.000000, backlog size: 47 novel add fittest, score: 0.000000, backlog size: 48 novel add fittest, score: 0.000000, backlog size: 49 epoch 55, fittest individual 0 with accuracy -99.351002 novel add fittest, score: 0.000000, backlog size: 50 novel add fittest, score: 0.000000, backlog size: 51 novel add fittest, score: 0.000000, backlog size: 52 epoch 60, fittest individual 56 with accuracy -91.532553 novel add fittest, score: 0.000000, backlog size: 53 novel add fittest, score: 0.000000, backlog size: 54 novel add fittest, score: 0.000000, backlog size: 55 epoch 65, fittest individual 0 with accuracy -99.389200 novel add fittest, score: 0.000000, backlog size: 56 novel add fittest, score: 0.000000, backlog size: 57 novel add fittest, score: 0.000000, backlog size: 58 epoch 70, fittest individual 68 with accuracy -90.999901 novel add fittest, score: 0.000000, backlog size: 59 novel add fittest, score: 0.000000, backlog size: 60 novel add fittest, score: 0.000000, backlog size: 61 novel add fittest, score: 0.000000, backlog size: 62 epoch 75, fittest individual 50 with accuracy -98.881400 novel add fittest, score: 0.000000, backlog size: 63 novel add fittest, score: 0.000000, backlog size: 64 novel add fittest, score: 0.000000, backlog size: 65 epoch 80, fittest individual 0 with accuracy -91.959200 novel add fittest, score: 0.000000, backlog size: 66 novel add fittest, score: 0.000000, backlog size: 67 novel add fittest, score: 0.000000, backlog size: 68 epoch 85, fittest individual 0 with accuracy -94.175699 novel add fittest, score: 0.000000, backlog size: 69 novel add fittest, score: 0.000000, backlog size: 70 novel add fittest, score: 0.000000, backlog size: 71 novel add fittest, score: 0.000000, backlog size: 72 novel add fittest, score: 0.000000, backlog size: 73 epoch 90, fittest individual 60 with accuracy -93.196199 novel add fittest, score: 0.000000, backlog size: 74 novel add fittest, score: 0.000000, backlog size: 75 novel add fittest, score: 0.000000, backlog size: 76 epoch 95, fittest individual 66 with accuracy -93.122201 novel add fittest, score: 0.000000, backlog size: 77 novel add fittest, score: 0.000000, backlog size: 78 novel add fittest, score: 0.000000, backlog size: 79 epoch 100, fittest individual 52 with accuracy -93.193801 novel add fittest, score: 0.000000, backlog size: 80
states_buy, states_sell, total_gains, invest = neural_evolve.buy(fittest_nets)
day 1: buy 1 unit at price 762.130005, total balance 9237.869995 day 3: buy 1 unit at price 782.520020, total balance 8455.349975 day 4: buy 1 unit at price 790.510010, total balance 7664.839965 day 5, sell 1 unit at price 785.309998, investment 3.041475 %, total balance 8450.149963, day 9: buy 1 unit at price 758.489990, total balance 7691.659973 day 10: buy 1 unit at price 764.479980, total balance 6927.179993 day 11: buy 1 unit at price 771.229980, total balance 6155.950013 day 15: buy 1 unit at price 760.989990, total balance 5394.960023 day 16: buy 1 unit at price 761.679993, total balance 4633.280030 day 17: buy 1 unit at price 768.239990, total balance 3865.040040 day 21: buy 1 unit at price 750.500000, total balance 3114.540040 day 26, sell 1 unit at price 789.289978, investment 0.865148 %, total balance 3903.830018, day 31: buy 1 unit at price 790.799988, total balance 3113.030030 day 37: buy 1 unit at price 791.549988, total balance 2321.480042 day 39: buy 1 unit at price 782.789978, total balance 1538.690064 day 40: buy 1 unit at price 771.820007, total balance 766.870057 day 43: buy 1 unit at price 794.020020, total balance -27.149963 day 44: buy 1 unit at price 806.150024, total balance -833.299987 day 45: buy 1 unit at price 806.650024, total balance -1639.950011 day 48: buy 1 unit at price 806.359985, total balance -2446.309996 day 49: buy 1 unit at price 807.880005, total balance -3254.190001 day 50: buy 1 unit at price 804.609985, total balance -4058.799986 day 52: buy 1 unit at price 802.174988, total balance -4860.974974 day 53: buy 1 unit at price 805.020020, total balance -5665.994994 day 54, sell 1 unit at price 819.309998, investment 3.643216 %, total balance -4846.684996, day 55: buy 1 unit at price 823.869995, total balance -5670.554991 day 60: buy 1 unit at price 796.789978, total balance -6467.344969 day 61: buy 1 unit at price 795.695007, total balance -7263.039976 day 63: buy 1 unit at price 801.489990, total balance -8064.529966 day 65: buy 1 unit at price 806.969971, total balance -8871.499937 day 66: buy 1 unit at price 808.380005, total balance -9679.879942 day 67: buy 1 unit at price 809.559998, total balance -10489.439940 day 68: buy 1 unit at price 813.669983, total balance -11303.109923 day 69: buy 1 unit at price 819.239990, total balance -12122.349913 day 73, sell 1 unit at price 828.070007, investment 9.173492 %, total balance -11294.279906, day 80: buy 1 unit at price 835.239990, total balance -12129.519896 day 84: buy 1 unit at price 831.909973, total balance -12961.429869 day 85, sell 1 unit at price 835.369995, investment 9.272972 %, total balance -12126.059874, day 86, sell 1 unit at price 838.679993, investment 8.745772 %, total balance -11287.379881, day 88: buy 1 unit at price 845.539978, total balance -12132.919859 day 89: buy 1 unit at price 845.619995, total balance -12978.539854 day 90: buy 1 unit at price 847.200012, total balance -13825.739866 day 92: buy 1 unit at price 852.119995, total balance -14677.859861 day 93: buy 1 unit at price 848.400024, total balance -15526.259885 day 94: buy 1 unit at price 830.460022, total balance -16356.719907 day 96: buy 1 unit at price 817.580017, total balance -17174.299924 day 97: buy 1 unit at price 814.429993, total balance -17988.729917 day 98: buy 1 unit at price 819.510010, total balance -18808.239927 day 99: buy 1 unit at price 820.919983, total balance -19629.159910 day 101, sell 1 unit at price 831.500000, investment 9.265563 %, total balance -18797.659910, day 102: buy 1 unit at price 829.559998, total balance -19627.219908 day 103, sell 1 unit at price 838.549988, investment 10.092164 %, total balance -18788.669920, day 104: buy 1 unit at price 834.570007, total balance -19623.239927 day 105: buy 1 unit at price 831.409973, total balance -20454.649900 day 107: buy 1 unit at price 824.669983, total balance -21279.319883 day 108: buy 1 unit at price 824.729980, total balance -22104.049863 day 109: buy 1 unit at price 823.349976, total balance -22927.399839 day 112: buy 1 unit at price 837.169983, total balance -23764.569822 day 116: buy 1 unit at price 843.190002, total balance -24607.759824 day 119: buy 1 unit at price 871.729980, total balance -25479.489804 day 125: buy 1 unit at price 931.659973, total balance -26411.149777 day 127: buy 1 unit at price 934.299988, total balance -27345.449765 day 129: buy 1 unit at price 928.780029, total balance -28274.229794 day 130: buy 1 unit at price 930.599976, total balance -29204.829770 day 133: buy 1 unit at price 943.000000, total balance -30147.829770 day 134: buy 1 unit at price 919.619995, total balance -31067.449765 day 137, sell 1 unit at price 941.859985, investment 22.599708 %, total balance -30125.589780, day 139: buy 1 unit at price 954.960022, total balance -31080.549802 day 140: buy 1 unit at price 969.539978, total balance -32050.089780 day 141, sell 1 unit at price 971.469971, investment 29.443034 %, total balance -31078.619809, day 142: buy 1 unit at price 975.880005, total balance -32054.499814 day 143: buy 1 unit at price 964.859985, total balance -33019.359799 day 145: buy 1 unit at price 975.599976, total balance -33994.959775 day 147: buy 1 unit at price 976.570007, total balance -34971.529782 day 148: buy 1 unit at price 980.940002, total balance -35952.469784 day 149: buy 1 unit at price 983.409973, total balance -36935.879757 day 151: buy 1 unit at price 942.900024, total balance -37878.779781 day 152: buy 1 unit at price 953.400024, total balance -38832.179805 day 153: buy 1 unit at price 950.760010, total balance -39782.939815 day 154: buy 1 unit at price 942.309998, total balance -40725.249813 day 156: buy 1 unit at price 957.369995, total balance -41682.619808 day 157, sell 1 unit at price 950.630005, investment 20.211181 %, total balance -40731.989803, day 158: buy 1 unit at price 959.450012, total balance -41691.439815 day 159: buy 1 unit at price 957.090027, total balance -42648.529842 day 164: buy 1 unit at price 917.789978, total balance -43566.319820 day 165: buy 1 unit at price 908.729980, total balance -44475.049800 day 167: buy 1 unit at price 911.710022, total balance -45386.759822 day 168, sell 1 unit at price 906.690002, investment 14.546146 %, total balance -44480.069820, day 169, sell 1 unit at price 918.590027, investment 17.348210 %, total balance -43561.479793, day 170: buy 1 unit at price 928.799988, total balance -44490.279781 day 173, sell 1 unit at price 947.159973, investment 22.717728 %, total balance -43543.119808, day 184: buy 1 unit at price 941.530029, total balance -44484.649837 day 186: buy 1 unit at price 930.830017, total balance -45415.479854 day 187: buy 1 unit at price 930.390015, total balance -46345.869869 day 190: buy 1 unit at price 929.359985, total balance -47275.229854 day 191: buy 1 unit at price 926.789978, total balance -48202.019832 day 194: buy 1 unit at price 914.390015, total balance -49116.409847 day 196: buy 1 unit at price 922.219971, total balance -50038.629818 day 198: buy 1 unit at price 910.979980, total balance -50949.609798 day 201: buy 1 unit at price 924.690002, total balance -51874.299800 day 202: buy 1 unit at price 927.000000, total balance -52801.299800 day 206, sell 1 unit at price 921.289978, investment 16.028558 %, total balance -51880.009822, day 207: buy 1 unit at price 929.570007, total balance -52809.579829 day 208: buy 1 unit at price 939.330017, total balance -53748.909846 day 213: buy 1 unit at price 926.500000, total balance -54675.409846 day 218: buy 1 unit at price 920.289978, total balance -55595.699824 day 219: buy 1 unit at price 915.000000, total balance -56510.699824 day 222: buy 1 unit at price 932.450012, total balance -57443.149836 day 224: buy 1 unit at price 920.969971, total balance -58364.119807 day 227: buy 1 unit at price 949.500000, total balance -59313.619807 day 228: buy 1 unit at price 959.109985, total balance -60272.729792 day 229: buy 1 unit at price 953.270020, total balance -61225.999812 day 232: buy 1 unit at price 969.960022, total balance -62195.959834 day 238: buy 1 unit at price 989.679993, total balance -63185.639827 day 240: buy 1 unit at price 992.179993, total balance -64177.819820 day 242: buy 1 unit at price 984.450012, total balance -65162.269832 day 244: buy 1 unit at price 968.450012, total balance -66130.719844 day 247: buy 1 unit at price 972.559998, total balance -67103.279842 day 248: buy 1 unit at price 1019.270020, total balance -68122.549862 day 249: buy 1 unit at price 1017.109985, total balance -69139.659847 day 250: buy 1 unit at price 1016.640015, total balance -70156.299862
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
df= df_full.copy()
name = 'ABCD strategy agent'
def abcd(trend, skip_loop = 4, ma = 7):
ma = pd.Series(trend).rolling(ma).mean().values
x = []
for a in range(ma.shape[0]):
for b in range(a, ma.shape[0], skip_loop):
for c in range(b, ma.shape[0], skip_loop):
for d in range(c, ma.shape[0], skip_loop):
if ma[b] > ma[a] and \
(ma[c] < ma[b] and ma[c] > ma[a]) \
and ma[d] > ma[b]:
x.append([a,b,c,d])
x_np = np.array(x)
ac = x_np[:,0].tolist() + x_np[:,2].tolist()
bd = x_np[:,1].tolist() + x_np[:,3].tolist()
ac_set = set(ac)
bd_set = set(bd)
signal = np.zeros(len(trend))
buy = list(ac_set - bd_set)
sell = list(list(bd_set - ac_set))
signal[buy] = 1.0
signal[sell] = -1.0
return signal
%%time
signal = abcd(df['Close'])
CPU times: user 1.08 s, sys: 8 ms, total: 1.09 s Wall time: 1.09 s
def buy_stock(
real_movement,
signal,
initial_money = 10000,
max_buy = 1,
max_sell = 1,
):
"""
real_movement = actual movement in the real world
delay = how much interval you want to delay to change our decision from buy to sell, vice versa
initial_state = 1 is buy, 0 is sell
initial_money = 10000, ignore what kind of currency
max_buy = max quantity for share to buy
max_sell = max quantity for share to sell
"""
starting_money = initial_money
states_sell = []
states_buy = []
states_money = []
current_inventory = 0
def buy(i, initial_money, current_inventory):
shares = initial_money // real_movement[i]
if shares < 1:
print(
'day %d: total balances %f, not enough money to buy a unit price %f'
% (i, initial_money, real_movement[i])
)
else:
if shares > max_buy:
buy_units = max_buy
else:
buy_units = shares
initial_money -= buy_units * real_movement[i]
current_inventory += buy_units
print(
'day %d: buy %d units at price %f, total balance %f'
% (i, buy_units, buy_units * real_movement[i], initial_money)
)
states_buy.append(0)
return initial_money, current_inventory
for i in range(real_movement.shape[0]):
state = signal[i]
if state == 1:
initial_money, current_inventory = buy(
i, initial_money, current_inventory
)
states_buy.append(i)
elif state == -1:
if current_inventory == 0:
print('day %d: cannot sell anything, inventory 0' % (i))
else:
if current_inventory > max_sell:
sell_units = max_sell
else:
sell_units = current_inventory
current_inventory -= sell_units
total_sell = sell_units * real_movement[i]
initial_money += total_sell
try:
invest = (
(real_movement[i] - real_movement[states_buy[-1]])
/ real_movement[states_buy[-1]]
) * 100
except:
invest = 0
print(
'day %d, sell %d units at price %f, investment %f %%, total balance %f,'
% (i, sell_units, total_sell, invest, initial_money)
)
states_sell.append(i)
states_money.append(initial_money)
invest = ((initial_money - starting_money) / starting_money) * 100
total_gains = initial_money - starting_money
return states_buy, states_sell, total_gains, invest, states_money
states_buy, states_sell, total_gains, invest, states_money = buy_stock(df.Close, signal)
day 6: buy 1 units at price 762.559998, total balance 9237.440002 day 7: buy 1 units at price 754.020020, total balance 8483.419982 day 8: buy 1 units at price 736.080017, total balance 7747.339965 day 9: buy 1 units at price 758.489990, total balance 6988.849975 day 10: buy 1 units at price 764.479980, total balance 6224.369995 day 11: buy 1 units at price 771.229980, total balance 5453.140015 day 12: buy 1 units at price 760.539978, total balance 4692.600037 day 13: buy 1 units at price 769.200012, total balance 3923.400025 day 14: buy 1 units at price 768.270020, total balance 3155.130005 day 15: buy 1 units at price 760.989990, total balance 2394.140015 day 19: buy 1 units at price 758.039978, total balance 1636.100037 day 21: buy 1 units at price 750.500000, total balance 885.600037 day 22: buy 1 units at price 762.520020, total balance 123.080017 day 23: total balances 123.080017, not enough money to buy a unit price 759.109985 day 24: total balances 123.080017, not enough money to buy a unit price 771.190002 day 25: total balances 123.080017, not enough money to buy a unit price 776.419983 day 26: total balances 123.080017, not enough money to buy a unit price 789.289978 day 27: total balances 123.080017, not enough money to buy a unit price 789.270020 day 43: total balances 123.080017, not enough money to buy a unit price 794.020020 day 148, sell 1 units at price 980.940002, investment 23.540966 %, total balance 1104.020019, day 149, sell 1 units at price 983.409973, investment 23.852038 %, total balance 2087.429992, day 239, sell 1 units at price 992.000000, investment 24.933878 %, total balance 3079.429992, day 240, sell 1 units at price 992.179993, investment 24.956546 %, total balance 4071.609985, day 241, sell 1 units at price 992.809998, investment 25.035890 %, total balance 5064.419983, day 242, sell 1 units at price 984.450012, investment 23.983021 %, total balance 6048.869995, day 243, sell 1 units at price 988.200012, investment 24.455302 %, total balance 7037.070007, day 244, sell 1 units at price 968.450012, investment 21.967959 %, total balance 8005.520019, day 245, sell 1 units at price 970.539978, investment 22.231172 %, total balance 8976.059997, day 248, sell 1 units at price 1019.270020, investment 28.368302 %, total balance 9995.330017, day 249, sell 1 units at price 1017.109985, investment 28.096264 %, total balance 11012.440002, day 250, sell 1 units at price 1016.640015, investment 28.037076 %, total balance 12029.080017, day 251, sell 1 units at price 1025.500000, investment 29.152915 %, total balance 13054.580017,
close = df['Close']
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
fig = plt.figure(figsize = (15,5))
plt.plot(states_money, color='r', lw=2.)
plt.plot(states_money, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(states_money, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.legend()
plt.show()
plt.figure(figsize = (10, 5))
bins = np.linspace(-10, 10, 100)
solution = np.random.randn(100)
w = np.random.randn(100)
plt.hist(solution, bins, alpha = 0.5, label = 'solution', color = 'r')
plt.hist(w, bins, alpha = 0.5, label = 'random', color = 'y')
plt.legend()
plt.show()
def f(w):
return -np.sum(np.square(solution - w))
npop = 50
sigma = 0.1
alpha = 0.001
for i in range(5000):
if (i + 1) % 1000 == 0:
print(
'iter %d. w: %s, solution: %s, reward: %f'
% (i + 1, str(w[-1]), str(solution[-1]), f(w))
)
N = np.random.randn(npop, 100)
R = np.zeros(npop)
for j in range(npop):
w_try = w + sigma * N[j]
R[j] = f(w_try)
A = (R - np.mean(R)) / np.std(R)
w = w + alpha / (npop * sigma) * np.dot(N.T, A)
iter 1000. w: 0.0952791586701015, solution: 0.5720518054873052, reward: -20.148099 iter 2000. w: 0.5750455468679501, solution: 0.5720518054873052, reward: -0.008058 iter 3000. w: 0.5751585748688035, solution: 0.5720518054873052, reward: -0.008793 iter 4000. w: 0.5665604300033952, solution: 0.5720518054873052, reward: -0.007711 iter 5000. w: 0.5619489293298067, solution: 0.5720518054873052, reward: -0.005604
'''
I want to compare my first two individuals with my real w
'''
plt.figure(figsize=(10,5))
sigma = 0.1
N = np.random.randn(npop, 100)
individuals = []
for j in range(2):
individuals.append(w + sigma * N[j])
plt.hist(w, bins, alpha=0.5, label='w',color='r')
plt.hist(individuals[0], bins, alpha=0.5, label='individual 1')
plt.hist(individuals[1], bins, alpha=0.5, label='individual 2')
plt.legend()
plt.show()
df= df_full.copy()
name = 'Deep Evolution Strategy'
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
0 | 2017-10-16 | 992.099976 | 993.906982 | 984.000000 | 992.000000 | 992.000000 | 910500 |
1 | 2017-10-17 | 990.289978 | 996.440002 | 988.590027 | 992.179993 | 992.179993 | 1290200 |
2 | 2017-10-18 | 991.770020 | 996.719971 | 986.974976 | 992.809998 | 992.809998 | 1057600 |
3 | 2017-10-19 | 986.000000 | 988.880005 | 978.390015 | 984.450012 | 984.450012 | 1313600 |
4 | 2017-10-20 | 989.440002 | 991.000000 | 984.580017 | 988.200012 | 988.200012 | 1183200 |
def get_state(data, t, n):
d = t - n + 1
block = data[d : t + 1] if d >= 0 else -d * [data[0]] + data[: t + 1]
res = []
for i in range(n - 1):
res.append(block[i + 1] - block[i])
return np.array([res])
close = df.Close.values.tolist()
get_state(close, 0, 10)
array([[0., 0., 0., 0., 0., 0., 0., 0., 0.]])
get_state(close, 1, 10)
array([[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.179993]])
get_state(close, 2, 10)
array([[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.179993, 0.630005]])
class Deep_Evolution_Strategy:
def __init__(
self, weights, reward_function, population_size, sigma, learning_rate
):
self.weights = weights
self.reward_function = reward_function
self.population_size = population_size
self.sigma = sigma
self.learning_rate = learning_rate
def _get_weight_from_population(self, weights, population):
weights_population = []
for index, i in enumerate(population):
jittered = self.sigma * i
weights_population.append(weights[index] + jittered)
return weights_population
def get_weights(self):
return self.weights
def train(self, epoch = 100, print_every = 1):
lasttime = time.time()
for i in range(epoch):
population = []
rewards = np.zeros(self.population_size)
for k in range(self.population_size):
x = []
for w in self.weights:
x.append(np.random.randn(*w.shape))
population.append(x)
for k in range(self.population_size):
weights_population = self._get_weight_from_population(
self.weights, population[k]
)
rewards[k] = self.reward_function(weights_population)
rewards = (rewards - np.mean(rewards)) / np.std(rewards)
for index, w in enumerate(self.weights):
A = np.array([p[index] for p in population])
self.weights[index] = (
w
+ self.learning_rate
/ (self.population_size * self.sigma)
* np.dot(A.T, rewards).T
)
if (i + 1) % print_every == 0:
print(
'iter %d. reward: %f'
% (i + 1, self.reward_function(self.weights))
)
print('time taken to train:', time.time() - lasttime, 'seconds')
class Model:
def __init__(self, input_size, layer_size, output_size):
self.weights = [
np.random.randn(input_size, layer_size),
np.random.randn(layer_size, output_size),
np.random.randn(layer_size, 1),
np.random.randn(1, layer_size),
]
def predict(self, inputs):
feed = np.dot(inputs, self.weights[0]) + self.weights[-1]
decision = np.dot(feed, self.weights[1])
buy = np.dot(feed, self.weights[2])
return decision, buy
def get_weights(self):
return self.weights
def set_weights(self, weights):
self.weights = weights
window_size = 30
model = Model(window_size, 500, 3)
initial_money = 10000
starting_money = initial_money
len_close = len(close) - 1
weight = model
skip = 1
state = get_state(close, 0, window_size + 1)
inventory = []
quantity = 0
max_buy = 5
max_sell = 5
def act(model, sequence):
decision, buy = model.predict(np.array(sequence))
return np.argmax(decision[0]), int(buy[0])
for t in range(0, len_close, skip):
action, buy = act(weight, state)
next_state = get_state(close, t + 1, window_size + 1)
if action == 1 and initial_money >= close[t]:
if buy < 0:
buy = 1
if buy > max_buy:
buy_units = max_buy
else:
buy_units = buy
total_buy = buy_units * close[t]
initial_money -= total_buy
inventory.append(total_buy)
quantity += buy_units
elif action == 2 and len(inventory) > 0:
if quantity > max_sell:
sell_units = max_sell
else:
sell_units = quantity
quantity -= sell_units
total_sell = sell_units * close[t]
initial_money += total_sell
state = next_state
((initial_money - starting_money) / starting_money) * 100
-89.2658852200001
import time
class Agent:
POPULATION_SIZE = 15
SIGMA = 0.1
LEARNING_RATE = 0.03
def __init__(
self, model, money, max_buy, max_sell, close, window_size, skip
):
self.window_size = window_size
self.skip = skip
self.close = close
self.model = model
self.initial_money = money
self.max_buy = max_buy
self.max_sell = max_sell
self.es = Deep_Evolution_Strategy(
self.model.get_weights(),
self.get_reward,
self.POPULATION_SIZE,
self.SIGMA,
self.LEARNING_RATE,
)
def act(self, sequence):
decision, buy = self.model.predict(np.array(sequence))
return np.argmax(decision[0]), int(buy[0])
def get_reward(self, weights):
initial_money = self.initial_money
starting_money = initial_money
len_close = len(self.close) - 1
self.model.weights = weights
state = get_state(self.close, 0, self.window_size + 1)
inventory = []
quantity = 0
for t in range(0, len_close, self.skip):
action, buy = self.act(state)
next_state = get_state(self.close, t + 1, self.window_size + 1)
if action == 1 and initial_money >= self.close[t]:
if buy < 0:
buy = 1
if buy > self.max_buy:
buy_units = self.max_buy
else:
buy_units = buy
total_buy = buy_units * self.close[t]
initial_money -= total_buy
inventory.append(total_buy)
quantity += buy_units
elif action == 2 and len(inventory) > 0:
if quantity > self.max_sell:
sell_units = self.max_sell
else:
sell_units = quantity
quantity -= sell_units
total_sell = sell_units * self.close[t]
initial_money += total_sell
state = next_state
return ((initial_money - starting_money) / starting_money) * 100
def fit(self, iterations, checkpoint):
self.es.train(iterations, print_every = checkpoint)
def buy(self):
initial_money = self.initial_money
len_close = len(self.close) - 1
state = get_state(self.close, 0, self.window_size + 1)
starting_money = initial_money
states_sell = []
states_buy = []
inventory = []
quantity = 0
for t in range(0, len_close, self.skip):
action, buy = self.act(state)
next_state = get_state(self.close, t + 1, self.window_size + 1)
if action == 1 and initial_money >= self.close[t]:
if buy < 0:
buy = 1
if buy > self.max_buy:
buy_units = self.max_buy
else:
buy_units = buy
total_buy = buy_units * self.close[t]
initial_money -= total_buy
inventory.append(total_buy)
quantity += buy_units
states_buy.append(t)
print(
'day %d: buy %d units at price %f, total balance %f'
% (t, buy_units, total_buy, initial_money)
)
elif action == 2 and len(inventory) > 0:
bought_price = inventory.pop(0)
if quantity > self.max_sell:
sell_units = self.max_sell
else:
sell_units = quantity
if sell_units < 1:
continue
quantity -= sell_units
total_sell = sell_units * self.close[t]
initial_money += total_sell
states_sell.append(t)
try:
invest = ((total_sell - bought_price) / bought_price) * 100
except:
invest = 0
print(
'day %d, sell %d units at price %f, investment %f %%, total balance %f,'
% (t, sell_units, total_sell, invest, initial_money)
)
state = next_state
invest = ((initial_money - starting_money) / starting_money) * 100
print(
'\ntotal gained %f, total investment %f %%'
% (initial_money - starting_money, invest)
)
plt.figure(figsize = (20, 10))
plt.plot(close, label = 'true close', c = 'g')
plt.plot(
close, 'X', label = 'predict buy', markevery = states_buy, c = 'b'
)
plt.plot(
close, 'o', label = 'predict sell', markevery = states_sell, c = 'r'
)
plt.legend()
plt.savefig('output/'+name+'.png')
plt.show()
model = Model(input_size = window_size, layer_size = 500, output_size = 3)
agent = Agent(
model = model,
money = 10000,
max_buy = 5,
max_sell = 5,
close = close,
window_size = window_size,
skip = 1,
)
agent.fit(iterations = 500, checkpoint = 10)
iter 10. reward: 36.181611 iter 20. reward: 50.767101 iter 30. reward: 65.467698 iter 40. reward: 71.316103 iter 50. reward: 82.881994 iter 60. reward: 84.293704 iter 70. reward: 78.501997 iter 80. reward: 94.488579 iter 90. reward: 86.526799 iter 100. reward: 85.882890 iter 110. reward: 86.063284 iter 120. reward: 90.334301 iter 130. reward: 85.850098 iter 140. reward: 91.399606 iter 150. reward: 87.862805 iter 160. reward: 97.226486 iter 170. reward: 86.767297 iter 180. reward: 97.016782 iter 190. reward: 97.843791 iter 200. reward: 89.146606 iter 210. reward: 96.508885 iter 220. reward: 97.765979 iter 230. reward: 98.256375 iter 240. reward: 99.942482 iter 250. reward: 94.536183 iter 260. reward: 96.916185 iter 270. reward: 93.193185 iter 280. reward: 100.844085 iter 290. reward: 100.994682 iter 300. reward: 101.523774 iter 310. reward: 102.090896 iter 320. reward: 102.176091 iter 330. reward: 92.306981 iter 340. reward: 105.409190 iter 350. reward: 103.159886 iter 360. reward: 99.091287 iter 370. reward: 108.475085 iter 380. reward: 102.349682 iter 390. reward: 110.289382 iter 400. reward: 103.371389 iter 410. reward: 110.951287 iter 420. reward: 111.561078 iter 430. reward: 112.275285 iter 440. reward: 113.112587 iter 450. reward: 110.838887 iter 460. reward: 111.243782 iter 470. reward: 112.924874 iter 480. reward: 111.705677 iter 490. reward: 110.903074 iter 500. reward: 112.986871 time taken to train: 60.56475520133972 seconds
agent.buy()
day 0: buy 1 units at price 992.000000, total balance 9008.000000 day 1: buy 1 units at price 992.179993, total balance 8015.820007 day 2: buy 1 units at price 992.809998, total balance 7023.010009 day 3: buy 5 units at price 4922.250060, total balance 2100.759949 day 4, sell 5 units at price 4941.000060, investment 398.084683 %, total balance 7041.760009, day 5: buy 5 units at price 4842.250060, total balance 2199.509949 day 7: buy 5 units at price 4866.650085, total balance -2667.140136 day 9, sell 5 units at price 5096.350100, investment 413.651770 %, total balance 2429.209964, day 10: buy 5 units at price 5085.549925, total balance -2656.339961 day 12, sell 5 units at price 5127.500000, investment 416.463373 %, total balance 2471.160039, day 13, sell 5 units at price 5127.899780, investment 4.177962 %, total balance 7599.059819, day 14, sell 3 units at price 3097.439940, investment -36.033045 %, total balance 10696.499759, day 22: buy 1 units at price 1020.909973, total balance 9675.589786 day 24: buy 1 units at price 1019.090027, total balance 8656.499759 day 25: buy 5 units at price 5091.900025, total balance 3564.599734 day 27: buy 5 units at price 5179.799805, total balance -1615.200071 day 29, sell 5 units at price 5271.049805, investment 416.308974 %, total balance 3655.849734, day 30, sell 5 units at price 5237.050170, investment 413.894752 %, total balance 8892.899904, day 33: buy 5 units at price 5050.849915, total balance 3842.049989 day 35: buy 5 units at price 5025.750120, total balance -1183.700131 day 42, sell 5 units at price 5245.750120, investment 3.021467 %, total balance 4062.049989, day 43, sell 5 units at price 5320.949705, investment 2.725007 %, total balance 9382.999694, day 44, sell 2 units at price 2154.280030, investment -57.348168 %, total balance 11537.279724, day 45: buy 1 units at price 1070.680054, total balance 10466.599670 day 48: buy 1 units at price 1060.119995, total balance 9406.479675 day 51: buy 5 units at price 5240.700075, total balance 4165.779600 day 52: buy 5 units at price 5232.000120, total balance -1066.220520 day 56, sell 5 units at price 5511.149900, investment 9.658255 %, total balance 4444.929380, day 57, sell 5 units at price 5534.699705, investment 416.933110 %, total balance 9979.629085, day 58, sell 2 units at price 2212.520020, investment 108.704678 %, total balance 12192.149105, day 59: buy 5 units at price 5513.049925, total balance 6679.099180 day 60: buy 5 units at price 5527.600100, total balance 1151.499080 day 62: buy 5 units at price 5608.800050, total balance -4457.300970 day 69, sell 5 units at price 5851.849975, investment 11.661608 %, total balance 1394.549005, day 70, sell 5 units at price 5879.199830, investment 12.370025 %, total balance 7273.748835, day 71, sell 5 units at price 5877.899780, investment 6.617931 %, total balance 13151.648615, day 72: buy 5 units at price 5818.449705, total balance 7333.198910 day 73, sell 5 units at price 5849.699705, investment 5.827115 %, total balance 13182.898615, day 78: buy 5 units at price 5242.899780, total balance 7939.998835 day 79: buy 5 units at price 5007.600100, total balance 2932.398735 day 80: buy 5 units at price 5188.900145, total balance -2256.501410 day 87, sell 5 units at price 5556.699830, investment -0.928901 %, total balance 3300.198420, day 89: buy 1 units at price 1126.790039, total balance 2173.408381 day 90, sell 5 units at price 5718.750000, investment -1.713510 %, total balance 7892.158381, day 93: buy 5 units at price 5347.600100, total balance 2544.558281 day 96: buy 5 units at price 5475.300295, total balance -2930.742014 day 98, sell 5 units at price 5630.000000, investment 7.383323 %, total balance 2699.257986, day 99, sell 5 units at price 5800.200195, investment 15.827943 %, total balance 8499.458181, day 100, sell 5 units at price 5822.500000, investment 12.210677 %, total balance 14321.958181, day 101: buy 1 units at price 1138.170044, total balance 13183.788137 day 102, sell 2 units at price 2298.979980, investment 104.029136 %, total balance 15482.768117, day 111: buy 5 units at price 5025.499880, total balance 10457.268237 day 113: buy 5 units at price 5158.950195, total balance 5298.318042 day 114: buy 5 units at price 5032.349855, total balance 265.968187 day 116, sell 5 units at price 5125.700075, investment 1.993835 %, total balance 5391.668262, day 118: buy 1 units at price 1007.039978, total balance 4384.628284 day 119: buy 5 units at price 5077.250060, total balance -692.621776 day 126, sell 5 units at price 5360.399780, investment 3.904856 %, total balance 4667.778004, day 128, sell 5 units at price 5364.799805, investment 6.606257 %, total balance 10032.577809, day 129, sell 5 units at price 5337.249755, investment 429.993831 %, total balance 15369.827564, day 131, sell 1 units at price 1021.179993, investment -79.887144 %, total balance 16391.007557, day 132: buy 1 units at price 1040.040039, total balance 15350.967518 day 135, sell 1 units at price 1037.310059, investment -0.262488 %, total balance 16388.277577, day 136: buy 5 units at price 5121.900025, total balance 11266.377552 day 137: buy 1 units at price 1023.719971, total balance 10242.657581 day 138: buy 5 units at price 5241.049805, total balance 5001.607776 day 139: buy 5 units at price 5273.950195, total balance -272.342419 day 141, sell 5 units at price 5413.800050, investment 5.699057 %, total balance 5141.457631, day 142, sell 5 units at price 5487.849730, investment 436.069422 %, total balance 10629.307361, day 144: buy 1 units at price 1100.199951, total balance 9529.107410 day 147: buy 1 units at price 1078.589966, total balance 8450.517444 day 148: buy 5 units at price 5331.799925, total balance 3118.717519 day 150: buy 5 units at price 5348.649900, total balance -2229.932381 day 159, sell 5 units at price 5698.300170, investment 8.724404 %, total balance 3468.367789, day 161, sell 5 units at price 5619.299925, investment 6.548218 %, total balance 9087.667714, day 162: buy 5 units at price 5604.349975, total balance 3483.317739 day 168: buy 1 units at price 1173.459961, total balance 2309.857778 day 170, sell 5 units at price 5849.199830, investment 431.648799 %, total balance 8159.057608, day 171, sell 5 units at price 5788.300170, investment 436.654368 %, total balance 13947.357778, day 172, sell 4 units at price 4621.919920, investment -13.314078 %, total balance 18569.277698, day 173: buy 5 units at price 5624.050295, total balance 12945.227403 day 175: buy 5 units at price 5519.899900, total balance 7425.327503 day 176: buy 5 units at price 5571.099855, total balance 1854.227648 day 179: buy 5 units at price 5514.450075, total balance -3660.222427 day 184, sell 5 units at price 5769.500120, investment 7.868345 %, total balance 2109.277693, day 187: buy 5 units at price 5919.299925, total balance -3810.022232 day 194, sell 5 units at price 6318.499755, investment 12.742776 %, total balance 2508.477523, day 195, sell 5 units at price 6341.649780, investment 440.423192 %, total balance 8850.127303, day 196, sell 5 units at price 6192.500000, investment 10.107479 %, total balance 15042.627303, day 197, sell 5 units at price 6098.699950, investment 10.485698 %, total balance 21141.327253, day 204: buy 5 units at price 6228.049925, total balance 14913.277328 day 205: buy 5 units at price 6245.499880, total balance 8667.777448 day 206: buy 5 units at price 6188.049925, total balance 2479.727523 day 207, sell 5 units at price 6175.050050, investment -0.850987 %, total balance 8654.777573, day 208, sell 5 units at price 6210.499880, investment -0.560404 %, total balance 14865.277453, day 209: buy 5 units at price 6071.900025, total balance 8793.377428 day 210: buy 5 units at price 6032.449950, total balance 2760.927478 day 211: buy 5 units at price 6004.799805, total balance -3243.872327 day 219, sell 5 units at price 6246.500245, investment 0.944568 %, total balance 3002.627918, day 220, sell 5 units at price 6195.599975, investment 2.037253 %, total balance 9198.227893, day 221, sell 5 units at price 6090.949705, investment 0.969751 %, total balance 15289.177598, day 227: buy 1 units at price 1177.359985, total balance 14111.817613 day 229: buy 5 units at price 5876.649780, total balance 8235.167833 day 230: buy 5 units at price 5862.650145, total balance 2372.517688 day 231: buy 1 units at price 1156.050049, total balance 1216.467639 day 232: buy 1 units at price 1161.219971, total balance 55.247668 day 233, sell 5 units at price 5855.449830, investment -2.487177 %, total balance 5910.697498, day 234, sell 5 units at price 5934.349975, investment 404.038701 %, total balance 11845.047473, day 235: buy 5 units at price 5830.449830, total balance 6014.597643 day 238: buy 1 units at price 1180.489990, total balance 4834.107653 day 242, sell 5 units at price 6000.549925, investment 2.108347 %, total balance 10834.657578, day 243, sell 5 units at price 6014.749755, investment 2.594383 %, total balance 16849.407333, day 245, sell 4 units at price 4629.399904, investment 300.449782 %, total balance 21478.807237, total gained 11478.807237, total investment 114.788072 %