# Created by: Sergiu Iatco / 2021.10.01
# https://github.com/itsergiu/Predict-S-P-500-correction-with-Shiller-PE-Ratio
# How to predict S&P 500 correction
# Nadeem Walayat - The Market Oracle
# http://www.marketoracle.co.uk/Article69423.html
# Vitaliy Katsenelson
# https://contrarianedge.com/sideways-market/
# https://www.marketwatch.com/story/market-analysts-cant-agree-on-where-stocks-are-going-next-so-double-check-the-data-before-you-buy-or-sell-11632447577
# Purpose: to build a machine learning model to predict S&P 500 correction within next 6 months
Disclaimer:
Author do not assume and hereby disclaim any liability to any party for any loss, damage, or disruption caused by errors or omissions, whether such errors or omissions result from negligence, accident, or any other cause. The software is provided "as is", without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose and noninfringement.
The below is a matter of opinion provided for general information purposes only and is not intended as investment advice. Information and analysis above are derived from sources and utilising methods believed to be reliable, but I cannot accept responsibility for any trading losses you may incur as a result of this analysis. Individuals should consult with their personal financial advisors before engaging in any trading activities.
# !pip install pandas
# !pip install dateparser
# !pip install xgboost
# !pip install sklearn
# !pip install numpy
# !pip install plotly_express
# !pip install fredapi
import pandas as pd
# import dateparser
import xgboost as xgb
from sklearn import model_selection
from sklearn.metrics import r2_score
import numpy as np
from fredapi import Fred
import plotly.express as px
from IPython.display import display
url_per='https://www.multpl.com/shiller-pe/table/by-month'
ls_tables = pd.read_html(url_per)
type(ls_tables)
list
len(ls_tables)
1
ls_tables[0]
Date | Value Value | |
---|---|---|
0 | Oct 1, 2021 | 37.63 |
1 | Aug 1, 2021 | 38.44 |
2 | Jul 1, 2021 | 37.68 |
3 | Jun 1, 2021 | 36.86 |
4 | May 1, 2021 | 36.62 |
... | ... | ... |
1803 | Jun 1, 1871 | 12.59 |
1804 | May 1, 1871 | 12.59 |
1805 | Apr 1, 1871 | 12.05 |
1806 | Mar 1, 1871 | 11.19 |
1807 | Feb 1, 1871 | 10.92 |
1808 rows × 2 columns
df_per=ls_tables[0]
df_per.dtypes
Date object Value Value float64 dtype: object
cols = df_per.columns.tolist()
cols
['Date', 'Value Value']
df_per.rename(
columns=({ cols[1]: 'PER'}),
inplace=True,)
df_per.head()
Date | PER | |
---|---|---|
0 | Oct 1, 2021 | 37.63 |
1 | Aug 1, 2021 | 38.44 |
2 | Jul 1, 2021 | 37.68 |
3 | Jun 1, 2021 | 36.86 |
4 | May 1, 2021 | 36.62 |
url_sp='https://www.multpl.com/s-p-500-historical-prices/table/by-month'
ls_tables = pd.read_html(url_sp)
len(ls_tables)
1
df_sp=ls_tables[0]
df_sp.head()
Date | Price Value | |
---|---|---|
0 | Oct 1, 2021 | 4357.04 |
1 | Aug 1, 2021 | 4450.37 |
2 | Jul 1, 2021 | 4358.13 |
3 | Jun 1, 2021 | 4238.49 |
4 | May 1, 2021 | 4167.85 |
cols = df_sp.columns.tolist()
cols
['Date', 'Price Value']
df_sp.rename(
columns=({ cols[1]: 'Price'}),
inplace=True,)
df_sp.head()
Date | Price | |
---|---|---|
0 | Oct 1, 2021 | 4357.04 |
1 | Aug 1, 2021 | 4450.37 |
2 | Jul 1, 2021 | 4358.13 |
3 | Jun 1, 2021 | 4238.49 |
4 | May 1, 2021 | 4167.85 |
df_per.index, df_sp.index
(RangeIndex(start=0, stop=1808, step=1), RangeIndex(start=0, stop=1809, step=1))
df = df_per.merge(df_sp, left_on='Date', right_on='Date')
df
Date | PER | Price | |
---|---|---|---|
0 | Oct 1, 2021 | 37.63 | 4357.04 |
1 | Aug 1, 2021 | 38.44 | 4450.37 |
2 | Jul 1, 2021 | 37.68 | 4358.13 |
3 | Jun 1, 2021 | 36.86 | 4238.49 |
4 | May 1, 2021 | 36.62 | 4167.85 |
... | ... | ... | ... |
1803 | Jun 1, 1871 | 12.59 | 4.82 |
1804 | May 1, 1871 | 12.59 | 4.86 |
1805 | Apr 1, 1871 | 12.05 | 4.74 |
1806 | Mar 1, 1871 | 11.19 | 4.61 |
1807 | Feb 1, 1871 | 10.92 | 4.50 |
1808 rows × 3 columns
df.dtypes
Date object PER float64 Price float64 dtype: object
df['Date']=pd.to_datetime(df['Date'])
df
Date | PER | Price | |
---|---|---|---|
0 | 2021-10-01 | 37.63 | 4357.04 |
1 | 2021-08-01 | 38.44 | 4450.37 |
2 | 2021-07-01 | 37.68 | 4358.13 |
3 | 2021-06-01 | 36.86 | 4238.49 |
4 | 2021-05-01 | 36.62 | 4167.85 |
... | ... | ... | ... |
1803 | 1871-06-01 | 12.59 | 4.82 |
1804 | 1871-05-01 | 12.59 | 4.86 |
1805 | 1871-04-01 | 12.05 | 4.74 |
1806 | 1871-03-01 | 11.19 | 4.61 |
1807 | 1871-02-01 | 10.92 | 4.50 |
1808 rows × 3 columns
df.head()
Date | PER | Price | |
---|---|---|---|
0 | 2021-10-01 | 37.63 | 4357.04 |
1 | 2021-08-01 | 38.44 | 4450.37 |
2 | 2021-07-01 | 37.68 | 4358.13 |
3 | 2021-06-01 | 36.86 | 4238.49 |
4 | 2021-05-01 | 36.62 | 4167.85 |
df.set_index(['Date'], inplace=True)
df['Date']=df.index
df
PER | Price | Date | |
---|---|---|---|
Date | |||
2021-10-01 | 37.63 | 4357.04 | 2021-10-01 |
2021-08-01 | 38.44 | 4450.37 | 2021-08-01 |
2021-07-01 | 37.68 | 4358.13 | 2021-07-01 |
2021-06-01 | 36.86 | 4238.49 | 2021-06-01 |
2021-05-01 | 36.62 | 4167.85 | 2021-05-01 |
... | ... | ... | ... |
1871-06-01 | 12.59 | 4.82 | 1871-06-01 |
1871-05-01 | 12.59 | 4.86 | 1871-05-01 |
1871-04-01 | 12.05 | 4.74 | 1871-04-01 |
1871-03-01 | 11.19 | 4.61 | 1871-03-01 |
1871-02-01 | 10.92 | 4.50 | 1871-02-01 |
1808 rows × 3 columns
# https://pandas.pydata.org/docs/reference/api/pandas.melt.html
df_per = pd.melt(df, id_vars=['Date'], value_vars=['PER'])
px.line(df_per, x='Date', y='value', color='variable')
df.describe()
PER | Price | |
---|---|---|
count | 1808.000000 | 1808.000000 |
mean | 16.864635 | 326.200664 |
std | 6.937856 | 680.270104 |
min | 4.780000 | 2.730000 |
25% | 11.682500 | 7.907500 |
50% | 15.855000 | 17.370000 |
75% | 20.472500 | 164.400000 |
max | 44.190000 | 4450.370000 |
df.dtypes
PER float64 Price float64 Date datetime64[ns] dtype: object
ls_price_p = ['Price'+str(i)+'F' for i in range(7)]
ls_price_p
['Price0F', 'Price1F', 'Price2F', 'Price3F', 'Price4F', 'Price5F', 'Price6F']
ls_price_p[1:]
['Price1F', 'Price2F', 'Price3F', 'Price4F', 'Price5F', 'Price6F']
ls_price_p[0]
'Price0F'
df.columns.tolist()
['PER', 'Price', 'Date']
df.head(10)
PER | Price | Date | |
---|---|---|---|
Date | |||
2021-10-01 | 37.63 | 4357.04 | 2021-10-01 |
2021-08-01 | 38.44 | 4450.37 | 2021-08-01 |
2021-07-01 | 37.68 | 4358.13 | 2021-07-01 |
2021-06-01 | 36.86 | 4238.49 | 2021-06-01 |
2021-05-01 | 36.62 | 4167.85 | 2021-05-01 |
2021-04-01 | 36.72 | 4141.18 | 2021-04-01 |
2021-03-01 | 35.04 | 3910.51 | 2021-03-01 |
2021-02-01 | 35.10 | 3883.43 | 2021-02-01 |
2021-01-01 | 34.51 | 3793.75 | 2021-01-01 |
2020-12-01 | 33.77 | 3695.31 | 2020-12-01 |
df.tail(10)
PER | Price | Date | |
---|---|---|---|
Date | |||
1871-11-01 | 11.60 | 4.64 | 1871-11-01 |
1871-10-01 | 11.47 | 4.59 | 1871-10-01 |
1871-09-01 | 12.31 | 4.84 | 1871-09-01 |
1871-08-01 | 12.55 | 4.79 | 1871-08-01 |
1871-07-01 | 12.27 | 4.73 | 1871-07-01 |
1871-06-01 | 12.59 | 4.82 | 1871-06-01 |
1871-05-01 | 12.59 | 4.86 | 1871-05-01 |
1871-04-01 | 12.05 | 4.74 | 1871-04-01 |
1871-03-01 | 11.19 | 4.61 | 1871-03-01 |
1871-02-01 | 10.92 | 4.50 | 1871-02-01 |
for i,e in enumerate(ls_price_p):
df[e]=df.Price.shift(i)
df['Price1P'] = df['Price'].shift(-1)
df['Price_Var']=df['Price']/df['Price1P']-1
df.head(10)
PER | Price | Date | Price0F | Price1F | Price2F | Price3F | Price4F | Price5F | Price6F | Price1P | Price_Var | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||
2021-10-01 | 37.63 | 4357.04 | 2021-10-01 | 4357.04 | NaN | NaN | NaN | NaN | NaN | NaN | 4450.37 | -0.020971 |
2021-08-01 | 38.44 | 4450.37 | 2021-08-01 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | NaN | 4358.13 | 0.021165 |
2021-07-01 | 37.68 | 4358.13 | 2021-07-01 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | 4238.49 | 0.028227 |
2021-06-01 | 36.86 | 4238.49 | 2021-06-01 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | 4167.85 | 0.016949 |
2021-05-01 | 36.62 | 4167.85 | 2021-05-01 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | 4141.18 | 0.006440 |
2021-04-01 | 36.72 | 4141.18 | 2021-04-01 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | 3910.51 | 0.058987 |
2021-03-01 | 35.04 | 3910.51 | 2021-03-01 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | 3883.43 | 0.006973 |
2021-02-01 | 35.10 | 3883.43 | 2021-02-01 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 3793.75 | 0.023639 |
2021-01-01 | 34.51 | 3793.75 | 2021-01-01 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 3695.31 | 0.026639 |
2020-12-01 | 33.77 | 3695.31 | 2020-12-01 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 3548.99 | 0.041229 |
df['PriceFmin']=df[ls_price_p].min(axis=1)
df.head(10)
PER | Price | Date | Price0F | Price1F | Price2F | Price3F | Price4F | Price5F | Price6F | Price1P | Price_Var | PriceFmin | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||
2021-10-01 | 37.63 | 4357.04 | 2021-10-01 | 4357.04 | NaN | NaN | NaN | NaN | NaN | NaN | 4450.37 | -0.020971 | 4357.04 |
2021-08-01 | 38.44 | 4450.37 | 2021-08-01 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | NaN | 4358.13 | 0.021165 | 4357.04 |
2021-07-01 | 37.68 | 4358.13 | 2021-07-01 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | 4238.49 | 0.028227 | 4357.04 |
2021-06-01 | 36.86 | 4238.49 | 2021-06-01 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | 4167.85 | 0.016949 | 4238.49 |
2021-05-01 | 36.62 | 4167.85 | 2021-05-01 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | 4141.18 | 0.006440 | 4167.85 |
2021-04-01 | 36.72 | 4141.18 | 2021-04-01 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | 3910.51 | 0.058987 | 4141.18 |
2021-03-01 | 35.04 | 3910.51 | 2021-03-01 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | 3883.43 | 0.006973 | 3910.51 |
2021-02-01 | 35.10 | 3883.43 | 2021-02-01 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 3793.75 | 0.023639 | 3883.43 |
2021-01-01 | 34.51 | 3793.75 | 2021-01-01 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 3695.31 | 0.026639 | 3793.75 |
2020-12-01 | 33.77 | 3695.31 | 2020-12-01 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 3548.99 | 0.041229 | 3695.31 |
col_y = 'Price_Corr_6M'
df[col_y]=1-df['PriceFmin']/df['Price0F']
df.head(10)
PER | Price | Date | Price0F | Price1F | Price2F | Price3F | Price4F | Price5F | Price6F | Price1P | Price_Var | PriceFmin | Price_Corr_6M | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||||
2021-10-01 | 37.63 | 4357.04 | 2021-10-01 | 4357.04 | NaN | NaN | NaN | NaN | NaN | NaN | 4450.37 | -0.020971 | 4357.04 | 0.000000 |
2021-08-01 | 38.44 | 4450.37 | 2021-08-01 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | NaN | 4358.13 | 0.021165 | 4357.04 | 0.020971 |
2021-07-01 | 37.68 | 4358.13 | 2021-07-01 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | 4238.49 | 0.028227 | 4357.04 | 0.000250 |
2021-06-01 | 36.86 | 4238.49 | 2021-06-01 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | 4167.85 | 0.016949 | 4238.49 | 0.000000 |
2021-05-01 | 36.62 | 4167.85 | 2021-05-01 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | 4141.18 | 0.006440 | 4167.85 | 0.000000 |
2021-04-01 | 36.72 | 4141.18 | 2021-04-01 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | 3910.51 | 0.058987 | 4141.18 | 0.000000 |
2021-03-01 | 35.04 | 3910.51 | 2021-03-01 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | 3883.43 | 0.006973 | 3910.51 | 0.000000 |
2021-02-01 | 35.10 | 3883.43 | 2021-02-01 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 3793.75 | 0.023639 | 3883.43 | 0.000000 |
2021-01-01 | 34.51 | 3793.75 | 2021-01-01 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 3695.31 | 0.026639 | 3793.75 | 0.000000 |
2020-12-01 | 33.77 | 3695.31 | 2020-12-01 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 3548.99 | 0.041229 | 3695.31 | 0.000000 |
df[df.isna().any(axis=1)]
PER | Price | Date | Price0F | Price1F | Price2F | Price3F | Price4F | Price5F | Price6F | Price1P | Price_Var | PriceFmin | Price_Corr_6M | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||||
2021-10-01 | 37.63 | 4357.04 | 2021-10-01 | 4357.04 | NaN | NaN | NaN | NaN | NaN | NaN | 4450.37 | -0.020971 | 4357.04 | 0.000000 |
2021-08-01 | 38.44 | 4450.37 | 2021-08-01 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | NaN | 4358.13 | 0.021165 | 4357.04 | 0.020971 |
2021-07-01 | 37.68 | 4358.13 | 2021-07-01 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | 4238.49 | 0.028227 | 4357.04 | 0.000250 |
2021-06-01 | 36.86 | 4238.49 | 2021-06-01 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | 4167.85 | 0.016949 | 4238.49 | 0.000000 |
2021-05-01 | 36.62 | 4167.85 | 2021-05-01 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | 4141.18 | 0.006440 | 4167.85 | 0.000000 |
2021-04-01 | 36.72 | 4141.18 | 2021-04-01 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | 3910.51 | 0.058987 | 4141.18 | 0.000000 |
1871-02-01 | 10.92 | 4.50 | 1871-02-01 | 4.50 | 4.61 | 4.74 | 4.86 | 4.82 | 4.73 | 4.79 | NaN | NaN | 4.50 | 0.000000 |
df.loc[df.isna().any(axis=1),col_y]=np.nan # set to zero corrections with Nan rows
df[df.isna().any(axis=1)]
PER | Price | Date | Price0F | Price1F | Price2F | Price3F | Price4F | Price5F | Price6F | Price1P | Price_Var | PriceFmin | Price_Corr_6M | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||||
2021-10-01 | 37.63 | 4357.04 | 2021-10-01 | 4357.04 | NaN | NaN | NaN | NaN | NaN | NaN | 4450.37 | -0.020971 | 4357.04 | NaN |
2021-08-01 | 38.44 | 4450.37 | 2021-08-01 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | NaN | 4358.13 | 0.021165 | 4357.04 | NaN |
2021-07-01 | 37.68 | 4358.13 | 2021-07-01 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | 4238.49 | 0.028227 | 4357.04 | NaN |
2021-06-01 | 36.86 | 4238.49 | 2021-06-01 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | 4167.85 | 0.016949 | 4238.49 | NaN |
2021-05-01 | 36.62 | 4167.85 | 2021-05-01 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | 4141.18 | 0.006440 | 4167.85 | NaN |
2021-04-01 | 36.72 | 4141.18 | 2021-04-01 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | 3910.51 | 0.058987 | 4141.18 | NaN |
1871-02-01 | 10.92 | 4.50 | 1871-02-01 | 4.50 | 4.61 | 4.74 | 4.86 | 4.82 | 4.73 | 4.79 | NaN | NaN | 4.50 | NaN |
df_initial=df.copy()
df_initial.head()
PER | Price | Date | Price0F | Price1F | Price2F | Price3F | Price4F | Price5F | Price6F | Price1P | Price_Var | PriceFmin | Price_Corr_6M | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||||
2021-10-01 | 37.63 | 4357.04 | 2021-10-01 | 4357.04 | NaN | NaN | NaN | NaN | NaN | NaN | 4450.37 | -0.020971 | 4357.04 | NaN |
2021-08-01 | 38.44 | 4450.37 | 2021-08-01 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | NaN | 4358.13 | 0.021165 | 4357.04 | NaN |
2021-07-01 | 37.68 | 4358.13 | 2021-07-01 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | 4238.49 | 0.028227 | 4357.04 | NaN |
2021-06-01 | 36.86 | 4238.49 | 2021-06-01 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | 4167.85 | 0.016949 | 4238.49 | NaN |
2021-05-01 | 36.62 | 4167.85 | 2021-05-01 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | 4141.18 | 0.006440 | 4167.85 | NaN |
df.dropna(axis=0, how='any', inplace=True) # remove NaN rowss
df[df[col_y]>0].describe()
PER | Price | Price0F | Price1F | Price2F | Price3F | Price4F | Price5F | Price6F | Price1P | Price_Var | PriceFmin | Price_Corr_6M | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 1165.000000 | 1165.000000 | 1165.000000 | 1165.000000 | 1165.000000 | 1165.000000 | 1165.000000 | 1165.000000 | 1165.000000 | 1165.000000 | 1165.000000 | 1165.000000 | 1165.000000 |
mean | 16.717880 | 269.130712 | 269.130712 | 266.352798 | 264.965476 | 264.704695 | 264.783270 | 265.672489 | 266.778481 | 268.215039 | -0.000321 | 250.402266 | 0.073373 |
std | 6.885325 | 577.596722 | 577.596722 | 571.992545 | 569.557731 | 569.693467 | 570.415241 | 573.627227 | 577.305673 | 573.246742 | 0.041202 | 537.571101 | 0.074285 |
min | 4.780000 | 2.940000 | 2.940000 | 2.730000 | 2.730000 | 2.730000 | 2.730000 | 2.730000 | 2.730000 | 2.940000 | -0.239709 | 2.730000 | 0.000244 |
25% | 11.690000 | 7.090000 | 7.090000 | 6.850000 | 6.800000 | 6.700000 | 6.680000 | 6.700000 | 6.780000 | 7.180000 | -0.020408 | 6.260000 | 0.020737 |
50% | 15.810000 | 14.820000 | 14.820000 | 14.600000 | 14.330000 | 14.330000 | 14.300000 | 14.100000 | 13.970000 | 14.860000 | 0.001581 | 13.870000 | 0.050343 |
75% | 19.920000 | 107.700000 | 107.700000 | 107.200000 | 106.500000 | 106.500000 | 107.200000 | 107.400000 | 107.200000 | 107.400000 | 0.023299 | 103.000000 | 0.101518 |
max | 44.190000 | 3391.710000 | 3391.710000 | 3365.520000 | 3418.700000 | 3548.990000 | 3695.310000 | 3793.750000 | 3883.430000 | 3278.200000 | 0.502994 | 3365.520000 | 0.469682 |
last_months=240
df_per = pd.melt(df.head(last_months), id_vars='Date', value_vars=['Price'])
px.line(df_per, x='Date', y='value', color='variable')
# min_correction=0.15
min_correction=0.15 # take a look at big corrections
df[df[col_y]>min_correction].head(10)
PER | Price | Date | Price0F | Price1F | Price2F | Price3F | Price4F | Price5F | Price6F | Price1P | Price_Var | PriceFmin | Price_Corr_6M | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||||
2020-02-01 | 30.73 | 3277.31 | 2020-02-01 | 3277.31 | 2652.39 | 2761.98 | 2919.61 | 3104.66 | 3207.62 | 3391.71 | 3278.20 | -0.000271 | 2652.39 | 0.190681 |
2020-01-01 | 30.99 | 3278.20 | 2020-01-01 | 3278.20 | 3277.31 | 2652.39 | 2761.98 | 2919.61 | 3104.66 | 3207.62 | 3176.75 | 0.031935 | 2652.39 | 0.190900 |
2019-12-01 | 30.33 | 3176.75 | 2019-12-01 | 3176.75 | 3278.20 | 3277.31 | 2652.39 | 2761.98 | 2919.61 | 3104.66 | 3104.90 | 0.023141 | 2652.39 | 0.165062 |
2008-10-01 | 16.39 | 968.80 | 2008-10-01 | 968.80 | 883.04 | 877.56 | 865.58 | 805.23 | 757.13 | 848.15 | 1216.95 | -0.203911 | 757.13 | 0.218487 |
2008-09-01 | 20.36 | 1216.95 | 2008-09-01 | 1216.95 | 968.80 | 883.04 | 877.56 | 865.58 | 805.23 | 757.13 | 1281.47 | -0.050348 | 757.13 | 0.377846 |
2008-08-01 | 21.40 | 1281.47 | 2008-08-01 | 1281.47 | 1216.95 | 968.80 | 883.04 | 877.56 | 865.58 | 805.23 | 1257.33 | 0.019199 | 805.23 | 0.371636 |
2008-07-01 | 20.91 | 1257.33 | 2008-07-01 | 1257.33 | 1281.47 | 1216.95 | 968.80 | 883.04 | 877.56 | 865.58 | 1341.25 | -0.062568 | 865.58 | 0.311573 |
2008-06-01 | 22.42 | 1341.25 | 2008-06-01 | 1341.25 | 1257.33 | 1281.47 | 1216.95 | 968.80 | 883.04 | 877.56 | 1403.22 | -0.044163 | 877.56 | 0.345715 |
2008-05-01 | 23.70 | 1403.22 | 2008-05-01 | 1403.22 | 1341.25 | 1257.33 | 1281.47 | 1216.95 | 968.80 | 883.04 | 1370.47 | 0.023897 | 883.04 | 0.370705 |
2008-04-01 | 23.36 | 1370.47 | 2008-04-01 | 1370.47 | 1403.22 | 1341.25 | 1257.33 | 1281.47 | 1216.95 | 968.80 | 1316.94 | 0.040647 | 968.80 | 0.293089 |
df[df[col_y]>min_correction][col_y].describe()
count 147.000000 mean 0.229457 std 0.072282 min 0.150127 25% 0.177002 50% 0.205638 75% 0.257215 max 0.469682 Name: Price_Corr_6M, dtype: float64
# plot PER & Price_correction
min_correction=0.05 # force learning above ?
# min_correction=0
if min_correction!=0:
df[col_y] = df[col_y].apply(lambda x: 0 if x<min_correction else x)
# df[col_y] = -df[col_y] # reverse sign
df_per_pc=df[['Date','PER',col_y]].copy()
df_per_pc[[col_y]]=100*df_per_pc[[col_y]]
df_per_pc[col_y]=-df_per_pc[col_y] # reverse sign for plotting
last_months=2400
df_plot = pd.melt(df_per_pc, id_vars='Date', value_vars=['PER',col_y])
px.line(df_plot, x='Date', y='value', color='variable')
pd.set_option('display.max_columns', 500)
df.head(10)
PER | Price | Date | Price0F | Price1F | Price2F | Price3F | Price4F | Price5F | Price6F | Price1P | Price_Var | PriceFmin | Price_Corr_6M | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||||
2021-03-01 | 35.04 | 3910.51 | 2021-03-01 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | 3883.43 | 0.006973 | 3910.51 | 0.0 |
2021-02-01 | 35.10 | 3883.43 | 2021-02-01 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 3793.75 | 0.023639 | 3883.43 | 0.0 |
2021-01-01 | 34.51 | 3793.75 | 2021-01-01 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 3695.31 | 0.026639 | 3793.75 | 0.0 |
2020-12-01 | 33.77 | 3695.31 | 2020-12-01 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 3548.99 | 0.041229 | 3695.31 | 0.0 |
2020-11-01 | 32.47 | 3548.99 | 2020-11-01 | 3548.99 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 3418.70 | 0.038111 | 3548.99 | 0.0 |
2020-10-01 | 31.28 | 3418.70 | 2020-10-01 | 3418.70 | 3548.99 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 3365.52 | 0.015801 | 3418.70 | 0.0 |
2020-09-01 | 30.84 | 3365.52 | 2020-09-01 | 3365.52 | 3418.70 | 3548.99 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 3391.71 | -0.007722 | 3365.52 | 0.0 |
2020-08-01 | 31.16 | 3391.71 | 2020-08-01 | 3391.71 | 3365.52 | 3418.70 | 3548.99 | 3695.31 | 3793.75 | 3883.43 | 3207.62 | 0.057391 | 3365.52 | 0.0 |
2020-07-01 | 29.60 | 3207.62 | 2020-07-01 | 3207.62 | 3391.71 | 3365.52 | 3418.70 | 3548.99 | 3695.31 | 3793.75 | 3104.66 | 0.033163 | 3207.62 | 0.0 |
2020-06-01 | 28.84 | 3104.66 | 2020-06-01 | 3104.66 | 3207.62 | 3391.71 | 3365.52 | 3418.70 | 3548.99 | 3695.31 | 2919.61 | 0.063382 | 3104.66 | 0.0 |
df.head(36)
PER | Price | Date | Price0F | Price1F | Price2F | Price3F | Price4F | Price5F | Price6F | Price1P | Price_Var | PriceFmin | Price_Corr_6M | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||||
2021-03-01 | 35.04 | 3910.51 | 2021-03-01 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | 3883.43 | 0.006973 | 3910.51 | 0.000000 |
2021-02-01 | 35.10 | 3883.43 | 2021-02-01 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 3793.75 | 0.023639 | 3883.43 | 0.000000 |
2021-01-01 | 34.51 | 3793.75 | 2021-01-01 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 3695.31 | 0.026639 | 3793.75 | 0.000000 |
2020-12-01 | 33.77 | 3695.31 | 2020-12-01 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 3548.99 | 0.041229 | 3695.31 | 0.000000 |
2020-11-01 | 32.47 | 3548.99 | 2020-11-01 | 3548.99 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 3418.70 | 0.038111 | 3548.99 | 0.000000 |
2020-10-01 | 31.28 | 3418.70 | 2020-10-01 | 3418.70 | 3548.99 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 3365.52 | 0.015801 | 3418.70 | 0.000000 |
2020-09-01 | 30.84 | 3365.52 | 2020-09-01 | 3365.52 | 3418.70 | 3548.99 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 3391.71 | -0.007722 | 3365.52 | 0.000000 |
2020-08-01 | 31.16 | 3391.71 | 2020-08-01 | 3391.71 | 3365.52 | 3418.70 | 3548.99 | 3695.31 | 3793.75 | 3883.43 | 3207.62 | 0.057391 | 3365.52 | 0.000000 |
2020-07-01 | 29.60 | 3207.62 | 2020-07-01 | 3207.62 | 3391.71 | 3365.52 | 3418.70 | 3548.99 | 3695.31 | 3793.75 | 3104.66 | 0.033163 | 3207.62 | 0.000000 |
2020-06-01 | 28.84 | 3104.66 | 2020-06-01 | 3104.66 | 3207.62 | 3391.71 | 3365.52 | 3418.70 | 3548.99 | 3695.31 | 2919.61 | 0.063382 | 3104.66 | 0.000000 |
2020-05-01 | 27.33 | 2919.61 | 2020-05-01 | 2919.61 | 3104.66 | 3207.62 | 3391.71 | 3365.52 | 3418.70 | 3548.99 | 2761.98 | 0.057071 | 2919.61 | 0.000000 |
2020-04-01 | 25.93 | 2761.98 | 2020-04-01 | 2761.98 | 2919.61 | 3104.66 | 3207.62 | 3391.71 | 3365.52 | 3418.70 | 2652.39 | 0.041317 | 2761.98 | 0.000000 |
2020-03-01 | 24.82 | 2652.39 | 2020-03-01 | 2652.39 | 2761.98 | 2919.61 | 3104.66 | 3207.62 | 3391.71 | 3365.52 | 3277.31 | -0.190681 | 2652.39 | 0.000000 |
2020-02-01 | 30.73 | 3277.31 | 2020-02-01 | 3277.31 | 2652.39 | 2761.98 | 2919.61 | 3104.66 | 3207.62 | 3391.71 | 3278.20 | -0.000271 | 2652.39 | 0.190681 |
2020-01-01 | 30.99 | 3278.20 | 2020-01-01 | 3278.20 | 3277.31 | 2652.39 | 2761.98 | 2919.61 | 3104.66 | 3207.62 | 3176.75 | 0.031935 | 2652.39 | 0.190900 |
2019-12-01 | 30.33 | 3176.75 | 2019-12-01 | 3176.75 | 3278.20 | 3277.31 | 2652.39 | 2761.98 | 2919.61 | 3104.66 | 3104.90 | 0.023141 | 2652.39 | 0.165062 |
2019-11-01 | 29.84 | 3104.90 | 2019-11-01 | 3104.90 | 3176.75 | 3278.20 | 3277.31 | 2652.39 | 2761.98 | 2919.61 | 2977.68 | 0.042725 | 2652.39 | 0.145741 |
2019-10-01 | 28.84 | 2977.68 | 2019-10-01 | 2977.68 | 3104.90 | 3176.75 | 3278.20 | 3277.31 | 2652.39 | 2761.98 | 2982.16 | -0.001502 | 2652.39 | 0.109243 |
2019-09-01 | 29.23 | 2982.16 | 2019-09-01 | 2982.16 | 2977.68 | 3104.90 | 3176.75 | 3278.20 | 3277.31 | 2652.39 | 2897.50 | 0.029218 | 2652.39 | 0.110581 |
2019-08-01 | 28.71 | 2897.50 | 2019-08-01 | 2897.50 | 2982.16 | 2977.68 | 3104.90 | 3176.75 | 3278.20 | 3277.31 | 2996.11 | -0.032913 | 2897.50 | 0.000000 |
2019-07-01 | 29.99 | 2996.11 | 2019-07-01 | 2996.11 | 2897.50 | 2982.16 | 2977.68 | 3104.90 | 3176.75 | 3278.20 | 2890.17 | 0.036655 | 2897.50 | 0.000000 |
2019-06-01 | 29.28 | 2890.17 | 2019-06-01 | 2890.17 | 2996.11 | 2897.50 | 2982.16 | 2977.68 | 3104.90 | 3176.75 | 2854.71 | 0.012422 | 2890.17 | 0.000000 |
2019-05-01 | 29.24 | 2854.71 | 2019-05-01 | 2854.71 | 2890.17 | 2996.11 | 2897.50 | 2982.16 | 2977.68 | 3104.90 | 2903.80 | -0.016905 | 2854.71 | 0.000000 |
2019-04-01 | 30.13 | 2903.80 | 2019-04-01 | 2903.80 | 2854.71 | 2890.17 | 2996.11 | 2897.50 | 2982.16 | 2977.68 | 2803.98 | 0.035599 | 2854.71 | 0.000000 |
2019-03-01 | 29.58 | 2803.98 | 2019-03-01 | 2803.98 | 2903.80 | 2854.71 | 2890.17 | 2996.11 | 2897.50 | 2982.16 | 2754.86 | 0.017830 | 2803.98 | 0.000000 |
2019-02-01 | 29.54 | 2754.86 | 2019-02-01 | 2754.86 | 2803.98 | 2903.80 | 2854.71 | 2890.17 | 2996.11 | 2897.50 | 2607.39 | 0.056558 | 2754.86 | 0.000000 |
2019-01-01 | 28.38 | 2607.39 | 2019-01-01 | 2607.39 | 2754.86 | 2803.98 | 2903.80 | 2854.71 | 2890.17 | 2996.11 | 2567.31 | 0.015612 | 2607.39 | 0.000000 |
2018-12-01 | 28.29 | 2567.31 | 2018-12-01 | 2567.31 | 2607.39 | 2754.86 | 2803.98 | 2903.80 | 2854.71 | 2890.17 | 2723.23 | -0.057256 | 2567.31 | 0.000000 |
2018-11-01 | 30.20 | 2723.23 | 2018-11-01 | 2723.23 | 2567.31 | 2607.39 | 2754.86 | 2803.98 | 2903.80 | 2854.71 | 2785.46 | -0.022341 | 2567.31 | 0.057256 |
2018-10-01 | 31.04 | 2785.46 | 2018-10-01 | 2785.46 | 2723.23 | 2567.31 | 2607.39 | 2754.86 | 2803.98 | 2903.80 | 2901.50 | -0.039993 | 2567.31 | 0.078317 |
2018-09-01 | 32.62 | 2901.50 | 2018-09-01 | 2901.50 | 2785.46 | 2723.23 | 2567.31 | 2607.39 | 2754.86 | 2803.98 | 2857.82 | 0.015284 | 2567.31 | 0.115178 |
2018-08-01 | 32.39 | 2857.82 | 2018-08-01 | 2857.82 | 2901.50 | 2785.46 | 2723.23 | 2567.31 | 2607.39 | 2754.86 | 2793.64 | 0.022974 | 2567.31 | 0.101654 |
2018-07-01 | 31.89 | 2793.64 | 2018-07-01 | 2793.64 | 2857.82 | 2901.50 | 2785.46 | 2723.23 | 2567.31 | 2607.39 | 2754.35 | 0.014265 | 2567.31 | 0.081016 |
2018-06-01 | 31.63 | 2754.35 | 2018-06-01 | 2754.35 | 2793.64 | 2857.82 | 2901.50 | 2785.46 | 2723.23 | 2567.31 | 2701.49 | 0.019567 | 2567.31 | 0.067907 |
2018-05-01 | 31.24 | 2701.49 | 2018-05-01 | 2701.49 | 2754.35 | 2793.64 | 2857.82 | 2901.50 | 2785.46 | 2723.23 | 2653.63 | 0.018036 | 2701.49 | 0.000000 |
2018-04-01 | 30.97 | 2653.63 | 2018-04-01 | 2653.63 | 2701.49 | 2754.35 | 2793.64 | 2857.82 | 2901.50 | 2785.46 | 2702.77 | -0.018181 | 2653.63 | 0.000000 |
def gen_features(df,cols, ref_period, band):
features = []
features.append(df['Date'].iloc[ref_period])
for col in cols:
segment = df[col].iloc[ref_period:ref_period+band]
features.append(segment.min())
features.append(segment.max())
for i in range(2,99,10):
features.append(np.quantile(segment, i/100))
features.append(segment.mean())
features.append(segment.std())
features.append(segment.mad())
features.append(segment.skew())
features.append(segment.kurtosis())
features.append(np.sqrt(np.mean(segment**2)))
features.append(np.abs(segment).mean())
features.append(np.abs(segment).std())
features.append(np.abs(np.diff(segment)).mean())
features.append(np.abs(np.diff(segment)).std())
return pd.Series(features)
def generate_dataset(df_f, cols, band=60, start_date='2000-01-01', periods = 0):
# global cols
df = df_f.copy()
X_train = pd.DataFrame()
y_train = pd.Series(dtype='float64', name=col_y)
# cols =['PER', 'Price', 'Price_Var']
if periods>0:
ref_periods = periods
else:
ref_periods = df[df.index>=start_date].shape[0]
for ref_period in range(ref_periods):
X = gen_features(df,cols, ref_period, band)
X_train = X_train.append(X, ignore_index=True)
y_train = y_train.append(pd.Series(df[col_y][ref_period], index=[df.index[ref_period]], name=col_y))
X_train = X_train.rename({0:'Date'}, axis='columns')
X_train = X_train.set_index('Date')
return X_train, y_train
start_date = '1950-01-01' # set your own date
band=60 # how many months from past to use to generate statistics
cols_features = ['PER', 'Price', 'Price_Var']
X_train, y_train = generate_dataset(df, cols_features, band, start_date)
y_train.shape, X_train.shape
((855,), (855, 66))
y_train.index
DatetimeIndex(['2021-03-01', '2021-02-01', '2021-01-01', '2020-12-01', '2020-11-01', '2020-10-01', '2020-09-01', '2020-08-01', '2020-07-01', '2020-06-01', ... '1950-10-01', '1950-09-01', '1950-08-01', '1950-07-01', '1950-06-01', '1950-05-01', '1950-04-01', '1950-03-01', '1950-02-01', '1950-01-01'], dtype='datetime64[ns]', length=855, freq=None)
X_train.index
DatetimeIndex(['2021-03-01', '2021-02-01', '2021-01-01', '2020-12-01', '2020-11-01', '2020-10-01', '2020-09-01', '2020-08-01', '2020-07-01', '2020-06-01', ... '1950-10-01', '1950-09-01', '1950-08-01', '1950-07-01', '1950-06-01', '1950-05-01', '1950-04-01', '1950-03-01', '1950-02-01', '1950-01-01'], dtype='datetime64[ns]', name='Date', length=855, freq=None)
dataset = pd.concat([y_train, X_train], axis=1, join="inner") # align to avoid random shifting
dataset.describe()
Price_Corr_6M | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 |
mean | 0.029442 | 14.431240 | 23.570515 | 14.777573 | 16.034851 | 17.090664 | 17.821839 | 18.497175 | 19.202284 | 19.946066 | 20.755240 | 21.613502 | 22.575595 | 19.098472 | 2.512827 | 2.085288 | -0.071771 | -0.390522 | 19.285351 | 19.098472 | 2.512827 | 0.488542 | 0.410447 | 384.463696 | 713.743170 | 394.766837 | 432.420474 | 468.538994 | 495.908749 | 522.235080 | 548.910676 | 577.287777 | 606.409720 | 638.616928 | 675.973329 | 545.985865 | 91.298120 | 76.474606 | 0.025165 | -0.619295 | 554.023079 | 545.985865 | 91.298120 | 14.235237 | 12.290918 | -0.100539 | 0.084752 | -0.068381 | -0.028274 | -0.014489 | -0.005086 | 0.003000 | 0.011045 | 0.018097 | 0.025443 | 0.034079 | 0.045030 | 0.006668 | 0.033679 | 0.025427 | -0.578491 | 1.979578 | 0.034516 | 0.026610 | 0.022078 | 0.032378 | 0.026856 |
std | 0.059236 | 5.390596 | 9.032074 | 5.425410 | 5.810988 | 6.207451 | 6.437289 | 6.735476 | 7.136561 | 7.559777 | 8.092159 | 8.437071 | 8.744541 | 7.055263 | 1.576082 | 1.386864 | 0.670917 | 1.033676 | 7.159203 | 7.055263 | 1.576082 | 0.211380 | 0.185295 | 475.162320 | 856.443386 | 486.178252 | 528.791032 | 568.013175 | 599.941679 | 631.712107 | 663.510627 | 696.218222 | 729.225963 | 765.972484 | 808.886759 | 658.069139 | 110.280034 | 93.514936 | 0.540819 | 0.647425 | 666.609814 | 658.069139 | 110.280034 | 17.021054 | 15.669683 | 0.040727 | 0.021875 | 0.023598 | 0.011524 | 0.008447 | 0.006091 | 0.005691 | 0.005557 | 0.005355 | 0.004835 | 0.005320 | 0.007283 | 0.004992 | 0.006853 | 0.004935 | 0.660493 | 2.311267 | 0.006286 | 0.004484 | 0.004871 | 0.005854 | 0.005227 |
min | 0.000000 | 6.640000 | 10.010000 | 6.649000 | 7.390800 | 7.839800 | 8.444000 | 8.757800 | 8.843600 | 8.979000 | 9.084800 | 9.260000 | 9.535600 | 8.579167 | 0.732325 | 0.594017 | -2.482265 | -1.599860 | 8.619747 | 8.579167 | 0.732325 | 0.221186 | 0.173401 | 13.930000 | 18.700000 | 13.945400 | 14.607200 | 14.780000 | 14.907600 | 15.153400 | 15.337600 | 15.765800 | 16.129200 | 16.842800 | 18.028400 | 15.689500 | 1.234354 | 0.991400 | -1.408928 | -1.727077 | 15.738145 | 15.689500 | 1.234354 | 0.455932 | 0.322354 | -0.203911 | 0.046303 | -0.113697 | -0.058984 | -0.037086 | -0.021644 | -0.009407 | -0.003258 | 0.005858 | 0.015197 | 0.023998 | 0.029321 | -0.005692 | 0.018549 | 0.014381 | -3.194508 | -0.619530 | 0.020565 | 0.015005 | 0.013153 | 0.018028 | 0.015464 |
25% | 0.000000 | 9.070000 | 18.330000 | 9.624400 | 10.062600 | 10.854900 | 11.632000 | 12.143900 | 12.651000 | 13.566600 | 14.505800 | 15.853200 | 17.248200 | 13.149917 | 1.609897 | 1.306261 | -0.505305 | -1.057436 | 13.392315 | 13.149917 | 1.609897 | 0.357627 | 0.299752 | 61.340000 | 95.810000 | 64.122700 | 70.159800 | 75.275800 | 80.003600 | 82.402100 | 84.944600 | 86.487700 | 88.739400 | 91.449900 | 92.779600 | 82.501500 | 8.457500 | 6.809078 | -0.396932 | -1.094689 | 82.970865 | 82.501500 | 8.457500 | 1.763644 | 1.388085 | -0.116780 | 0.068898 | -0.085138 | -0.034946 | -0.020583 | -0.008601 | -0.001513 | 0.006866 | 0.015007 | 0.021134 | 0.030060 | 0.039719 | 0.003053 | 0.028540 | 0.021893 | -0.998518 | 0.615426 | 0.029960 | 0.023818 | 0.018208 | 0.028348 | 0.023768 |
50% | 0.000000 | 13.670000 | 22.170000 | 14.067800 | 15.870000 | 17.027200 | 17.535600 | 18.011200 | 18.463600 | 19.491800 | 20.304400 | 20.608600 | 21.084800 | 18.702833 | 2.085849 | 1.680956 | -0.136785 | -0.603029 | 18.789576 | 18.702833 | 2.085849 | 0.453051 | 0.371721 | 109.400000 | 192.500000 | 109.700000 | 117.380000 | 126.446000 | 132.228000 | 135.414000 | 151.644000 | 157.658000 | 164.644000 | 166.628000 | 180.684000 | 146.948333 | 23.385346 | 20.608333 | 0.002311 | -0.703772 | 148.766839 | 146.948333 | 23.385346 | 3.781356 | 3.189377 | -0.105555 | 0.073924 | -0.059120 | -0.026887 | -0.013355 | -0.004159 | 0.003470 | 0.011680 | 0.017729 | 0.026024 | 0.033624 | 0.044658 | 0.007230 | 0.032639 | 0.025095 | -0.526425 | 1.129819 | 0.033443 | 0.026435 | 0.021073 | 0.031874 | 0.025729 |
75% | 0.052333 | 19.365000 | 27.550000 | 19.691300 | 20.400800 | 21.068700 | 21.621600 | 22.564300 | 23.678600 | 24.915200 | 25.747400 | 26.457800 | 27.291200 | 23.367000 | 2.680915 | 2.299289 | 0.352199 | -0.054330 | 23.579368 | 23.367000 | 2.680915 | 0.528390 | 0.438316 | 757.130000 | 1485.460000 | 812.955600 | 904.097200 | 992.479800 | 1073.387200 | 1105.174600 | 1136.067200 | 1190.914700 | 1273.358400 | 1354.531600 | 1423.463000 | 1133.277167 | 182.130724 | 148.246117 | 0.386897 | -0.268461 | 1147.898600 | 1133.277167 | 182.130724 | 29.947203 | 24.505543 | -0.065957 | 0.108439 | -0.051697 | -0.020761 | -0.006811 | -0.000925 | 0.007640 | 0.014019 | 0.021943 | 0.028942 | 0.038760 | 0.049991 | 0.009902 | 0.038566 | 0.028974 | -0.205180 | 3.044290 | 0.039175 | 0.029874 | 0.025503 | 0.036072 | 0.029917 |
max | 0.377846 | 27.670000 | 44.190000 | 28.657400 | 30.306800 | 32.317000 | 32.895200 | 34.569200 | 36.970400 | 38.803200 | 41.334400 | 42.320600 | 43.212800 | 36.634500 | 7.976569 | 7.214517 | 2.050546 | 7.139904 | 36.966912 | 36.634500 | 7.976569 | 1.190508 | 0.917348 | 2065.550000 | 3910.510000 | 2077.043000 | 2177.004400 | 2394.779400 | 2566.072800 | 2693.317000 | 2759.701600 | 2856.513800 | 2947.483600 | 3188.480600 | 3455.181200 | 2778.860500 | 463.023817 | 356.773533 | 1.612620 | 3.178645 | 2816.537511 | 2778.860500 | 463.023817 | 76.767288 | 86.528188 | -0.035747 | 0.120217 | -0.016349 | -0.007115 | -0.000274 | 0.010087 | 0.015437 | 0.024440 | 0.030495 | 0.036719 | 0.045736 | 0.062069 | 0.019740 | 0.050296 | 0.037867 | 1.828116 | 16.683022 | 0.049884 | 0.037836 | 0.033161 | 0.047505 | 0.039672 |
Y=dataset.pop(col_y) # target to predict
Y.describe()
count 855.000000 mean 0.029442 std 0.059236 min 0.000000 25% 0.000000 50% 0.000000 75% 0.052333 max 0.377846 Name: Price_Corr_6M, dtype: float64
X=dataset # features for prediction
X.describe()
1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 | 855.000000 |
mean | 14.431240 | 23.570515 | 14.777573 | 16.034851 | 17.090664 | 17.821839 | 18.497175 | 19.202284 | 19.946066 | 20.755240 | 21.613502 | 22.575595 | 19.098472 | 2.512827 | 2.085288 | -0.071771 | -0.390522 | 19.285351 | 19.098472 | 2.512827 | 0.488542 | 0.410447 | 384.463696 | 713.743170 | 394.766837 | 432.420474 | 468.538994 | 495.908749 | 522.235080 | 548.910676 | 577.287777 | 606.409720 | 638.616928 | 675.973329 | 545.985865 | 91.298120 | 76.474606 | 0.025165 | -0.619295 | 554.023079 | 545.985865 | 91.298120 | 14.235237 | 12.290918 | -0.100539 | 0.084752 | -0.068381 | -0.028274 | -0.014489 | -0.005086 | 0.003000 | 0.011045 | 0.018097 | 0.025443 | 0.034079 | 0.045030 | 0.006668 | 0.033679 | 0.025427 | -0.578491 | 1.979578 | 0.034516 | 0.026610 | 0.022078 | 0.032378 | 0.026856 |
std | 5.390596 | 9.032074 | 5.425410 | 5.810988 | 6.207451 | 6.437289 | 6.735476 | 7.136561 | 7.559777 | 8.092159 | 8.437071 | 8.744541 | 7.055263 | 1.576082 | 1.386864 | 0.670917 | 1.033676 | 7.159203 | 7.055263 | 1.576082 | 0.211380 | 0.185295 | 475.162320 | 856.443386 | 486.178252 | 528.791032 | 568.013175 | 599.941679 | 631.712107 | 663.510627 | 696.218222 | 729.225963 | 765.972484 | 808.886759 | 658.069139 | 110.280034 | 93.514936 | 0.540819 | 0.647425 | 666.609814 | 658.069139 | 110.280034 | 17.021054 | 15.669683 | 0.040727 | 0.021875 | 0.023598 | 0.011524 | 0.008447 | 0.006091 | 0.005691 | 0.005557 | 0.005355 | 0.004835 | 0.005320 | 0.007283 | 0.004992 | 0.006853 | 0.004935 | 0.660493 | 2.311267 | 0.006286 | 0.004484 | 0.004871 | 0.005854 | 0.005227 |
min | 6.640000 | 10.010000 | 6.649000 | 7.390800 | 7.839800 | 8.444000 | 8.757800 | 8.843600 | 8.979000 | 9.084800 | 9.260000 | 9.535600 | 8.579167 | 0.732325 | 0.594017 | -2.482265 | -1.599860 | 8.619747 | 8.579167 | 0.732325 | 0.221186 | 0.173401 | 13.930000 | 18.700000 | 13.945400 | 14.607200 | 14.780000 | 14.907600 | 15.153400 | 15.337600 | 15.765800 | 16.129200 | 16.842800 | 18.028400 | 15.689500 | 1.234354 | 0.991400 | -1.408928 | -1.727077 | 15.738145 | 15.689500 | 1.234354 | 0.455932 | 0.322354 | -0.203911 | 0.046303 | -0.113697 | -0.058984 | -0.037086 | -0.021644 | -0.009407 | -0.003258 | 0.005858 | 0.015197 | 0.023998 | 0.029321 | -0.005692 | 0.018549 | 0.014381 | -3.194508 | -0.619530 | 0.020565 | 0.015005 | 0.013153 | 0.018028 | 0.015464 |
25% | 9.070000 | 18.330000 | 9.624400 | 10.062600 | 10.854900 | 11.632000 | 12.143900 | 12.651000 | 13.566600 | 14.505800 | 15.853200 | 17.248200 | 13.149917 | 1.609897 | 1.306261 | -0.505305 | -1.057436 | 13.392315 | 13.149917 | 1.609897 | 0.357627 | 0.299752 | 61.340000 | 95.810000 | 64.122700 | 70.159800 | 75.275800 | 80.003600 | 82.402100 | 84.944600 | 86.487700 | 88.739400 | 91.449900 | 92.779600 | 82.501500 | 8.457500 | 6.809078 | -0.396932 | -1.094689 | 82.970865 | 82.501500 | 8.457500 | 1.763644 | 1.388085 | -0.116780 | 0.068898 | -0.085138 | -0.034946 | -0.020583 | -0.008601 | -0.001513 | 0.006866 | 0.015007 | 0.021134 | 0.030060 | 0.039719 | 0.003053 | 0.028540 | 0.021893 | -0.998518 | 0.615426 | 0.029960 | 0.023818 | 0.018208 | 0.028348 | 0.023768 |
50% | 13.670000 | 22.170000 | 14.067800 | 15.870000 | 17.027200 | 17.535600 | 18.011200 | 18.463600 | 19.491800 | 20.304400 | 20.608600 | 21.084800 | 18.702833 | 2.085849 | 1.680956 | -0.136785 | -0.603029 | 18.789576 | 18.702833 | 2.085849 | 0.453051 | 0.371721 | 109.400000 | 192.500000 | 109.700000 | 117.380000 | 126.446000 | 132.228000 | 135.414000 | 151.644000 | 157.658000 | 164.644000 | 166.628000 | 180.684000 | 146.948333 | 23.385346 | 20.608333 | 0.002311 | -0.703772 | 148.766839 | 146.948333 | 23.385346 | 3.781356 | 3.189377 | -0.105555 | 0.073924 | -0.059120 | -0.026887 | -0.013355 | -0.004159 | 0.003470 | 0.011680 | 0.017729 | 0.026024 | 0.033624 | 0.044658 | 0.007230 | 0.032639 | 0.025095 | -0.526425 | 1.129819 | 0.033443 | 0.026435 | 0.021073 | 0.031874 | 0.025729 |
75% | 19.365000 | 27.550000 | 19.691300 | 20.400800 | 21.068700 | 21.621600 | 22.564300 | 23.678600 | 24.915200 | 25.747400 | 26.457800 | 27.291200 | 23.367000 | 2.680915 | 2.299289 | 0.352199 | -0.054330 | 23.579368 | 23.367000 | 2.680915 | 0.528390 | 0.438316 | 757.130000 | 1485.460000 | 812.955600 | 904.097200 | 992.479800 | 1073.387200 | 1105.174600 | 1136.067200 | 1190.914700 | 1273.358400 | 1354.531600 | 1423.463000 | 1133.277167 | 182.130724 | 148.246117 | 0.386897 | -0.268461 | 1147.898600 | 1133.277167 | 182.130724 | 29.947203 | 24.505543 | -0.065957 | 0.108439 | -0.051697 | -0.020761 | -0.006811 | -0.000925 | 0.007640 | 0.014019 | 0.021943 | 0.028942 | 0.038760 | 0.049991 | 0.009902 | 0.038566 | 0.028974 | -0.205180 | 3.044290 | 0.039175 | 0.029874 | 0.025503 | 0.036072 | 0.029917 |
max | 27.670000 | 44.190000 | 28.657400 | 30.306800 | 32.317000 | 32.895200 | 34.569200 | 36.970400 | 38.803200 | 41.334400 | 42.320600 | 43.212800 | 36.634500 | 7.976569 | 7.214517 | 2.050546 | 7.139904 | 36.966912 | 36.634500 | 7.976569 | 1.190508 | 0.917348 | 2065.550000 | 3910.510000 | 2077.043000 | 2177.004400 | 2394.779400 | 2566.072800 | 2693.317000 | 2759.701600 | 2856.513800 | 2947.483600 | 3188.480600 | 3455.181200 | 2778.860500 | 463.023817 | 356.773533 | 1.612620 | 3.178645 | 2816.537511 | 2778.860500 | 463.023817 | 76.767288 | 86.528188 | -0.035747 | 0.120217 | -0.016349 | -0.007115 | -0.000274 | 0.010087 | 0.015437 | 0.024440 | 0.030495 | 0.036719 | 0.045736 | 0.062069 | 0.019740 | 0.050296 | 0.037867 | 1.828116 | 16.683022 | 0.049884 | 0.037836 | 0.033161 | 0.047505 | 0.039672 |
# PREDICTION
def generate_model(df, cols, start_date, verbosity = False):
best_score = 0
best_model = None
best_band = 0
test_size = 0.3
bands = 60
for band in range(12,bands+1,12): # [12,24,36,48,60] months
X_train, y_train = generate_dataset(df, cols, band, start_date)
dataset = pd.concat([y_train, X_train], axis=1, join="inner") # align to avoid random shifting
Y=dataset.pop(col_y)
X=dataset
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, shuffle = True)
model = xgb.XGBRegressor()
model.fit(X_train,Y_train.values)
score = model.score(X_test, Y_test)
if best_score < score:
best_score = score
best_model = model
best_band = band
X_t = X_test
Y_t = Y_test
# if verbosity: # verbosit for each band
# print(score, band)
if verbosity: # verbosity for best score
print('retest score:', best_model.score(X_t, Y_t).round(3), best_score.round(3), 'features:', cols, 'X_train.shape:', X_train.shape)# retest best model score
return best_model, best_band, best_score
start_date = '1970-01-01'
cols_features =['PER', 'Price', 'Price_Var']
model, band, score = generate_model(df, cols_features, start_date, True)
retest score: 0.785 0.785 features: ['PER', 'Price', 'Price_Var'] X_train.shape: (430, 66)
# model.score(X_t, Y_t), best_score # retest best model score
df_initial['horizon'] = 'past'
df_initial.loc[np.isnan(df_initial['Price6F']),['horizon']]='future'
df_initial.head(10)
PER | Price | Date | Price0F | Price1F | Price2F | Price3F | Price4F | Price5F | Price6F | Price1P | Price_Var | PriceFmin | Price_Corr_6M | horizon | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||
2021-10-01 | 37.63 | 4357.04 | 2021-10-01 | 4357.04 | NaN | NaN | NaN | NaN | NaN | NaN | 4450.37 | -0.020971 | 4357.04 | NaN | future |
2021-08-01 | 38.44 | 4450.37 | 2021-08-01 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | NaN | 4358.13 | 0.021165 | 4357.04 | NaN | future |
2021-07-01 | 37.68 | 4358.13 | 2021-07-01 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | 4238.49 | 0.028227 | 4357.04 | NaN | future |
2021-06-01 | 36.86 | 4238.49 | 2021-06-01 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | 4167.85 | 0.016949 | 4238.49 | NaN | future |
2021-05-01 | 36.62 | 4167.85 | 2021-05-01 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | 4141.18 | 0.006440 | 4167.85 | NaN | future |
2021-04-01 | 36.72 | 4141.18 | 2021-04-01 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | 3910.51 | 0.058987 | 4141.18 | NaN | future |
2021-03-01 | 35.04 | 3910.51 | 2021-03-01 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | 3883.43 | 0.006973 | 3910.51 | 0.0 | past |
2021-02-01 | 35.10 | 3883.43 | 2021-02-01 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 3793.75 | 0.023639 | 3883.43 | 0.0 | past |
2021-01-01 | 34.51 | 3793.75 | 2021-01-01 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 3695.31 | 0.026639 | 3793.75 | 0.0 | past |
2020-12-01 | 33.77 | 3695.31 | 2020-12-01 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 3548.99 | 0.041229 | 3695.31 | 0.0 | past |
df_initial['Date']=df_initial.index # add required column
df_initial.head(24)
PER | Price | Date | Price0F | Price1F | Price2F | Price3F | Price4F | Price5F | Price6F | Price1P | Price_Var | PriceFmin | Price_Corr_6M | horizon | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||
2021-10-01 | 37.63 | 4357.04 | 2021-10-01 | 4357.04 | NaN | NaN | NaN | NaN | NaN | NaN | 4450.37 | -0.020971 | 4357.04 | NaN | future |
2021-08-01 | 38.44 | 4450.37 | 2021-08-01 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | NaN | 4358.13 | 0.021165 | 4357.04 | NaN | future |
2021-07-01 | 37.68 | 4358.13 | 2021-07-01 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | 4238.49 | 0.028227 | 4357.04 | NaN | future |
2021-06-01 | 36.86 | 4238.49 | 2021-06-01 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | 4167.85 | 0.016949 | 4238.49 | NaN | future |
2021-05-01 | 36.62 | 4167.85 | 2021-05-01 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | 4141.18 | 0.006440 | 4167.85 | NaN | future |
2021-04-01 | 36.72 | 4141.18 | 2021-04-01 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | 3910.51 | 0.058987 | 4141.18 | NaN | future |
2021-03-01 | 35.04 | 3910.51 | 2021-03-01 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | 3883.43 | 0.006973 | 3910.51 | 0.000000 | past |
2021-02-01 | 35.10 | 3883.43 | 2021-02-01 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 3793.75 | 0.023639 | 3883.43 | 0.000000 | past |
2021-01-01 | 34.51 | 3793.75 | 2021-01-01 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 3695.31 | 0.026639 | 3793.75 | 0.000000 | past |
2020-12-01 | 33.77 | 3695.31 | 2020-12-01 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 3548.99 | 0.041229 | 3695.31 | 0.000000 | past |
2020-11-01 | 32.47 | 3548.99 | 2020-11-01 | 3548.99 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 3418.70 | 0.038111 | 3548.99 | 0.000000 | past |
2020-10-01 | 31.28 | 3418.70 | 2020-10-01 | 3418.70 | 3548.99 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 3365.52 | 0.015801 | 3418.70 | 0.000000 | past |
2020-09-01 | 30.84 | 3365.52 | 2020-09-01 | 3365.52 | 3418.70 | 3548.99 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 3391.71 | -0.007722 | 3365.52 | 0.000000 | past |
2020-08-01 | 31.16 | 3391.71 | 2020-08-01 | 3391.71 | 3365.52 | 3418.70 | 3548.99 | 3695.31 | 3793.75 | 3883.43 | 3207.62 | 0.057391 | 3365.52 | 0.007722 | past |
2020-07-01 | 29.60 | 3207.62 | 2020-07-01 | 3207.62 | 3391.71 | 3365.52 | 3418.70 | 3548.99 | 3695.31 | 3793.75 | 3104.66 | 0.033163 | 3207.62 | 0.000000 | past |
2020-06-01 | 28.84 | 3104.66 | 2020-06-01 | 3104.66 | 3207.62 | 3391.71 | 3365.52 | 3418.70 | 3548.99 | 3695.31 | 2919.61 | 0.063382 | 3104.66 | 0.000000 | past |
2020-05-01 | 27.33 | 2919.61 | 2020-05-01 | 2919.61 | 3104.66 | 3207.62 | 3391.71 | 3365.52 | 3418.70 | 3548.99 | 2761.98 | 0.057071 | 2919.61 | 0.000000 | past |
2020-04-01 | 25.93 | 2761.98 | 2020-04-01 | 2761.98 | 2919.61 | 3104.66 | 3207.62 | 3391.71 | 3365.52 | 3418.70 | 2652.39 | 0.041317 | 2761.98 | 0.000000 | past |
2020-03-01 | 24.82 | 2652.39 | 2020-03-01 | 2652.39 | 2761.98 | 2919.61 | 3104.66 | 3207.62 | 3391.71 | 3365.52 | 3277.31 | -0.190681 | 2652.39 | 0.000000 | past |
2020-02-01 | 30.73 | 3277.31 | 2020-02-01 | 3277.31 | 2652.39 | 2761.98 | 2919.61 | 3104.66 | 3207.62 | 3391.71 | 3278.20 | -0.000271 | 2652.39 | 0.190681 | past |
2020-01-01 | 30.99 | 3278.20 | 2020-01-01 | 3278.20 | 3277.31 | 2652.39 | 2761.98 | 2919.61 | 3104.66 | 3207.62 | 3176.75 | 0.031935 | 2652.39 | 0.190900 | past |
2019-12-01 | 30.33 | 3176.75 | 2019-12-01 | 3176.75 | 3278.20 | 3277.31 | 2652.39 | 2761.98 | 2919.61 | 3104.66 | 3104.90 | 0.023141 | 2652.39 | 0.165062 | past |
2019-11-01 | 29.84 | 3104.90 | 2019-11-01 | 3104.90 | 3176.75 | 3278.20 | 3277.31 | 2652.39 | 2761.98 | 2919.61 | 2977.68 | 0.042725 | 2652.39 | 0.145741 | past |
2019-10-01 | 28.84 | 2977.68 | 2019-10-01 | 2977.68 | 3104.90 | 3176.75 | 3278.20 | 3277.31 | 2652.39 | 2761.98 | 2982.16 | -0.001502 | 2652.39 | 0.109243 | past |
df.head()
PER | Price | Date | Price0F | Price1F | Price2F | Price3F | Price4F | Price5F | Price6F | Price1P | Price_Var | PriceFmin | Price_Corr_6M | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||||
2021-03-01 | 35.04 | 3910.51 | 2021-03-01 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | 3883.43 | 0.006973 | 3910.51 | 0.0 |
2021-02-01 | 35.10 | 3883.43 | 2021-02-01 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 3793.75 | 0.023639 | 3883.43 | 0.0 |
2021-01-01 | 34.51 | 3793.75 | 2021-01-01 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 4358.13 | 3695.31 | 0.026639 | 3793.75 | 0.0 |
2020-12-01 | 33.77 | 3695.31 | 2020-12-01 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 4238.49 | 3548.99 | 0.041229 | 3695.31 | 0.0 |
2020-11-01 | 32.47 | 3548.99 | 2020-11-01 | 3548.99 | 3695.31 | 3793.75 | 3883.43 | 3910.51 | 4141.18 | 4167.85 | 3418.70 | 0.038111 | 3548.99 | 0.0 |
periods=120 # predicted periods
df_pred = df_initial.copy()
cols_features =['PER', 'Price', 'Price_Var']
X_pred, y_dummy = generate_dataset(df_pred, cols_features, band, periods=periods)
X_pred.head() # generated features
1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
2021-10-01 | 24.82 | 38.44 | 25.597 | 28.446 | 29.113 | 29.548 | 29.945 | 30.41 | 31.025 | 32.730 | 35.082 | 37.014 | 31.360556 | 3.460154 | 2.770772 | 0.584275 | -0.427582 | 31.545595 | 31.360556 | 3.460154 | 0.918000 | 0.987452 | 2567.31 | 4450.37 | 2595.366 | 2756.284 | 2839.491 | 2906.962 | 2991.925 | 3182.924 | 3339.324 | 3578.254 | 3902.386 | 4262.200 | 3304.132222 | 565.618155 | 470.173827 | 0.708066 | -0.740779 | 3350.869568 | 3304.132222 | 565.618155 | 108.654286 | 102.967154 | -0.190681 | 0.063382 | -0.097283 | -0.022067 | -0.003368 | 0.008063 | 0.016605 | 0.023240 | 0.028921 | 0.035811 | 0.041291 | 0.057135 | 0.012434 | 0.045339 | 0.029242 | -2.734204 | 10.917498 | 0.046402 | 0.034132 | 0.031882 | 0.040542 | 0.046385 |
2021-08-01 | 24.82 | 38.44 | 25.597 | 28.446 | 29.113 | 29.548 | 29.945 | 30.41 | 31.025 | 32.500 | 34.881 | 36.748 | 31.221389 | 3.297733 | 2.580741 | 0.602872 | -0.164667 | 31.390255 | 31.221389 | 3.297733 | 0.940000 | 0.993361 | 2567.31 | 4450.37 | 2595.366 | 2756.284 | 2839.491 | 2901.960 | 2980.816 | 3119.270 | 3277.933 | 3444.758 | 3856.526 | 4181.978 | 3263.700556 | 539.629090 | 442.441204 | 0.816545 | -0.462824 | 3306.788774 | 3263.700556 | 539.629090 | 109.303143 | 102.940094 | -0.190681 | 0.063382 | -0.097283 | -0.021254 | -0.000641 | 0.012994 | 0.016605 | 0.023240 | 0.028921 | 0.035811 | 0.041291 | 0.057135 | 0.013441 | 0.044977 | 0.028058 | -2.859490 | 11.653228 | 0.046340 | 0.033974 | 0.031963 | 0.040917 | 0.046449 |
2021-07-01 | 24.82 | 37.68 | 25.597 | 28.446 | 29.113 | 29.548 | 29.945 | 30.41 | 31.025 | 32.406 | 34.288 | 36.640 | 31.053333 | 3.065322 | 2.362593 | 0.531582 | -0.041598 | 31.200075 | 31.053333 | 3.065322 | 0.924857 | 1.000007 | 2567.31 | 4358.13 | 2595.366 | 2756.284 | 2839.491 | 2898.300 | 2960.259 | 3104.708 | 3256.403 | 3397.108 | 3764.218 | 4146.514 | 3219.463056 | 503.646380 | 410.922068 | 0.872566 | -0.311079 | 3257.538325 | 3219.463056 | 503.646380 | 107.915714 | 103.486517 | -0.190681 | 0.063382 | -0.097283 | -0.021254 | -0.000641 | 0.012994 | 0.016605 | 0.023240 | 0.028921 | 0.035811 | 0.041291 | 0.057135 | 0.013491 | 0.044987 | 0.028091 | -2.861019 | 11.654473 | 0.046364 | 0.034024 | 0.031943 | 0.040935 | 0.046436 |
2021-06-01 | 24.82 | 36.86 | 25.597 | 28.446 | 29.113 | 29.548 | 29.945 | 30.41 | 31.025 | 31.990 | 33.425 | 35.404 | 30.892500 | 2.852182 | 2.170694 | 0.460551 | 0.144587 | 31.020244 | 30.892500 | 2.852182 | 0.915714 | 1.002384 | 2567.31 | 4238.49 | 2595.366 | 2756.284 | 2800.878 | 2891.636 | 2914.867 | 3017.820 | 3198.359 | 3370.758 | 3651.414 | 3956.644 | 3176.005000 | 468.885055 | 380.846944 | 0.920733 | -0.164513 | 3209.478762 | 3176.005000 | 468.885055 | 106.331143 | 103.719202 | -0.190681 | 0.063382 | -0.097283 | -0.021254 | -0.000641 | 0.012790 | 0.015744 | 0.023007 | 0.028445 | 0.035811 | 0.041291 | 0.057135 | 0.013104 | 0.044917 | 0.027833 | -2.848158 | 11.637258 | 0.046186 | 0.033636 | 0.032100 | 0.040862 | 0.046485 |
2021-05-01 | 24.82 | 36.72 | 25.597 | 28.446 | 29.113 | 29.548 | 29.945 | 30.41 | 31.025 | 31.682 | 32.575 | 35.052 | 30.747222 | 2.666705 | 2.006358 | 0.398347 | 0.425817 | 30.859447 | 30.747222 | 2.666705 | 0.916286 | 1.002004 | 2567.31 | 4167.85 | 2595.366 | 2754.452 | 2791.186 | 2864.290 | 2903.110 | 2984.950 | 3155.195 | 3295.664 | 3509.903 | 3888.846 | 3134.778889 | 436.956849 | 353.884753 | 0.976178 | 0.042582 | 3164.248145 | 3134.778889 | 436.956849 | 105.435429 | 104.158001 | -0.190681 | 0.063382 | -0.097283 | -0.021254 | -0.000641 | 0.012790 | 0.015744 | 0.023007 | 0.028445 | 0.035811 | 0.041291 | 0.057135 | 0.013176 | 0.044925 | 0.027881 | -2.851609 | 11.646643 | 0.046215 | 0.033709 | 0.032064 | 0.040713 | 0.046590 |
X_pred.shape
(120, 66)
# df_pred = df_pred.iloc[:periods]
# df_pred
y_pred = model.predict(X_pred).round(2) # readable
# y_pred = model.predict(X_pred)
# y_pred
df_pred = df_pred.iloc[:periods].copy()
df_pred[col_y+'Pred'] = np.clip(y_pred,0,0.5) # replace outliers
cols_pred = ['PER', 'Price', 'Price_Corr_6M', 'horizon', 'Price_Corr_6MPred']
df_pred[cols_pred].head(36) # one shoot prediction
PER | Price | Price_Corr_6M | horizon | Price_Corr_6MPred | |
---|---|---|---|---|---|
Date | |||||
2021-10-01 | 37.63 | 4357.04 | NaN | future | 0.04 |
2021-08-01 | 38.44 | 4450.37 | NaN | future | 0.05 |
2021-07-01 | 37.68 | 4358.13 | NaN | future | 0.04 |
2021-06-01 | 36.86 | 4238.49 | NaN | future | 0.03 |
2021-05-01 | 36.62 | 4167.85 | NaN | future | 0.02 |
2021-04-01 | 36.72 | 4141.18 | NaN | future | 0.02 |
2021-03-01 | 35.04 | 3910.51 | 0.000000 | past | 0.00 |
2021-02-01 | 35.10 | 3883.43 | 0.000000 | past | 0.00 |
2021-01-01 | 34.51 | 3793.75 | 0.000000 | past | 0.00 |
2020-12-01 | 33.77 | 3695.31 | 0.000000 | past | 0.00 |
2020-11-01 | 32.47 | 3548.99 | 0.000000 | past | 0.00 |
2020-10-01 | 31.28 | 3418.70 | 0.000000 | past | 0.00 |
2020-09-01 | 30.84 | 3365.52 | 0.000000 | past | 0.00 |
2020-08-01 | 31.16 | 3391.71 | 0.007722 | past | 0.00 |
2020-07-01 | 29.60 | 3207.62 | 0.000000 | past | 0.00 |
2020-06-01 | 28.84 | 3104.66 | 0.000000 | past | 0.00 |
2020-05-01 | 27.33 | 2919.61 | 0.000000 | past | 0.00 |
2020-04-01 | 25.93 | 2761.98 | 0.000000 | past | 0.00 |
2020-03-01 | 24.82 | 2652.39 | 0.000000 | past | 0.00 |
2020-02-01 | 30.73 | 3277.31 | 0.190681 | past | 0.19 |
2020-01-01 | 30.99 | 3278.20 | 0.190900 | past | 0.19 |
2019-12-01 | 30.33 | 3176.75 | 0.165062 | past | 0.17 |
2019-11-01 | 29.84 | 3104.90 | 0.145741 | past | 0.15 |
2019-10-01 | 28.84 | 2977.68 | 0.109243 | past | 0.11 |
2019-09-01 | 29.23 | 2982.16 | 0.110581 | past | 0.01 |
2019-08-01 | 28.71 | 2897.50 | 0.000000 | past | 0.00 |
2019-07-01 | 29.99 | 2996.11 | 0.032913 | past | 0.00 |
2019-06-01 | 29.28 | 2890.17 | 0.000000 | past | 0.00 |
2019-05-01 | 29.24 | 2854.71 | 0.000000 | past | 0.01 |
2019-04-01 | 30.13 | 2903.80 | 0.016905 | past | 0.03 |
2019-03-01 | 29.58 | 2803.98 | 0.000000 | past | 0.03 |
2019-02-01 | 29.54 | 2754.86 | 0.000000 | past | 0.05 |
2019-01-01 | 28.38 | 2607.39 | 0.000000 | past | 0.04 |
2018-12-01 | 28.29 | 2567.31 | 0.000000 | past | 0.04 |
2018-11-01 | 30.20 | 2723.23 | 0.057256 | past | 0.06 |
2018-10-01 | 31.04 | 2785.46 | 0.078317 | past | 0.08 |
ind_y = df_pred['horizon']=='past'
# ind_y
model.score(X_pred.loc[ind_y], df_pred.loc[ind_y,col_y]) # score for past - could contain seen data (X_train)
0.815055914768403
# ind_y[ind_y==True] # past months for score
# RANDOM CROSS FOLD
n_times = 10 # build model n times and predict n times
periods = 6 # predicted last months
df_pred = df_initial.copy()
df_pred_iter = df_pred.iloc[:periods].copy()
ls_pc6m_cols=[]
ls_score = []
cols_features =['PER', 'Price', 'Price_Var']
for i in range(n_times):
start_date = '1970-01-01'
model, band, score = generate_model(df, cols_features, start_date, True)
ls_score.append(score.round(3))
print('score:',score.round(3), 'band:', band)
X_pred, y_dummy = generate_dataset(df_pred, cols_features, band, periods=periods)
y_pred = model.predict(X_pred).round(2) # readable
pc6m_col = 'PC6M'+str(1+i)
ls_pc6m_cols.append(pc6m_col)
df_pred_iter[pc6m_col] = y_pred
retest score: 0.77 0.77 features: ['PER', 'Price', 'Price_Var'] X_train.shape: (430, 66) score: 0.77 band: 60 retest score: 0.752 0.752 features: ['PER', 'Price', 'Price_Var'] X_train.shape: (430, 66) score: 0.752 band: 36 retest score: 0.779 0.779 features: ['PER', 'Price', 'Price_Var'] X_train.shape: (430, 66) score: 0.779 band: 60 retest score: 0.767 0.767 features: ['PER', 'Price', 'Price_Var'] X_train.shape: (430, 66) score: 0.767 band: 24 retest score: 0.803 0.803 features: ['PER', 'Price', 'Price_Var'] X_train.shape: (430, 66) score: 0.803 band: 60 retest score: 0.769 0.769 features: ['PER', 'Price', 'Price_Var'] X_train.shape: (430, 66) score: 0.769 band: 60 retest score: 0.818 0.818 features: ['PER', 'Price', 'Price_Var'] X_train.shape: (430, 66) score: 0.818 band: 60 retest score: 0.719 0.719 features: ['PER', 'Price', 'Price_Var'] X_train.shape: (430, 66) score: 0.719 band: 60 retest score: 0.749 0.749 features: ['PER', 'Price', 'Price_Var'] X_train.shape: (430, 66) score: 0.749 band: 24 retest score: 0.71 0.71 features: ['PER', 'Price', 'Price_Var'] X_train.shape: (430, 66) score: 0.71 band: 60
cols_iter = ['PER', 'Price', 'Price_Var', 'horizon']
df_pred_iter=df_pred_iter[cols_iter+ls_pc6m_cols].copy()
df_pred_iter['PC6M_AVG']=df_pred_iter[ls_pc6m_cols].mean(axis=1) # average of all predictions
# RANDOM CROSS FOLD PREDICTION with ['PER', 'Price', 'Price_Var']
df_pred_iter
PER | Price | Price_Var | horizon | PC6M1 | PC6M2 | PC6M3 | PC6M4 | PC6M5 | PC6M6 | PC6M7 | PC6M8 | PC6M9 | PC6M10 | PC6M_AVG | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||
2021-10-01 | 37.63 | 4357.04 | -0.020971 | future | 0.01 | 0.08 | 0.02 | 0.09 | 0.01 | 0.02 | 0.04 | 0.01 | 0.05 | 0.01 | 0.034 |
2021-08-01 | 38.44 | 4450.37 | 0.021165 | future | 0.01 | 0.05 | 0.02 | 0.10 | 0.01 | 0.02 | 0.04 | 0.01 | 0.02 | 0.01 | 0.029 |
2021-07-01 | 37.68 | 4358.13 | 0.028227 | future | 0.01 | 0.05 | 0.02 | 0.07 | 0.01 | 0.02 | 0.04 | 0.01 | 0.02 | 0.00 | 0.025 |
2021-06-01 | 36.86 | 4238.49 | 0.016949 | future | 0.01 | 0.05 | 0.02 | 0.06 | 0.01 | 0.02 | 0.04 | 0.01 | 0.02 | 0.02 | 0.026 |
2021-05-01 | 36.62 | 4167.85 | 0.006440 | future | 0.01 | 0.05 | 0.00 | 0.06 | 0.01 | 0.02 | 0.04 | 0.01 | 0.02 | 0.02 | 0.024 |
2021-04-01 | 36.72 | 4141.18 | 0.058987 | future | 0.01 | 0.05 | 0.00 | 0.06 | 0.01 | 0.02 | 0.00 | 0.01 | 0.00 | 0.01 | 0.017 |
np.mean(ls_score), ls_score # model score mean and model score list
(0.7636, [0.77, 0.752, 0.779, 0.767, 0.803, 0.769, 0.818, 0.719, 0.749, 0.71])
# Buffet Indicator
# Get your api_key from FRED
# https://fred.stlouisfed.org/docs/api/api_key.html
# fred = Fred(api_key='abcdefghijklmnopqrstuvwxyz123456') # example # get your api_key from FRED
fred = Fred(api_key='your_api_key')
# wilshire = fred.get_series('WILL5000PRFC')
gdp_data = fred.get_series_latest_release('GDP')
gdp_data
1946-01-01 NaN 1946-04-01 NaN 1946-07-01 NaN 1946-10-01 NaN 1947-01-01 243.164 ... 2020-04-01 19477.444 2020-07-01 21138.574 2020-10-01 21477.597 2021-01-01 22038.226 2021-04-01 22740.959 Length: 302, dtype: float64
# Convert from quarter to day with interpolate
idx = pd.date_range(gdp_data.index.min(), gdp_data.index.max())
gdp_data = gdp_data.reindex(idx, fill_value=np.nan).copy()
gdp_data
1946-01-01 NaN 1946-01-02 NaN 1946-01-03 NaN 1946-01-04 NaN 1946-01-05 NaN ... 2021-03-28 NaN 2021-03-29 NaN 2021-03-30 NaN 2021-03-31 NaN 2021-04-01 22740.959 Freq: D, Length: 27485, dtype: float64
gdp_data = pd.to_numeric(gdp_data, errors='coerce').astype('float64').copy()
gdp_data.interpolate(inplace=True)
gdp_data
1946-01-01 NaN 1946-01-02 NaN 1946-01-03 NaN 1946-01-04 NaN 1946-01-05 NaN ... 2021-03-28 22709.726422 2021-03-29 22717.534567 2021-03-30 22725.342711 2021-03-31 22733.150856 2021-04-01 22740.959000 Freq: D, Length: 27485, dtype: float64
# %matplotlib inline
gdp_data.plot.line()
<AxesSubplot:>
wilshire = fred.get_series('WILL5000PRFC')
idx = pd.date_range(wilshire.index.min(), wilshire.index.max())
wilshire = wilshire.reindex(idx, fill_value=np.nan).copy()
wilshire
1970-12-31 830.27 1971-01-01 NaN 1971-01-02 NaN 1971-01-03 NaN 1971-01-04 NaN ... 2021-09-26 NaN 2021-09-27 46299.74 2021-09-28 45313.81 2021-09-29 45332.09 2021-09-30 44850.03 Freq: D, Length: 18537, dtype: float64
wilshire.interpolate(inplace=True)
wilshire
1970-12-31 830.270000 1971-01-01 831.754138 1971-01-02 833.238276 1971-01-03 834.722414 1971-01-04 836.206552 ... 2021-09-26 46320.236667 2021-09-27 46299.740000 2021-09-28 45313.810000 2021-09-29 45332.090000 2021-09-30 44850.030000 Freq: D, Length: 18537, dtype: float64
wilshire.plot.line()
<AxesSubplot:>
gdp_data.name='GDP'
wilshire.name='Wilshire 5000'
bf_ind = pd.concat([gdp_data, wilshire], axis=1) # Buffet Indicator
bf_ind
GDP | Wilshire 5000 | |
---|---|---|
1946-01-01 | NaN | NaN |
1946-01-02 | NaN | NaN |
1946-01-03 | NaN | NaN |
1946-01-04 | NaN | NaN |
1946-01-05 | NaN | NaN |
... | ... | ... |
2021-09-26 | NaN | 46320.236667 |
2021-09-27 | NaN | 46299.740000 |
2021-09-28 | NaN | 45313.810000 |
2021-09-29 | NaN | 45332.090000 |
2021-09-30 | NaN | 44850.030000 |
27667 rows × 2 columns
bf_ind.dropna(subset=['Wilshire 5000'], inplace=True) # drop missing GDP
bf_ind
GDP | Wilshire 5000 | |
---|---|---|
1970-12-31 | 1134.650043 | 830.270000 |
1971-01-01 | 1135.156000 | 831.754138 |
1971-01-02 | 1135.390611 | 833.238276 |
1971-01-03 | 1135.625222 | 834.722414 |
1971-01-04 | 1135.859833 | 836.206552 |
... | ... | ... |
2021-09-26 | NaN | 46320.236667 |
2021-09-27 | NaN | 46299.740000 |
2021-09-28 | NaN | 45313.810000 |
2021-09-29 | NaN | 45332.090000 |
2021-09-30 | NaN | 44850.030000 |
18537 rows × 2 columns
bf_ind.interpolate(method ='spline',order=2, inplace=True)
col_bf_ind='Buffet Indicator'
bf_ind[col_bf_ind] = bf_ind['Wilshire 5000'] / bf_ind['GDP']
bf_ind
GDP | Wilshire 5000 | Buffet Indicator | |
---|---|---|---|
1970-12-31 | 1134.650043 | 830.270000 | 0.731741 |
1971-01-01 | 1135.156000 | 831.754138 | 0.732722 |
1971-01-02 | 1135.390611 | 833.238276 | 0.733878 |
1971-01-03 | 1135.625222 | 834.722414 | 0.735033 |
1971-01-04 | 1135.859833 | 836.206552 | 0.736188 |
... | ... | ... | ... |
2021-09-26 | 24046.814696 | 46320.236667 | 1.926252 |
2021-09-27 | 24053.813469 | 46299.740000 | 1.924840 |
2021-09-28 | 24060.808396 | 45313.810000 | 1.883304 |
2021-09-29 | 24067.799478 | 45332.090000 | 1.883516 |
2021-09-30 | 24074.786715 | 44850.030000 | 1.862946 |
18537 rows × 3 columns
# https://www.longtermtrends.net/market-cap-to-gdp-the-buffett-indicator/
bf_ind[col_bf_ind].plot.line() # compare with chart from above link
<AxesSubplot:>
bf_ind.index.min(), bf_ind.index.max()
(Timestamp('1970-12-31 00:00:00', freq='D'), Timestamp('2021-09-30 00:00:00', freq='D'))
df_initial
PER | Price | Date | Price0F | Price1F | Price2F | Price3F | Price4F | Price5F | Price6F | Price1P | Price_Var | PriceFmin | Price_Corr_6M | horizon | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||
2021-10-01 | 37.63 | 4357.04 | 2021-10-01 | 4357.04 | NaN | NaN | NaN | NaN | NaN | NaN | 4450.37 | -0.020971 | 4357.04 | NaN | future |
2021-08-01 | 38.44 | 4450.37 | 2021-08-01 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | NaN | 4358.13 | 0.021165 | 4357.04 | NaN | future |
2021-07-01 | 37.68 | 4358.13 | 2021-07-01 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | NaN | 4238.49 | 0.028227 | 4357.04 | NaN | future |
2021-06-01 | 36.86 | 4238.49 | 2021-06-01 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | NaN | 4167.85 | 0.016949 | 4238.49 | NaN | future |
2021-05-01 | 36.62 | 4167.85 | 2021-05-01 | 4167.85 | 4238.49 | 4358.13 | 4450.37 | 4357.04 | NaN | NaN | 4141.18 | 0.006440 | 4167.85 | NaN | future |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1871-06-01 | 12.59 | 4.82 | 1871-06-01 | 4.82 | 4.73 | 4.79 | 4.84 | 4.59 | 4.64 | 4.74 | 4.86 | -0.008230 | 4.59 | 0.047718 | past |
1871-05-01 | 12.59 | 4.86 | 1871-05-01 | 4.86 | 4.82 | 4.73 | 4.79 | 4.84 | 4.59 | 4.64 | 4.74 | 0.025316 | 4.59 | 0.055556 | past |
1871-04-01 | 12.05 | 4.74 | 1871-04-01 | 4.74 | 4.86 | 4.82 | 4.73 | 4.79 | 4.84 | 4.59 | 4.61 | 0.028200 | 4.59 | 0.031646 | past |
1871-03-01 | 11.19 | 4.61 | 1871-03-01 | 4.61 | 4.74 | 4.86 | 4.82 | 4.73 | 4.79 | 4.84 | 4.50 | 0.024444 | 4.61 | 0.000000 | past |
1871-02-01 | 10.92 | 4.50 | 1871-02-01 | 4.50 | 4.61 | 4.74 | 4.86 | 4.82 | 4.73 | 4.79 | NaN | NaN | 4.50 | NaN | past |
1808 rows × 15 columns
df_pred = df_initial.copy()
df_pred = pd.concat([df_pred, bf_ind], axis=1)
df_pred.sort_index(ascending=False, inplace=True)
cols_iter = ['Date','PER', 'Price', 'Price_Var', 'horizon', 'Buffet Indicator', 'Price_Corr_6M']
df_pred[col_bf_ind] = df_pred[col_bf_ind].interpolate(method='linear', limit_direction='backward')
df_pred = df_pred[cols_iter].copy()
df_pred.dropna(subset=['horizon', 'Buffet Indicator'], inplace=True)
df_pred
Date | PER | Price | Price_Var | horizon | Buffet Indicator | Price_Corr_6M | |
---|---|---|---|---|---|---|---|
2021-10-01 | 2021-10-01 | 37.63 | 4357.04 | -0.020971 | future | 1.862946 | NaN |
2021-08-01 | 2021-08-01 | 38.44 | 4450.37 | 0.021165 | future | 1.931096 | NaN |
2021-07-01 | 2021-07-01 | 37.68 | 4358.13 | 0.028227 | future | 1.934718 | NaN |
2021-06-01 | 2021-06-01 | 36.86 | 4238.49 | 0.016949 | future | 1.897103 | NaN |
2021-05-01 | 2021-05-01 | 36.62 | 4167.85 | 0.006440 | future | 1.910928 | NaN |
... | ... | ... | ... | ... | ... | ... | ... |
1971-05-01 | 1971-05-01 | 17.56 | 101.60 | -0.013592 | past | 0.820049 | 0.086811 |
1971-04-01 | 1971-04-01 | 17.92 | 103.00 | 0.034137 | past | 0.799294 | 0.055922 |
1971-03-01 | 1971-03-01 | 17.40 | 99.60 | 0.025641 | past | 0.773546 | 0.023695 |
1971-02-01 | 1971-02-01 | 17.03 | 97.11 | 0.038721 | past | 0.765564 | 0.000000 |
1971-01-01 | 1971-01-01 | 16.46 | 93.49 | 0.038201 | past | 0.732722 | 0.000000 |
609 rows × 7 columns
cols_train=['Date','PER', 'Price', 'Price_Var', 'Buffet Indicator', 'Price_Corr_6M']
df= df_pred[cols_train].copy()
df.dropna(inplace=True)
df
Date | PER | Price | Price_Var | Buffet Indicator | Price_Corr_6M | |
---|---|---|---|---|---|---|
2021-03-01 | 2021-03-01 | 35.04 | 3910.51 | 0.006973 | 1.846238 | 0.000000 |
2021-02-01 | 2021-02-01 | 35.10 | 3883.43 | 0.023639 | 1.798761 | 0.000000 |
2021-01-01 | 2021-01-01 | 34.51 | 3793.75 | 0.026639 | 1.783969 | 0.000000 |
2020-12-01 | 2020-12-01 | 33.77 | 3695.31 | 0.041229 | 1.748069 | 0.000000 |
2020-11-01 | 2020-11-01 | 32.47 | 3548.99 | 0.038111 | 1.568975 | 0.000000 |
... | ... | ... | ... | ... | ... | ... |
1971-05-01 | 1971-05-01 | 17.56 | 101.60 | -0.013592 | 0.820049 | 0.086811 |
1971-04-01 | 1971-04-01 | 17.92 | 103.00 | 0.034137 | 0.799294 | 0.055922 |
1971-03-01 | 1971-03-01 | 17.40 | 99.60 | 0.025641 | 0.773546 | 0.023695 |
1971-02-01 | 1971-02-01 | 17.03 | 97.11 | 0.038721 | 0.765564 | 0.000000 |
1971-01-01 | 1971-01-01 | 16.46 | 93.49 | 0.038201 | 0.732722 | 0.000000 |
603 rows × 6 columns
# RANDOM CROSS FOLD for Bootstrapping
ls_cols_features=['PER', 'Price', 'Price_Var','Buffet Indicator'] # test with one by one feature
ls_cols_features.append(['PER','Buffet Indicator']) # test with set of features
ls_cols_features.append(['PER', 'Price','Buffet Indicator']) # test with set of features
start_date = df.index[-periods]
for e in ls_cols_features:
n_times = 10 # build model n times and predict n times
periods = 6 # predicted last months
# df_pred = df_initial.copy()
df_pred_iter = df_pred.iloc[:periods].copy()
ls_pc6m_cols=[]
ls_score = []
cols_features = [e]
if isinstance(e, list):
cols_features = e # set of features
else:
cols_features = [e] # one feature
for i in range(n_times):
# start_date = '1974-01-01'
model, band, score = generate_model(df, cols_features, start_date, True)
score = score.round(3)
ls_score.append(score)
print('score:',score, 'band:', band)
X_pred, y_dummy = generate_dataset(df_pred, cols_features, band, periods=periods)
y_pred = model.predict(X_pred).round(2) # recols_features, adable
y_pred = np.clip(y_pred,0,0.5) # replace outliers
pc6m_col = 'PC6M'+str(1+i)
ls_pc6m_cols.append(pc6m_col)
df_pred_iter[pc6m_col] = y_pred
# RANDOM CROSS FOLD PREDICTION on PER and BUFFET INDICATOR
df_pred_iter['PC6M_AVG']=df_pred_iter[ls_pc6m_cols].mean(axis=1) # average of all predictions
display(df_pred_iter.loc[:,df_pred_iter.columns!=col_y])
print('scores:', np.mean(ls_score).round(3), ls_score) # model score mean and model score list)
retest score: 0.741 0.741 features: ['PER'] X_train.shape: (418, 22) score: 0.741 band: 60 retest score: 0.814 0.814 features: ['PER'] X_train.shape: (418, 22) score: 0.814 band: 60 retest score: 0.78 0.78 features: ['PER'] X_train.shape: (418, 22) score: 0.78 band: 36 retest score: 0.692 0.692 features: ['PER'] X_train.shape: (418, 22) score: 0.692 band: 60 retest score: 0.737 0.737 features: ['PER'] X_train.shape: (418, 22) score: 0.737 band: 60 retest score: 0.597 0.597 features: ['PER'] X_train.shape: (418, 22) score: 0.597 band: 60 retest score: 0.779 0.779 features: ['PER'] X_train.shape: (418, 22) score: 0.779 band: 36 retest score: 0.669 0.669 features: ['PER'] X_train.shape: (418, 22) score: 0.669 band: 48 retest score: 0.698 0.698 features: ['PER'] X_train.shape: (418, 22) score: 0.698 band: 48 retest score: 0.714 0.714 features: ['PER'] X_train.shape: (418, 22) score: 0.714 band: 48
Date | PER | Price | Price_Var | horizon | Buffet Indicator | PC6M1 | PC6M2 | PC6M3 | PC6M4 | PC6M5 | PC6M6 | PC6M7 | PC6M8 | PC6M9 | PC6M10 | PC6M_AVG | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2021-10-01 | 2021-10-01 | 37.63 | 4357.04 | -0.020971 | future | 1.862946 | 0.02 | 0.06 | 0.05 | 0.09 | 0.02 | 0.01 | 0.03 | 0.02 | 0.05 | 0.04 | 0.039 |
2021-08-01 | 2021-08-01 | 38.44 | 4450.37 | 0.021165 | future | 1.931096 | 0.02 | 0.06 | 0.01 | 0.09 | 0.02 | 0.02 | 0.02 | 0.01 | 0.05 | 0.04 | 0.034 |
2021-07-01 | 2021-07-01 | 37.68 | 4358.13 | 0.028227 | future | 1.934718 | 0.03 | 0.07 | 0.02 | 0.09 | 0.02 | 0.03 | 0.01 | 0.02 | 0.04 | 0.04 | 0.037 |
2021-06-01 | 2021-06-01 | 36.86 | 4238.49 | 0.016949 | future | 1.897103 | 0.01 | 0.04 | 0.00 | 0.09 | 0.00 | 0.00 | 0.00 | 0.02 | 0.04 | 0.07 | 0.027 |
2021-05-01 | 2021-05-01 | 36.62 | 4167.85 | 0.006440 | future | 1.910928 | 0.01 | 0.03 | 0.00 | 0.09 | 0.00 | 0.01 | 0.00 | 0.01 | 0.02 | 0.05 | 0.022 |
2021-04-01 | 2021-04-01 | 36.72 | 4141.18 | 0.058987 | future | 1.859243 | 0.01 | 0.01 | 0.00 | 0.09 | 0.00 | 0.01 | 0.01 | 0.00 | 0.06 | 0.04 | 0.023 |
scores: 0.722 [0.741, 0.814, 0.78, 0.692, 0.737, 0.597, 0.779, 0.669, 0.698, 0.714] retest score: 0.823 0.823 features: ['Price'] X_train.shape: (418, 22) score: 0.823 band: 48 retest score: 0.716 0.716 features: ['Price'] X_train.shape: (418, 22) score: 0.716 band: 60 retest score: 0.705 0.705 features: ['Price'] X_train.shape: (418, 22) score: 0.705 band: 24 retest score: 0.755 0.755 features: ['Price'] X_train.shape: (418, 22) score: 0.755 band: 48 retest score: 0.806 0.806 features: ['Price'] X_train.shape: (418, 22) score: 0.806 band: 24 retest score: 0.745 0.745 features: ['Price'] X_train.shape: (418, 22) score: 0.745 band: 60 retest score: 0.719 0.719 features: ['Price'] X_train.shape: (418, 22) score: 0.719 band: 60 retest score: 0.69 0.69 features: ['Price'] X_train.shape: (418, 22) score: 0.69 band: 12 retest score: 0.828 0.828 features: ['Price'] X_train.shape: (418, 22) score: 0.828 band: 48 retest score: 0.792 0.792 features: ['Price'] X_train.shape: (418, 22) score: 0.792 band: 48
Date | PER | Price | Price_Var | horizon | Buffet Indicator | PC6M1 | PC6M2 | PC6M3 | PC6M4 | PC6M5 | PC6M6 | PC6M7 | PC6M8 | PC6M9 | PC6M10 | PC6M_AVG | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2021-10-01 | 2021-10-01 | 37.63 | 4357.04 | -0.020971 | future | 1.862946 | 0.03 | 0.07 | 0.0 | 0.02 | 0.00 | 0.08 | 0.07 | 0.11 | 0.09 | 0.03 | 0.050 |
2021-08-01 | 2021-08-01 | 38.44 | 4450.37 | 0.021165 | future | 1.931096 | 0.03 | 0.07 | 0.0 | 0.01 | 0.00 | 0.08 | 0.07 | 0.11 | 0.09 | 0.02 | 0.048 |
2021-07-01 | 2021-07-01 | 37.68 | 4358.13 | 0.028227 | future | 1.934718 | 0.02 | 0.07 | 0.0 | 0.00 | 0.01 | 0.08 | 0.07 | 0.10 | 0.08 | 0.02 | 0.045 |
2021-06-01 | 2021-06-01 | 36.86 | 4238.49 | 0.016949 | future | 1.897103 | 0.02 | 0.07 | 0.0 | 0.00 | 0.02 | 0.08 | 0.07 | 0.10 | 0.08 | 0.02 | 0.046 |
2021-05-01 | 2021-05-01 | 36.62 | 4167.85 | 0.006440 | future | 1.910928 | 0.02 | 0.07 | 0.0 | 0.00 | 0.00 | 0.09 | 0.07 | 0.11 | 0.08 | 0.02 | 0.046 |
2021-04-01 | 2021-04-01 | 36.72 | 4141.18 | 0.058987 | future | 1.859243 | 0.02 | 0.06 | 0.0 | 0.00 | 0.01 | 0.08 | 0.07 | 0.10 | 0.08 | 0.02 | 0.044 |
scores: 0.758 [0.823, 0.716, 0.705, 0.755, 0.806, 0.745, 0.719, 0.69, 0.828, 0.792] retest score: 0.689 0.689 features: ['Price_Var'] X_train.shape: (418, 22) score: 0.689 band: 36 retest score: 0.763 0.763 features: ['Price_Var'] X_train.shape: (418, 22) score: 0.763 band: 60 retest score: 0.734 0.734 features: ['Price_Var'] X_train.shape: (418, 22) score: 0.734 band: 60 retest score: 0.749 0.749 features: ['Price_Var'] X_train.shape: (418, 22) score: 0.749 band: 36 retest score: 0.73 0.73 features: ['Price_Var'] X_train.shape: (418, 22) score: 0.73 band: 24 retest score: 0.745 0.745 features: ['Price_Var'] X_train.shape: (418, 22) score: 0.745 band: 36 retest score: 0.683 0.683 features: ['Price_Var'] X_train.shape: (418, 22) score: 0.683 band: 60 retest score: 0.686 0.686 features: ['Price_Var'] X_train.shape: (418, 22) score: 0.686 band: 48 retest score: 0.751 0.751 features: ['Price_Var'] X_train.shape: (418, 22) score: 0.751 band: 24 retest score: 0.697 0.697 features: ['Price_Var'] X_train.shape: (418, 22) score: 0.697 band: 60
Date | PER | Price | Price_Var | horizon | Buffet Indicator | PC6M1 | PC6M2 | PC6M3 | PC6M4 | PC6M5 | PC6M6 | PC6M7 | PC6M8 | PC6M9 | PC6M10 | PC6M_AVG | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2021-10-01 | 2021-10-01 | 37.63 | 4357.04 | -0.020971 | future | 1.862946 | 0.0 | 0.0 | 0.0 | 0.01 | 0.01 | 0.01 | 0.0 | 0.04 | 0.02 | 0.0 | 0.009 |
2021-08-01 | 2021-08-01 | 38.44 | 4450.37 | 0.021165 | future | 1.931096 | 0.0 | 0.0 | 0.0 | 0.05 | 0.01 | 0.02 | 0.0 | 0.04 | 0.02 | 0.0 | 0.014 |
2021-07-01 | 2021-07-01 | 37.68 | 4358.13 | 0.028227 | future | 1.934718 | 0.0 | 0.0 | 0.0 | 0.05 | 0.00 | 0.02 | 0.0 | 0.04 | 0.02 | 0.0 | 0.013 |
2021-06-01 | 2021-06-01 | 36.86 | 4238.49 | 0.016949 | future | 1.897103 | 0.0 | 0.0 | 0.0 | 0.05 | 0.00 | 0.02 | 0.0 | 0.03 | 0.02 | 0.0 | 0.012 |
2021-05-01 | 2021-05-01 | 36.62 | 4167.85 | 0.006440 | future | 1.910928 | 0.0 | 0.0 | 0.0 | 0.05 | 0.00 | 0.02 | 0.0 | 0.03 | 0.02 | 0.0 | 0.012 |
2021-04-01 | 2021-04-01 | 36.72 | 4141.18 | 0.058987 | future | 1.859243 | 0.0 | 0.0 | 0.0 | 0.05 | 0.00 | 0.01 | 0.0 | 0.03 | 0.00 | 0.0 | 0.009 |
scores: 0.723 [0.689, 0.763, 0.734, 0.749, 0.73, 0.745, 0.683, 0.686, 0.751, 0.697] retest score: 0.776 0.776 features: ['Buffet Indicator'] X_train.shape: (418, 22) score: 0.776 band: 36 retest score: 0.644 0.644 features: ['Buffet Indicator'] X_train.shape: (418, 22) score: 0.644 band: 36 retest score: 0.719 0.719 features: ['Buffet Indicator'] X_train.shape: (418, 22) score: 0.719 band: 36 retest score: 0.761 0.761 features: ['Buffet Indicator'] X_train.shape: (418, 22) score: 0.761 band: 36 retest score: 0.771 0.771 features: ['Buffet Indicator'] X_train.shape: (418, 22) score: 0.771 band: 60 retest score: 0.731 0.731 features: ['Buffet Indicator'] X_train.shape: (418, 22) score: 0.731 band: 60 retest score: 0.734 0.734 features: ['Buffet Indicator'] X_train.shape: (418, 22) score: 0.734 band: 36 retest score: 0.763 0.763 features: ['Buffet Indicator'] X_train.shape: (418, 22) score: 0.763 band: 36 retest score: 0.716 0.716 features: ['Buffet Indicator'] X_train.shape: (418, 22) score: 0.716 band: 36 retest score: 0.719 0.719 features: ['Buffet Indicator'] X_train.shape: (418, 22) score: 0.719 band: 36
Date | PER | Price | Price_Var | horizon | Buffet Indicator | PC6M1 | PC6M2 | PC6M3 | PC6M4 | PC6M5 | PC6M6 | PC6M7 | PC6M8 | PC6M9 | PC6M10 | PC6M_AVG | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2021-10-01 | 2021-10-01 | 37.63 | 4357.04 | -0.020971 | future | 1.862946 | 0.04 | 0.06 | 0.02 | 0.06 | 0.12 | 0.12 | 0.15 | 0.03 | 0.09 | 0.07 | 0.076 |
2021-08-01 | 2021-08-01 | 38.44 | 4450.37 | 0.021165 | future | 1.931096 | 0.04 | 0.03 | 0.02 | 0.05 | 0.10 | 0.12 | 0.10 | 0.01 | 0.07 | 0.02 | 0.056 |
2021-07-01 | 2021-07-01 | 37.68 | 4358.13 | 0.028227 | future | 1.934718 | 0.03 | 0.03 | 0.00 | 0.06 | 0.08 | 0.12 | 0.10 | 0.01 | 0.06 | 0.01 | 0.050 |
2021-06-01 | 2021-06-01 | 36.86 | 4238.49 | 0.016949 | future | 1.897103 | 0.03 | 0.00 | 0.00 | 0.02 | 0.08 | 0.09 | 0.07 | 0.00 | 0.05 | 0.00 | 0.034 |
2021-05-01 | 2021-05-01 | 36.62 | 4167.85 | 0.006440 | future | 1.910928 | 0.03 | 0.00 | 0.00 | 0.02 | 0.09 | 0.09 | 0.06 | 0.00 | 0.05 | 0.00 | 0.034 |
2021-04-01 | 2021-04-01 | 36.72 | 4141.18 | 0.058987 | future | 1.859243 | 0.03 | 0.01 | 0.00 | 0.00 | 0.01 | 0.03 | 0.06 | 0.00 | 0.05 | 0.00 | 0.019 |
scores: 0.733 [0.776, 0.644, 0.719, 0.761, 0.771, 0.731, 0.734, 0.763, 0.716, 0.719] retest score: 0.805 0.805 features: ['PER', 'Buffet Indicator'] X_train.shape: (418, 44) score: 0.805 band: 60 retest score: 0.74 0.74 features: ['PER', 'Buffet Indicator'] X_train.shape: (418, 44) score: 0.74 band: 48 retest score: 0.697 0.697 features: ['PER', 'Buffet Indicator'] X_train.shape: (418, 44) score: 0.697 band: 36 retest score: 0.749 0.749 features: ['PER', 'Buffet Indicator'] X_train.shape: (418, 44) score: 0.749 band: 24 retest score: 0.711 0.711 features: ['PER', 'Buffet Indicator'] X_train.shape: (418, 44) score: 0.711 band: 60 retest score: 0.73 0.73 features: ['PER', 'Buffet Indicator'] X_train.shape: (418, 44) score: 0.73 band: 48 retest score: 0.75 0.75 features: ['PER', 'Buffet Indicator'] X_train.shape: (418, 44) score: 0.75 band: 48 retest score: 0.795 0.795 features: ['PER', 'Buffet Indicator'] X_train.shape: (418, 44) score: 0.795 band: 12 retest score: 0.792 0.792 features: ['PER', 'Buffet Indicator'] X_train.shape: (418, 44) score: 0.792 band: 60 retest score: 0.757 0.757 features: ['PER', 'Buffet Indicator'] X_train.shape: (418, 44) score: 0.757 band: 24
Date | PER | Price | Price_Var | horizon | Buffet Indicator | PC6M1 | PC6M2 | PC6M3 | PC6M4 | PC6M5 | PC6M6 | PC6M7 | PC6M8 | PC6M9 | PC6M10 | PC6M_AVG | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2021-10-01 | 2021-10-01 | 37.63 | 4357.04 | -0.020971 | future | 1.862946 | 0.02 | 0.13 | 0.02 | 0.05 | 0.03 | 0.01 | 0.08 | 0.10 | 0.01 | 0.07 | 0.052 |
2021-08-01 | 2021-08-01 | 38.44 | 4450.37 | 0.021165 | future | 1.931096 | 0.03 | 0.07 | 0.03 | 0.02 | 0.03 | 0.01 | 0.09 | 0.10 | 0.01 | 0.08 | 0.047 |
2021-07-01 | 2021-07-01 | 37.68 | 4358.13 | 0.028227 | future | 1.934718 | 0.03 | 0.05 | 0.03 | 0.01 | 0.02 | 0.01 | 0.08 | 0.10 | 0.01 | 0.06 | 0.040 |
2021-06-01 | 2021-06-01 | 36.86 | 4238.49 | 0.016949 | future | 1.897103 | 0.02 | 0.05 | 0.00 | 0.04 | 0.01 | 0.01 | 0.08 | 0.12 | 0.00 | 0.07 | 0.040 |
2021-05-01 | 2021-05-01 | 36.62 | 4167.85 | 0.006440 | future | 1.910928 | 0.01 | 0.05 | 0.01 | 0.04 | 0.01 | 0.00 | 0.06 | 0.11 | 0.01 | 0.07 | 0.037 |
2021-04-01 | 2021-04-01 | 36.72 | 4141.18 | 0.058987 | future | 1.859243 | 0.00 | 0.05 | 0.00 | 0.03 | 0.01 | 0.00 | 0.06 | 0.10 | 0.01 | 0.07 | 0.033 |
scores: 0.753 [0.805, 0.74, 0.697, 0.749, 0.711, 0.73, 0.75, 0.795, 0.792, 0.757] retest score: 0.776 0.776 features: ['PER', 'Price', 'Buffet Indicator'] X_train.shape: (418, 66) score: 0.776 band: 12 retest score: 0.801 0.801 features: ['PER', 'Price', 'Buffet Indicator'] X_train.shape: (418, 66) score: 0.801 band: 36 retest score: 0.771 0.771 features: ['PER', 'Price', 'Buffet Indicator'] X_train.shape: (418, 66) score: 0.771 band: 60 retest score: 0.819 0.819 features: ['PER', 'Price', 'Buffet Indicator'] X_train.shape: (418, 66) score: 0.819 band: 48 retest score: 0.803 0.803 features: ['PER', 'Price', 'Buffet Indicator'] X_train.shape: (418, 66) score: 0.803 band: 24 retest score: 0.768 0.768 features: ['PER', 'Price', 'Buffet Indicator'] X_train.shape: (418, 66) score: 0.768 band: 60 retest score: 0.841 0.841 features: ['PER', 'Price', 'Buffet Indicator'] X_train.shape: (418, 66) score: 0.841 band: 60 retest score: 0.736 0.736 features: ['PER', 'Price', 'Buffet Indicator'] X_train.shape: (418, 66) score: 0.736 band: 48 retest score: 0.771 0.771 features: ['PER', 'Price', 'Buffet Indicator'] X_train.shape: (418, 66) score: 0.771 band: 60 retest score: 0.783 0.783 features: ['PER', 'Price', 'Buffet Indicator'] X_train.shape: (418, 66) score: 0.783 band: 48
Date | PER | Price | Price_Var | horizon | Buffet Indicator | PC6M1 | PC6M2 | PC6M3 | PC6M4 | PC6M5 | PC6M6 | PC6M7 | PC6M8 | PC6M9 | PC6M10 | PC6M_AVG | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2021-10-01 | 2021-10-01 | 37.63 | 4357.04 | -0.020971 | future | 1.862946 | 0.12 | 0.07 | 0.09 | 0.04 | 0.09 | 0.07 | 0.04 | 0.05 | 0.05 | 0.07 | 0.069 |
2021-08-01 | 2021-08-01 | 38.44 | 4450.37 | 0.021165 | future | 1.931096 | 0.12 | 0.01 | 0.09 | 0.04 | 0.05 | 0.07 | 0.04 | 0.05 | 0.05 | 0.04 | 0.056 |
2021-07-01 | 2021-07-01 | 37.68 | 4358.13 | 0.028227 | future | 1.934718 | 0.12 | 0.01 | 0.07 | 0.03 | 0.00 | 0.06 | 0.04 | 0.05 | 0.04 | 0.05 | 0.047 |
2021-06-01 | 2021-06-01 | 36.86 | 4238.49 | 0.016949 | future | 1.897103 | 0.08 | 0.01 | 0.06 | 0.03 | 0.00 | 0.07 | 0.03 | 0.05 | 0.04 | 0.05 | 0.042 |
2021-05-01 | 2021-05-01 | 36.62 | 4167.85 | 0.006440 | future | 1.910928 | 0.08 | 0.01 | 0.06 | 0.03 | 0.02 | 0.06 | 0.03 | 0.05 | 0.04 | 0.05 | 0.043 |
2021-04-01 | 2021-04-01 | 36.72 | 4141.18 | 0.058987 | future | 1.859243 | 0.09 | 0.01 | 0.06 | 0.01 | 0.02 | 0.05 | 0.03 | 0.03 | 0.04 | 0.03 | 0.037 |
scores: 0.787 [0.776, 0.801, 0.771, 0.819, 0.803, 0.768, 0.841, 0.736, 0.771, 0.783]
#