from utils.returns_data_class import ReturnsData
from utils.window_context import get_target_context_sets, Euclidean
# Constants and configurations
TRAIN_PCT = 1
CONTEXT_SIZE = 32
GRANULARITY = 1
WINDOW_LENGTHS = [5, 10, 2]
STRIDE = None
data = ReturnsData(
daily_returns_path="Data/returns_df_611.csv",
extras_path="Data/historical_stocks.csv",
)
tgt_context_sets = []
for window_length in WINDOW_LENGTHS:
data = ReturnsData(
daily_returns_path="Data/returns_df_611.csv",
extras_path="Data/historical_stocks.csv",
)
data.change_returns_period(window_length)
data.train_test_split(TRAIN_PCT)
tgt_context_sets += get_target_context_sets(
X=data.returns_df.values.T,
metric_class=Euclidean(),
window_length=window_length,
stride=STRIDE,
context_size=32,
verbose=True,
)
No change made because period entered is 1
100%|██████████| 122/122 [00:00<00:00, 180.47it/s] 100%|██████████| 122/122 [00:00<00:00, 144.13it/s] 100%|██████████| 121/121 [00:00<00:00, 161.77it/s]
i = data.ticker2idx["JPM"]
import numpy as np
temp = np.array([xi[1] for xi in tgt_context_sets if xi[0]==i]).flatten()
import pandas as pd
[data.idx2ticker[xi] for xi in pd.Series(temp).value_counts().index][:10]
['USB', 'C', 'WFC', 'BAC', 'NTRS', 'FHN', 'BK', 'PNC', 'STI', 'GE']
i = data.ticker2idx["JPM"]
import numpy as np
temp = np.array([xi[1] for xi in tgt_context_sets if xi[0]==i]).flatten()
import pandas as pd
[data.idx2ticker[xi] for xi in pd.Series(temp).value_counts().index][:10]
['USB', 'C', 'WFC', 'BAC', 'PNC', 'NTRS', 'BK', 'STI', 'KEY', 'BBT']
i = data.ticker2idx["JPM"]
import numpy as np
temp = np.array([xi[1] for xi in tgt_context_sets if xi[0]==i]).flatten()
import pandas as pd
[data.idx2ticker[xi] for xi in pd.Series(temp).value_counts().index][:10]
['C', 'BAC', 'STI', 'WFC', 'PNC', 'HBAN', 'ZION', 'STT', 'KEY', 'CMA']
from models.embedding_models import ClassificationEmbeddings
EMBEDDING_DIM = 20
model = ClassificationEmbeddings(n_time_series=len(data.tickers), embedding_dim=EMBEDDING_DIM)
from utils.training_helpers import train_embeddings_from_idx_combinations
EPOCHS = 3
model, losses = train_embeddings_from_idx_combinations(
n_time_series=len(data.tickers),
idx_combinations=tgt_context_sets,
model=model,
epochs=20,
# embedding_dim=EMBEDDING_DIM,
verbose=True,
)
Training embeddings...
5%|▌ | 1/20 [00:40<12:58, 40.96s/it]
Epoch 0: Loss = 0.10374109499065473
10%|█ | 2/20 [01:21<12:14, 40.83s/it]
Epoch 1: Loss = 0.10121887932745742
15%|█▌ | 3/20 [02:02<11:32, 40.76s/it]
Epoch 2: Loss = 0.0995641483254468
20%|██ | 4/20 [02:43<10:51, 40.74s/it]
Epoch 3: Loss = 0.09868489761125318
25%|██▌ | 5/20 [03:23<10:11, 40.76s/it]
Epoch 4: Loss = 0.09819312226299613
30%|███ | 6/20 [04:05<09:32, 40.89s/it]
Epoch 5: Loss = 0.09788414273028445
35%|███▌ | 7/20 [04:46<08:52, 40.97s/it]
Epoch 6: Loss = 0.0976704512347089
35%|███▌ | 7/20 [05:27<10:07, 46.76s/it]
Epoch 7: Loss = 0.09751443594345513 Early stopping at epoch 7 due to minimal loss reduction.
from utils.sector_classification import get_sector_score
get_sector_score(model.embeddings.weight.detach().numpy(), sectors=data.sectors)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[2], line 3 1 from utils.sector_classification import get_sector_score ----> 3 get_sector_score(model.embeddings.weight.detach().numpy(), sectors=data.sectors) NameError: name 'model' is not defined
SAVE_MODEL = False
SAVE_PATH_TEMPLATE = "embeddings/abs_diff_E{epochs}_C{context_size}_D{embedding_dim}_P{periods}_train{train_pct}.pt"