#!/usr/bin/env python # coding: utf-8 # In[4]: from utils.returns_data_class import ReturnsData from utils.window_context import get_target_context_sets, Euclidean # Constants and configurations TRAIN_PCT = 1 CONTEXT_SIZE = 32 GRANULARITY = 1 WINDOW_LENGTHS = [5, 10, 2] STRIDE = None data = ReturnsData( daily_returns_path="Data/returns_df_611.csv", extras_path="Data/historical_stocks.csv", ) tgt_context_sets = [] for window_length in WINDOW_LENGTHS: data = ReturnsData( daily_returns_path="Data/returns_df_611.csv", extras_path="Data/historical_stocks.csv", ) data.change_returns_period(window_length) data.train_test_split(TRAIN_PCT) tgt_context_sets += get_target_context_sets( X=data.returns_df.values.T, metric_class=Euclidean(), window_length=window_length, stride=STRIDE, context_size=32, verbose=True, ) # In[5]: i = data.ticker2idx["JPM"] import numpy as np temp = np.array([xi[1] for xi in tgt_context_sets if xi[0]==i]).flatten() import pandas as pd [data.idx2ticker[xi] for xi in pd.Series(temp).value_counts().index][:10] # In[ ]: i = data.ticker2idx["JPM"] import numpy as np temp = np.array([xi[1] for xi in tgt_context_sets if xi[0]==i]).flatten() import pandas as pd [data.idx2ticker[xi] for xi in pd.Series(temp).value_counts().index][:10] # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: i = data.ticker2idx["JPM"] import numpy as np temp = np.array([xi[1] for xi in tgt_context_sets if xi[0]==i]).flatten() import pandas as pd [data.idx2ticker[xi] for xi in pd.Series(temp).value_counts().index][:10] # In[ ]: from models.embedding_models import ClassificationEmbeddings EMBEDDING_DIM = 20 model = ClassificationEmbeddings(n_time_series=len(data.tickers), embedding_dim=EMBEDDING_DIM) # In[ ]: from utils.training_helpers import train_embeddings_from_idx_combinations EPOCHS = 3 model, losses = train_embeddings_from_idx_combinations( n_time_series=len(data.tickers), idx_combinations=tgt_context_sets, model=model, epochs=20, # embedding_dim=EMBEDDING_DIM, verbose=True, ) # In[ ]: from utils.sector_classification import get_sector_score get_sector_score(model.embeddings.weight.detach().numpy(), sectors=data.sectors) # In[ ]: SAVE_MODEL = False SAVE_PATH_TEMPLATE = "embeddings/abs_diff_E{epochs}_C{context_size}_D{embedding_dim}_P{periods}_train{train_pct}.pt"