import sys
sys.path.append('../scripts')
from pathlib import Path
import pandas as pd
import hydra
from hydra import compose, initialize
hydra.core.global_hydra.GlobalHydra.instance().clear()
initialize(config_path=Path('..'), job_name='foo', version_base='1.1')
config = compose(config_name='experiment.yaml')
from dataset import load_data
base_path = Path('..')
train_df, val_df, test_df = load_data(base_path / config.data.cnf_tsv_path, base_path / config.data.controls_tsv_path)
all_df = pd.concat([train_df, val_df, test_df])
len(train_df), len(val_df), len(test_df), len(all_df)
(5336, 1201, 1159, 7696)
# Ellipses
(~train_df.controls).sum(), (~val_df.controls).sum(), (~test_df.controls).sum(), (~all_df.controls).sum()
(2715, 590, 543, 3848)
# Controls
(train_df.controls).sum(), (val_df.controls).sum(), (test_df.controls).sum(), (all_df.controls).sum()
(2621, 611, 616, 3848)