1 Install packages, imports and settings

In [ ]:
%pip install -qU tensorflow_addons
%pip install -qU gcsfs
     |████████████████████████████████| 1.1 MB 4.1 MB/s 
     |████████████████████████████████| 1.1 MB 6.2 MB/s 
     |████████████████████████████████| 133 kB 91.6 MB/s 
     |████████████████████████████████| 94 kB 2.3 MB/s 
     |████████████████████████████████| 144 kB 59.1 MB/s 
     |████████████████████████████████| 271 kB 75.5 MB/s 
In [ ]:
import os
from pathlib import Path
import logging
import json, joblib
from datetime import datetime
from collections import namedtuple
from functools import partial

# Numerical, stats and ML
import pandas as pd
import numpy as np
import dask.array as da
from scipy import signal
from sklearn.model_selection import StratifiedKFold, KFold
import tensorflow as tf
from tensorflow import keras
import tensorflow_addons as tfa
from keras.callbacks import ModelCheckpoint

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
tf.get_logger().setLevel('WARNING')
tf.config.list_logical_devices()
Out[ ]:
[LogicalDevice(name='/device:CPU:0', device_type='CPU'),
 LogicalDevice(name='/device:GPU:0', device_type='GPU')]
In [ ]:
class SignalFilter:
    """
    Cell 33 of https://www.gw-openscience.org/LVT151012data/LOSC_Event_tutorial_LVT151012.html
    https://scipy-cookbook.readthedocs.io/items/ButterworthBandpass.html
    """
    def __init__(self, scaling: np.ndarray, filter_type: str='highpass',
                 filter_order: int=5, f_lo: float=20.43, f_hi: float=None,
                 sampling_rate: int=2048):
        self.scaling = scaling
        self.filter_type = filter_type
        if filter_type.lower() == 'highpass':
            Wn = f_lo
            self.filter_window = signal.tukey(4096, 0.1)
            self.filter_norm = np.sqrt(1 - f_lo / (sampling_rate / 2))
        elif filter_type.lower() == 'bandpass':
            Wn = [f_lo, f_hi]
            self.filter_window = signal.tukey(4096, 0.1)
            self.filter_norm = np.sqrt((f_hi - f_lo) / (sampling_rate / 2))
        else:
            raise ValueError('Unknown filter type.')
        self.filter = signal.butter(N=filter_order, Wn=Wn, btype=filter_type, output='sos', fs=sampling_rate)

    def filt(self, X):
        X = self.scaling * X * self.filter_window
        if self.filter_type.lower() == 'bandpass':
            X = signal.sosfilt(self.filter, X)
        elif self.filter_type.lower() == 'highpass':
            X = signal.sosfiltfilt(self.filter, X)
        X *= self.filter_norm
        return X

class Data:
    """
    Dataset builder.
    """
    tfrec_format_train = {
        "wave": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.int64),
        "wave_id": tf.io.FixedLenFeature([], tf.string),
    }

    tfrec_format_test = {
        "wave": tf.io.FixedLenFeature([], tf.string),
        "wave_id": tf.io.FixedLenFeature([], tf.string)
    }

    AUTO = tf.data.AUTOTUNE

    def __init__(self, config):
        self.config = config

        # Data file paths
        self.train_files = [config.data_path + f'train{i}.tfrecords' for i in range(20)]
        self.test_files = [config.data_path + f'test{i}.tfrecords' for i in range(10)]

        # Front-end signal filter
        self.filter = SignalFilter(**config.filter)

    def _preprocess(self, X, y, train_or_test=True):
        """
        Preprocess a batch of data: scaling, filtering, transpose. Casting to tf.float32 is done in wrapper.
        """
        X = X.numpy()
        X = self.filter.filt(X)
        X = np.transpose(X, axes=(0, 2, 1))
        if train_or_test:
            return X, y
        else:
            return X

    def _preprocess_wrapper(self, train_or_test=True):
        if train_or_test:
            def wrapper(X, y):
                return tf.py_function(
                    self._preprocess,
                    inp=[X, y], Tout=[tf.float32, tf.float32])
        else:
            def wrapper(X):
                return tf.py_function(
                    partial(self._preprocess, y=None, train_or_test=False),
                    inp=[X], Tout=tf.float32)
        return wrapper

    def _decode_train(self, tfrecord):
        tensor_dict = tf.io.parse_single_example(tfrecord, self.tfrec_format_train)
        X = tf.reshape(tf.io.decode_raw(tensor_dict['wave'], tf.float64), (3, 4096))
        y = tf.reshape(tf.cast(tensor_dict['target'], tf.float32), [1])
        # sample_ids = tensor_dict['sample_id']
        return X, y

    def _decode_test(self, tfrecord):
        tensor_dict = tf.io.parse_single_example(tfrecord, self.tfrec_format_test)
        X = tf.reshape(tf.io.decode_raw(tensor_dict['wave'], tf.float64), (3, 4096))
        return X

    def get_dataset(self, train_or_test=True, shuffle=True, file_indices=None):
        data_files = self.train_files if train_or_test else self.test_files
        if file_indices is not None:
            data_files = [data_files[i] for i in file_indices]
        ds = tf.data.TFRecordDataset(  # do not interleave test data files with parallel reads
            data_files, num_parallel_reads=self.AUTO if train_or_test else 1, compression_type="GZIP")
        if shuffle:
            options = tf.data.Options()
            options.experimental_deterministic = False
            ds = ds.shuffle(self.config.shuffle_buf_size).with_options(options)
        ds = ds.map(self._decode_train if train_or_test else self._decode_test, num_parallel_calls=self.AUTO)
        ds = ds.batch(self.config.batch_size).map(self._preprocess_wrapper(train_or_test), num_parallel_calls=self.AUTO)
        ds = ds.prefetch(self.AUTO)
        return ds
In [ ]:
class GeM(keras.layers.Layer):
    def __init__(self, pool_size, p=3, eps=1e-6, **kwargs):
        super(GeM, self).__init__(**kwargs)
        self.pool_size = pool_size
        self.p = p
        self.eps = eps

    def call(self, x):
        x = tf.math.maximum(x, self.eps)
        x = tf.pow(x, self.p)
        x = tf.nn.avg_pool(x, self.pool_size, self.pool_size, 'VALID')
        x = tf.pow(x, 1./self.p)
        return x

def get_model():
    """
    Modified from
    https://journals.aps.org/prl/pdf/1check0.1103/PhysRevLett.120.141103
    """
    model = keras.models.Sequential([
        keras.layers.Conv1D(64, 64, padding='valid', input_shape=(4096, 3)),
        keras.layers.BatchNormalization(),
        keras.layers.Activation(tf.nn.silu),
        
        keras.layers.Conv1D(64, 32, padding='valid'),
        GeM(pool_size=8),
        keras.layers.BatchNormalization(),
        keras.layers.Activation(tf.nn.silu),

        keras.layers.Conv1D(128, 32, padding='valid'),
        keras.layers.BatchNormalization(),
        keras.layers.Activation(tf.nn.silu),
        
        keras.layers.Conv1D(128, 16, padding='valid'),
        GeM(pool_size=6),
        keras.layers.BatchNormalization(),
        keras.layers.Activation(tf.nn.silu),

        keras.layers.Conv1D(256, 16, padding='valid'),
        keras.layers.BatchNormalization(),
        keras.layers.Activation(tf.nn.silu),
        
        keras.layers.Conv1D(256, 16, padding='valid'),
        GeM(pool_size=4),
        keras.layers.BatchNormalization(),
        keras.layers.Activation(tf.nn.silu),

        keras.layers.Flatten(),
        # keras.layers.GlobalAveragePooling1D(),

        keras.layers.Dense(64),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.25),
        keras.layers.Activation(tf.nn.silu),

        keras.layers.Dense(64),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.25),
        keras.layers.Activation(tf.nn.silu),

        keras.layers.Dense(1, activation='sigmoid')
    ])

    return model
In [ ]:
def seed_all(seed=42):
    np.random.seed(seed)
    tf.random.set_seed(seed)
In [ ]:
def get_logger(
    logger_name,
    log_path=None,
    file_level=logging.INFO,
    stream_level=logging.INFO,
):
    if log_path is None and stream_level is None:
        raise ValueError("Both file and stream logger is None.")
    logger = logging.getLogger(logger_name)
    logger.setLevel(logging.INFO)
    logger_format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
    # Add file handler
    if log_path is not None and Path(log_path).expanduser().resolve().exists():
        logger_file = (
            Path(log_path).expanduser().resolve().joinpath(logger_name + ".log")
        )
        if logger_file.exists():
            logger_file.unlink()
        fh = logging.FileHandler(logger_file)
        fh.setLevel(file_level)
        fh.setFormatter(logger_format)
        logger.addHandler(fh)
    # Add stream handler
    if stream_level is not None:
        sh = logging.StreamHandler()
        sh.setLevel(stream_level)
        sh.setFormatter(logger_format)
        logger.addHandler(sh)
    return logger

class Config(dict):
    def __init__(self, *args, **kwargs):
        super().__init__(**kwargs)
        self.__dict__ = self

def check_save_config(config: Config):
    config.results_path = str(Path(config.results_parent_path).joinpath(config.name))
    p = Path(config.results_path).expanduser().resolve()
    try:
        p.mkdir(parents=True, exist_ok=False)
    except FileExistsError:
        if any(p.iterdir()):
            raise ValueError("Non-empty results directory.")
    joblib.dump(config, p.joinpath('config.pkl'))
    attrs = {k: v for k, v in vars(config).items() if not k.startswith('__')}
    with open(p.joinpath('config.json'), 'w') as f:  # Human readable JSON
        try:
            attrs['filter']['scaling'] = attrs['filter']['scaling'].tolist()
        except AttributeError:
            pass
        json.dump(attrs, f)

def train(config: Config, train_data, val_data, fold_k=None):
    """
    Train a single fold of training dataset.
    """
    checkpoint_path = Path(config.results_path).joinpath(
        'checkpoint' + ('' if fold_k is None else f"_fold{fold_k}" ))
    step = tf.Variable(0, trainable=False)
    lr_schedule_class = getattr(tf.keras.optimizers.schedules, config.lr_schedule)
    lr_schedule = lr_schedule_class(**config.lr_schedule_paras[config.lr_schedule])
    optimizer_class = getattr(tfa.optimizers, config.optimizer)
    optimizer = optimizer_class(learning_rate=1*lr_schedule(step), **config.optimizer_paras[config.optimizer]) 
    # weight_decay = lambda: cfg.optimizer['weight_decay'] * lr_schedule(step))

    model = get_model()
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[keras.metrics.AUC(name='AUC')])
    checkpoint_cb = ModelCheckpoint(checkpoint_path, monitor='val_AUC', verbose=0, save_best_only=True, mode='max')
    history = model.fit(train_data, epochs=config.epochs, callbacks=[checkpoint_cb], validation_data=val_data)
    pd.DataFrame(history.history).to_csv(checkpoint_path.joinpath('train_history.csv'))

    # return best model and the checkpoint path
    model = keras.models.load_model(checkpoint_path)
    return model, checkpoint_path

def get_score(y_true, y_pred):
    """
    Compute ROC AUC score on tensors.
    """
    auc = tf.keras.metrics.AUC()
    auc.update_state(y_true, y_pred)
    score = auc.result().numpy()
    return score

def oof_predict(model, val_data):
    """
    Predict on the validation dataset of a single fold.
    """
    val_y_true = []
    val_y_pred = []
    for X, y in val_data:
        val_y_true.append(y)
        val_y_pred.append(model(X))
    val_y_true = tf.concat(val_y_true, axis=0)
    val_y_pred = tf.concat(val_y_pred, axis=0)
    val_score = get_score(val_y_true, val_y_pred)
    return val_y_true, val_y_pred, val_score

def make_inference(config, models, logger=None):
    """
    Make inference on test dataset and create submission file.
    """
    logger_func = logger.info if logger else print
    data = Data(config)
    test_data = data.get_dataset(train_or_test=False, shuffle=False)
    test_preds = []
    for k, model in zip(config.train_folds or range(config.K_fold), models):
        logger_func(f"Make inference by fold {k} model")
        y_pred = model.predict(test_data)
        test_preds.append(y_pred)
    test_preds = np.concatenate(test_preds, axis=1).mean(axis=1)
    sample = pd.read_csv(config.data_path + 'sample_submission.csv')
    test_preds_df = pd.DataFrame({'id': sample['id'].values, 'target': test_preds})
    p_submission = Path(config.results_path).joinpath(f'{config.name}_submission.csv')
    test_preds_df.to_csv(p_submission, index=False)
    logger_func(f'Test inference written to {p_submission}.')
    return test_preds_df

def train_K_folds_make_inference(config):
    """
    Train K folds of the training dataset with out-of-fold validation.
    """
    try:
        check_save_config(config)
    except FileExistsError:
        print(f"Results path: {config.results_path} exists and not empty. Quit.")
        return

    logger = get_logger(config.name, log_path=config.results_path)
    logger.info(f"{config.description}")
    logger.info(f"Results path: {config.results_path}")

    data = Data(config)
    kf = KFold(n_splits=config.K_fold, shuffle=True, random_state=config.seed)
    oof_models, oof_model_paths, oof_labels, oof_preds, k_fold_scores = [], [], [], [], []
    for k, (train_idx, val_idx) in enumerate(kf.split(data.train_files)):
        if config.train_folds and k not in config.train_folds:
            continue
        logger.info(f"--- Train fold {k} of {config.train_folds} ---")
        logger.info(f"Train: {train_idx}  Val: {val_idx}")
        train_data = data.get_dataset(train_or_test=True, shuffle=True, file_indices=train_idx)
        val_data = data.get_dataset(train_or_test=True, shuffle=False, file_indices=val_idx)
        model, model_path = train(config, train_data, val_data, fold_k=k)
        oof_models.append(model)
        oof_model_paths.append(model_path)
        # oof prediction
        val_y_true, val_y_pred, val_score = oof_predict(model, val_data)
        oof_labels.append(val_y_true)
        oof_preds.append(val_y_pred)
        k_fold_scores.append(val_score)
        logger.info(f"Fold {k} val score: {val_score}")
    oof_labels = tf.concat(oof_labels, axis=0)
    oof_preds = tf.concat(oof_preds, axis=0)
    oof_score = get_score(oof_labels, oof_preds)
    logger.info(f"OOF val score: {oof_score}")

    logger.info("--- Inference ---")
    test_preds_df = make_inference(config, oof_models, logger)
    
    return oof_models, oof_model_paths, oof_score, k_fold_scores, test_preds_df

def load_results_make_inference(results_path: str):
    """
    Load saved models and make inference on test dataset.
    """
    p = Path(results_path).expanduser().resolve()
    if not p.exists():
        print(f"Invalid results_path: {results_path}")
        return
    config = joblib.load(p.joinpath('config.pkl'))
    models = []
    for k in config.train_folds or range(config.K_fold):
        checkpoint_k = p.joinpath(f'checkpoint_fold{k}')
        model = keras.models.load_model(checkpoint_k)
        models.append(model)
    test_preds_df = make_inference(config, models)

    return test_preds_df
In [ ]:
config = Config(
    name = "CNN1d_GeM_SGDW_Highpass_Tukey_" + datetime.now().strftime("%Y-%m-%d_%H-%M"),
    description = "CNN1d GeM SGDW CosineDecay, 5 folds, 20 epochs, batch_size 128. Highpass signal filter.",

    # paths
    # data_path = "data/",  # Dataset on local drive
    data_path = "/content/drive/Shareddrives/ml/g2net/data/",  # Dataset on Google Drive
    # data_path = "gs://kds-8a5a5ceed201023b7b0d1880950ccc33c21b9bef067a7abe0dfb4aaa/",  # Dataset on Kaggle GCS
    # results_path = "results/",  # Save results on local drive
    results_parent_path = "/content/drive/Shareddrives/ml/g2net/results/",  # Save results on Google Drive
    results_path = None,  # results_parent_path + name, to be initialized in check_and_save_config().
    shuffle_buf_size = 2048,

    # train/test paras
    K_fold = 5,
    train_folds = [],  # If not empty, only train a subset of folds.
    batch_size = 128,
    epochs = 20,

    # warm start
    warm_start = False,
    warm_start_model_path = "",

    # algorithm/model paras
    filter = dict(
        scaling = 1e20,  # 1 / np.array([1.5e-20, 1.5e-20, 0.5e-20]).reshape(-1, 1),
        #filter_type = 'bandpass', filter_order=8, f_lo=25, f_hi=1000, sampling_rate=2048
        filter_type='highpass', filter_order=5, f_lo=20.43, f_hi=None, sampling_rate=2048
    ),
    lr_schedule = 'CosineDecay',
    lr_schedule_paras = {'CosineDecay': dict(initial_learning_rate=1e-1, decay_steps=5, alpha=1e-6)},  # init_lr=eta_max alpha= eta_min/eta_max
    optimizer = 'SGDW',
    optimizer_paras = {
        'SGDW': dict(weight_decay=1e-4, momentum=0.9, nesterov=True),
        'AdamW': dict(weight_decay=1e-5),
    },

    # misc
    seed = 42,
)
In [ ]:
oof_models, oof_model_paths, oof_score, k_fold_scores, test_preds_df = train_K_folds_make_inference(config)
2022-01-30 19:27:25,667 - INFO - CNN1d GeM SGDW CosineDecay, 5 folds, 20 epochs, batch_size 128. Highpass signal filter.
2022-01-30 19:27:25,669 - INFO - Results path: /content/drive/Shareddrives/ml/g2net/results/CNN1d_GeM_SGDW_Highpass_Tukey_2022-01-30_19-27
2022-01-30 19:27:25,679 - INFO - --- Train fold 0 of [] ---
2022-01-30 19:27:25,681 - INFO - Train: [ 2  3  4  5  6  7  8  9 10 11 12 13 14 16 18 19]  Val: [ 0  1 15 17]
Epoch 1/20
3500/3500 [==============================] - 625s 172ms/step - loss: 0.4740 - AUC: 0.8321 - val_loss: 0.4502 - val_AUC: 0.8599
Epoch 2/20
3500/3500 [==============================] - 612s 173ms/step - loss: 0.4322 - AUC: 0.8590 - val_loss: 0.4582 - val_AUC: 0.8628
Epoch 3/20
3500/3500 [==============================] - 608s 171ms/step - loss: 0.4257 - AUC: 0.8627 - val_loss: 0.4280 - val_AUC: 0.8661
Epoch 4/20
3500/3500 [==============================] - 598s 169ms/step - loss: 0.4229 - AUC: 0.8641 - val_loss: 0.4277 - val_AUC: 0.8624
Epoch 5/20
3500/3500 [==============================] - 605s 171ms/step - loss: 0.4211 - AUC: 0.8651 - val_loss: 0.4197 - val_AUC: 0.8676
Epoch 6/20
3500/3500 [==============================] - 604s 170ms/step - loss: 0.4195 - AUC: 0.8659 - val_loss: 0.4254 - val_AUC: 0.8630
Epoch 7/20
3500/3500 [==============================] - 618s 172ms/step - loss: 0.4178 - AUC: 0.8667 - val_loss: 0.4188 - val_AUC: 0.8671
Epoch 8/20
3500/3500 [==============================] - 610s 170ms/step - loss: 0.4166 - AUC: 0.8675 - val_loss: 0.4256 - val_AUC: 0.8667
Epoch 9/20
3500/3500 [==============================] - 620s 173ms/step - loss: 0.4149 - AUC: 0.8684 - val_loss: 0.4409 - val_AUC: 0.8673
Epoch 10/20
3500/3500 [==============================] - 619s 172ms/step - loss: 0.4144 - AUC: 0.8687 - val_loss: 0.4260 - val_AUC: 0.8651
Epoch 11/20
3500/3500 [==============================] - 614s 171ms/step - loss: 0.4130 - AUC: 0.8696 - val_loss: 0.4187 - val_AUC: 0.8682
Epoch 12/20
3500/3500 [==============================] - 608s 171ms/step - loss: 0.4124 - AUC: 0.8700 - val_loss: 0.4297 - val_AUC: 0.8666
Epoch 13/20
3500/3500 [==============================] - 599s 169ms/step - loss: 0.4110 - AUC: 0.8706 - val_loss: 0.4715 - val_AUC: 0.8671
Epoch 14/20
3500/3500 [==============================] - 622s 173ms/step - loss: 0.4106 - AUC: 0.8711 - val_loss: 0.4368 - val_AUC: 0.8645
Epoch 15/20
3500/3500 [==============================] - 622s 172ms/step - loss: 0.4095 - AUC: 0.8716 - val_loss: 0.4294 - val_AUC: 0.8662
Epoch 16/20
3500/3500 [==============================] - 612s 171ms/step - loss: 0.4090 - AUC: 0.8720 - val_loss: 0.4366 - val_AUC: 0.8622
Epoch 17/20
3500/3500 [==============================] - 609s 172ms/step - loss: 0.4080 - AUC: 0.8726 - val_loss: 0.4547 - val_AUC: 0.8636
Epoch 18/20
3500/3500 [==============================] - 610s 171ms/step - loss: 0.4075 - AUC: 0.8728 - val_loss: 0.4235 - val_AUC: 0.8666
Epoch 19/20
3500/3500 [==============================] - 618s 173ms/step - loss: 0.4063 - AUC: 0.8738 - val_loss: 0.4486 - val_AUC: 0.8659
Epoch 20/20
3500/3500 [==============================] - 620s 173ms/step - loss: 0.4059 - AUC: 0.8741 - val_loss: 0.4653 - val_AUC: 0.8636
2022-01-30 22:54:05,364 - INFO - Fold 0 val score: 0.8681674599647522
2022-01-30 22:54:05,366 - INFO - --- Train fold 1 of [] ---
2022-01-30 22:54:05,367 - INFO - Train: [ 0  1  2  4  6  7  9 10 12 13 14 15 16 17 18 19]  Val: [ 3  5  8 11]
Epoch 1/20
3500/3500 [==============================] - 604s 170ms/step - loss: 0.4752 - AUC: 0.8316 - val_loss: 0.4422 - val_AUC: 0.8604
Epoch 2/20
3500/3500 [==============================] - 612s 173ms/step - loss: 0.4311 - AUC: 0.8596 - val_loss: 0.4472 - val_AUC: 0.8658
Epoch 3/20
3500/3500 [==============================] - 605s 171ms/step - loss: 0.4241 - AUC: 0.8635 - val_loss: 0.4904 - val_AUC: 0.8635
Epoch 4/20
3500/3500 [==============================] - 604s 170ms/step - loss: 0.4210 - AUC: 0.8653 - val_loss: 0.4232 - val_AUC: 0.8658
Epoch 5/20
3500/3500 [==============================] - 607s 171ms/step - loss: 0.4196 - AUC: 0.8659 - val_loss: 0.4295 - val_AUC: 0.8608
Epoch 6/20
3500/3500 [==============================] - 608s 170ms/step - loss: 0.4177 - AUC: 0.8671 - val_loss: 0.4510 - val_AUC: 0.8658
Epoch 7/20
3500/3500 [==============================] - 597s 169ms/step - loss: 0.4164 - AUC: 0.8676 - val_loss: 0.4249 - val_AUC: 0.8650
Epoch 8/20
3500/3500 [==============================] - 627s 175ms/step - loss: 0.4148 - AUC: 0.8687 - val_loss: 0.4341 - val_AUC: 0.8660
Epoch 9/20
3500/3500 [==============================] - 612s 173ms/step - loss: 0.4138 - AUC: 0.8691 - val_loss: 0.4255 - val_AUC: 0.8667
Epoch 10/20
3500/3500 [==============================] - 615s 173ms/step - loss: 0.4139 - AUC: 0.8691 - val_loss: 0.4178 - val_AUC: 0.8670
Epoch 11/20
3500/3500 [==============================] - 610s 171ms/step - loss: 0.4122 - AUC: 0.8703 - val_loss: 0.4198 - val_AUC: 0.8665
Epoch 12/20
3500/3500 [==============================] - 619s 173ms/step - loss: 0.4114 - AUC: 0.8705 - val_loss: 0.4341 - val_AUC: 0.8652
Epoch 13/20
3500/3500 [==============================] - 609s 170ms/step - loss: 0.4105 - AUC: 0.8712 - val_loss: 0.4316 - val_AUC: 0.8631
Epoch 14/20
3500/3500 [==============================] - 619s 173ms/step - loss: 0.4093 - AUC: 0.8719 - val_loss: 0.4198 - val_AUC: 0.8658
Epoch 15/20
3500/3500 [==============================] - 618s 172ms/step - loss: 0.4084 - AUC: 0.8723 - val_loss: 0.4241 - val_AUC: 0.8633
Epoch 16/20
3500/3500 [==============================] - 607s 171ms/step - loss: 0.4068 - AUC: 0.8735 - val_loss: 0.4319 - val_AUC: 0.8656
Epoch 17/20
3500/3500 [==============================] - 622s 174ms/step - loss: 0.4065 - AUC: 0.8737 - val_loss: 0.4347 - val_AUC: 0.8622
Epoch 18/20
3500/3500 [==============================] - 616s 171ms/step - loss: 0.4060 - AUC: 0.8741 - val_loss: 0.4223 - val_AUC: 0.8638
Epoch 19/20
3500/3500 [==============================] - 620s 172ms/step - loss: 0.4046 - AUC: 0.8747 - val_loss: 0.4300 - val_AUC: 0.8623
Epoch 20/20
3500/3500 [==============================] - 620s 173ms/step - loss: 0.4037 - AUC: 0.8755 - val_loss: 0.4542 - val_AUC: 0.8624
2022-01-31 02:20:40,964 - INFO - Fold 1 val score: 0.8670347332954407
2022-01-31 02:20:40,966 - INFO - --- Train fold 2 of [] ---
2022-01-31 02:20:40,969 - INFO - Train: [ 0  1  3  4  5  6  7  8  9 10 11 12 14 15 17 19]  Val: [ 2 13 16 18]
Epoch 1/20
3500/3500 [==============================] - 604s 170ms/step - loss: 0.4754 - AUC: 0.8316 - val_loss: 0.4662 - val_AUC: 0.8558
Epoch 2/20
3500/3500 [==============================] - 611s 173ms/step - loss: 0.4314 - AUC: 0.8595 - val_loss: 0.4310 - val_AUC: 0.8614
Epoch 3/20
3500/3500 [==============================] - 613s 173ms/step - loss: 0.4251 - AUC: 0.8634 - val_loss: 0.4602 - val_AUC: 0.8623
Epoch 4/20
3500/3500 [==============================] - 611s 172ms/step - loss: 0.4218 - AUC: 0.8651 - val_loss: 0.4296 - val_AUC: 0.8636
Epoch 5/20
3500/3500 [==============================] - 614s 173ms/step - loss: 0.4199 - AUC: 0.8659 - val_loss: 0.4305 - val_AUC: 0.8655
Epoch 6/20
3500/3500 [==============================] - 606s 172ms/step - loss: 0.4183 - AUC: 0.8670 - val_loss: 0.4425 - val_AUC: 0.8656
Epoch 7/20
3500/3500 [==============================] - 611s 172ms/step - loss: 0.4164 - AUC: 0.8679 - val_loss: 0.4205 - val_AUC: 0.8660
Epoch 8/20
3500/3500 [==============================] - 607s 171ms/step - loss: 0.4150 - AUC: 0.8686 - val_loss: 0.4361 - val_AUC: 0.8645
Epoch 9/20
3500/3500 [==============================] - 617s 173ms/step - loss: 0.4135 - AUC: 0.8695 - val_loss: 0.4256 - val_AUC: 0.8636
Epoch 10/20
3500/3500 [==============================] - 621s 173ms/step - loss: 0.4127 - AUC: 0.8700 - val_loss: 0.4539 - val_AUC: 0.8640
Epoch 11/20
3500/3500 [==============================] - 610s 171ms/step - loss: 0.4114 - AUC: 0.8706 - val_loss: 0.4280 - val_AUC: 0.8637
Epoch 12/20
3500/3500 [==============================] - 609s 171ms/step - loss: 0.4102 - AUC: 0.8714 - val_loss: 0.4226 - val_AUC: 0.8641
Epoch 13/20
3500/3500 [==============================] - 621s 173ms/step - loss: 0.4094 - AUC: 0.8721 - val_loss: 0.4301 - val_AUC: 0.8617
Epoch 14/20
3500/3500 [==============================] - 618s 172ms/step - loss: 0.4088 - AUC: 0.8725 - val_loss: 0.4680 - val_AUC: 0.8651
Epoch 15/20
3500/3500 [==============================] - 626s 175ms/step - loss: 0.4077 - AUC: 0.8730 - val_loss: 0.4276 - val_AUC: 0.8618
Epoch 16/20
3500/3500 [==============================] - 601s 169ms/step - loss: 0.4068 - AUC: 0.8736 - val_loss: 0.4408 - val_AUC: 0.8635
Epoch 17/20
3500/3500 [==============================] - 624s 173ms/step - loss: 0.4062 - AUC: 0.8738 - val_loss: 0.4522 - val_AUC: 0.8636
Epoch 18/20
3500/3500 [==============================] - 616s 171ms/step - loss: 0.4052 - AUC: 0.8748 - val_loss: 0.4287 - val_AUC: 0.8605
Epoch 19/20
3500/3500 [==============================] - 620s 173ms/step - loss: 0.4041 - AUC: 0.8755 - val_loss: 0.4273 - val_AUC: 0.8612
Epoch 20/20
3500/3500 [==============================] - 621s 173ms/step - loss: 0.4031 - AUC: 0.8760 - val_loss: 0.4274 - val_AUC: 0.8602
2022-01-31 05:48:19,458 - INFO - Fold 2 val score: 0.8660010099411011
2022-01-31 05:48:19,460 - INFO - --- Train fold 3 of [] ---
2022-01-31 05:48:19,463 - INFO - Train: [ 0  1  2  3  5  6  7  8 10 11 13 14 15 16 17 18]  Val: [ 4  9 12 19]
Epoch 1/20
3500/3500 [==============================] - 607s 171ms/step - loss: 0.4695 - AUC: 0.8358 - val_loss: 0.4535 - val_AUC: 0.8583
Epoch 2/20
3500/3500 [==============================] - 605s 171ms/step - loss: 0.4316 - AUC: 0.8594 - val_loss: 0.4634 - val_AUC: 0.8637
Epoch 3/20
3500/3500 [==============================] - 612s 172ms/step - loss: 0.4251 - AUC: 0.8632 - val_loss: 0.4417 - val_AUC: 0.8651
Epoch 4/20
3500/3500 [==============================] - 607s 171ms/step - loss: 0.4220 - AUC: 0.8647 - val_loss: 0.4680 - val_AUC: 0.8643
Epoch 5/20
3500/3500 [==============================] - 617s 173ms/step - loss: 0.4202 - AUC: 0.8660 - val_loss: 0.4536 - val_AUC: 0.8662
Epoch 6/20
3500/3500 [==============================] - 604s 171ms/step - loss: 0.4184 - AUC: 0.8669 - val_loss: 0.4892 - val_AUC: 0.8671
Epoch 7/20
3500/3500 [==============================] - 608s 171ms/step - loss: 0.4173 - AUC: 0.8672 - val_loss: 0.4187 - val_AUC: 0.8659
Epoch 8/20
3500/3500 [==============================] - 628s 175ms/step - loss: 0.4158 - AUC: 0.8679 - val_loss: 0.4420 - val_AUC: 0.8674
Epoch 9/20
3500/3500 [==============================] - 608s 172ms/step - loss: 0.4147 - AUC: 0.8688 - val_loss: 0.4349 - val_AUC: 0.8667
Epoch 10/20
3500/3500 [==============================] - 620s 174ms/step - loss: 0.4136 - AUC: 0.8692 - val_loss: 0.4261 - val_AUC: 0.8641
Epoch 11/20
3500/3500 [==============================] - 627s 174ms/step - loss: 0.4127 - AUC: 0.8698 - val_loss: 0.4548 - val_AUC: 0.8661
Epoch 12/20
3500/3500 [==============================] - 611s 171ms/step - loss: 0.4118 - AUC: 0.8703 - val_loss: 0.4791 - val_AUC: 0.8664
Epoch 13/20
3500/3500 [==============================] - 611s 171ms/step - loss: 0.4107 - AUC: 0.8708 - val_loss: 0.4350 - val_AUC: 0.8666
Epoch 14/20
3500/3500 [==============================] - 624s 174ms/step - loss: 0.4101 - AUC: 0.8716 - val_loss: 0.4786 - val_AUC: 0.8667
Epoch 15/20
3500/3500 [==============================] - 618s 174ms/step - loss: 0.4087 - AUC: 0.8723 - val_loss: 0.4370 - val_AUC: 0.8639
Epoch 16/20
3500/3500 [==============================] - 624s 175ms/step - loss: 0.4078 - AUC: 0.8728 - val_loss: 0.4253 - val_AUC: 0.8658
Epoch 17/20
3500/3500 [==============================] - 607s 170ms/step - loss: 0.4068 - AUC: 0.8735 - val_loss: 0.4764 - val_AUC: 0.8616
Epoch 18/20
3500/3500 [==============================] - 619s 172ms/step - loss: 0.4060 - AUC: 0.8740 - val_loss: 0.4439 - val_AUC: 0.8658
Epoch 19/20
3500/3500 [==============================] - 620s 173ms/step - loss: 0.4053 - AUC: 0.8745 - val_loss: 0.4516 - val_AUC: 0.8640
Epoch 20/20
3500/3500 [==============================] - 618s 172ms/step - loss: 0.4040 - AUC: 0.8753 - val_loss: 0.4674 - val_AUC: 0.8627
2022-01-31 09:15:08,523 - INFO - Fold 3 val score: 0.8674059510231018
2022-01-31 09:15:08,525 - INFO - --- Train fold 4 of [] ---
2022-01-31 09:15:08,528 - INFO - Train: [ 0  1  2  3  4  5  8  9 11 12 13 15 16 17 18 19]  Val: [ 6  7 10 14]
Epoch 1/20
3500/3500 [==============================] - 603s 170ms/step - loss: 0.4783 - AUC: 0.8291 - val_loss: 0.4422 - val_AUC: 0.8587
Epoch 2/20
3500/3500 [==============================] - 612s 172ms/step - loss: 0.4315 - AUC: 0.8593 - val_loss: 0.4293 - val_AUC: 0.8632
Epoch 3/20
3500/3500 [==============================] - 607s 170ms/step - loss: 0.4249 - AUC: 0.8630 - val_loss: 0.4649 - val_AUC: 0.8630
Epoch 4/20
3500/3500 [==============================] - 609s 172ms/step - loss: 0.4221 - AUC: 0.8647 - val_loss: 0.4293 - val_AUC: 0.8640
Epoch 5/20
3500/3500 [==============================] - 613s 172ms/step - loss: 0.4198 - AUC: 0.8659 - val_loss: 0.4277 - val_AUC: 0.8644
Epoch 6/20
3500/3500 [==============================] - 610s 172ms/step - loss: 0.4178 - AUC: 0.8669 - val_loss: 0.4236 - val_AUC: 0.8657
Epoch 7/20
3500/3500 [==============================] - 614s 173ms/step - loss: 0.4163 - AUC: 0.8677 - val_loss: 0.4194 - val_AUC: 0.8665
Epoch 8/20
3500/3500 [==============================] - 618s 174ms/step - loss: 0.4151 - AUC: 0.8683 - val_loss: 0.4231 - val_AUC: 0.8673
Epoch 9/20
3500/3500 [==============================] - 600s 170ms/step - loss: 0.4141 - AUC: 0.8691 - val_loss: 0.4441 - val_AUC: 0.8636
Epoch 10/20
3500/3500 [==============================] - 620s 174ms/step - loss: 0.4129 - AUC: 0.8698 - val_loss: 0.4493 - val_AUC: 0.8645
Epoch 11/20
3500/3500 [==============================] - 620s 173ms/step - loss: 0.4117 - AUC: 0.8705 - val_loss: 0.4606 - val_AUC: 0.8635
Epoch 12/20
3500/3500 [==============================] - 617s 170ms/step - loss: 0.4109 - AUC: 0.8709 - val_loss: 0.4263 - val_AUC: 0.8660
Epoch 13/20
3500/3500 [==============================] - 605s 170ms/step - loss: 0.4100 - AUC: 0.8714 - val_loss: 0.4198 - val_AUC: 0.8659
Epoch 14/20
3500/3500 [==============================] - 618s 171ms/step - loss: 0.4095 - AUC: 0.8717 - val_loss: 0.4236 - val_AUC: 0.8636
Epoch 15/20
3500/3500 [==============================] - 619s 172ms/step - loss: 0.4084 - AUC: 0.8724 - val_loss: 0.4245 - val_AUC: 0.8637
Epoch 16/20
3500/3500 [==============================] - 622s 173ms/step - loss: 0.4074 - AUC: 0.8729 - val_loss: 0.4292 - val_AUC: 0.8620
Epoch 17/20
3500/3500 [==============================] - 619s 172ms/step - loss: 0.4062 - AUC: 0.8738 - val_loss: 0.4322 - val_AUC: 0.8611
Epoch 18/20
3500/3500 [==============================] - 611s 170ms/step - loss: 0.4054 - AUC: 0.8743 - val_loss: 0.4200 - val_AUC: 0.8654
Epoch 19/20
3500/3500 [==============================] - 617s 172ms/step - loss: 0.4047 - AUC: 0.8750 - val_loss: 0.4242 - val_AUC: 0.8629
Epoch 20/20
3500/3500 [==============================] - 619s 172ms/step - loss: 0.4029 - AUC: 0.8761 - val_loss: 0.4453 - val_AUC: 0.8634
2022-01-31 12:41:49,536 - INFO - Fold 4 val score: 0.867283284664154
2022-01-31 12:41:49,572 - INFO - OOF val score: 0.863306999206543
2022-01-31 12:41:49,573 - INFO - --- Inference ---
2022-01-31 12:41:49,743 - INFO - Make inference by fold 0 model
2022-01-31 12:45:37,653 - INFO - Make inference by fold 1 model
2022-01-31 12:49:15,957 - INFO - Make inference by fold 2 model
2022-01-31 12:53:03,120 - INFO - Make inference by fold 3 model
2022-01-31 12:56:50,739 - INFO - Make inference by fold 4 model
2022-01-31 13:00:39,477 - INFO - Test inference written to /content/drive/Shareddrives/ml/g2net/results/CNN1d_GeM_SGDW_Highpass_Tukey_2022-01-30_19-27/CNN1d_GeM_SGDW_Highpass_Tukey_2022-01-30_19-27_submission.csv.

Submission to Kaggle

Private Score: 0.87249 Public score: 0.87414

In [ ]:
#results_path = "/content/drive/Shareddrives/ml/g2net/results/CNN1d_GeM_SGDW_Highpass_Tukey"
#test_preds_df = load_results_make_inference(results_path)