This notebook tests experimental features that will likely change.¶

In [1]:

from lale.lib.rasl import SimpleImputer as SIM
from lale.lib.rasl import MinMaxScaler as MMS
from lale.lib.rasl import BatchedBaggingClassifier as BBC
from lale.lib.sklearn import SGDClassifier as SGD
from lale.lib.sklearn import RandomForestClassifier as RFC

def fresh_pipeline(final_est="sgd"):  #create new to guarantee fresh state after earlier fit/partial_fit, if any
    if final_est == "sgd":
        est = SGD(random_state=42)
    elif final_est == "rfc":
        est = RFC(random_state=42)
    elif final_est == "bbc":
        est = BBC()
    return SIM(strategy="mean") >> MMS() >> est

fresh_pipeline().visualize()

In [2]:

from lale.lib.rasl import is_associative, is_incremental, PrioBatch, PrioStep, PrioResourceAware
from lale.lib.rasl import mockup_data_loader, fit_with_batches, cross_val_score
from sklearn.model_selection import KFold

assert is_associative(SIM)
assert is_associative(MMS)
assert is_incremental(SGD)
assert not is_incremental(RFC)

In [3]:

import lale.datasets.openml
import math
import pandas as pd
from lale.lib.lale import Project

(raw_train_X, train_y), (raw_test_X, test_y) = lale.datasets.openml.fetch("credit-g", "classification", preprocess=False)
project = Project(columns={"type": "number"}).fit(raw_train_X)
train_X, test_X = project.transform(raw_train_X), project.transform(raw_test_X)
unique_class_labels = list(train_y.unique())
print(f"unique_class_labels {unique_class_labels}")
train_data_space = train_X.memory_usage().sum() + train_y.memory_usage()
print(f"train_data_space {train_data_space}")
pd.set_option('display.max_rows', 6)
pd.concat([test_y, test_X], axis=1)

unique_class_labels ['good', 'bad']
train_data_space 53600

Out[3]:

	class	duration	credit_amount	installment_commitment	residence_since	age	existing_credits	num_dependents
993	good	36.0	3959.0	4.0	3.0	30.0	1.0	1.0
859	good	9.0	3577.0	1.0	2.0	26.0	1.0	2.0
298	good	18.0	2515.0	3.0	4.0	43.0	1.0	1.0
...	...	...	...	...	...	...	...	...
503	bad	24.0	1216.0	4.0	4.0	38.0	2.0	2.0
263	good	12.0	2748.0	2.0	4.0	57.0	3.0	1.0
808	good	42.0	9283.0	1.0	2.0	55.0	1.0	1.0

330 rows × 8 columns

Example: no RASL¶

In [4]:

import lale.lib.rasl

accuracy_scorer = lale.lib.rasl.get_scorer("accuracy")

print(f"majority class {len(test_y[test_y == 'good']) / len(test_y):.2%}")
trained = fresh_pipeline().fit(train_X, train_y)
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")

majority class 70.91%
accuracy 70.91%

Example: no batching, no cross-validation¶

starting point
behavior equivalent to sklearn
purposely setting max_resident too low to demonstrate cache warnings

In [5]:

n_batches = 1
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
max_resident = 1 * math.ceil(train_data_space / n_batches)
trained = fit_with_batches(fresh_pipeline(), batches, None, None, unique_class_labels,
                           max_resident, PrioStep(), partial_transform=False, verbose=3, progress_callback=None)
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")
trained.visualize()

scan INP(d0)

scan INP(d1)
spill /tmp/tmp5r63ky4x/X_-1_d0_None.pkl /tmp/tmp5r63ky4x/y_-1_d0_None.pkl

to_monoid SimpleImputer(d0)
load /tmp/tmp5r63ky4x/X_-1_d0_None.pkl /tmp/tmp5r63ky4x/y_-1_d0_None.pkl

combine SimpleImputer(d*)

aborted spill of batch -1_d0_None
ensure_space() failed, amount_needed 53600, no_spill_space 53600, min_resident 107200, max_resident 53600

transform SimpleImputer(d0)

to_monoid MinMaxScaler(d0)

combine MinMaxScaler(d*)

aborted spill of batch 0_d0_None
ensure_space() failed, amount_needed 53600, no_spill_space 53600, min_resident 107200, max_resident 53600

transform MinMaxScaler(d0)

partial_fit SGDClassifier(d0)

partial_fit SGDClassifier(d*)

{
    "spill_count": 1,
    "load_count": 1,
    "spill_space": 53600,
    "load_space": 53600,
    "min_resident": 107200,
    "max_resident": 53600,
    "train_count": 6,
    "apply_count": 4,
    "metric_count": 0,
    "train_time": 0.04813194274902344,
    "apply_time": 0.0444943904876709,
    "metric_time": 0,
    "critical_count": 1,
    "critical_time": 0.0268857479095459,
}
accuracy 63.33%

Example: batching using associativity where possible, non-incremental final estimator¶

In [6]:

n_batches = 3
max_resident = 3 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
trained = fit_with_batches(fresh_pipeline("rfc"), batches, None, None, unique_class_labels,
                           max_resident, PrioStep(), partial_transform=False, verbose=2, progress_callback=None)
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")

spill /tmp/tmplzol_tj8/X_-1_d1_None.pkl /tmp/tmplzol_tj8/y_-1_d1_None.pkl
spill /tmp/tmplzol_tj8/X_-1_d2_None.pkl /tmp/tmplzol_tj8/y_-1_d2_None.pkl
load /tmp/tmplzol_tj8/X_-1_d1_None.pkl /tmp/tmplzol_tj8/y_-1_d1_None.pkl
load /tmp/tmplzol_tj8/X_-1_d2_None.pkl /tmp/tmplzol_tj8/y_-1_d2_None.pkl
spill /tmp/tmplzol_tj8/X_-1_d2_None.pkl /tmp/tmplzol_tj8/y_-1_d2_None.pkl
spill /tmp/tmplzol_tj8/X_-1_d1_None.pkl /tmp/tmplzol_tj8/y_-1_d1_None.pkl
load /tmp/tmplzol_tj8/X_-1_d1_None.pkl /tmp/tmplzol_tj8/y_-1_d1_None.pkl
spill /tmp/tmplzol_tj8/X_0_d0_None.pkl /tmp/tmplzol_tj8/y_0_d0_None.pkl
load /tmp/tmplzol_tj8/X_-1_d2_None.pkl /tmp/tmplzol_tj8/y_-1_d2_None.pkl
load /tmp/tmplzol_tj8/X_0_d0_None.pkl /tmp/tmplzol_tj8/y_0_d0_None.pkl
spill /tmp/tmplzol_tj8/X_0_d2_None.pkl /tmp/tmplzol_tj8/y_0_d2_None.pkl
spill /tmp/tmplzol_tj8/X_0_d1_None.pkl /tmp/tmplzol_tj8/y_0_d1_None.pkl
load /tmp/tmplzol_tj8/X_0_d1_None.pkl /tmp/tmplzol_tj8/y_0_d1_None.pkl
spill /tmp/tmplzol_tj8/X_1_d0_None.pkl /tmp/tmplzol_tj8/y_1_d0_None.pkl
load /tmp/tmplzol_tj8/X_0_d2_None.pkl /tmp/tmplzol_tj8/y_0_d2_None.pkl
load /tmp/tmplzol_tj8/X_1_d0_None.pkl /tmp/tmplzol_tj8/y_1_d0_None.pkl

{
    "spill_count": 8,
    "load_count": 8,
    "spill_space": 142880,
    "load_space": 142880,
    "min_resident": 71520,
    "max_resident": 53601,
    "train_count": 9,
    "apply_count": 10,
    "metric_count": 0,
    "train_time": 0.2980036735534668,
    "apply_time": 0.08468341827392578,
    "metric_time": 0,
    "critical_count": 1,
    "critical_time": 0.21265220642089844,
}
accuracy 70.30%

Example: batching using associativity where possible, partial_fit otherwise¶

unlike our batching patent, which always used partial_fit
behavior still equivalent to sklearn
potential advantages: less resident data; parallelism
below are three runs with three different scheduling priorities

In [7]:

n_batches = 3
max_resident = 3 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
trained = fit_with_batches(fresh_pipeline(), batches, None, None, unique_class_labels, max_resident,
                           PrioStep(), partial_transform=False, verbose=2, progress_callback=None)
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")

spill /tmp/tmp2ijsdu2b/X_-1_d1_None.pkl /tmp/tmp2ijsdu2b/y_-1_d1_None.pkl
spill /tmp/tmp2ijsdu2b/X_-1_d2_None.pkl /tmp/tmp2ijsdu2b/y_-1_d2_None.pkl
load /tmp/tmp2ijsdu2b/X_-1_d1_None.pkl /tmp/tmp2ijsdu2b/y_-1_d1_None.pkl
load /tmp/tmp2ijsdu2b/X_-1_d2_None.pkl /tmp/tmp2ijsdu2b/y_-1_d2_None.pkl
spill /tmp/tmp2ijsdu2b/X_-1_d2_None.pkl /tmp/tmp2ijsdu2b/y_-1_d2_None.pkl
spill /tmp/tmp2ijsdu2b/X_-1_d1_None.pkl /tmp/tmp2ijsdu2b/y_-1_d1_None.pkl
load /tmp/tmp2ijsdu2b/X_-1_d1_None.pkl /tmp/tmp2ijsdu2b/y_-1_d1_None.pkl
spill /tmp/tmp2ijsdu2b/X_0_d0_None.pkl /tmp/tmp2ijsdu2b/y_0_d0_None.pkl
load /tmp/tmp2ijsdu2b/X_-1_d2_None.pkl /tmp/tmp2ijsdu2b/y_-1_d2_None.pkl
load /tmp/tmp2ijsdu2b/X_0_d0_None.pkl /tmp/tmp2ijsdu2b/y_0_d0_None.pkl
spill /tmp/tmp2ijsdu2b/X_0_d2_None.pkl /tmp/tmp2ijsdu2b/y_0_d2_None.pkl
spill /tmp/tmp2ijsdu2b/X_0_d1_None.pkl /tmp/tmp2ijsdu2b/y_0_d1_None.pkl
load /tmp/tmp2ijsdu2b/X_0_d1_None.pkl /tmp/tmp2ijsdu2b/y_0_d1_None.pkl
spill /tmp/tmp2ijsdu2b/X_1_d0_None.pkl /tmp/tmp2ijsdu2b/y_1_d0_None.pkl
load /tmp/tmp2ijsdu2b/X_0_d2_None.pkl /tmp/tmp2ijsdu2b/y_0_d2_None.pkl
load /tmp/tmp2ijsdu2b/X_1_d0_None.pkl /tmp/tmp2ijsdu2b/y_1_d0_None.pkl

{
    "spill_count": 8,
    "load_count": 8,
    "spill_space": 142880,
    "load_space": 142880,
    "min_resident": 35840,
    "max_resident": 53601,
    "train_count": 12,
    "apply_count": 10,
    "metric_count": 0,
    "train_time": 0.10301017761230469,
    "apply_time": 0.10129833221435547,
    "metric_time": 0,
    "critical_count": 1,
    "critical_time": 0.0216214656829834,
}
accuracy 62.42%

In [8]:

n_batches = 3
max_resident = 3 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
trained = fit_with_batches(fresh_pipeline(), batches, None, None, unique_class_labels, max_resident,
                           PrioBatch(), partial_transform=False, verbose=2, progress_callback=None)
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")

spill /tmp/tmpocs5ad_r/X_-1_d1_None.pkl /tmp/tmpocs5ad_r/y_-1_d1_None.pkl
spill /tmp/tmpocs5ad_r/X_-1_d2_None.pkl /tmp/tmpocs5ad_r/y_-1_d2_None.pkl
load /tmp/tmpocs5ad_r/X_-1_d1_None.pkl /tmp/tmpocs5ad_r/y_-1_d1_None.pkl
spill /tmp/tmpocs5ad_r/X_0_d0_None.pkl /tmp/tmpocs5ad_r/y_0_d0_None.pkl
load /tmp/tmpocs5ad_r/X_-1_d2_None.pkl /tmp/tmpocs5ad_r/y_-1_d2_None.pkl
load /tmp/tmpocs5ad_r/X_0_d0_None.pkl /tmp/tmpocs5ad_r/y_0_d0_None.pkl
spill /tmp/tmpocs5ad_r/X_0_d2_None.pkl /tmp/tmpocs5ad_r/y_0_d2_None.pkl
spill /tmp/tmpocs5ad_r/X_0_d1_None.pkl /tmp/tmpocs5ad_r/y_0_d1_None.pkl
load /tmp/tmpocs5ad_r/X_0_d1_None.pkl /tmp/tmpocs5ad_r/y_0_d1_None.pkl
load /tmp/tmpocs5ad_r/X_0_d2_None.pkl /tmp/tmpocs5ad_r/y_0_d2_None.pkl

{
    "spill_count": 5,
    "load_count": 5,
    "spill_space": 89280,
    "load_space": 89280,
    "min_resident": 35840,
    "max_resident": 53601,
    "train_count": 12,
    "apply_count": 10,
    "metric_count": 0,
    "train_time": 0.10233783721923828,
    "apply_time": 0.07904672622680664,
    "metric_time": 0,
    "critical_count": 1,
    "critical_time": 0.025415658950805664,
}
accuracy 62.42%

In [9]:

n_batches = 3
max_resident = 3 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
trained = fit_with_batches(fresh_pipeline(), batches, None, None, unique_class_labels, max_resident,
                           PrioResourceAware(), partial_transform=False, verbose=2, progress_callback=None)
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")

spill /tmp/tmp32eeypzi/X_-1_d1_None.pkl /tmp/tmp32eeypzi/y_-1_d1_None.pkl
spill /tmp/tmp32eeypzi/X_-1_d2_None.pkl /tmp/tmp32eeypzi/y_-1_d2_None.pkl
load /tmp/tmp32eeypzi/X_-1_d1_None.pkl /tmp/tmp32eeypzi/y_-1_d1_None.pkl
spill /tmp/tmp32eeypzi/X_0_d0_None.pkl /tmp/tmp32eeypzi/y_0_d0_None.pkl
load /tmp/tmp32eeypzi/X_-1_d2_None.pkl /tmp/tmp32eeypzi/y_-1_d2_None.pkl
load /tmp/tmp32eeypzi/X_0_d0_None.pkl /tmp/tmp32eeypzi/y_0_d0_None.pkl
spill /tmp/tmp32eeypzi/X_1_d2_None.pkl /tmp/tmp32eeypzi/y_1_d2_None.pkl
load /tmp/tmp32eeypzi/X_1_d2_None.pkl /tmp/tmp32eeypzi/y_1_d2_None.pkl

{
    "spill_count": 4,
    "load_count": 4,
    "spill_space": 71440,
    "load_space": 71440,
    "min_resident": 35840,
    "max_resident": 53601,
    "train_count": 12,
    "apply_count": 10,
    "metric_count": 0,
    "train_time": 0.08783912658691406,
    "apply_time": 0.06755399703979492,
    "metric_time": 0,
    "critical_count": 1,
    "critical_time": 0.01776576042175293,
}
accuracy 62.42%

Example: frozen trained prefix¶

pre-train on the first batch, then freeze, and train again on remaining batches
in the second training, SGD partial_fit i happens before scan i+1
to test and demonstrate that, setting partial_transform="score"

In [10]:

class DemoProgressCallback:  # ideally, should create a dynamically updating plot in Jupyter
    def __init__(self):
        self.scores = []
    
    def __call__(self, score_train, score_valid, n_batches_scanned, end_of_scanned_batches):
        self.scores.append(score_train)
        scores_s = ", ".join(f"{s:.2%}" for s in self.scores)
        print(f"scanned {n_batches_scanned}, found end {end_of_scanned_batches}, callback state {scores_s}")
        assert len(self.scores) == n_batches_scanned + (1 if end_of_scanned_batches else 0)

n_batches = 3
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
first_batch = next(iter(batches))
trainable1 = fresh_pipeline()
trained1 = fit_with_batches(trainable1, [first_batch], None, None, unique_class_labels, None,
                            PrioBatch(), partial_transform=False, verbose=2, progress_callback=None)

{
    "spill_count": 0,
    "load_count": 0,
    "spill_space": 0,
    "load_space": 0,
    "min_resident": 35840,
    "max_resident": 9223372036854775807,
    "train_count": 6,
    "apply_count": 4,
    "metric_count": 0,
    "train_time": 0.029951095581054688,
    "apply_time": 0.018345117568969727,
    "metric_time": 0,
    "critical_count": 1,
    "critical_time": 0.019626379013061523,
}

In [11]:

prefix2 = trained1.remove_last().freeze_trained()
suffix2 = trained1.get_last()
trainable2 = prefix2 >> suffix2
trained = fit_with_batches(trainable2, batches, None, accuracy_scorer, unique_class_labels, None,
                           PrioBatch(), partial_transform="score", verbose=2, progress_callback=DemoProgressCallback())

scanned 1, found end False, callback state 69.96%
scanned 2, found end False, callback state 69.96%, 69.51%

{
    "spill_count": 0,
    "load_count": 0,
    "spill_space": 0,
    "load_space": 0,
    "min_resident": 35680,
    "max_resident": 9223372036854775807,
    "train_count": 3,
    "apply_count": 9,
    "metric_count": 3,
    "train_time": 0.0074999332427978516,
    "apply_time": 0.05312967300415039,
    "metric_time": 0.012913227081298828,
    "critical_count": 1,
    "critical_time": 0.009376764297485352,
}

Example: partial_transform, start using transformer before it has seen all data¶

behavior not equivalent to sklearn, instead more similar to DL
but may be "close enough" if operators are "convergent"
advantage: need much less resident data
amenable to early stopping, extracting a checkpoint

In [12]:

n_batches = 3
max_resident = 3 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
trained = fit_with_batches(fresh_pipeline(), batches, None, None, unique_class_labels,
                           max_resident, PrioBatch(), partial_transform=True, verbose=3, progress_callback=None)
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")

scan INP(d0)

to_monoid SimpleImputer(d0)

transform SimpleImputer(d0)

to_monoid MinMaxScaler(d0)

transform MinMaxScaler(d0)

partial_fit SGDClassifier(d0)

scan INP(d1)

to_monoid SimpleImputer(d1)

combine SimpleImputer(d0,d1)

transform SimpleImputer(d1)

to_monoid MinMaxScaler(d1)

combine MinMaxScaler(d0,d1)

transform MinMaxScaler(d1)

partial_fit SGDClassifier(d0,d1)

scan INP(d2)

to_monoid SimpleImputer(d2)

combine SimpleImputer(d0,d1,d2)

transform SimpleImputer(d2)

to_monoid MinMaxScaler(d2)

combine MinMaxScaler(d0,d1,d2)

transform MinMaxScaler(d2)

partial_fit SGDClassifier(d0,d1,d2)

scan INP(d3)

combine SimpleImputer(d*)

combine MinMaxScaler(d*)

partial_fit SGDClassifier(d*)

{
    "spill_count": 0,
    "load_count": 0,
    "spill_space": 0,
    "load_space": 0,
    "min_resident": 35840,
    "max_resident": 53601,
    "train_count": 16,
    "apply_count": 10,
    "metric_count": 0,
    "train_time": 0.11674237251281738,
    "apply_time": 0.092437744140625,
    "metric_time": 0,
    "critical_count": 1,
    "critical_time": 0.022518157958984375,
}
accuracy 70.30%

Example: learning curve during fit_with_batches with partial_transform¶

In [13]:

n_batches = 3
max_resident = 3 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
trained = fit_with_batches(fresh_pipeline(), batches, None, accuracy_scorer, unique_class_labels,
                           max_resident, PrioBatch(), partial_transform=True, verbose=2, progress_callback=DemoProgressCallback())
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")

spill /tmp/tmpv3zynub2/X_-1_d0_None.pkl /tmp/tmpv3zynub2/y_-1_d0_None.pkl
load /tmp/tmpv3zynub2/X_-1_d0_None.pkl /tmp/tmpv3zynub2/y_-1_d0_None.pkl
scanned 1, found end False, callback state 68.75%
scanned 2, found end False, callback state 68.75%, 69.96%
scanned 3, found end False, callback state 68.75%, 69.96%, 71.30%

{
    "spill_count": 1,
    "load_count": 1,
    "spill_space": 17920,
    "load_space": 17920,
    "min_resident": 53760,
    "max_resident": 53601,
    "train_count": 16,
    "apply_count": 13,
    "metric_count": 4,
    "train_time": 0.09845256805419922,
    "apply_time": 0.10098576545715332,
    "metric_time": 0.025612354278564453,
    "critical_count": 1,
    "critical_time": 0.02614283561706543,
}
accuracy 70.30%

Example: cross-validation without batching, using same-fold data for training¶

behavior equivalent to sklearn
reuse of lifted coefficients limited to first operator
disambiguate some tasks by \f, where f is the left-out fold
using associativity where possible, partial_fit otherwise

In [14]:

n_batches = 1
max_resident = 2 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
scores = cross_val_score(fresh_pipeline("bbc"), batches, accuracy_scorer, KFold(3),
                         unique_class_labels, max_resident, PrioResourceAware(), same_fold=True, verbose=2)
print("accuracies " + ", ".join(f"{score:.2%}" for score in scores))

spill /tmp/tmpaduz2lqh/X_-1_d0_None.pkl /tmp/tmpaduz2lqh/y_-1_d0_None.pkl
spill /tmp/tmpaduz2lqh/X_0_e0_f.pkl /tmp/tmpaduz2lqh/y_0_e0_f.pkl
spill /tmp/tmpaduz2lqh/X_-1_e0_None.pkl /tmp/tmpaduz2lqh/y_-1_e0_None.pkl
load /tmp/tmpaduz2lqh/X_-1_e0_None.pkl /tmp/tmpaduz2lqh/y_-1_e0_None.pkl
spill /tmp/tmpaduz2lqh/X_-1_f0_None.pkl /tmp/tmpaduz2lqh/y_-1_f0_None.pkl
spill /tmp/tmpaduz2lqh/X_0_f0_e.pkl /tmp/tmpaduz2lqh/y_0_f0_e.pkl
load /tmp/tmpaduz2lqh/X_-1_d0_None.pkl /tmp/tmpaduz2lqh/y_-1_d0_None.pkl
spill /tmp/tmpaduz2lqh/X_0_f0_f.pkl /tmp/tmpaduz2lqh/y_0_f0_f.pkl
spill /tmp/tmpaduz2lqh/X_-1_e0_None.pkl /tmp/tmpaduz2lqh/y_-1_e0_None.pkl
spill /tmp/tmpaduz2lqh/X_-1_d0_None.pkl /tmp/tmpaduz2lqh/y_-1_d0_None.pkl
load /tmp/tmpaduz2lqh/X_0_e0_f.pkl /tmp/tmpaduz2lqh/y_0_e0_f.pkl
load /tmp/tmpaduz2lqh/X_0_f0_e.pkl /tmp/tmpaduz2lqh/y_0_f0_e.pkl
load /tmp/tmpaduz2lqh/X_-1_e0_None.pkl /tmp/tmpaduz2lqh/y_-1_e0_None.pkl
load /tmp/tmpaduz2lqh/X_0_f0_f.pkl /tmp/tmpaduz2lqh/y_0_f0_f.pkl
load /tmp/tmpaduz2lqh/X_-1_f0_None.pkl /tmp/tmpaduz2lqh/y_-1_f0_None.pkl
load /tmp/tmpaduz2lqh/X_-1_d0_None.pkl /tmp/tmpaduz2lqh/y_-1_d0_None.pkl

{
    "spill_count": 8,
    "load_count": 8,
    "spill_space": 142880,
    "load_space": 142880,
    "min_resident": 107200,
    "max_resident": 107200,
    "train_count": 24,
    "apply_count": 26,
    "metric_count": 6,
    "train_time": 0.16528654098510742,
    "apply_time": 0.5534472465515137,
    "metric_time": 0.02211618423461914,
    "critical_count": 1,
    "critical_time": 0.1302642822265625,
}
accuracies 54.91%, 58.30%, 53.81%

Example: cross-validation with batching, using same-fold data for training¶

compared to cross-validation without batching, requires less resident memory

In [15]:

n_batches = 2
max_resident = 2 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
scores = cross_val_score(fresh_pipeline("bbc"), batches, accuracy_scorer, KFold(3),
                         unique_class_labels, max_resident, PrioResourceAware(), same_fold=True, verbose=2)
print("accuracies " + ", ".join(f"{score:.2%}" for score in scores))

spill /tmp/tmpxmtt9rld/X_-1_*1_None.pkl /tmp/tmpxmtt9rld/y_-1_*1_None.pkl
spill /tmp/tmpxmtt9rld/X_-1_d0_None.pkl /tmp/tmpxmtt9rld/y_-1_d0_None.pkl
spill /tmp/tmpxmtt9rld/X_-1_e0_None.pkl /tmp/tmpxmtt9rld/y_-1_e0_None.pkl
load /tmp/tmpxmtt9rld/X_-1_*1_None.pkl /tmp/tmpxmtt9rld/y_-1_*1_None.pkl
spill /tmp/tmpxmtt9rld/X_-1_f0_None.pkl /tmp/tmpxmtt9rld/y_-1_f0_None.pkl
spill /tmp/tmpxmtt9rld/X_-1_d1_None.pkl /tmp/tmpxmtt9rld/y_-1_d1_None.pkl
spill /tmp/tmpxmtt9rld/X_0_e1_f.pkl /tmp/tmpxmtt9rld/y_0_e1_f.pkl
spill /tmp/tmpxmtt9rld/X_-1_e1_None.pkl /tmp/tmpxmtt9rld/y_-1_e1_None.pkl
load /tmp/tmpxmtt9rld/X_-1_f0_None.pkl /tmp/tmpxmtt9rld/y_-1_f0_None.pkl
spill /tmp/tmpxmtt9rld/X_-1_f1_None.pkl /tmp/tmpxmtt9rld/y_-1_f1_None.pkl
spill /tmp/tmpxmtt9rld/X_0_f1_f.pkl /tmp/tmpxmtt9rld/y_0_f1_f.pkl
spill /tmp/tmpxmtt9rld/X_0_f1_e.pkl /tmp/tmpxmtt9rld/y_0_f1_e.pkl
load /tmp/tmpxmtt9rld/X_-1_d0_None.pkl /tmp/tmpxmtt9rld/y_-1_d0_None.pkl
spill /tmp/tmpxmtt9rld/X_0_f1_d.pkl /tmp/tmpxmtt9rld/y_0_f1_d.pkl
spill /tmp/tmpxmtt9rld/X_-1_f0_None.pkl /tmp/tmpxmtt9rld/y_-1_f0_None.pkl
spill /tmp/tmpxmtt9rld/X_0_f0_d.pkl /tmp/tmpxmtt9rld/y_0_f0_d.pkl
spill /tmp/tmpxmtt9rld/X_0_f0_e.pkl /tmp/tmpxmtt9rld/y_0_f0_e.pkl
spill /tmp/tmpxmtt9rld/X_0_f0_f.pkl /tmp/tmpxmtt9rld/y_0_f0_f.pkl
load /tmp/tmpxmtt9rld/X_-1_d1_None.pkl /tmp/tmpxmtt9rld/y_-1_d1_None.pkl
spill /tmp/tmpxmtt9rld/X_-1_d0_None.pkl /tmp/tmpxmtt9rld/y_-1_d0_None.pkl
spill /tmp/tmpxmtt9rld/X_0_d0_d.pkl /tmp/tmpxmtt9rld/y_0_d0_d.pkl
spill /tmp/tmpxmtt9rld/X_0_d1_d.pkl /tmp/tmpxmtt9rld/y_0_d1_d.pkl
spill /tmp/tmpxmtt9rld/X_-1_d1_None.pkl /tmp/tmpxmtt9rld/y_-1_d1_None.pkl
load /tmp/tmpxmtt9rld/X_0_f0_e.pkl /tmp/tmpxmtt9rld/y_0_f0_e.pkl
load /tmp/tmpxmtt9rld/X_0_f1_e.pkl /tmp/tmpxmtt9rld/y_0_f1_e.pkl
load /tmp/tmpxmtt9rld/X_-1_e0_None.pkl /tmp/tmpxmtt9rld/y_-1_e0_None.pkl
spill /tmp/tmpxmtt9rld/X_0_d1_f.pkl /tmp/tmpxmtt9rld/y_0_d1_f.pkl
spill /tmp/tmpxmtt9rld/X_-1_e0_None.pkl /tmp/tmpxmtt9rld/y_-1_e0_None.pkl
load /tmp/tmpxmtt9rld/X_0_f0_f.pkl /tmp/tmpxmtt9rld/y_0_f0_f.pkl
load /tmp/tmpxmtt9rld/X_0_f1_f.pkl /tmp/tmpxmtt9rld/y_0_f1_f.pkl
load /tmp/tmpxmtt9rld/X_0_d1_f.pkl /tmp/tmpxmtt9rld/y_0_d1_f.pkl
load /tmp/tmpxmtt9rld/X_-1_e0_None.pkl /tmp/tmpxmtt9rld/y_-1_e0_None.pkl
load /tmp/tmpxmtt9rld/X_-1_e1_None.pkl /tmp/tmpxmtt9rld/y_-1_e1_None.pkl
spill /tmp/tmpxmtt9rld/X_1_f1_f.pkl /tmp/tmpxmtt9rld/y_1_f1_f.pkl
load /tmp/tmpxmtt9rld/X_0_f0_d.pkl /tmp/tmpxmtt9rld/y_0_f0_d.pkl
load /tmp/tmpxmtt9rld/X_0_f1_d.pkl /tmp/tmpxmtt9rld/y_0_f1_d.pkl
load /tmp/tmpxmtt9rld/X_0_d0_d.pkl /tmp/tmpxmtt9rld/y_0_d0_d.pkl
load /tmp/tmpxmtt9rld/X_-1_d0_None.pkl /tmp/tmpxmtt9rld/y_-1_d0_None.pkl
load /tmp/tmpxmtt9rld/X_0_d1_d.pkl /tmp/tmpxmtt9rld/y_0_d1_d.pkl
load /tmp/tmpxmtt9rld/X_-1_d1_None.pkl /tmp/tmpxmtt9rld/y_-1_d1_None.pkl
load /tmp/tmpxmtt9rld/X_0_e1_f.pkl /tmp/tmpxmtt9rld/y_0_e1_f.pkl
load /tmp/tmpxmtt9rld/X_-1_f0_None.pkl /tmp/tmpxmtt9rld/y_-1_f0_None.pkl
load /tmp/tmpxmtt9rld/X_1_f1_f.pkl /tmp/tmpxmtt9rld/y_1_f1_f.pkl
load /tmp/tmpxmtt9rld/X_-1_f1_None.pkl /tmp/tmpxmtt9rld/y_-1_f1_None.pkl

{
    "spill_count": 22,
    "load_count": 22,
    "spill_space": 214160,
    "load_space": 214160,
    "min_resident": 53600,
    "max_resident": 53600,
    "train_count": 39,
    "apply_count": 51,
    "metric_count": 9,
    "train_time": 0.24051737785339355,
    "apply_time": 0.9827837944030762,
    "metric_time": 0.03426671028137207,
    "critical_count": 1,
    "critical_time": 0.21920561790466309,
}
accuracies 66.07%, 62.05%, 63.06%

Example: cross-validation with out-of-fold samples¶

not sklearn compatible
might generalize better by following best practices for stacking ensembles
depends on transformers being convergent
fewer tasks overall than for cross-validation with in-fold samples

In [16]:

n_batches = 1
max_resident = 2 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
scores = cross_val_score(fresh_pipeline("bbc"), batches, accuracy_scorer, KFold(3),
                         unique_class_labels, max_resident, PrioResourceAware(), same_fold=False, verbose=2)
print("accuracies " + ", ".join(f"{score:.2%}" for score in scores))

spill /tmp/tmpwewe5gvy/X_-1_d0_None.pkl /tmp/tmpwewe5gvy/y_-1_d0_None.pkl
spill /tmp/tmpwewe5gvy/X_-1_e0_None.pkl /tmp/tmpwewe5gvy/y_-1_e0_None.pkl
load /tmp/tmpwewe5gvy/X_-1_e0_None.pkl /tmp/tmpwewe5gvy/y_-1_e0_None.pkl
load /tmp/tmpwewe5gvy/X_-1_d0_None.pkl /tmp/tmpwewe5gvy/y_-1_d0_None.pkl
spill /tmp/tmpwewe5gvy/X_-1_f0_None.pkl /tmp/tmpwewe5gvy/y_-1_f0_None.pkl
spill /tmp/tmpwewe5gvy/X_-1_e0_None.pkl /tmp/tmpwewe5gvy/y_-1_e0_None.pkl
load /tmp/tmpwewe5gvy/X_-1_e0_None.pkl /tmp/tmpwewe5gvy/y_-1_e0_None.pkl
load /tmp/tmpwewe5gvy/X_-1_f0_None.pkl /tmp/tmpwewe5gvy/y_-1_f0_None.pkl

{
    "spill_count": 4,
    "load_count": 4,
    "spill_space": 71440,
    "load_space": 71440,
    "min_resident": 107200,
    "max_resident": 107200,
    "train_count": 18,
    "apply_count": 14,
    "metric_count": 6,
    "train_time": 0.09293031692504883,
    "apply_time": 0.49991703033447266,
    "metric_time": 0.02008819580078125,
    "critical_count": 1,
    "critical_time": 0.1726703643798828,
}
accuracies 60.71%, 56.50%, 58.74%

In [17]:

n_batches = 2
max_resident = 2 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
scores = cross_val_score(fresh_pipeline("bbc"), batches, accuracy_scorer, KFold(3),
                         unique_class_labels, max_resident, PrioResourceAware(), same_fold=False, verbose=2)
print("accuracies " + ", ".join(f"{score:.2%}" for score in scores))

spill /tmp/tmpgwdyqcux/X_-1_*1_None.pkl /tmp/tmpgwdyqcux/y_-1_*1_None.pkl
spill /tmp/tmpgwdyqcux/X_-1_d0_None.pkl /tmp/tmpgwdyqcux/y_-1_d0_None.pkl
spill /tmp/tmpgwdyqcux/X_-1_e0_None.pkl /tmp/tmpgwdyqcux/y_-1_e0_None.pkl
load /tmp/tmpgwdyqcux/X_-1_*1_None.pkl /tmp/tmpgwdyqcux/y_-1_*1_None.pkl
spill /tmp/tmpgwdyqcux/X_-1_f0_None.pkl /tmp/tmpgwdyqcux/y_-1_f0_None.pkl
spill /tmp/tmpgwdyqcux/X_-1_d1_None.pkl /tmp/tmpgwdyqcux/y_-1_d1_None.pkl
spill /tmp/tmpgwdyqcux/X_-1_e1_None.pkl /tmp/tmpgwdyqcux/y_-1_e1_None.pkl
load /tmp/tmpgwdyqcux/X_-1_f0_None.pkl /tmp/tmpgwdyqcux/y_-1_f0_None.pkl
load /tmp/tmpgwdyqcux/X_-1_d0_None.pkl /tmp/tmpgwdyqcux/y_-1_d0_None.pkl
spill /tmp/tmpgwdyqcux/X_-1_f1_None.pkl /tmp/tmpgwdyqcux/y_-1_f1_None.pkl
load /tmp/tmpgwdyqcux/X_-1_d1_None.pkl /tmp/tmpgwdyqcux/y_-1_d1_None.pkl
spill /tmp/tmpgwdyqcux/X_0_f1_f.pkl /tmp/tmpgwdyqcux/y_0_f1_f.pkl
spill /tmp/tmpgwdyqcux/X_-1_f0_None.pkl /tmp/tmpgwdyqcux/y_-1_f0_None.pkl
spill /tmp/tmpgwdyqcux/X_0_f0_f.pkl /tmp/tmpgwdyqcux/y_0_f0_f.pkl
load /tmp/tmpgwdyqcux/X_-1_e0_None.pkl /tmp/tmpgwdyqcux/y_-1_e0_None.pkl
spill /tmp/tmpgwdyqcux/X_-1_d1_None.pkl /tmp/tmpgwdyqcux/y_-1_d1_None.pkl
spill /tmp/tmpgwdyqcux/X_-1_e0_None.pkl /tmp/tmpgwdyqcux/y_-1_e0_None.pkl
load /tmp/tmpgwdyqcux/X_-1_e1_None.pkl /tmp/tmpgwdyqcux/y_-1_e1_None.pkl
spill /tmp/tmpgwdyqcux/X_1_e0_e.pkl /tmp/tmpgwdyqcux/y_1_e0_e.pkl
spill /tmp/tmpgwdyqcux/X_-1_e1_None.pkl /tmp/tmpgwdyqcux/y_-1_e1_None.pkl
load /tmp/tmpgwdyqcux/X_0_f0_f.pkl /tmp/tmpgwdyqcux/y_0_f0_f.pkl
spill /tmp/tmpgwdyqcux/X_1_e1_e.pkl /tmp/tmpgwdyqcux/y_1_e1_e.pkl
load /tmp/tmpgwdyqcux/X_-1_f0_None.pkl /tmp/tmpgwdyqcux/y_-1_f0_None.pkl
load /tmp/tmpgwdyqcux/X_0_f1_f.pkl /tmp/tmpgwdyqcux/y_0_f1_f.pkl
load /tmp/tmpgwdyqcux/X_-1_f1_None.pkl /tmp/tmpgwdyqcux/y_-1_f1_None.pkl
load /tmp/tmpgwdyqcux/X_-1_d1_None.pkl /tmp/tmpgwdyqcux/y_-1_d1_None.pkl
load /tmp/tmpgwdyqcux/X_1_e0_e.pkl /tmp/tmpgwdyqcux/y_1_e0_e.pkl
load /tmp/tmpgwdyqcux/X_-1_e0_None.pkl /tmp/tmpgwdyqcux/y_-1_e0_None.pkl
load /tmp/tmpgwdyqcux/X_1_e1_e.pkl /tmp/tmpgwdyqcux/y_1_e1_e.pkl
load /tmp/tmpgwdyqcux/X_-1_e1_None.pkl /tmp/tmpgwdyqcux/y_-1_e1_None.pkl

{
    "spill_count": 15,
    "load_count": 15,
    "spill_space": 151840,
    "load_space": 151840,
    "min_resident": 53600,
    "max_resident": 53600,
    "train_count": 27,
    "apply_count": 27,
    "metric_count": 9,
    "train_time": 0.21664977073669434,
    "apply_time": 0.6339118480682373,
    "metric_time": 0.03318333625793457,
    "critical_count": 1,
    "critical_time": 0.10162663459777832,
}
accuracies 62.50%, 59.82%, 68.47%