from lale.lib.rasl import SimpleImputer as SIM
from lale.lib.rasl import MinMaxScaler as MMS
from lale.lib.rasl import BatchedBaggingClassifier as BBC
from lale.lib.sklearn import SGDClassifier as SGD
from lale.lib.sklearn import RandomForestClassifier as RFC
def fresh_pipeline(final_est="sgd"): #create new to guarantee fresh state after earlier fit/partial_fit, if any
if final_est == "sgd":
est = SGD(random_state=42)
elif final_est == "rfc":
est = RFC(random_state=42)
elif final_est == "bbc":
est = BBC()
return SIM(strategy="mean") >> MMS() >> est
fresh_pipeline().visualize()
from lale.lib.rasl import is_associative, is_incremental, PrioBatch, PrioStep, PrioResourceAware
from lale.lib.rasl import mockup_data_loader, fit_with_batches, cross_val_score
from sklearn.model_selection import KFold
assert is_associative(SIM)
assert is_associative(MMS)
assert is_incremental(SGD)
assert not is_incremental(RFC)
import lale.datasets.openml
import math
import pandas as pd
from lale.lib.lale import Project
(raw_train_X, train_y), (raw_test_X, test_y) = lale.datasets.openml.fetch("credit-g", "classification", preprocess=False)
project = Project(columns={"type": "number"}).fit(raw_train_X)
train_X, test_X = project.transform(raw_train_X), project.transform(raw_test_X)
unique_class_labels = list(train_y.unique())
print(f"unique_class_labels {unique_class_labels}")
train_data_space = train_X.memory_usage().sum() + train_y.memory_usage()
print(f"train_data_space {train_data_space}")
pd.set_option('display.max_rows', 6)
pd.concat([test_y, test_X], axis=1)
unique_class_labels ['good', 'bad'] train_data_space 53600
class | duration | credit_amount | installment_commitment | residence_since | age | existing_credits | num_dependents | |
---|---|---|---|---|---|---|---|---|
993 | good | 36.0 | 3959.0 | 4.0 | 3.0 | 30.0 | 1.0 | 1.0 |
859 | good | 9.0 | 3577.0 | 1.0 | 2.0 | 26.0 | 1.0 | 2.0 |
298 | good | 18.0 | 2515.0 | 3.0 | 4.0 | 43.0 | 1.0 | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
503 | bad | 24.0 | 1216.0 | 4.0 | 4.0 | 38.0 | 2.0 | 2.0 |
263 | good | 12.0 | 2748.0 | 2.0 | 4.0 | 57.0 | 3.0 | 1.0 |
808 | good | 42.0 | 9283.0 | 1.0 | 2.0 | 55.0 | 1.0 | 1.0 |
330 rows × 8 columns
import lale.lib.rasl
accuracy_scorer = lale.lib.rasl.get_scorer("accuracy")
print(f"majority class {len(test_y[test_y == 'good']) / len(test_y):.2%}")
trained = fresh_pipeline().fit(train_X, train_y)
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")
majority class 70.91% accuracy 70.91%
n_batches = 1
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
max_resident = 1 * math.ceil(train_data_space / n_batches)
trained = fit_with_batches(fresh_pipeline(), batches, None, None, unique_class_labels,
max_resident, PrioStep(), partial_transform=False, verbose=3, progress_callback=None)
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")
trained.visualize()
scan INP(d0)
scan INP(d1) spill /tmp/tmp5r63ky4x/X_-1_d0_None.pkl /tmp/tmp5r63ky4x/y_-1_d0_None.pkl
to_monoid SimpleImputer(d0) load /tmp/tmp5r63ky4x/X_-1_d0_None.pkl /tmp/tmp5r63ky4x/y_-1_d0_None.pkl
combine SimpleImputer(d*)
aborted spill of batch -1_d0_None ensure_space() failed, amount_needed 53600, no_spill_space 53600, min_resident 107200, max_resident 53600
transform SimpleImputer(d0)
to_monoid MinMaxScaler(d0)
combine MinMaxScaler(d*)
aborted spill of batch 0_d0_None ensure_space() failed, amount_needed 53600, no_spill_space 53600, min_resident 107200, max_resident 53600
transform MinMaxScaler(d0)
partial_fit SGDClassifier(d0)
partial_fit SGDClassifier(d*)
{ "spill_count": 1, "load_count": 1, "spill_space": 53600, "load_space": 53600, "min_resident": 107200, "max_resident": 53600, "train_count": 6, "apply_count": 4, "metric_count": 0, "train_time": 0.04813194274902344, "apply_time": 0.0444943904876709, "metric_time": 0, "critical_count": 1, "critical_time": 0.0268857479095459, } accuracy 63.33%
n_batches = 3
max_resident = 3 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
trained = fit_with_batches(fresh_pipeline("rfc"), batches, None, None, unique_class_labels,
max_resident, PrioStep(), partial_transform=False, verbose=2, progress_callback=None)
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")
spill /tmp/tmplzol_tj8/X_-1_d1_None.pkl /tmp/tmplzol_tj8/y_-1_d1_None.pkl spill /tmp/tmplzol_tj8/X_-1_d2_None.pkl /tmp/tmplzol_tj8/y_-1_d2_None.pkl load /tmp/tmplzol_tj8/X_-1_d1_None.pkl /tmp/tmplzol_tj8/y_-1_d1_None.pkl load /tmp/tmplzol_tj8/X_-1_d2_None.pkl /tmp/tmplzol_tj8/y_-1_d2_None.pkl spill /tmp/tmplzol_tj8/X_-1_d2_None.pkl /tmp/tmplzol_tj8/y_-1_d2_None.pkl spill /tmp/tmplzol_tj8/X_-1_d1_None.pkl /tmp/tmplzol_tj8/y_-1_d1_None.pkl load /tmp/tmplzol_tj8/X_-1_d1_None.pkl /tmp/tmplzol_tj8/y_-1_d1_None.pkl spill /tmp/tmplzol_tj8/X_0_d0_None.pkl /tmp/tmplzol_tj8/y_0_d0_None.pkl load /tmp/tmplzol_tj8/X_-1_d2_None.pkl /tmp/tmplzol_tj8/y_-1_d2_None.pkl load /tmp/tmplzol_tj8/X_0_d0_None.pkl /tmp/tmplzol_tj8/y_0_d0_None.pkl spill /tmp/tmplzol_tj8/X_0_d2_None.pkl /tmp/tmplzol_tj8/y_0_d2_None.pkl spill /tmp/tmplzol_tj8/X_0_d1_None.pkl /tmp/tmplzol_tj8/y_0_d1_None.pkl load /tmp/tmplzol_tj8/X_0_d1_None.pkl /tmp/tmplzol_tj8/y_0_d1_None.pkl spill /tmp/tmplzol_tj8/X_1_d0_None.pkl /tmp/tmplzol_tj8/y_1_d0_None.pkl load /tmp/tmplzol_tj8/X_0_d2_None.pkl /tmp/tmplzol_tj8/y_0_d2_None.pkl load /tmp/tmplzol_tj8/X_1_d0_None.pkl /tmp/tmplzol_tj8/y_1_d0_None.pkl
{ "spill_count": 8, "load_count": 8, "spill_space": 142880, "load_space": 142880, "min_resident": 71520, "max_resident": 53601, "train_count": 9, "apply_count": 10, "metric_count": 0, "train_time": 0.2980036735534668, "apply_time": 0.08468341827392578, "metric_time": 0, "critical_count": 1, "critical_time": 0.21265220642089844, } accuracy 70.30%
n_batches = 3
max_resident = 3 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
trained = fit_with_batches(fresh_pipeline(), batches, None, None, unique_class_labels, max_resident,
PrioStep(), partial_transform=False, verbose=2, progress_callback=None)
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")
spill /tmp/tmp2ijsdu2b/X_-1_d1_None.pkl /tmp/tmp2ijsdu2b/y_-1_d1_None.pkl spill /tmp/tmp2ijsdu2b/X_-1_d2_None.pkl /tmp/tmp2ijsdu2b/y_-1_d2_None.pkl load /tmp/tmp2ijsdu2b/X_-1_d1_None.pkl /tmp/tmp2ijsdu2b/y_-1_d1_None.pkl load /tmp/tmp2ijsdu2b/X_-1_d2_None.pkl /tmp/tmp2ijsdu2b/y_-1_d2_None.pkl spill /tmp/tmp2ijsdu2b/X_-1_d2_None.pkl /tmp/tmp2ijsdu2b/y_-1_d2_None.pkl spill /tmp/tmp2ijsdu2b/X_-1_d1_None.pkl /tmp/tmp2ijsdu2b/y_-1_d1_None.pkl load /tmp/tmp2ijsdu2b/X_-1_d1_None.pkl /tmp/tmp2ijsdu2b/y_-1_d1_None.pkl spill /tmp/tmp2ijsdu2b/X_0_d0_None.pkl /tmp/tmp2ijsdu2b/y_0_d0_None.pkl load /tmp/tmp2ijsdu2b/X_-1_d2_None.pkl /tmp/tmp2ijsdu2b/y_-1_d2_None.pkl load /tmp/tmp2ijsdu2b/X_0_d0_None.pkl /tmp/tmp2ijsdu2b/y_0_d0_None.pkl spill /tmp/tmp2ijsdu2b/X_0_d2_None.pkl /tmp/tmp2ijsdu2b/y_0_d2_None.pkl spill /tmp/tmp2ijsdu2b/X_0_d1_None.pkl /tmp/tmp2ijsdu2b/y_0_d1_None.pkl load /tmp/tmp2ijsdu2b/X_0_d1_None.pkl /tmp/tmp2ijsdu2b/y_0_d1_None.pkl spill /tmp/tmp2ijsdu2b/X_1_d0_None.pkl /tmp/tmp2ijsdu2b/y_1_d0_None.pkl load /tmp/tmp2ijsdu2b/X_0_d2_None.pkl /tmp/tmp2ijsdu2b/y_0_d2_None.pkl load /tmp/tmp2ijsdu2b/X_1_d0_None.pkl /tmp/tmp2ijsdu2b/y_1_d0_None.pkl
{ "spill_count": 8, "load_count": 8, "spill_space": 142880, "load_space": 142880, "min_resident": 35840, "max_resident": 53601, "train_count": 12, "apply_count": 10, "metric_count": 0, "train_time": 0.10301017761230469, "apply_time": 0.10129833221435547, "metric_time": 0, "critical_count": 1, "critical_time": 0.0216214656829834, } accuracy 62.42%
n_batches = 3
max_resident = 3 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
trained = fit_with_batches(fresh_pipeline(), batches, None, None, unique_class_labels, max_resident,
PrioBatch(), partial_transform=False, verbose=2, progress_callback=None)
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")
spill /tmp/tmpocs5ad_r/X_-1_d1_None.pkl /tmp/tmpocs5ad_r/y_-1_d1_None.pkl spill /tmp/tmpocs5ad_r/X_-1_d2_None.pkl /tmp/tmpocs5ad_r/y_-1_d2_None.pkl load /tmp/tmpocs5ad_r/X_-1_d1_None.pkl /tmp/tmpocs5ad_r/y_-1_d1_None.pkl spill /tmp/tmpocs5ad_r/X_0_d0_None.pkl /tmp/tmpocs5ad_r/y_0_d0_None.pkl load /tmp/tmpocs5ad_r/X_-1_d2_None.pkl /tmp/tmpocs5ad_r/y_-1_d2_None.pkl load /tmp/tmpocs5ad_r/X_0_d0_None.pkl /tmp/tmpocs5ad_r/y_0_d0_None.pkl spill /tmp/tmpocs5ad_r/X_0_d2_None.pkl /tmp/tmpocs5ad_r/y_0_d2_None.pkl spill /tmp/tmpocs5ad_r/X_0_d1_None.pkl /tmp/tmpocs5ad_r/y_0_d1_None.pkl load /tmp/tmpocs5ad_r/X_0_d1_None.pkl /tmp/tmpocs5ad_r/y_0_d1_None.pkl load /tmp/tmpocs5ad_r/X_0_d2_None.pkl /tmp/tmpocs5ad_r/y_0_d2_None.pkl
{ "spill_count": 5, "load_count": 5, "spill_space": 89280, "load_space": 89280, "min_resident": 35840, "max_resident": 53601, "train_count": 12, "apply_count": 10, "metric_count": 0, "train_time": 0.10233783721923828, "apply_time": 0.07904672622680664, "metric_time": 0, "critical_count": 1, "critical_time": 0.025415658950805664, } accuracy 62.42%
n_batches = 3
max_resident = 3 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
trained = fit_with_batches(fresh_pipeline(), batches, None, None, unique_class_labels, max_resident,
PrioResourceAware(), partial_transform=False, verbose=2, progress_callback=None)
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")
spill /tmp/tmp32eeypzi/X_-1_d1_None.pkl /tmp/tmp32eeypzi/y_-1_d1_None.pkl spill /tmp/tmp32eeypzi/X_-1_d2_None.pkl /tmp/tmp32eeypzi/y_-1_d2_None.pkl load /tmp/tmp32eeypzi/X_-1_d1_None.pkl /tmp/tmp32eeypzi/y_-1_d1_None.pkl spill /tmp/tmp32eeypzi/X_0_d0_None.pkl /tmp/tmp32eeypzi/y_0_d0_None.pkl load /tmp/tmp32eeypzi/X_-1_d2_None.pkl /tmp/tmp32eeypzi/y_-1_d2_None.pkl load /tmp/tmp32eeypzi/X_0_d0_None.pkl /tmp/tmp32eeypzi/y_0_d0_None.pkl spill /tmp/tmp32eeypzi/X_1_d2_None.pkl /tmp/tmp32eeypzi/y_1_d2_None.pkl load /tmp/tmp32eeypzi/X_1_d2_None.pkl /tmp/tmp32eeypzi/y_1_d2_None.pkl
{ "spill_count": 4, "load_count": 4, "spill_space": 71440, "load_space": 71440, "min_resident": 35840, "max_resident": 53601, "train_count": 12, "apply_count": 10, "metric_count": 0, "train_time": 0.08783912658691406, "apply_time": 0.06755399703979492, "metric_time": 0, "critical_count": 1, "critical_time": 0.01776576042175293, } accuracy 62.42%
class DemoProgressCallback: # ideally, should create a dynamically updating plot in Jupyter
def __init__(self):
self.scores = []
def __call__(self, score_train, score_valid, n_batches_scanned, end_of_scanned_batches):
self.scores.append(score_train)
scores_s = ", ".join(f"{s:.2%}" for s in self.scores)
print(f"scanned {n_batches_scanned}, found end {end_of_scanned_batches}, callback state {scores_s}")
assert len(self.scores) == n_batches_scanned + (1 if end_of_scanned_batches else 0)
n_batches = 3
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
first_batch = next(iter(batches))
trainable1 = fresh_pipeline()
trained1 = fit_with_batches(trainable1, [first_batch], None, None, unique_class_labels, None,
PrioBatch(), partial_transform=False, verbose=2, progress_callback=None)
{ "spill_count": 0, "load_count": 0, "spill_space": 0, "load_space": 0, "min_resident": 35840, "max_resident": 9223372036854775807, "train_count": 6, "apply_count": 4, "metric_count": 0, "train_time": 0.029951095581054688, "apply_time": 0.018345117568969727, "metric_time": 0, "critical_count": 1, "critical_time": 0.019626379013061523, }
prefix2 = trained1.remove_last().freeze_trained()
suffix2 = trained1.get_last()
trainable2 = prefix2 >> suffix2
trained = fit_with_batches(trainable2, batches, None, accuracy_scorer, unique_class_labels, None,
PrioBatch(), partial_transform="score", verbose=2, progress_callback=DemoProgressCallback())
scanned 1, found end False, callback state 69.96% scanned 2, found end False, callback state 69.96%, 69.51%
{ "spill_count": 0, "load_count": 0, "spill_space": 0, "load_space": 0, "min_resident": 35680, "max_resident": 9223372036854775807, "train_count": 3, "apply_count": 9, "metric_count": 3, "train_time": 0.0074999332427978516, "apply_time": 0.05312967300415039, "metric_time": 0.012913227081298828, "critical_count": 1, "critical_time": 0.009376764297485352, }
n_batches = 3
max_resident = 3 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
trained = fit_with_batches(fresh_pipeline(), batches, None, None, unique_class_labels,
max_resident, PrioBatch(), partial_transform=True, verbose=3, progress_callback=None)
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")
scan INP(d0)
to_monoid SimpleImputer(d0)
transform SimpleImputer(d0)
to_monoid MinMaxScaler(d0)
transform MinMaxScaler(d0)
partial_fit SGDClassifier(d0)
scan INP(d1)
to_monoid SimpleImputer(d1)
combine SimpleImputer(d0,d1)
transform SimpleImputer(d1)
to_monoid MinMaxScaler(d1)
combine MinMaxScaler(d0,d1)
transform MinMaxScaler(d1)
partial_fit SGDClassifier(d0,d1)
scan INP(d2)
to_monoid SimpleImputer(d2)
combine SimpleImputer(d0,d1,d2)
transform SimpleImputer(d2)
to_monoid MinMaxScaler(d2)
combine MinMaxScaler(d0,d1,d2)
transform MinMaxScaler(d2)
partial_fit SGDClassifier(d0,d1,d2)
scan INP(d3)
combine SimpleImputer(d*)
combine MinMaxScaler(d*)
partial_fit SGDClassifier(d*)
{ "spill_count": 0, "load_count": 0, "spill_space": 0, "load_space": 0, "min_resident": 35840, "max_resident": 53601, "train_count": 16, "apply_count": 10, "metric_count": 0, "train_time": 0.11674237251281738, "apply_time": 0.092437744140625, "metric_time": 0, "critical_count": 1, "critical_time": 0.022518157958984375, } accuracy 70.30%
n_batches = 3
max_resident = 3 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
trained = fit_with_batches(fresh_pipeline(), batches, None, accuracy_scorer, unique_class_labels,
max_resident, PrioBatch(), partial_transform=True, verbose=2, progress_callback=DemoProgressCallback())
print(f"accuracy {accuracy_scorer(trained, test_X, test_y):.2%}")
spill /tmp/tmpv3zynub2/X_-1_d0_None.pkl /tmp/tmpv3zynub2/y_-1_d0_None.pkl load /tmp/tmpv3zynub2/X_-1_d0_None.pkl /tmp/tmpv3zynub2/y_-1_d0_None.pkl scanned 1, found end False, callback state 68.75% scanned 2, found end False, callback state 68.75%, 69.96% scanned 3, found end False, callback state 68.75%, 69.96%, 71.30%
{ "spill_count": 1, "load_count": 1, "spill_space": 17920, "load_space": 17920, "min_resident": 53760, "max_resident": 53601, "train_count": 16, "apply_count": 13, "metric_count": 4, "train_time": 0.09845256805419922, "apply_time": 0.10098576545715332, "metric_time": 0.025612354278564453, "critical_count": 1, "critical_time": 0.02614283561706543, } accuracy 70.30%
\f
, where f is the left-out foldn_batches = 1
max_resident = 2 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
scores = cross_val_score(fresh_pipeline("bbc"), batches, accuracy_scorer, KFold(3),
unique_class_labels, max_resident, PrioResourceAware(), same_fold=True, verbose=2)
print("accuracies " + ", ".join(f"{score:.2%}" for score in scores))
spill /tmp/tmpaduz2lqh/X_-1_d0_None.pkl /tmp/tmpaduz2lqh/y_-1_d0_None.pkl spill /tmp/tmpaduz2lqh/X_0_e0_f.pkl /tmp/tmpaduz2lqh/y_0_e0_f.pkl spill /tmp/tmpaduz2lqh/X_-1_e0_None.pkl /tmp/tmpaduz2lqh/y_-1_e0_None.pkl load /tmp/tmpaduz2lqh/X_-1_e0_None.pkl /tmp/tmpaduz2lqh/y_-1_e0_None.pkl spill /tmp/tmpaduz2lqh/X_-1_f0_None.pkl /tmp/tmpaduz2lqh/y_-1_f0_None.pkl spill /tmp/tmpaduz2lqh/X_0_f0_e.pkl /tmp/tmpaduz2lqh/y_0_f0_e.pkl load /tmp/tmpaduz2lqh/X_-1_d0_None.pkl /tmp/tmpaduz2lqh/y_-1_d0_None.pkl spill /tmp/tmpaduz2lqh/X_0_f0_f.pkl /tmp/tmpaduz2lqh/y_0_f0_f.pkl spill /tmp/tmpaduz2lqh/X_-1_e0_None.pkl /tmp/tmpaduz2lqh/y_-1_e0_None.pkl spill /tmp/tmpaduz2lqh/X_-1_d0_None.pkl /tmp/tmpaduz2lqh/y_-1_d0_None.pkl load /tmp/tmpaduz2lqh/X_0_e0_f.pkl /tmp/tmpaduz2lqh/y_0_e0_f.pkl load /tmp/tmpaduz2lqh/X_0_f0_e.pkl /tmp/tmpaduz2lqh/y_0_f0_e.pkl load /tmp/tmpaduz2lqh/X_-1_e0_None.pkl /tmp/tmpaduz2lqh/y_-1_e0_None.pkl load /tmp/tmpaduz2lqh/X_0_f0_f.pkl /tmp/tmpaduz2lqh/y_0_f0_f.pkl load /tmp/tmpaduz2lqh/X_-1_f0_None.pkl /tmp/tmpaduz2lqh/y_-1_f0_None.pkl load /tmp/tmpaduz2lqh/X_-1_d0_None.pkl /tmp/tmpaduz2lqh/y_-1_d0_None.pkl
{ "spill_count": 8, "load_count": 8, "spill_space": 142880, "load_space": 142880, "min_resident": 107200, "max_resident": 107200, "train_count": 24, "apply_count": 26, "metric_count": 6, "train_time": 0.16528654098510742, "apply_time": 0.5534472465515137, "metric_time": 0.02211618423461914, "critical_count": 1, "critical_time": 0.1302642822265625, } accuracies 54.91%, 58.30%, 53.81%
n_batches = 2
max_resident = 2 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
scores = cross_val_score(fresh_pipeline("bbc"), batches, accuracy_scorer, KFold(3),
unique_class_labels, max_resident, PrioResourceAware(), same_fold=True, verbose=2)
print("accuracies " + ", ".join(f"{score:.2%}" for score in scores))
spill /tmp/tmpxmtt9rld/X_-1_*1_None.pkl /tmp/tmpxmtt9rld/y_-1_*1_None.pkl spill /tmp/tmpxmtt9rld/X_-1_d0_None.pkl /tmp/tmpxmtt9rld/y_-1_d0_None.pkl spill /tmp/tmpxmtt9rld/X_-1_e0_None.pkl /tmp/tmpxmtt9rld/y_-1_e0_None.pkl load /tmp/tmpxmtt9rld/X_-1_*1_None.pkl /tmp/tmpxmtt9rld/y_-1_*1_None.pkl spill /tmp/tmpxmtt9rld/X_-1_f0_None.pkl /tmp/tmpxmtt9rld/y_-1_f0_None.pkl spill /tmp/tmpxmtt9rld/X_-1_d1_None.pkl /tmp/tmpxmtt9rld/y_-1_d1_None.pkl spill /tmp/tmpxmtt9rld/X_0_e1_f.pkl /tmp/tmpxmtt9rld/y_0_e1_f.pkl spill /tmp/tmpxmtt9rld/X_-1_e1_None.pkl /tmp/tmpxmtt9rld/y_-1_e1_None.pkl load /tmp/tmpxmtt9rld/X_-1_f0_None.pkl /tmp/tmpxmtt9rld/y_-1_f0_None.pkl spill /tmp/tmpxmtt9rld/X_-1_f1_None.pkl /tmp/tmpxmtt9rld/y_-1_f1_None.pkl spill /tmp/tmpxmtt9rld/X_0_f1_f.pkl /tmp/tmpxmtt9rld/y_0_f1_f.pkl spill /tmp/tmpxmtt9rld/X_0_f1_e.pkl /tmp/tmpxmtt9rld/y_0_f1_e.pkl load /tmp/tmpxmtt9rld/X_-1_d0_None.pkl /tmp/tmpxmtt9rld/y_-1_d0_None.pkl spill /tmp/tmpxmtt9rld/X_0_f1_d.pkl /tmp/tmpxmtt9rld/y_0_f1_d.pkl spill /tmp/tmpxmtt9rld/X_-1_f0_None.pkl /tmp/tmpxmtt9rld/y_-1_f0_None.pkl spill /tmp/tmpxmtt9rld/X_0_f0_d.pkl /tmp/tmpxmtt9rld/y_0_f0_d.pkl spill /tmp/tmpxmtt9rld/X_0_f0_e.pkl /tmp/tmpxmtt9rld/y_0_f0_e.pkl spill /tmp/tmpxmtt9rld/X_0_f0_f.pkl /tmp/tmpxmtt9rld/y_0_f0_f.pkl load /tmp/tmpxmtt9rld/X_-1_d1_None.pkl /tmp/tmpxmtt9rld/y_-1_d1_None.pkl spill /tmp/tmpxmtt9rld/X_-1_d0_None.pkl /tmp/tmpxmtt9rld/y_-1_d0_None.pkl spill /tmp/tmpxmtt9rld/X_0_d0_d.pkl /tmp/tmpxmtt9rld/y_0_d0_d.pkl spill /tmp/tmpxmtt9rld/X_0_d1_d.pkl /tmp/tmpxmtt9rld/y_0_d1_d.pkl spill /tmp/tmpxmtt9rld/X_-1_d1_None.pkl /tmp/tmpxmtt9rld/y_-1_d1_None.pkl load /tmp/tmpxmtt9rld/X_0_f0_e.pkl /tmp/tmpxmtt9rld/y_0_f0_e.pkl load /tmp/tmpxmtt9rld/X_0_f1_e.pkl /tmp/tmpxmtt9rld/y_0_f1_e.pkl load /tmp/tmpxmtt9rld/X_-1_e0_None.pkl /tmp/tmpxmtt9rld/y_-1_e0_None.pkl spill /tmp/tmpxmtt9rld/X_0_d1_f.pkl /tmp/tmpxmtt9rld/y_0_d1_f.pkl spill /tmp/tmpxmtt9rld/X_-1_e0_None.pkl /tmp/tmpxmtt9rld/y_-1_e0_None.pkl load /tmp/tmpxmtt9rld/X_0_f0_f.pkl /tmp/tmpxmtt9rld/y_0_f0_f.pkl load /tmp/tmpxmtt9rld/X_0_f1_f.pkl /tmp/tmpxmtt9rld/y_0_f1_f.pkl load /tmp/tmpxmtt9rld/X_0_d1_f.pkl /tmp/tmpxmtt9rld/y_0_d1_f.pkl load /tmp/tmpxmtt9rld/X_-1_e0_None.pkl /tmp/tmpxmtt9rld/y_-1_e0_None.pkl load /tmp/tmpxmtt9rld/X_-1_e1_None.pkl /tmp/tmpxmtt9rld/y_-1_e1_None.pkl spill /tmp/tmpxmtt9rld/X_1_f1_f.pkl /tmp/tmpxmtt9rld/y_1_f1_f.pkl load /tmp/tmpxmtt9rld/X_0_f0_d.pkl /tmp/tmpxmtt9rld/y_0_f0_d.pkl load /tmp/tmpxmtt9rld/X_0_f1_d.pkl /tmp/tmpxmtt9rld/y_0_f1_d.pkl load /tmp/tmpxmtt9rld/X_0_d0_d.pkl /tmp/tmpxmtt9rld/y_0_d0_d.pkl load /tmp/tmpxmtt9rld/X_-1_d0_None.pkl /tmp/tmpxmtt9rld/y_-1_d0_None.pkl load /tmp/tmpxmtt9rld/X_0_d1_d.pkl /tmp/tmpxmtt9rld/y_0_d1_d.pkl load /tmp/tmpxmtt9rld/X_-1_d1_None.pkl /tmp/tmpxmtt9rld/y_-1_d1_None.pkl load /tmp/tmpxmtt9rld/X_0_e1_f.pkl /tmp/tmpxmtt9rld/y_0_e1_f.pkl load /tmp/tmpxmtt9rld/X_-1_f0_None.pkl /tmp/tmpxmtt9rld/y_-1_f0_None.pkl load /tmp/tmpxmtt9rld/X_1_f1_f.pkl /tmp/tmpxmtt9rld/y_1_f1_f.pkl load /tmp/tmpxmtt9rld/X_-1_f1_None.pkl /tmp/tmpxmtt9rld/y_-1_f1_None.pkl
{ "spill_count": 22, "load_count": 22, "spill_space": 214160, "load_space": 214160, "min_resident": 53600, "max_resident": 53600, "train_count": 39, "apply_count": 51, "metric_count": 9, "train_time": 0.24051737785339355, "apply_time": 0.9827837944030762, "metric_time": 0.03426671028137207, "critical_count": 1, "critical_time": 0.21920561790466309, } accuracies 66.07%, 62.05%, 63.06%
n_batches = 1
max_resident = 2 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
scores = cross_val_score(fresh_pipeline("bbc"), batches, accuracy_scorer, KFold(3),
unique_class_labels, max_resident, PrioResourceAware(), same_fold=False, verbose=2)
print("accuracies " + ", ".join(f"{score:.2%}" for score in scores))
spill /tmp/tmpwewe5gvy/X_-1_d0_None.pkl /tmp/tmpwewe5gvy/y_-1_d0_None.pkl spill /tmp/tmpwewe5gvy/X_-1_e0_None.pkl /tmp/tmpwewe5gvy/y_-1_e0_None.pkl load /tmp/tmpwewe5gvy/X_-1_e0_None.pkl /tmp/tmpwewe5gvy/y_-1_e0_None.pkl load /tmp/tmpwewe5gvy/X_-1_d0_None.pkl /tmp/tmpwewe5gvy/y_-1_d0_None.pkl spill /tmp/tmpwewe5gvy/X_-1_f0_None.pkl /tmp/tmpwewe5gvy/y_-1_f0_None.pkl spill /tmp/tmpwewe5gvy/X_-1_e0_None.pkl /tmp/tmpwewe5gvy/y_-1_e0_None.pkl load /tmp/tmpwewe5gvy/X_-1_e0_None.pkl /tmp/tmpwewe5gvy/y_-1_e0_None.pkl load /tmp/tmpwewe5gvy/X_-1_f0_None.pkl /tmp/tmpwewe5gvy/y_-1_f0_None.pkl
{ "spill_count": 4, "load_count": 4, "spill_space": 71440, "load_space": 71440, "min_resident": 107200, "max_resident": 107200, "train_count": 18, "apply_count": 14, "metric_count": 6, "train_time": 0.09293031692504883, "apply_time": 0.49991703033447266, "metric_time": 0.02008819580078125, "critical_count": 1, "critical_time": 0.1726703643798828, } accuracies 60.71%, 56.50%, 58.74%
n_batches = 2
max_resident = 2 * math.ceil(train_data_space / n_batches)
batches = mockup_data_loader(train_X, train_y, n_batches, "pandas")
scores = cross_val_score(fresh_pipeline("bbc"), batches, accuracy_scorer, KFold(3),
unique_class_labels, max_resident, PrioResourceAware(), same_fold=False, verbose=2)
print("accuracies " + ", ".join(f"{score:.2%}" for score in scores))
spill /tmp/tmpgwdyqcux/X_-1_*1_None.pkl /tmp/tmpgwdyqcux/y_-1_*1_None.pkl spill /tmp/tmpgwdyqcux/X_-1_d0_None.pkl /tmp/tmpgwdyqcux/y_-1_d0_None.pkl spill /tmp/tmpgwdyqcux/X_-1_e0_None.pkl /tmp/tmpgwdyqcux/y_-1_e0_None.pkl load /tmp/tmpgwdyqcux/X_-1_*1_None.pkl /tmp/tmpgwdyqcux/y_-1_*1_None.pkl spill /tmp/tmpgwdyqcux/X_-1_f0_None.pkl /tmp/tmpgwdyqcux/y_-1_f0_None.pkl spill /tmp/tmpgwdyqcux/X_-1_d1_None.pkl /tmp/tmpgwdyqcux/y_-1_d1_None.pkl spill /tmp/tmpgwdyqcux/X_-1_e1_None.pkl /tmp/tmpgwdyqcux/y_-1_e1_None.pkl load /tmp/tmpgwdyqcux/X_-1_f0_None.pkl /tmp/tmpgwdyqcux/y_-1_f0_None.pkl load /tmp/tmpgwdyqcux/X_-1_d0_None.pkl /tmp/tmpgwdyqcux/y_-1_d0_None.pkl spill /tmp/tmpgwdyqcux/X_-1_f1_None.pkl /tmp/tmpgwdyqcux/y_-1_f1_None.pkl load /tmp/tmpgwdyqcux/X_-1_d1_None.pkl /tmp/tmpgwdyqcux/y_-1_d1_None.pkl spill /tmp/tmpgwdyqcux/X_0_f1_f.pkl /tmp/tmpgwdyqcux/y_0_f1_f.pkl spill /tmp/tmpgwdyqcux/X_-1_f0_None.pkl /tmp/tmpgwdyqcux/y_-1_f0_None.pkl spill /tmp/tmpgwdyqcux/X_0_f0_f.pkl /tmp/tmpgwdyqcux/y_0_f0_f.pkl load /tmp/tmpgwdyqcux/X_-1_e0_None.pkl /tmp/tmpgwdyqcux/y_-1_e0_None.pkl spill /tmp/tmpgwdyqcux/X_-1_d1_None.pkl /tmp/tmpgwdyqcux/y_-1_d1_None.pkl spill /tmp/tmpgwdyqcux/X_-1_e0_None.pkl /tmp/tmpgwdyqcux/y_-1_e0_None.pkl load /tmp/tmpgwdyqcux/X_-1_e1_None.pkl /tmp/tmpgwdyqcux/y_-1_e1_None.pkl spill /tmp/tmpgwdyqcux/X_1_e0_e.pkl /tmp/tmpgwdyqcux/y_1_e0_e.pkl spill /tmp/tmpgwdyqcux/X_-1_e1_None.pkl /tmp/tmpgwdyqcux/y_-1_e1_None.pkl load /tmp/tmpgwdyqcux/X_0_f0_f.pkl /tmp/tmpgwdyqcux/y_0_f0_f.pkl spill /tmp/tmpgwdyqcux/X_1_e1_e.pkl /tmp/tmpgwdyqcux/y_1_e1_e.pkl load /tmp/tmpgwdyqcux/X_-1_f0_None.pkl /tmp/tmpgwdyqcux/y_-1_f0_None.pkl load /tmp/tmpgwdyqcux/X_0_f1_f.pkl /tmp/tmpgwdyqcux/y_0_f1_f.pkl load /tmp/tmpgwdyqcux/X_-1_f1_None.pkl /tmp/tmpgwdyqcux/y_-1_f1_None.pkl load /tmp/tmpgwdyqcux/X_-1_d1_None.pkl /tmp/tmpgwdyqcux/y_-1_d1_None.pkl load /tmp/tmpgwdyqcux/X_1_e0_e.pkl /tmp/tmpgwdyqcux/y_1_e0_e.pkl load /tmp/tmpgwdyqcux/X_-1_e0_None.pkl /tmp/tmpgwdyqcux/y_-1_e0_None.pkl load /tmp/tmpgwdyqcux/X_1_e1_e.pkl /tmp/tmpgwdyqcux/y_1_e1_e.pkl load /tmp/tmpgwdyqcux/X_-1_e1_None.pkl /tmp/tmpgwdyqcux/y_-1_e1_None.pkl
{ "spill_count": 15, "load_count": 15, "spill_space": 151840, "load_space": 151840, "min_resident": 53600, "max_resident": 53600, "train_count": 27, "apply_count": 27, "metric_count": 9, "train_time": 0.21664977073669434, "apply_time": 0.6339118480682373, "metric_time": 0.03318333625793457, "critical_count": 1, "critical_time": 0.10162663459777832, } accuracies 62.50%, 59.82%, 68.47%