# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
from IPython.display import display
from IPython.display import HTML
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np
import pandas as pd
import tensorflow as tf
import warnings
pd.options.display.max_columns = 110
pd.options.display.max_rows = 400
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from keras.regularizers import l1
from keras.regularizers import l2
from keras.backend import sigmoid
from tensorflow.keras import activations
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import initializers
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.constraints import max_norm
from tensorflow.keras.layers import LayerNormalization
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from tensorflow.keras.callbacks import EarlyStopping
warnings.filterwarnings('ignore')
2022-03-08 12:58:11.081785: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
Open dataset and get general statistical data. For the calculation mutual information values for сlassifier, I used my own function, so as not to clutter up the place with unnecessary code, I load the summary results of mutual values and general statistics values from a cvs file.
%%time
train = pd.read_csv("data/train.csv")
test = pd.read_csv("data/test.csv")
submission = pd.read_csv("data/sample_submission.csv")
train.set_index("id", inplace=True)
# Check nan values
print("The train has {} features with nan values.".format(list(train.isnull().sum().values > 0).count(True)))
print("The test has {} features with nan values.".format(list(test.isnull().sum().values > 0).count(True)))
print("The sample_submission has with {} features nan values.".format(list(submission.isnull().sum().values > 0).count(True)))
train_mutual_clf = pd.read_csv("train_mutual_clf.csv")
train_mutual_clf
The train has 0 features with nan values. The test has 0 features with nan values. The sample_submission has with 0 features nan values. CPU times: user 13.3 s, sys: 1.76 s, total: 15.1 s Wall time: 15.1 s
Unnamed: 0 | fs | cat | mi_val | raw_skew | raw_min | raw_max | raw_delta | raw_unique | raw_var | raw_mean | raw_std | pt_skew | pt_min | pt_max | pt_delta | pt_var | pt_mean | pt_std | qt_skew | qt_min | qt_max | qt_delta | qt_var | qt_mean | qt_std | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | target | num | 1.000000 | -0.024042 | 0.000000 | 1.000000 | 1.000000 | 2 | 0.249964 | 0.506010 | 0.499964 | -0.024042 | -1.012093 | 0.988051 | 2.000144 | 1.0 | -2.540901e-16 | 1.0 | -0.024042 | -5.199338 | 5.199338 | 10.398675 | 27.029206 | 0.062496 | 5.198962 |
1 | 1 | f34 | num | 0.009754 | 0.023004 | -1.517720 | 6.582410 | 8.100130 | 410986 | 2.454143 | 2.463409 | 1.566571 | -0.124700 | -2.935852 | 2.438184 | 5.374036 | 1.0 | 3.257128e-16 | 1.0 | -0.007648 | -5.199338 | 5.199338 | 10.398675 | 0.999698 | -0.000759 | 0.999849 |
2 | 2 | f71 | num | 0.007264 | -0.037328 | -1.236730 | 6.516420 | 7.753150 | 401973 | 2.377366 | 2.632673 | 1.541871 | -0.134107 | -2.743451 | 2.399741 | 5.143192 | 1.0 | -1.046866e-16 | 1.0 | 0.001841 | -5.199338 | 5.199338 | 10.398675 | 1.004341 | -0.001740 | 1.002168 |
3 | 3 | f27 | num | 0.006615 | 2.722050 | -2.281050 | 3.752360 | 6.033410 | 468021 | 0.013026 | 0.057945 | 0.114129 | -0.371299 | -38.491148 | 17.259738 | 55.750886 | 1.0 | -1.515825e-18 | 1.0 | 0.010351 | -5.199338 | 5.199338 | 10.398675 | 0.996604 | -0.003690 | 0.998300 |
4 | 4 | f55 | num | 0.005759 | 0.025584 | -1.131580 | 6.351410 | 7.482990 | 397458 | 2.205884 | 2.571747 | 1.485222 | -0.119325 | -2.871718 | 2.364158 | 5.235876 | 1.0 | 9.915387e-16 | 1.0 | -0.004325 | -5.199338 | 5.199338 | 10.398675 | 0.993593 | 0.006420 | 0.996792 |
5 | 5 | f91 | num | 0.005177 | 0.047625 | -1.217700 | 6.573890 | 7.791590 | 409188 | 2.379330 | 2.444471 | 1.542508 | -0.119028 | -2.746521 | 2.458010 | 5.204531 | 1.0 | 2.041626e-16 | 1.0 | 0.009557 | -5.199338 | 5.199338 | 10.398675 | 0.999749 | 0.001106 | 0.999874 |
6 | 6 | f80 | num | 0.004990 | 0.009174 | -1.142840 | 6.388890 | 7.531730 | 409160 | 2.486275 | 2.550175 | 1.576793 | -0.136746 | -2.690576 | 2.256180 | 4.946755 | 1.0 | 1.229713e-16 | 1.0 | -0.000661 | -5.199338 | 5.199338 | 10.398675 | 0.994803 | -0.003480 | 0.997398 |
7 | 7 | f43 | num | 0.004927 | 0.074306 | -1.246410 | 6.507140 | 7.753550 | 413581 | 2.481095 | 2.500058 | 1.575149 | -0.122944 | -2.894138 | 2.295985 | 5.190123 | 1.0 | 2.084259e-16 | 1.0 | 0.004526 | -5.199338 | 5.199338 | 10.398675 | 1.001579 | -0.000571 | 1.000789 |
8 | 8 | f41 | num | 0.004660 | -0.027999 | -1.637800 | 6.414540 | 8.052340 | 402568 | 2.362558 | 2.582621 | 1.537062 | -0.126976 | -3.037688 | 2.378746 | 5.416434 | 1.0 | 5.082512e-16 | 1.0 | 0.007786 | -5.199338 | 5.199338 | 10.398675 | 1.003106 | 0.003532 | 1.001552 |
9 | 9 | f8 | num | 0.004198 | 0.046116 | -1.242020 | 6.389670 | 7.631690 | 403082 | 2.350047 | 2.538273 | 1.532986 | -0.120171 | -2.915410 | 2.308410 | 5.223820 | 1.0 | 5.445600e-16 | 1.0 | 0.005429 | -5.199338 | 5.199338 | 10.398675 | 0.997801 | -0.001002 | 0.998900 |
10 | 10 | f25 | num | 0.003575 | 0.007003 | -1.380430 | 6.192880 | 7.573310 | 410697 | 2.590722 | 2.580093 | 1.609572 | -0.140076 | -2.882624 | 2.088666 | 4.971291 | 1.0 | -5.760133e-16 | 1.0 | 0.002569 | -5.199338 | 5.199338 | 10.398675 | 0.993821 | 0.000411 | 0.996906 |
11 | 11 | f22 | num | 0.003416 | 0.020816 | -1.201070 | 6.306320 | 7.507390 | 405890 | 2.422674 | 2.559781 | 1.556494 | -0.126862 | -2.781458 | 2.239136 | 5.020594 | 1.0 | -6.491696e-16 | 1.0 | 0.002889 | -5.199338 | 5.199338 | 10.398675 | 0.996943 | 0.000461 | 0.998471 |
12 | 12 | f57 | num | 0.002946 | -0.112606 | -1.237060 | 6.396210 | 7.633270 | 399562 | 2.347374 | 2.604276 | 1.532114 | -0.155209 | -2.605263 | 2.420700 | 5.025962 | 1.0 | 3.920301e-16 | 1.0 | -0.006921 | -5.199338 | 5.199338 | 10.398675 | 0.993305 | 0.001547 | 0.996647 |
13 | 13 | f97 | num | 0.002809 | -0.117514 | -1.254730 | 6.145300 | 7.400030 | 399620 | 2.325899 | 2.590159 | 1.525090 | -0.158270 | -2.618379 | 2.283974 | 4.902353 | 1.0 | -1.477929e-17 | 1.0 | -0.009331 | -5.199338 | 5.199338 | 10.398675 | 0.996711 | 0.000508 | 0.998354 |
14 | 14 | f50 | num | 0.002721 | 0.003991 | -1.489400 | 6.223770 | 7.713170 | 411925 | 2.595396 | 2.558957 | 1.611023 | -0.142338 | -2.958789 | 2.114638 | 5.073426 | 1.0 | -5.078989e-16 | 1.0 | 0.009665 | -5.199338 | 5.199338 | 10.398675 | 1.003859 | -0.000714 | 1.001928 |
15 | 15 | f66 | num | 0.002584 | 0.018878 | -1.260920 | 6.307160 | 7.568080 | 405047 | 2.445530 | 2.577005 | 1.563819 | -0.125251 | -2.817235 | 2.226988 | 5.044223 | 1.0 | 1.743257e-16 | 1.0 | 0.002014 | -5.199338 | 5.199338 | 10.398675 | 1.002849 | -0.002932 | 1.001424 |
16 | 16 | f70 | num | 0.002295 | -0.025677 | -1.377760 | 6.521520 | 7.899280 | 410788 | 2.481350 | 2.494925 | 1.575230 | -0.141441 | -2.742190 | 2.404981 | 5.147171 | 1.0 | 2.076206e-16 | 1.0 | -0.005235 | -5.199338 | 5.199338 | 10.398675 | 0.993688 | -0.003624 | 0.996839 |
17 | 17 | f96 | num | 0.002203 | -0.025727 | -1.584740 | 6.254360 | 7.839100 | 407424 | 2.428691 | 2.547853 | 1.558426 | -0.142647 | -3.021004 | 2.242036 | 5.263040 | 1.0 | -5.818871e-16 | 1.0 | 0.005020 | -5.199338 | 5.199338 | 10.398675 | 1.002693 | -0.000319 | 1.001346 |
18 | 18 | f17 | num | 0.002076 | 0.046835 | -1.387720 | 6.377910 | 7.765630 | 415890 | 2.488364 | 2.483075 | 1.577455 | -0.134205 | -2.992054 | 2.241355 | 5.233409 | 1.0 | 2.069100e-16 | 1.0 | 0.015862 | -5.199338 | 5.199338 | 10.398675 | 0.991952 | -0.000129 | 0.995968 |
19 | 19 | f62 | num | 0.001964 | 8.176561 | -2.553080 | 8.583710 | 11.136790 | 494172 | 0.171429 | 0.124303 | 0.414039 | -6.035195 | -54.434135 | 6.443759 | 60.877893 | 1.0 | 1.610564e-18 | 1.0 | -0.000673 | -5.199338 | 5.199338 | 10.398675 | 0.991444 | -0.000692 | 0.995713 |
20 | 20 | f40 | num | 0.001947 | -0.071256 | -1.098840 | 6.306290 | 7.405130 | 404915 | 2.458388 | 2.646091 | 1.567925 | -0.157912 | -2.594561 | 2.230150 | 4.824710 | 1.0 | 1.435296e-16 | 1.0 | 0.001570 | -5.199338 | 5.199338 | 10.398675 | 0.994073 | -0.004350 | 0.997032 |
21 | 21 | f19 | num | 0.001909 | 12.705403 | -5.334110 | 11.687500 | 17.021610 | 472472 | 0.156272 | 0.108097 | 0.395313 | -10.017948 | -96.574945 | 11.386607 | 107.961552 | 1.0 | -3.344288e-17 | 1.0 | 0.008950 | -5.199338 | 5.199338 | 10.398675 | 1.002423 | 0.004164 | 1.001211 |
22 | 22 | f16 | num | 0.001901 | 9.102838 | -18.169600 | 41.469200 | 59.638800 | 503836 | 1.812660 | 0.362479 | 1.346351 | -8.713081 | -69.878336 | 13.114703 | 82.993039 | 1.0 | 1.669302e-16 | 1.0 | 0.001513 | -5.199338 | 5.199338 | 10.398675 | 0.998343 | 0.000053 | 0.999171 |
23 | 23 | f21 | num | 0.001830 | 1.928121 | -0.151519 | 0.526576 | 0.678095 | 463788 | 0.003003 | 0.062157 | 0.054796 | -0.077659 | -8.620859 | 3.730654 | 12.351512 | 1.0 | -1.300293e-17 | 1.0 | 0.003877 | -5.199338 | 5.199338 | 10.398675 | 0.999656 | 0.002837 | 0.999828 |
24 | 24 | f82 | num | 0.001825 | -0.036529 | -1.135490 | 6.803970 | 7.939460 | 403288 | 2.380152 | 2.598029 | 1.542774 | -0.140285 | -2.660951 | 2.570525 | 5.231476 | 1.0 | -1.345294e-16 | 1.0 | -0.007673 | -5.199338 | 5.199338 | 10.398675 | 0.988709 | 0.001634 | 0.994338 |
25 | 25 | f44 | num | 0.001759 | 9.036670 | -42.443200 | 131.565000 | 174.008200 | 462319 | 29.314140 | 3.227368 | 5.414253 | -1.745393 | -42.573895 | 13.291213 | 55.865108 | 1.0 | 6.745419e-17 | 1.0 | -0.005582 | -5.199338 | 5.199338 | 10.398675 | 0.991658 | 0.000946 | 0.995820 |
26 | 26 | f10 | num | 0.001711 | 0.059222 | -1.309730 | 6.508760 | 7.818490 | 411710 | 2.574058 | 2.579987 | 1.604387 | -0.131162 | -3.033922 | 2.201569 | 5.235491 | 1.0 | 1.280872e-16 | 1.0 | -0.002992 | -5.199338 | 5.199338 | 10.398675 | 0.995310 | -0.000088 | 0.997652 |
27 | 27 | f81 | num | 0.001647 | 2.505458 | -8.519100 | 17.195300 | 25.714400 | 499979 | 1.783854 | 0.902983 | 1.335610 | -2.196082 | -65.812350 | 4.373625 | 70.185975 | 1.0 | 2.827013e-16 | 1.0 | 0.000636 | -5.199338 | 5.199338 | 10.398675 | 0.998035 | -0.000144 | 0.999017 |
28 | 28 | f24 | num | 0.001345 | 4.611215 | -2.645250 | 6.101230 | 8.746480 | 500666 | 0.117222 | 0.141055 | 0.342376 | -6.808852 | -92.350992 | 4.966076 | 97.317068 | 1.0 | -5.049590e-17 | 1.0 | 0.012083 | -5.199338 | 5.199338 | 10.398675 | 0.997257 | 0.000369 | 0.998628 |
29 | 29 | f54 | num | 0.001342 | 0.056442 | -1.322530 | 6.234570 | 7.557100 | 408755 | 2.351773 | 2.455216 | 1.533549 | -0.121740 | -2.954965 | 2.253913 | 5.208877 | 1.0 | 1.841727e-16 | 1.0 | -0.006677 | -5.199338 | 5.199338 | 10.398675 | 0.995397 | -0.002081 | 0.997696 |
30 | 30 | f49 | num | 0.001262 | 4.408824 | -2.247330 | 7.234150 | 9.481480 | 503783 | 0.387649 | 0.244993 | 0.622615 | -3.365907 | -40.198464 | 3.771973 | 43.970438 | 1.0 | 2.974806e-17 | 1.0 | -0.007123 | -5.199338 | 5.199338 | 10.398675 | 1.003724 | -0.007833 | 1.001860 |
31 | 31 | f26 | num | 0.001250 | 0.075467 | -1.147260 | 6.966730 | 8.113990 | 412946 | 2.458279 | 2.459101 | 1.567890 | -0.121591 | -2.777318 | 2.555890 | 5.333208 | 1.0 | -6.085562e-16 | 1.0 | 0.000268 | -5.199338 | 5.199338 | 10.398675 | 1.003201 | 0.000691 | 1.001599 |
32 | 32 | f9 | num | 0.001219 | 6.395390 | -2.577840 | 7.078460 | 9.656300 | 489350 | 0.177992 | 0.134370 | 0.421891 | -4.731829 | -53.212971 | 5.922621 | 59.135591 | 1.0 | -1.286556e-16 | 1.0 | 0.008054 | -5.199338 | 5.199338 | 10.398675 | 1.002136 | -0.001826 | 1.001067 |
33 | 33 | f83 | num | 0.001174 | 4.108394 | -1.516430 | 4.805270 | 6.321700 | 493627 | 0.087516 | 0.162773 | 0.295831 | -4.853791 | -74.424971 | 3.589484 | 78.014456 | 1.0 | 3.808509e-17 | 1.0 | 0.005273 | -5.199338 | 5.199338 | 10.398675 | 0.992302 | -0.000706 | 0.996144 |
34 | 34 | f58 | num | 0.001164 | 20.324865 | -14.032200 | 24.792000 | 38.824200 | 473621 | 0.348225 | 0.099612 | 0.590106 | -9.807700 | -74.526152 | 24.925373 | 99.451525 | 1.0 | -1.638985e-17 | 1.0 | 0.018220 | -5.199338 | 5.199338 | 10.398675 | 0.992201 | -0.002441 | 0.996093 |
35 | 35 | f47 | num | 0.001109 | 0.024528 | -1.351300 | 6.819640 | 8.170940 | 400176 | 2.256048 | 2.537154 | 1.502015 | -0.112905 | -2.944431 | 2.653541 | 5.597972 | 1.0 | -1.089499e-16 | 1.0 | 0.000990 | -5.199338 | 5.199338 | 10.398675 | 1.000445 | 0.001116 | 1.000222 |
36 | 36 | f37 | num | 0.001101 | -0.008390 | -1.210370 | 6.255900 | 7.466270 | 405523 | 2.441420 | 2.595079 | 1.562504 | -0.135174 | -2.753911 | 2.201936 | 4.955848 | 1.0 | -3.835983e-16 | 1.0 | -0.001804 | -5.199338 | 5.199338 | 10.398675 | 0.999562 | 0.008116 | 0.999781 |
37 | 37 | f4 | num | 0.001066 | 4.643932 | -3.206210 | 8.265470 | 11.471680 | 502075 | 0.174296 | 0.177850 | 0.417488 | -5.810944 | -61.938365 | 5.804490 | 67.742855 | 1.0 | 1.552773e-16 | 1.0 | 0.007123 | -5.199338 | 5.199338 | 10.398675 | 1.006434 | -0.000438 | 1.003212 |
38 | 38 | f13 | num | 0.001040 | 0.044583 | -1.295160 | 6.401460 | 7.696620 | 403995 | 2.303392 | 2.477244 | 1.517693 | -0.111684 | -2.860350 | 2.399806 | 5.260156 | 1.0 | 1.640406e-16 | 1.0 | 0.000616 | -5.199338 | 5.199338 | 10.398675 | 1.002802 | -0.000816 | 1.001400 |
39 | 39 | f94 | num | 0.000989 | 8.413375 | -3.101500 | 8.623270 | 11.724770 | 481499 | 0.213457 | 0.144932 | 0.462014 | -6.467638 | -68.130272 | 6.432864 | 74.563136 | 1.0 | -1.406875e-17 | 1.0 | 0.002618 | -5.199338 | 5.199338 | 10.398675 | 1.000795 | 0.002532 | 1.000398 |
40 | 40 | f79 | num | 0.000981 | 6.082862 | -4.397590 | 12.738500 | 17.136090 | 494930 | 0.356206 | 0.349875 | 0.596830 | -4.370017 | -66.775980 | 6.042122 | 72.818101 | 1.0 | -1.676762e-16 | 1.0 | 0.006330 | -5.199338 | 5.199338 | 10.398675 | 0.993421 | 0.001164 | 0.996705 |
41 | 41 | f20 | num | 0.000953 | 12.108015 | -2.858550 | 8.749520 | 11.608070 | 477361 | 0.095442 | 0.093783 | 0.308938 | -8.746207 | -65.697509 | 9.813520 | 75.511029 | 1.0 | 6.664891e-17 | 1.0 | 0.004698 | -5.199338 | 5.199338 | 10.398675 | 1.000398 | -0.001393 | 1.000199 |
42 | 42 | f11 | num | 0.000863 | -0.127316 | -1.451570 | 6.456620 | 7.908190 | 395261 | 2.387690 | 2.759273 | 1.545215 | -0.154772 | -2.804413 | 2.359543 | 5.163955 | 1.0 | 1.477929e-16 | 1.0 | 0.019414 | -5.199338 | 5.199338 | 10.398675 | 0.998970 | -0.001452 | 0.999485 |
43 | 43 | f42 | num | 0.000817 | 10.132594 | -3.255920 | 8.292860 | 11.548780 | 482157 | 0.126768 | 0.108999 | 0.356045 | -6.998476 | -67.864419 | 8.530777 | 76.395196 | 1.0 | 3.600083e-17 | 1.0 | 0.001391 | -5.199338 | 5.199338 | 10.398675 | 1.003704 | -0.003478 | 1.001850 |
44 | 44 | f7 | num | 0.000810 | -0.004024 | -1.281970 | 6.258770 | 7.540740 | 403969 | 2.400516 | 2.571593 | 1.549360 | -0.126757 | -2.786038 | 2.247795 | 5.033833 | 1.0 | 3.330077e-16 | 1.0 | 0.004515 | -5.199338 | 5.199338 | 10.398675 | 0.992474 | -0.001397 | 0.996230 |
45 | 45 | f63 | num | 0.000806 | 3.884359 | -5.153130 | 10.866600 | 16.019730 | 511206 | 0.388143 | 0.340198 | 0.623011 | -5.176650 | -73.295881 | 5.496163 | 78.792044 | 1.0 | -1.578115e-16 | 1.0 | 0.002953 | -5.199338 | 5.199338 | 10.398675 | 0.995565 | 0.005148 | 0.997780 |
46 | 46 | f23 | num | 0.000717 | 10.854096 | -2.721660 | 8.408950 | 11.130610 | 494695 | 0.111282 | 0.097061 | 0.333589 | -7.556650 | -61.259198 | 8.504263 | 69.763461 | 1.0 | 1.449507e-17 | 1.0 | -0.001259 | -5.199338 | 5.199338 | 10.398675 | 1.001658 | -0.003847 | 1.000829 |
47 | 47 | f3 | num | 0.000702 | -0.055741 | -1.368560 | 6.521150 | 7.889710 | 398271 | 2.385567 | 2.647901 | 1.544528 | -0.130376 | -2.787181 | 2.418412 | 5.205594 | 1.0 | -7.437014e-18 | 1.0 | -0.007233 | -5.199338 | 5.199338 | 10.398675 | 1.000187 | 0.002869 | 1.000094 |
48 | 48 | f31 | num | 0.000671 | 8.925872 | -3.273000 | 8.646590 | 11.919590 | 503328 | 0.169984 | 0.107798 | 0.412291 | -6.182716 | -52.331789 | 8.161700 | 60.493490 | 1.0 | 8.147557e-18 | 1.0 | -0.006577 | -5.199338 | 5.199338 | 10.398675 | 0.997456 | -0.000768 | 0.998727 |
49 | 49 | f84 | num | 0.000666 | 9.786382 | -9.791940 | 45.224700 | 55.016640 | 503958 | 5.589865 | 0.501982 | 2.364289 | -10.953929 | -99.963949 | 6.087219 | 106.051169 | 1.0 | -9.668118e-17 | 1.0 | -0.010389 | -5.199338 | 5.199338 | 10.398675 | 1.001125 | -0.002029 | 1.000562 |
50 | 50 | f33 | num | 0.000609 | 19.735574 | -3.318420 | 5.487420 | 8.805840 | 473584 | 0.012026 | 0.054776 | 0.109661 | -8.828982 | -107.098562 | 24.442614 | 131.541176 | 1.0 | 8.474406e-17 | 1.0 | -0.004334 | -5.199338 | 5.199338 | 10.398675 | 1.000651 | 0.005826 | 1.000326 |
51 | 51 | f48 | num | 0.000586 | 10.486610 | -3.477470 | 7.917030 | 11.394500 | 494814 | 0.085836 | 0.084997 | 0.292977 | -8.141951 | -70.289607 | 10.512749 | 80.802356 | 1.0 | 1.048998e-16 | 1.0 | -0.001165 | -5.199338 | 5.199338 | 10.398675 | 0.997138 | -0.004494 | 0.998568 |
52 | 52 | f1 | num | 0.000577 | 0.003215 | -1.223960 | 6.226720 | 7.450680 | 409963 | 2.414966 | 2.497590 | 1.554016 | -0.140788 | -2.766490 | 2.225557 | 4.992048 | 1.0 | 1.401190e-16 | 1.0 | 0.005937 | -5.199338 | 5.199338 | 10.398675 | 0.996485 | 0.001440 | 0.998241 |
53 | 53 | f18 | num | 0.000540 | 0.022661 | -1.221020 | 6.354660 | 7.575680 | 408418 | 2.376438 | 2.471319 | 1.541570 | -0.124440 | -2.727742 | 2.343561 | 5.071302 | 1.0 | 1.321609e-16 | 1.0 | -0.001203 | -5.199338 | 5.199338 | 10.398675 | 0.999668 | 0.003605 | 0.999834 |
54 | 54 | f90 | num | 0.000539 | 7.155654 | -3.676990 | 6.482940 | 10.159930 | 486746 | 0.012691 | 0.071252 | 0.112654 | -4.463037 | -75.125777 | 27.886814 | 103.012591 | 1.0 | 2.094680e-16 | 1.0 | 0.001083 | -5.199338 | 5.199338 | 10.398675 | 0.997554 | -0.003803 | 0.998776 |
55 | 55 | f32 | num | 0.000532 | 5.308662 | -1.954520 | 5.176510 | 7.131030 | 497622 | 0.194141 | 0.170434 | 0.440615 | -4.182299 | -60.566804 | 3.982112 | 64.548915 | 1.0 | 1.433402e-16 | 1.0 | 0.000668 | -5.199338 | 5.199338 | 10.398675 | 0.995713 | 0.003870 | 0.997854 |
56 | 56 | f99 | num | 0.000532 | 5.087507 | -2.783380 | 5.988110 | 8.771490 | 499969 | 0.070170 | 0.123048 | 0.264896 | -5.464131 | -72.492706 | 6.884818 | 79.377524 | 1.0 | -4.547474e-17 | 1.0 | -0.005623 | -5.199338 | 5.199338 | 10.398675 | 0.991623 | 0.003841 | 0.995803 |
57 | 57 | f69 | num | 0.000530 | 15.430224 | -11.650200 | 25.699800 | 37.350000 | 495146 | 0.554353 | 0.189364 | 0.744549 | -8.603893 | -68.187433 | 17.108467 | 85.295900 | 1.0 | 7.960447e-17 | 1.0 | 0.011778 | -5.199338 | 5.199338 | 10.398675 | 1.002036 | 0.003507 | 1.001017 |
58 | 58 | f64 | num | 0.000510 | 7.228653 | -3.498950 | 6.186820 | 9.685770 | 477696 | 0.046659 | 0.086385 | 0.216007 | -17.974021 | -195.841275 | 8.619073 | 204.460349 | 1.0 | 5.608551e-17 | 1.0 | -0.000105 | -5.199338 | 5.199338 | 10.398675 | 0.994023 | 0.004226 | 0.997007 |
59 | 59 | f89 | num | 0.000340 | 47.220475 | -4.421610 | 12.347600 | 16.769210 | 462090 | 0.016540 | 0.056866 | 0.128607 | -24.851315 | -164.934501 | 42.446757 | 207.381259 | 1.0 | -1.182343e-16 | 1.0 | 0.007141 | -5.199338 | 5.199338 | 10.398675 | 1.003083 | -0.000303 | 1.001540 |
60 | 60 | f28 | num | 0.000332 | 6.842681 | -4.596700 | 9.021170 | 13.617870 | 461373 | 0.115138 | 0.216789 | 0.339319 | -3.919446 | -61.269148 | 10.501833 | 71.770981 | 1.0 | 8.526513e-19 | 1.0 | -0.004718 | -5.199338 | 5.199338 | 10.398675 | 0.996626 | -0.001770 | 0.998312 |
61 | 61 | f67 | num | 0.000328 | 0.055022 | -1.059910 | 6.270830 | 7.330740 | 406066 | 2.347992 | 2.522315 | 1.532316 | -0.120164 | -2.747502 | 2.243433 | 4.990935 | 1.0 | 7.803654e-16 | 1.0 | 0.006439 | -5.199338 | 5.199338 | 10.398675 | 0.994265 | 0.000575 | 0.997128 |
62 | 62 | f0 | num | 0.000318 | 3.228543 | -3.797450 | 8.781500 | 12.578950 | 516483 | 0.272953 | 0.306508 | 0.522449 | -3.598121 | -66.741044 | 4.764844 | 71.505888 | 1.0 | 6.471860e-17 | 1.0 | -0.011752 | -5.199338 | 5.199338 | 10.398675 | 0.998948 | 0.005463 | 0.999474 |
63 | 63 | f92 | num | 0.000311 | 13.831340 | -9.761770 | 18.412800 | 28.174570 | 489279 | 0.300739 | 0.155260 | 0.548396 | -8.436891 | -70.974893 | 16.820566 | 87.795459 | 1.0 | -1.980046e-17 | 1.0 | 0.009430 | -5.199338 | 5.199338 | 10.398675 | 0.998379 | 0.001588 | 0.999189 |
64 | 64 | f12 | num | 0.000271 | 3.489836 | -0.678145 | 2.361230 | 3.039375 | 487380 | 0.044462 | 0.128958 | 0.210861 | -1.132124 | -33.850966 | 2.987201 | 36.838167 | 1.0 | -2.678272e-16 | 1.0 | -0.003708 | -5.199338 | 5.199338 | 10.398675 | 0.992869 | 0.001031 | 0.996428 |
65 | 65 | f15 | num | 0.000269 | -0.027524 | -1.450540 | 6.774750 | 8.225290 | 399560 | 2.344161 | 2.631040 | 1.531065 | -0.132876 | -2.982384 | 2.553688 | 5.536073 | 1.0 | -8.052818e-18 | 1.0 | 0.008152 | -5.199338 | 5.199338 | 10.398675 | 0.996790 | 0.003228 | 0.998394 |
66 | 66 | f60 | num | 0.000241 | 5.170038 | -1.736520 | 3.697070 | 5.433590 | 500768 | 0.023102 | 0.088858 | 0.151993 | -4.594667 | -90.352555 | 6.448992 | 96.801547 | 1.0 | 1.256240e-16 | 1.0 | -0.009993 | -5.199338 | 5.199338 | 10.398675 | 0.996720 | 0.002991 | 0.998359 |
67 | 67 | f36 | num | 0.000200 | 1.286776 | -2.888940 | 13.029000 | 15.917940 | 470902 | 3.188761 | 1.769510 | 1.785710 | -0.032915 | -10.074323 | 3.053464 | 13.127787 | 1.0 | -1.998520e-16 | 1.0 | -0.003141 | -5.199338 | 5.199338 | 10.398675 | 1.007152 | -0.003715 | 1.003570 |
68 | 68 | f61 | num | 0.000179 | 5.328209 | -1.326570 | 4.305100 | 5.631670 | 489110 | 0.142855 | 0.144262 | 0.377962 | -4.353915 | -58.285719 | 3.689689 | 61.975408 | 1.0 | -1.159132e-16 | 1.0 | -0.003061 | -5.199338 | 5.199338 | 10.398675 | 1.001801 | 0.003585 | 1.000900 |
69 | 69 | f56 | num | 0.000158 | 5.437561 | -3.247350 | 10.710600 | 13.957950 | 509895 | 0.726495 | 0.270019 | 0.852347 | -5.022595 | -51.236193 | 4.172564 | 55.408757 | 1.0 | 2.453741e-17 | 1.0 | -0.002649 | -5.199338 | 5.199338 | 10.398675 | 0.998047 | 0.002005 | 0.999023 |
70 | 70 | f76 | num | 0.000131 | 14.336178 | -18.456000 | 38.755000 | 57.211000 | 488819 | 1.455101 | 0.231822 | 1.206275 | -10.181363 | -76.566735 | 16.210437 | 92.777172 | 1.0 | 3.746929e-17 | 1.0 | 0.009794 | -5.199338 | 5.199338 | 10.398675 | 0.997907 | 0.002163 | 0.998953 |
71 | 71 | Total | NaN | NaN | 338.827941 | 262.976034 | 775.577536 | 1038.553570 | 31566176 | 126.213955 | 93.934214 | 75.691241 | 249.026592 | 2753.693424 | 463.791196 | 3217.484619 | 71.0 | 1.322472e-14 | 71.0 | 0.416619 | 369.152968 | 369.152968 | 738.305937 | 96.894647 | 0.225887 | 75.131506 |
As I see above, I have the classical binary classification task with numeric continuous values for x_values and binary y_values (1 or 0). Formally , only 70 values mutual information other than zero and deleting features with zero value should increase the accuracy value , but as I found out experimentally deleting features with a zero value mutual information decreasing the accuracy values by 3-6%. File train_mutual_clf.csv
with mutual information values you can download here. Let 's define the outliers quantity.
%%time
def dfoutlsidx(dataframe):
"""
Define indexes of outliers values less than quintile 25% - 1.5IRQ and more
then quintile 75% + 1.5 IRQ for continuous values of features.
Parameters
----------
dataframe : tested pandas dataframe
Returns
-------
list indexes of outliers values.
"""
df = train.copy()
outliers = set()
features = list(df.columns)[:-1]
for feature in features:
quant_25 = df[feature].quantile(0.25)
quant_75 = df[feature].quantile(0.75)
delta = 1.5*(quant_75 - quant_25)
df_feature = set(train[(train[feature] < quant_25 - delta) \
| (train[feature] > quant_75 + delta)].index)
for idx in df_feature:
if idx not in outliers:
outliers.add(idx)
return list(outliers)
outls_idx = dfoutlsidx(train)
print("Train dataset contains {:,} outliers values in the {:,} rows.\n\
Share of outliers {:.3f}%".format(len(outls_idx), train.shape[0],
len(outls_idx)/train.shape[0]*100.0))
Train dataset contains 596,529 outliers values in the 600,000 rows. Share of outliers 99.421% CPU times: user 5.39 s, sys: 251 ms, total: 5.65 s Wall time: 5.67 s
As seen above, almost all x_values of train
dataset - outliers.
To be honest, I was surprised that DL like ML, it does not have any clear and clear criteria for building a model - the number of hidden layers, the total number of neurons and numbers of neurons for each layer and the model you have to create empirically based on the rules rule-of-thumbs
and your own experience or fantasy. For example:
I have a few rules of thumb that I use to choose hidden layers. There are many rule-of-thumb methods for determining an acceptable number of neurons to use in the hidden layers, such as the following: 1.The number of hidden neurons should be between the size of the input layer and the size of the output layer. 2.The number of hidden neurons should be 2/3 the size of the input layer, plus the size of the output layer. 3.The number of hidden neurons should be less than twice the size of the input layer.
At the first iteration I tried to use xgboost, randomforest and SVM from scikit-learn to solve this binary classification problem. When, after more than 12 hours, the pipeline cross validation with these three train dataset algorithms did not end, I decided to use tensorflow. Maximal accuracy have SVM (train:test ration = 1:4, 3 trials) ~ 0.64.
At second iteration I started with Tensorflow, I tried to write my own functions to determine the optimal number of neurons, hidden layers, activation functions and etc. I tried to use KerasClassifier - but I couldn't connect the loss_validation metric in it - as a result, I got a bunch of spaghetti code with monstrous time costs. Realizing that I was getting dirty in the abyss of writing functions that spend a lot of time, I searched the Internet and found AutoKeras.
With the help of Autokers and Keras tuner I created 3 models in about five hours:
automl_clf
with default settings from AutoKeras.automl_clf
was regularized by d1 and d2 and rename to automl_tuner
.automl_regr
is a standard linear regression model with default settings from AutoKeras.The code for finding all three models can be found at this link.
After finishing the work, these models were exported to json
or inserted into a text file using the get_config
method and typed manually. In these cases , the acceptable calculation speed for defining models and hyperparameters is given by batch_size = 1024 - 2048. Experimentally, it was found out that the maximum value for validation_accuracy
and minimal values for validation_loss
gives only StandardScaler
automl_clf
model.¶Load required functions:
%%time
def dfsplit(dataframe, scaler=None):
"""
Split dataframe to x_train, y_train, x_test, y_test on ratio 4:1.
Possible scale/transform option for x features:
1. None – not scale or trainsform
2. “ptbc” Power-transformer by Box-Cox
3. “ptbc” - .PowerTransformer by Yeo-Johnson’
4. “rb” - .RobustScaler(
5. "ss" - StandardScaler
For prevent data leakage using separate instance scaler/transformer
for each train and test parts.
Parameters
----------
dataframe : pandas dataframe with numeric values of features.
scaler : TYPE - None or str, optional. The default is None.
Returns
-------
x_train, x_test, y_train, y_test - numpy arrays.
"""
df = dataframe.copy()
mms_train = MinMaxScaler(feature_range=(1, 2))
mms_test = MinMaxScaler(feature_range=(1, 2))
ptbc_train = PowerTransformer(method='box-cox')
ptbc_test = PowerTransformer(method='box-cox')
ptyj_train = PowerTransformer()
ptyj_test = PowerTransformer()
rb_train = RobustScaler(unit_variance=True)
rb_test = RobustScaler(unit_variance=True)
ss_train = StandardScaler()
ss_test = StandardScaler()
df = dataframe.copy()
# split dataframe for train and test x and y nparrays
x_all, y_all =df.iloc[:,:-1].values, np.ravel(df.iloc[:,[-1]].values)
x_train, x_test, y_train, y_test = train_test_split(x_all, y_all,
test_size=0.2,
random_state=42,
stratify=y_all)
# Transform or scale
scalers = [None, "ptbc", "ptyj", "rb", "ss"]
if scaler == None:
x_train, x_test = x_train[:,:], x_test[:,:]
if scaler == "ptbc":
x_train, x_test = \
ptbc_train.fit_transform(mms_train.fit_transform(x_train[:,:])), \
ptbc_test.fit_transform(mms_test.fit_transform(x_test[:,:]))
elif scaler == "ptyj":
x_train, x_test = \
ptyj_train.fit_transform(x_train[:,:]), \
ptyj_test.fit_transform(x_test[:,:])
elif scaler == "rb":
x_train, x_test = \
rb_train.fit_transform(x_train[:,:]), \
rb_test.fit_transform(x_test[:,:])
elif scaler == "ss":
x_train, x_test = \
ss_train.fit_transform(x_train[:,:]), \
ss_test.fit_transform(x_test[:,:])
if scaler not in scalers:
return "Value error for 'scaler'!", "Enter None or", \
"'ptbc' or", " 'ptyj' or 'rb' or 'ss' value for scaler!"
return x_train, x_test, y_train, y_test
def df_transform(dataframe, scaler=None, y=True):
"""
Split dataframe to x_train, y_train, x_test, y_test on ratio 4:1.
Possible scale/transform option for x features:
1. None – not scale or trainsform
2. “ptbc” Power-transformer by Box-Cox
3. “ptbc” - .PowerTransformer by Yeo-Johnson’
4. “rb” - .RobustScaler(
5. "ss" - StandardScaler
For prevent data leakage using separate instance scaler/transformer
for each train and test parts.
Parameters
----------
dataframe : pandas dataframe with numeric values of features.
scaler : TYPE - None or str, optional. The default is None.
Returns
-------
If y==True: x_train, x_test, y_train, y_test - numpy arrays.
If y==False: x_train, x_test - numpy arrays.
"""
df = dataframe.copy()
mms_all = MinMaxScaler(feature_range=(1, 2))
ptbc_all = PowerTransformer(method='box-cox')
ptyj_all = PowerTransformer()
rb_all = RobustScaler(unit_variance=True)
ss_all = StandardScaler()
df = dataframe.copy()
# split dataframe for train and test x and y nparrays
if y==True:
x_all, y_all =df.iloc[:,:-1].values, np.ravel(df.iloc[:,[-1]].values)
elif y==False:
x_all =df.iloc[:,:].values
if y not in [True, False]:
return "Y value error!", "Enter or True or False!"
# Transform or scale x_all
scalers = [None, "ptbc", "ptyj", "rb", "ss"]
if scaler == None:
x_all = x_all[:,:]
if scaler == "ptbc":
x_all = ptbc_all.fit_transform(mms_all.fit_transform(x_all[:,:]))
elif scaler == "ptyj":
x_all = ptyj_all.fit_transform(x_all[:,:])
elif scaler == "rb":
x_all = rb_all.fit_transform(x_all[:,:]), \
elif scaler == "ss":
x_all = ss_all.fit_transform(x_all[:,:])
if scaler not in scalers:
return "Value error for 'scaler'!", "Enter None or", \
"'ptbc' or", " 'ptyj' or 'rb' or 'ss' value for scaler!"
if y==True:
return x_all, y_all
elif y==False:
return x_all
def automl_clf(shape_x, learn_rate=0.01):
"""
Model created manually from json file model from auto-keras
Parameters
----------
shape_x : integer, equal of dimensions the dataset features.
learn_rate : float, value for learning_rate of optimizer.
Default value of learn_rate = 0.001.
Returns
-------
model : the keras model
"""
model = Sequential()
# 0.Input
model.add(InputLayer(input_shape=(100,), dtype='float64', name="input_1"))
# Normalization input == StandardScaler
model.add(Normalization(name='normalization'))
# Hidden layer 1
# 1.1 Initializer for first hidden layer input linear
model.add(layers.Dense(units=32, kernel_initializer="GlorotUniform",
bias_initializer='zeros', name="layer_1"))
# 1.2 Activation for fisrt hidden layer
model.add(layers.Activation(activations.relu, name="relu_1"))
model.add(layers.Dropout(.25))
# Hidden layer 2
# 2.1 Initializer for first hidden layer input linear
model.add(layers.Dense(units=32, kernel_initializer="GlorotUniform",
bias_initializer='zeros', name="layer_2"))
# 2.2 Activation for second hidden layer
model.add(layers.Activation(activations.relu, name="relu_2"))
model.add(layers.Dropout(.25))
# Hidden layer 3
# 3.1 Initializer for third hidden layer input linear
model.add(layers.Dense(units=32, kernel_initializer="GlorotUniform",
bias_initializer='zeros', name="layer_3"))
# 3.2 Activation for second hidden layer
model.add(layers.Activation(activations.relu, name="relu_3"))
model.add(layers.Dropout(.25))
# 4. Final sigmoid
model.add(layers.Dense(units=1, kernel_initializer="GlorotUniform",
bias_initializer='zeros', name="layer_4"))
model.add(layers.Activation(activations.sigmoid, name="sigmoid_1"))
model.compile(loss='binary_crossentropy',
optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate),
metrics=['accuracy',tf.keras.metrics.AUC(name='auc')])
return model
def train_model(model, dataframe, batch_sz=16384, stop_no=30, scaler=None,
estimator="clf"):
"""
Scale / Transform numeric features for fit and train model.
Parameters
----------
model : keras model for fitting data.
Dataframe : pandas dataframe with numeric values of x and y .
batch_sz : integer, Size of the batch, optional. The default is 16384.
stop_no : integer, number of repeat for callback, optional.
The default is 30.
scaler : None or str, available values - None, "ptbc", "ptyj",
"rb", "ss", optional. Default is None.
Returns
-------
model : keras fitted and trained model
hist_stat : pandas dataframe with values of metrics ane epochs for
model.
"""
callbacks = [EarlyStopping(monitor='val_loss',mode='min',
patience=stop_no,restore_best_weights=True)]
df = dataframe.copy()
scaler=scaler
# split and scale or transform features
x_train, x_test, y_train, y_test = dfsplit(df, scaler=scaler)
# Fit and train model
history = model.fit(x_train, y_train,
batch_size=batch_sz,
epochs=10000,
validation_data=(x_test,y_test),
callbacks=callbacks,
verbose=0)
# Export history to dataframe
hist_stat = pd.DataFrame(history.history)
hist_stat["epochs"] = np.array(list(hist_stat.index))+1
if estimator == "clf":
hist_stat.sort_values("val_accuracy", ascending=False, inplace=True)
elif estimator == "regr":
hist_stat.sort_values("val_mean_squared_error", ascending=True, inplace=True)
estimators = ["clf", "regr"]
if estimator not in estimators:
return "Estimator value error!", "Enter 'clf' of 'regr'!"
hist_stat.reset_index(drop=True, inplace=True)
return model, hist_stat
# Get model and model history
automl_clf_ss, automl_stat_clf_ss = train_model(automl_clf(train.shape[1]-1),
train, batch_sz=2048, scaler='ss')
automl_stat_clf_ss
2022-03-08 12:58:33.828728: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1 2022-03-08 12:58:33.863968: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: pciBusID: 0000:01:00.0 name: GeForce GTX 1050 computeCapability: 6.1 coreClock: 1.5185GHz coreCount: 5 deviceMemorySize: 1.95GiB deviceMemoryBandwidth: 104.43GiB/s 2022-03-08 12:58:33.864013: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2022-03-08 12:58:33.868764: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11 2022-03-08 12:58:33.868898: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11 2022-03-08 12:58:33.869978: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10 2022-03-08 12:58:33.870768: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10 2022-03-08 12:58:33.871770: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11 2022-03-08 12:58:33.872503: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11 2022-03-08 12:58:33.872664: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8 2022-03-08 12:58:33.873347: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0 2022-03-08 12:58:33.874905: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: pciBusID: 0000:01:00.0 name: GeForce GTX 1050 computeCapability: 6.1 coreClock: 1.5185GHz coreCount: 5 deviceMemorySize: 1.95GiB deviceMemoryBandwidth: 104.43GiB/s 2022-03-08 12:58:33.875515: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0 2022-03-08 12:58:33.875559: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 2022-03-08 12:58:34.282688: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix: 2022-03-08 12:58:34.282736: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264] 0 2022-03-08 12:58:34.282746: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0: N 2022-03-08 12:58:34.283842: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 1397 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1050, pci bus id: 0000:01:00.0, compute capability: 6.1) 2022-03-08 12:58:37.543311: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2) 2022-03-08 12:58:37.563663: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 4026815000 Hz 2022-03-08 12:58:38.320659: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11 2022-03-08 12:58:38.641006: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11
CPU times: user 3min 36s, sys: 23.7 s, total: 4min Wall time: 2min 2s
loss | accuracy | auc | val_loss | val_accuracy | val_auc | epochs | |
---|---|---|---|---|---|---|---|
0 | 0.572249 | 0.742875 | 0.751303 | 0.566111 | 0.745800 | 0.751328 | 61 |
1 | 0.572767 | 0.742058 | 0.749230 | 0.566262 | 0.745733 | 0.750550 | 31 |
2 | 0.572764 | 0.742960 | 0.749994 | 0.566017 | 0.745725 | 0.751667 | 52 |
3 | 0.572399 | 0.742979 | 0.750811 | 0.567126 | 0.745533 | 0.752069 | 57 |
4 | 0.572383 | 0.742588 | 0.751405 | 0.566872 | 0.745400 | 0.751181 | 63 |
5 | 0.572000 | 0.742829 | 0.751501 | 0.566055 | 0.745400 | 0.751388 | 66 |
6 | 0.572843 | 0.742823 | 0.750721 | 0.566150 | 0.745367 | 0.751166 | 55 |
7 | 0.572729 | 0.742723 | 0.750325 | 0.568322 | 0.745367 | 0.751661 | 51 |
8 | 0.572665 | 0.742725 | 0.750836 | 0.566882 | 0.745358 | 0.750852 | 58 |
9 | 0.571894 | 0.742948 | 0.751994 | 0.567372 | 0.745333 | 0.751586 | 65 |
10 | 0.572918 | 0.741852 | 0.749467 | 0.566658 | 0.745308 | 0.752903 | 34 |
11 | 0.572576 | 0.742590 | 0.751180 | 0.567077 | 0.745200 | 0.751381 | 64 |
12 | 0.572423 | 0.742306 | 0.749117 | 0.566411 | 0.745158 | 0.752193 | 38 |
13 | 0.572387 | 0.742481 | 0.751792 | 0.567532 | 0.745150 | 0.750670 | 73 |
14 | 0.573101 | 0.742681 | 0.749941 | 0.566866 | 0.745142 | 0.752578 | 54 |
15 | 0.572095 | 0.742200 | 0.752135 | 0.567248 | 0.745133 | 0.750780 | 77 |
16 | 0.572287 | 0.742219 | 0.751834 | 0.568474 | 0.745092 | 0.750504 | 67 |
17 | 0.572447 | 0.742288 | 0.751649 | 0.567554 | 0.745058 | 0.750744 | 74 |
18 | 0.572518 | 0.742675 | 0.751478 | 0.567038 | 0.745050 | 0.751757 | 56 |
19 | 0.573895 | 0.741990 | 0.749593 | 0.566818 | 0.745050 | 0.750900 | 48 |
20 | 0.572971 | 0.742342 | 0.750535 | 0.567284 | 0.745042 | 0.751149 | 60 |
21 | 0.572600 | 0.741996 | 0.751341 | 0.568255 | 0.744992 | 0.751208 | 80 |
22 | 0.572667 | 0.742006 | 0.749105 | 0.566638 | 0.744900 | 0.750165 | 30 |
23 | 0.572484 | 0.742529 | 0.751072 | 0.566947 | 0.744858 | 0.750195 | 68 |
24 | 0.573046 | 0.741479 | 0.750917 | 0.567996 | 0.744842 | 0.750792 | 82 |
25 | 0.573016 | 0.741946 | 0.748944 | 0.566424 | 0.744825 | 0.753039 | 35 |
26 | 0.573147 | 0.742619 | 0.750118 | 0.567434 | 0.744792 | 0.751644 | 49 |
27 | 0.572210 | 0.742950 | 0.751231 | 0.567147 | 0.744758 | 0.751273 | 62 |
28 | 0.572756 | 0.742950 | 0.750213 | 0.566577 | 0.744758 | 0.751110 | 53 |
29 | 0.573276 | 0.742354 | 0.749500 | 0.567602 | 0.744750 | 0.750876 | 46 |
30 | 0.572557 | 0.741667 | 0.749384 | 0.566904 | 0.744742 | 0.750006 | 22 |
31 | 0.572326 | 0.742823 | 0.751194 | 0.568027 | 0.744742 | 0.751130 | 59 |
32 | 0.572861 | 0.742581 | 0.749754 | 0.567374 | 0.744733 | 0.751843 | 50 |
33 | 0.573019 | 0.741385 | 0.751105 | 0.567278 | 0.744708 | 0.750715 | 76 |
34 | 0.571707 | 0.743375 | 0.751542 | 0.567175 | 0.744683 | 0.750387 | 69 |
35 | 0.571842 | 0.742585 | 0.752318 | 0.567484 | 0.744675 | 0.750624 | 79 |
36 | 0.572465 | 0.742333 | 0.751466 | 0.567651 | 0.744667 | 0.750562 | 71 |
37 | 0.573175 | 0.741950 | 0.748546 | 0.566544 | 0.744617 | 0.752975 | 32 |
38 | 0.572759 | 0.742238 | 0.749554 | 0.567501 | 0.744617 | 0.751355 | 37 |
39 | 0.572553 | 0.742448 | 0.749680 | 0.569217 | 0.744583 | 0.751434 | 40 |
40 | 0.573192 | 0.742233 | 0.749129 | 0.566629 | 0.744558 | 0.752250 | 45 |
41 | 0.573341 | 0.741100 | 0.747934 | 0.566518 | 0.744558 | 0.752920 | 20 |
42 | 0.573462 | 0.741231 | 0.747909 | 0.566732 | 0.744558 | 0.752283 | 16 |
43 | 0.573001 | 0.741956 | 0.749188 | 0.567709 | 0.744483 | 0.751466 | 27 |
44 | 0.572733 | 0.742331 | 0.749740 | 0.566984 | 0.744475 | 0.752007 | 42 |
45 | 0.573353 | 0.741631 | 0.748042 | 0.566738 | 0.744475 | 0.752803 | 25 |
46 | 0.573543 | 0.741125 | 0.747935 | 0.566885 | 0.744450 | 0.751534 | 21 |
47 | 0.574474 | 0.740108 | 0.747124 | 0.567090 | 0.744433 | 0.751523 | 14 |
48 | 0.572400 | 0.742169 | 0.749808 | 0.567581 | 0.744425 | 0.752294 | 36 |
49 | 0.572895 | 0.741850 | 0.749065 | 0.567436 | 0.744392 | 0.751382 | 26 |
50 | 0.572521 | 0.742102 | 0.751419 | 0.567536 | 0.744358 | 0.750444 | 70 |
51 | 0.573760 | 0.741071 | 0.747520 | 0.567493 | 0.744350 | 0.749793 | 17 |
52 | 0.573553 | 0.741704 | 0.749417 | 0.566894 | 0.744283 | 0.752147 | 44 |
53 | 0.572921 | 0.742052 | 0.748629 | 0.566992 | 0.744267 | 0.750628 | 39 |
54 | 0.573385 | 0.742600 | 0.749405 | 0.568246 | 0.744242 | 0.750961 | 47 |
55 | 0.573370 | 0.742562 | 0.749721 | 0.569269 | 0.744225 | 0.751155 | 41 |
56 | 0.573080 | 0.741519 | 0.748720 | 0.568265 | 0.744200 | 0.750020 | 23 |
57 | 0.572593 | 0.742096 | 0.751550 | 0.568130 | 0.744175 | 0.750661 | 78 |
58 | 0.573009 | 0.742046 | 0.748605 | 0.567240 | 0.744150 | 0.750471 | 29 |
59 | 0.573350 | 0.741492 | 0.748024 | 0.567294 | 0.744133 | 0.752180 | 24 |
60 | 0.573085 | 0.741817 | 0.749058 | 0.567212 | 0.744108 | 0.752934 | 28 |
61 | 0.572230 | 0.742444 | 0.751803 | 0.569144 | 0.743992 | 0.749834 | 75 |
62 | 0.574209 | 0.740269 | 0.747588 | 0.567868 | 0.743983 | 0.751172 | 9 |
63 | 0.573387 | 0.741119 | 0.748346 | 0.567258 | 0.743925 | 0.751552 | 19 |
64 | 0.574026 | 0.740444 | 0.747679 | 0.567574 | 0.743917 | 0.750286 | 11 |
65 | 0.574263 | 0.740946 | 0.747417 | 0.567054 | 0.743875 | 0.750504 | 15 |
66 | 0.572238 | 0.742400 | 0.749595 | 0.567204 | 0.743833 | 0.752923 | 33 |
67 | 0.574306 | 0.739992 | 0.747847 | 0.567695 | 0.743783 | 0.749901 | 10 |
68 | 0.573567 | 0.740933 | 0.747373 | 0.567772 | 0.743692 | 0.750011 | 18 |
69 | 0.573636 | 0.741112 | 0.747618 | 0.567726 | 0.743575 | 0.750249 | 12 |
70 | 0.572546 | 0.742058 | 0.751438 | 0.568405 | 0.743417 | 0.750262 | 81 |
71 | 0.572155 | 0.742240 | 0.751943 | 0.568617 | 0.743292 | 0.750082 | 72 |
72 | 0.573078 | 0.741948 | 0.748951 | 0.570107 | 0.743258 | 0.750700 | 43 |
73 | 0.574041 | 0.740725 | 0.746969 | 0.567848 | 0.743075 | 0.751922 | 13 |
74 | 0.575316 | 0.739306 | 0.745837 | 0.568366 | 0.742867 | 0.749600 | 6 |
75 | 0.574476 | 0.739456 | 0.747433 | 0.569568 | 0.742842 | 0.750158 | 7 |
76 | 0.575532 | 0.738715 | 0.746318 | 0.568063 | 0.742808 | 0.751733 | 5 |
77 | 0.574444 | 0.739808 | 0.747420 | 0.568355 | 0.742633 | 0.749971 | 8 |
78 | 0.576279 | 0.738240 | 0.746043 | 0.569435 | 0.742183 | 0.749328 | 4 |
79 | 0.576734 | 0.737181 | 0.746870 | 0.569223 | 0.741300 | 0.749261 | 3 |
80 | 0.579014 | 0.735090 | 0.745487 | 0.569913 | 0.739967 | 0.751876 | 2 |
81 | 0.600305 | 0.710004 | 0.736945 | 0.574612 | 0.738367 | 0.748244 | 1 |
automl_tuner
.¶%%time
def automl_tuner(shape_x):
learning_rate = 0.0012589254117941675
l1_kernel=0.0023713737056616554
l2_bias = 0.0007943282347242813
l1_val = 0.0001258925411794166
model = Sequential()
# 0.Input
model.add(InputLayer(input_shape=(100,), dtype='float64', name="input_1"))
model.add(Normalization(name='normalization'))
# Hidden layer 1
# 1.1 Initializer for first hidden layer input linear
model.add(layers.Dense(units=32, kernel_initializer="GlorotUniform",
bias_initializer='zeros', name="layer_1"))
# l1 regularization
model.add(layers.Dense(
units=32, kernel_regularizer = tf.keras.regularizers.l1(l1_kernel),
bias_regularizer=tf.keras.regularizers.l2(l2_bias),
activity_regularizer=tf.keras.regularizers.l1(l1_val)))
# 1.2 Activation for fisrt hidden layer
model.add(layers.Activation(activations.relu, name="relu_1"))
model.add(layers.Dropout(.25))
# Hidden layer 2
# 2.1 Initializer for first hidden layer input linear
model.add(layers.Dense(units=32, kernel_initializer="GlorotUniform",
bias_initializer='zeros', name="layer_2"))
# l1 regularization
model.add(layers.Dense(
units=32, kernel_regularizer = tf.keras.regularizers.l1(l1_kernel),
bias_regularizer=tf.keras.regularizers.l2(l2_bias),
activity_regularizer=tf.keras.regularizers.l1(l1_val)))
# 2.2 Activation for second hidden layer
model.add(layers.Activation(activations.relu, name="relu_2"))
model.add(layers.Dropout(.25))
# Hidden layer 3
# 3.1 Initializer for third hidden layer input linear
model.add(layers.Dense(units=32, kernel_initializer="GlorotUniform",
bias_initializer='zeros', name="layer_3"))
# l1 regularization
model.add(layers.Dense(
units=32, kernel_regularizer = tf.keras.regularizers.l1(l1_kernel),
bias_regularizer=tf.keras.regularizers.l2(l2_bias),
activity_regularizer=tf.keras.regularizers.l1(l1_val)))
# 3.2 Activation for second hidden layer
model.add(layers.Activation(activations.relu, name="relu_3"))
model.add(layers.Dropout(.25))
# 4. Final sigmoid
model.add(layers.Dense(units=1, kernel_initializer="GlorotUniform",
bias_initializer='zeros', name="layer_4"))
model.add(layers.Activation(activations.sigmoid, name="sigmoid_1"))
model.compile(loss='binary_crossentropy',
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate),
metrics=['accuracy',tf.keras.metrics.AUC(name='auc')])
return model
automl_clf_ss_tuner, automl_stat_clf_ss_tuner = train_model(automl_tuner(train.shape[1]-1),
train, batch_sz=2048, scaler='ss')
automl_stat_clf_ss_tuner
CPU times: user 7min 52s, sys: 48.2 s, total: 8min 40s Wall time: 4min 28s
loss | accuracy | auc | val_loss | val_accuracy | val_auc | epochs | |
---|---|---|---|---|---|---|---|
0 | 0.576827 | 0.745129 | 0.747650 | 0.569928 | 0.748158 | 0.749932 | 101 |
1 | 0.576777 | 0.745017 | 0.748339 | 0.570304 | 0.748083 | 0.749636 | 85 |
2 | 0.576838 | 0.745348 | 0.747420 | 0.571168 | 0.748067 | 0.749590 | 111 |
3 | 0.576545 | 0.745310 | 0.747895 | 0.570381 | 0.747817 | 0.749620 | 115 |
4 | 0.576606 | 0.745267 | 0.748264 | 0.570141 | 0.747767 | 0.749373 | 108 |
5 | 0.576974 | 0.745260 | 0.747963 | 0.570880 | 0.747767 | 0.750217 | 93 |
6 | 0.577697 | 0.744756 | 0.748278 | 0.572759 | 0.747767 | 0.750664 | 69 |
7 | 0.576882 | 0.745200 | 0.748125 | 0.571151 | 0.747742 | 0.749463 | 97 |
8 | 0.576331 | 0.745273 | 0.748320 | 0.570086 | 0.747742 | 0.749627 | 119 |
9 | 0.576441 | 0.745529 | 0.748171 | 0.571553 | 0.747700 | 0.750024 | 120 |
10 | 0.577146 | 0.745025 | 0.747596 | 0.570742 | 0.747650 | 0.749735 | 95 |
11 | 0.577197 | 0.745306 | 0.747669 | 0.570508 | 0.747633 | 0.750315 | 83 |
12 | 0.576039 | 0.745371 | 0.748358 | 0.572854 | 0.747617 | 0.750084 | 130 |
13 | 0.576720 | 0.745335 | 0.747923 | 0.570326 | 0.747608 | 0.750183 | 102 |
14 | 0.578297 | 0.744579 | 0.748126 | 0.571785 | 0.747600 | 0.750785 | 55 |
15 | 0.576840 | 0.745196 | 0.748040 | 0.570593 | 0.747575 | 0.750646 | 98 |
16 | 0.577299 | 0.745144 | 0.748008 | 0.570812 | 0.747492 | 0.749621 | 86 |
17 | 0.576229 | 0.745252 | 0.748091 | 0.570517 | 0.747483 | 0.750285 | 112 |
18 | 0.577589 | 0.744825 | 0.747642 | 0.571034 | 0.747467 | 0.750248 | 72 |
19 | 0.578495 | 0.744577 | 0.747393 | 0.571788 | 0.747458 | 0.750294 | 56 |
20 | 0.577092 | 0.745096 | 0.748169 | 0.570329 | 0.747458 | 0.750269 | 90 |
21 | 0.576569 | 0.745404 | 0.748073 | 0.570582 | 0.747458 | 0.750623 | 107 |
22 | 0.577253 | 0.744950 | 0.747691 | 0.570675 | 0.747442 | 0.749862 | 82 |
23 | 0.576801 | 0.745285 | 0.747777 | 0.570914 | 0.747425 | 0.750332 | 105 |
24 | 0.578037 | 0.745092 | 0.747985 | 0.572456 | 0.747425 | 0.749659 | 131 |
25 | 0.576180 | 0.745310 | 0.748140 | 0.569953 | 0.747400 | 0.750081 | 127 |
26 | 0.577962 | 0.744785 | 0.747454 | 0.572193 | 0.747400 | 0.750675 | 66 |
27 | 0.577320 | 0.744927 | 0.747990 | 0.571312 | 0.747383 | 0.749765 | 75 |
28 | 0.576860 | 0.745175 | 0.747512 | 0.570320 | 0.747350 | 0.750363 | 96 |
29 | 0.576707 | 0.745252 | 0.748086 | 0.570769 | 0.747350 | 0.749934 | 94 |
30 | 0.578757 | 0.744300 | 0.748489 | 0.572868 | 0.747342 | 0.751071 | 48 |
31 | 0.576384 | 0.745279 | 0.748292 | 0.570420 | 0.747333 | 0.750321 | 116 |
32 | 0.576570 | 0.745425 | 0.748164 | 0.570095 | 0.747300 | 0.749540 | 123 |
33 | 0.580042 | 0.743700 | 0.748607 | 0.573350 | 0.747283 | 0.751127 | 35 |
34 | 0.577008 | 0.745202 | 0.747792 | 0.570529 | 0.747275 | 0.749803 | 99 |
35 | 0.577379 | 0.745019 | 0.748296 | 0.570867 | 0.747275 | 0.749853 | 76 |
36 | 0.576928 | 0.744946 | 0.748328 | 0.570831 | 0.747258 | 0.750464 | 92 |
37 | 0.577844 | 0.744685 | 0.748132 | 0.571456 | 0.747250 | 0.750513 | 64 |
38 | 0.577350 | 0.745062 | 0.747677 | 0.570909 | 0.747250 | 0.750222 | 79 |
39 | 0.576420 | 0.745387 | 0.747654 | 0.570317 | 0.747225 | 0.750451 | 118 |
40 | 0.576288 | 0.745379 | 0.748267 | 0.570023 | 0.747217 | 0.750308 | 114 |
41 | 0.577524 | 0.744821 | 0.747766 | 0.571366 | 0.747208 | 0.750134 | 73 |
42 | 0.577423 | 0.744883 | 0.748435 | 0.571959 | 0.747192 | 0.750662 | 68 |
43 | 0.576455 | 0.745542 | 0.747925 | 0.570341 | 0.747192 | 0.750409 | 121 |
44 | 0.576646 | 0.745117 | 0.747743 | 0.570979 | 0.747183 | 0.749937 | 109 |
45 | 0.576342 | 0.745273 | 0.748367 | 0.569934 | 0.747167 | 0.749827 | 126 |
46 | 0.577974 | 0.744725 | 0.748126 | 0.571555 | 0.747158 | 0.750022 | 61 |
47 | 0.576664 | 0.745358 | 0.747789 | 0.570519 | 0.747158 | 0.749600 | 104 |
48 | 0.576337 | 0.745404 | 0.748101 | 0.570175 | 0.747150 | 0.749744 | 117 |
49 | 0.577764 | 0.744885 | 0.748087 | 0.571743 | 0.747142 | 0.749877 | 65 |
50 | 0.579325 | 0.744169 | 0.747934 | 0.572926 | 0.747133 | 0.750974 | 42 |
51 | 0.578113 | 0.744885 | 0.748136 | 0.571795 | 0.747117 | 0.749958 | 70 |
52 | 0.576193 | 0.745342 | 0.748276 | 0.570660 | 0.747117 | 0.750407 | 110 |
53 | 0.578035 | 0.744735 | 0.748323 | 0.571689 | 0.747100 | 0.750300 | 60 |
54 | 0.577371 | 0.745008 | 0.747375 | 0.570576 | 0.747092 | 0.749559 | 84 |
55 | 0.577416 | 0.744858 | 0.748582 | 0.571681 | 0.747092 | 0.750144 | 71 |
56 | 0.579076 | 0.744117 | 0.748115 | 0.572303 | 0.747083 | 0.750833 | 44 |
57 | 0.578785 | 0.744362 | 0.747402 | 0.571949 | 0.747075 | 0.750390 | 52 |
58 | 0.578059 | 0.744585 | 0.748270 | 0.571969 | 0.747075 | 0.749627 | 57 |
59 | 0.577494 | 0.744873 | 0.747821 | 0.570901 | 0.747075 | 0.750615 | 74 |
60 | 0.576833 | 0.745079 | 0.747662 | 0.570994 | 0.747058 | 0.750137 | 124 |
61 | 0.577613 | 0.744840 | 0.748223 | 0.571565 | 0.747042 | 0.750784 | 62 |
62 | 0.576928 | 0.744867 | 0.748674 | 0.571660 | 0.747033 | 0.750287 | 80 |
63 | 0.576630 | 0.745256 | 0.747880 | 0.571012 | 0.747017 | 0.750261 | 125 |
64 | 0.576683 | 0.745217 | 0.748683 | 0.570710 | 0.747008 | 0.750355 | 88 |
65 | 0.576408 | 0.745298 | 0.747916 | 0.570342 | 0.747008 | 0.749417 | 113 |
66 | 0.576537 | 0.745302 | 0.748386 | 0.571964 | 0.746992 | 0.750495 | 106 |
67 | 0.578814 | 0.744446 | 0.747928 | 0.572279 | 0.746983 | 0.751151 | 49 |
68 | 0.578645 | 0.744581 | 0.747568 | 0.572624 | 0.746983 | 0.750256 | 53 |
69 | 0.576376 | 0.745313 | 0.748420 | 0.570683 | 0.746967 | 0.750094 | 103 |
70 | 0.580356 | 0.743967 | 0.748068 | 0.574479 | 0.746958 | 0.750931 | 33 |
71 | 0.578820 | 0.744444 | 0.747811 | 0.572394 | 0.746942 | 0.749828 | 50 |
72 | 0.576381 | 0.745252 | 0.748933 | 0.570939 | 0.746925 | 0.750285 | 91 |
73 | 0.580101 | 0.744054 | 0.747802 | 0.573637 | 0.746925 | 0.750868 | 36 |
74 | 0.578152 | 0.744721 | 0.747269 | 0.571721 | 0.746908 | 0.751088 | 63 |
75 | 0.578869 | 0.744044 | 0.748282 | 0.573015 | 0.746900 | 0.750570 | 47 |
76 | 0.579016 | 0.744267 | 0.748176 | 0.572387 | 0.746883 | 0.750805 | 45 |
77 | 0.576376 | 0.745550 | 0.747436 | 0.570685 | 0.746875 | 0.750100 | 129 |
78 | 0.577174 | 0.745183 | 0.748232 | 0.571940 | 0.746875 | 0.750647 | 78 |
79 | 0.579461 | 0.744254 | 0.747774 | 0.573289 | 0.746875 | 0.749895 | 41 |
80 | 0.580608 | 0.743894 | 0.748329 | 0.574103 | 0.746850 | 0.750523 | 32 |
81 | 0.577352 | 0.745067 | 0.747397 | 0.571169 | 0.746842 | 0.749793 | 81 |
82 | 0.576621 | 0.745179 | 0.748219 | 0.570384 | 0.746817 | 0.750109 | 100 |
83 | 0.578446 | 0.744633 | 0.748199 | 0.571839 | 0.746808 | 0.750826 | 51 |
84 | 0.581902 | 0.743335 | 0.747872 | 0.574934 | 0.746800 | 0.751172 | 26 |
85 | 0.577849 | 0.744794 | 0.747970 | 0.571574 | 0.746792 | 0.750618 | 67 |
86 | 0.579009 | 0.744317 | 0.747742 | 0.572602 | 0.746792 | 0.751027 | 46 |
87 | 0.580917 | 0.743660 | 0.748471 | 0.574635 | 0.746767 | 0.751015 | 30 |
88 | 0.580795 | 0.743656 | 0.748622 | 0.574454 | 0.746767 | 0.750944 | 31 |
89 | 0.576901 | 0.745256 | 0.747993 | 0.571238 | 0.746750 | 0.749861 | 87 |
90 | 0.585136 | 0.741912 | 0.748155 | 0.577663 | 0.746750 | 0.751334 | 16 |
91 | 0.579418 | 0.743998 | 0.748315 | 0.572922 | 0.746725 | 0.750782 | 40 |
92 | 0.576407 | 0.745354 | 0.747509 | 0.571384 | 0.746717 | 0.749565 | 128 |
93 | 0.581425 | 0.743508 | 0.748474 | 0.574768 | 0.746692 | 0.751375 | 27 |
94 | 0.585068 | 0.742348 | 0.747726 | 0.577343 | 0.746642 | 0.751366 | 17 |
95 | 0.583173 | 0.742710 | 0.748262 | 0.575827 | 0.746567 | 0.751295 | 22 |
96 | 0.577279 | 0.744956 | 0.748200 | 0.571250 | 0.746567 | 0.749985 | 77 |
97 | 0.579935 | 0.744135 | 0.748088 | 0.573488 | 0.746567 | 0.750537 | 37 |
98 | 0.579674 | 0.743906 | 0.748169 | 0.573339 | 0.746542 | 0.750402 | 39 |
99 | 0.578206 | 0.744521 | 0.748146 | 0.573074 | 0.746492 | 0.750686 | 59 |
100 | 0.582154 | 0.742988 | 0.748564 | 0.575500 | 0.746450 | 0.751228 | 24 |
101 | 0.578343 | 0.744552 | 0.748170 | 0.572862 | 0.746425 | 0.750928 | 54 |
102 | 0.575943 | 0.745615 | 0.748723 | 0.570527 | 0.746425 | 0.750114 | 122 |
103 | 0.577064 | 0.745152 | 0.747605 | 0.570899 | 0.746400 | 0.750333 | 89 |
104 | 0.577719 | 0.744675 | 0.748294 | 0.571974 | 0.746267 | 0.750746 | 58 |
105 | 0.586183 | 0.741710 | 0.748288 | 0.578479 | 0.746250 | 0.750957 | 14 |
106 | 0.580663 | 0.743781 | 0.747750 | 0.574192 | 0.746233 | 0.750903 | 34 |
107 | 0.582160 | 0.743213 | 0.748303 | 0.575149 | 0.746183 | 0.750850 | 25 |
108 | 0.581577 | 0.743377 | 0.747708 | 0.575009 | 0.746158 | 0.751211 | 28 |
109 | 0.581363 | 0.743344 | 0.748201 | 0.574980 | 0.746133 | 0.751114 | 29 |
110 | 0.589631 | 0.740598 | 0.748049 | 0.580849 | 0.746117 | 0.750851 | 10 |
111 | 0.584410 | 0.742383 | 0.748255 | 0.577790 | 0.746117 | 0.751056 | 18 |
112 | 0.583764 | 0.742640 | 0.748062 | 0.577420 | 0.746100 | 0.750591 | 20 |
113 | 0.579802 | 0.744002 | 0.747745 | 0.573701 | 0.746025 | 0.750024 | 38 |
114 | 0.584101 | 0.742604 | 0.748128 | 0.577321 | 0.746000 | 0.751188 | 19 |
115 | 0.586995 | 0.741606 | 0.748368 | 0.579095 | 0.745992 | 0.751477 | 13 |
116 | 0.579311 | 0.744140 | 0.747740 | 0.573727 | 0.745917 | 0.750727 | 43 |
117 | 0.587632 | 0.741571 | 0.748039 | 0.580163 | 0.745833 | 0.751543 | 12 |
118 | 0.583908 | 0.742390 | 0.747153 | 0.577511 | 0.745725 | 0.750858 | 21 |
119 | 0.596595 | 0.737694 | 0.746528 | 0.585425 | 0.745592 | 0.751378 | 6 |
120 | 0.585882 | 0.742096 | 0.748063 | 0.578875 | 0.745367 | 0.751613 | 15 |
121 | 0.582879 | 0.743087 | 0.747568 | 0.576513 | 0.745225 | 0.751450 | 23 |
122 | 0.594040 | 0.738858 | 0.746949 | 0.584165 | 0.745017 | 0.751874 | 7 |
123 | 0.588586 | 0.741098 | 0.747670 | 0.580824 | 0.744975 | 0.751153 | 11 |
124 | 0.599079 | 0.736867 | 0.746497 | 0.587461 | 0.744958 | 0.751264 | 5 |
125 | 0.603323 | 0.734644 | 0.746093 | 0.590287 | 0.744867 | 0.750738 | 4 |
126 | 0.592293 | 0.739537 | 0.747403 | 0.583830 | 0.744775 | 0.751367 | 8 |
127 | 0.590773 | 0.740194 | 0.747871 | 0.582991 | 0.744225 | 0.750923 | 9 |
128 | 0.609227 | 0.732608 | 0.746633 | 0.595014 | 0.743075 | 0.750949 | 3 |
129 | 1.152743 | 0.663369 | 0.706438 | 0.673564 | 0.742058 | 0.750840 | 1 |
130 | 0.632776 | 0.727704 | 0.745449 | 0.603399 | 0.741925 | 0.750797 | 2 |
As see above increasing validation_accuracy for regularized classifier model hasn't radically improve, in this case, it is within the statistical error but iteration numbers increasing by two times comparing non regularized classifier model.
automl_regr
¶%%time
def automl_regr(shape_x, learn_rate=0.001):
"""
Regression Model created manually from json file model from auto-keras
Parameters
----------
shape_x : integer, equal of dimensions the dataset features.
learn_rate : float, value for learning_rate of optimizer.
Default value of learn_rate = 0.001.
Returns
-------
model : the keras model
"""
model = Sequential()
# 0.Input
model.add(InputLayer(input_shape=(100,), dtype='float64', name="input_1"))
# Normalization input
model.add(Normalization(name='normalization'))
# Hidden layer 1
# 1.1 Initializer for first hidden layer input linear
model.add(layers.Dense(units=32, kernel_initializer="GlorotUniform",
bias_initializer='zeros', name="layer_1"))
# 1.2 Activation for fisrt hidden layer
model.add(layers.Activation(activations.relu, name="relu_1"))
# Hidden layer 2
# 2.1 Initializer for first hidden layer input linear
model.add(layers.Dense(units=32, kernel_initializer="GlorotUniform",
bias_initializer='zeros', name="layer_2"))
# 2.2 Activation for second hidden layer
model.add(layers.Activation(activations.relu, name="relu_2"))
model.add(layers.Dropout(.25))
# 3. Final linear
model.add(layers.Dense(units=1, kernel_initializer="GlorotUniform",
bias_initializer='zeros', name="layer_3"))
model.add(layers.Activation(activations.linear, name="regression_head_1"))
model.compile(loss='mean_squared_error',
optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate),
metrics=['mean_squared_error'])
return model
automl_regr_ss, automl_regr_ss_stat = train_model(automl_regr(train.shape[1]-1),
train, batch_sz=2048, scaler='ss',
estimator="regr")
automl_regr_ss_stat
CPU times: user 1min 38s, sys: 9.37 s, total: 1min 48s Wall time: 58 s
loss | mean_squared_error | val_loss | val_mean_squared_error | epochs | |
---|---|---|---|---|---|
0 | 0.190724 | 0.190724 | 0.188966 | 0.188966 | 26 |
1 | 0.190984 | 0.190984 | 0.188989 | 0.188989 | 22 |
2 | 0.191103 | 0.191103 | 0.189021 | 0.189021 | 21 |
3 | 0.190979 | 0.190979 | 0.189066 | 0.189066 | 23 |
4 | 0.190800 | 0.190800 | 0.189071 | 0.189071 | 24 |
5 | 0.191058 | 0.191058 | 0.189071 | 0.189071 | 20 |
6 | 0.190123 | 0.190123 | 0.189078 | 0.189078 | 31 |
7 | 0.190840 | 0.190840 | 0.189102 | 0.189102 | 25 |
8 | 0.191267 | 0.191267 | 0.189125 | 0.189125 | 19 |
9 | 0.190347 | 0.190347 | 0.189131 | 0.189131 | 29 |
10 | 0.190410 | 0.190410 | 0.189139 | 0.189139 | 27 |
11 | 0.190044 | 0.190044 | 0.189144 | 0.189144 | 33 |
12 | 0.190009 | 0.190009 | 0.189159 | 0.189159 | 36 |
13 | 0.191809 | 0.191809 | 0.189207 | 0.189207 | 16 |
14 | 0.189603 | 0.189603 | 0.189217 | 0.189217 | 43 |
15 | 0.190269 | 0.190269 | 0.189221 | 0.189221 | 30 |
16 | 0.190487 | 0.190487 | 0.189230 | 0.189230 | 28 |
17 | 0.191857 | 0.191857 | 0.189234 | 0.189234 | 15 |
18 | 0.191313 | 0.191313 | 0.189256 | 0.189256 | 18 |
19 | 0.190161 | 0.190161 | 0.189261 | 0.189261 | 32 |
20 | 0.189844 | 0.189844 | 0.189279 | 0.189279 | 40 |
21 | 0.189773 | 0.189773 | 0.189298 | 0.189298 | 42 |
22 | 0.190026 | 0.190026 | 0.189300 | 0.189300 | 35 |
23 | 0.189246 | 0.189246 | 0.189305 | 0.189305 | 56 |
24 | 0.189472 | 0.189472 | 0.189314 | 0.189314 | 47 |
25 | 0.189840 | 0.189840 | 0.189325 | 0.189325 | 41 |
26 | 0.189859 | 0.189859 | 0.189328 | 0.189328 | 38 |
27 | 0.190044 | 0.190044 | 0.189329 | 0.189329 | 34 |
28 | 0.189482 | 0.189482 | 0.189346 | 0.189346 | 48 |
29 | 0.190084 | 0.190084 | 0.189428 | 0.189428 | 37 |
30 | 0.189565 | 0.189565 | 0.189438 | 0.189438 | 44 |
31 | 0.189560 | 0.189560 | 0.189456 | 0.189456 | 46 |
32 | 0.189237 | 0.189237 | 0.189465 | 0.189465 | 55 |
33 | 0.189278 | 0.189278 | 0.189469 | 0.189469 | 51 |
34 | 0.189468 | 0.189468 | 0.189471 | 0.189471 | 45 |
35 | 0.189291 | 0.189291 | 0.189475 | 0.189475 | 54 |
36 | 0.191642 | 0.191642 | 0.189482 | 0.189482 | 17 |
37 | 0.189466 | 0.189466 | 0.189498 | 0.189498 | 49 |
38 | 0.189530 | 0.189530 | 0.189527 | 0.189527 | 50 |
39 | 0.189800 | 0.189800 | 0.189528 | 0.189528 | 39 |
40 | 0.192106 | 0.192106 | 0.189596 | 0.189596 | 14 |
41 | 0.192173 | 0.192173 | 0.189608 | 0.189608 | 13 |
42 | 0.189372 | 0.189372 | 0.189645 | 0.189645 | 52 |
43 | 0.192544 | 0.192544 | 0.189649 | 0.189649 | 12 |
44 | 0.189419 | 0.189419 | 0.189805 | 0.189805 | 53 |
45 | 0.192778 | 0.192778 | 0.189866 | 0.189866 | 11 |
46 | 0.193137 | 0.193137 | 0.190025 | 0.190025 | 10 |
47 | 0.193664 | 0.193664 | 0.190699 | 0.190699 | 9 |
48 | 0.194320 | 0.194320 | 0.191104 | 0.191104 | 8 |
49 | 0.195501 | 0.195501 | 0.191550 | 0.191550 | 7 |
50 | 0.198140 | 0.198140 | 0.192865 | 0.192865 | 6 |
51 | 0.204408 | 0.204408 | 0.196676 | 0.196676 | 5 |
52 | 0.212652 | 0.212652 | 0.204608 | 0.204608 | 4 |
53 | 0.220876 | 0.220876 | 0.211292 | 0.211292 | 3 |
54 | 0.235060 | 0.235060 | 0.217287 | 0.217287 | 2 |
55 | 0.384971 | 0.384971 | 0.228658 | 0.228658 | 1 |
%%time
# Transform and divide x and y for train dataset
x_all, y_all = df_transform(train, scaler="ss")
# Predict y_all for all models and select best estimator using accuracy metric
y_pred_automl_clf = automl_clf_ss.predict(x_all, batch_size=2048, verbose=1)
y_pred_automl_clf_conv = np.where(y_pred_automl_clf < 0.5, 0, 1)
y_pred_automl_clf_tuner = automl_clf_ss_tuner.predict(x_all, batch_size=2048,
verbose=1)
y_pred_automl_clf_tuner_conv = np.where(y_pred_automl_clf_tuner < 0.5, 0, 1)
y_pred_automl_regr_ss = automl_regr_ss.predict(x_all, batch_size=2048,
verbose=1)
y_pred_automl_regr_ss_conv = np.where(y_pred_automl_regr_ss < 0.5, 0, 1)
293/293 [==============================] - 1s 2ms/step 293/293 [==============================] - 1s 2ms/step 293/293 [==============================] - 1s 2ms/step CPU times: user 4.36 s, sys: 1.12 s, total: 5.48 s Wall time: 4.68 s
Compare accuracy:
%%time
accuracy_automl_clf = accuracy_score(y_all, y_pred_automl_clf_conv)
accuracy_automl_clf_tuner = accuracy_score(y_all, y_pred_automl_clf_tuner_conv)
accuracy_automl_regr = accuracy_score(y_all, y_pred_automl_regr_ss_conv)
CPU times: user 177 ms, sys: 6.77 ms, total: 184 ms Wall time: 186 ms
print(f"Accuracy for `automl_clf` model: {accuracy_automl_clf:.4f}.")
print(f"Accuracy for `automl_clf_tuner` model: {accuracy_automl_clf_tuner:.4f}.")
print(f"Accuracy for `automl_regr` model: {accuracy_automl_regr:.4f}.")
Accuracy for `automl_clf` model: 0.7456. Accuracy for `automl_clf_tuner` model: 0.7473. Accuracy for `automl_regr` model: 0.7470.
As see above, the best accuracy has automl_regr
model.
automl_regr
model¶# Open and read test dataset
test = pd.read_csv("data/test.csv")
test.set_index("id", inplace=True)
# Set id as index for submission
# submission.set_index("id", inplace=True)
# Convert test x values with StandardScaler
test_ss = df_transform(test, scaler="ss", y=False)
# Predict target and convert to binary format
predict_target = automl_regr_ss.predict(test_ss, batch_size=2048,
verbose=1)
predict_target_conv = np.where(predict_target < 0.5, 0, 1)
# Fill `target` column
submission["target"] = predict_target_conv
# Save predict
submission.to_csv("submission_pred.csv")
submission.head(18)
264/264 [==============================] - 0s 1ms/step
id | target | |
---|---|---|
0 | 600000 | 1 |
1 | 600001 | 1 |
2 | 600002 | 1 |
3 | 600003 | 1 |
4 | 600004 | 1 |
5 | 600005 | 0 |
6 | 600006 | 0 |
7 | 600007 | 1 |
8 | 600008 | 1 |
9 | 600009 | 0 |
10 | 600010 | 1 |
11 | 600011 | 1 |
12 | 600012 | 0 |
13 | 600013 | 0 |
14 | 600014 | 0 |
15 | 600015 | 0 |
16 | 600016 | 1 |
17 | 600017 | 0 |
Auto-Keras gives a completely workable model and may in the future get rid of manual work that takes a lot of time to build an optimal model and a selection of hyperparameters.
In this case, the l2/l2 regularization of the classifier model did not bring any visible improvements, but only increased the execution time. It turned out that a simple regression has accuracy comparable with regularized l1/l2 classifier. l2/l2 regularization isn't always the silver bullet. The possible reason is a wild outliers amount.
I was pleased with the speed of the old GTX 1050 2GB RAM graphics accelerator with a data set containing 600K rows and 100 columns (60M cells), which takes several minutes to process and cross-validate. As I wrote earlier, when trying to choose the optimal algorithm from the classic ML between boost, random forest and SVM, the cross validation time took more than 12 hours. ML is dead long live DL, using ML is justified on small datasets with several thousand rows, with an increase in the amount of data ML loses in speed to DL. Advice for those who do not have modern video cards - lower the TF version for example, GTX 1050 2GB RAM works quite correctly by this dataset with TF 2.5, with version TF above, out of memory problems begin.
This dataset itself turned out to be a tough nut to crack. High accuracy given only with StandardScaler, all other transformations from classical ML - MinMaxScaler, RobustScaler, QuantileTransformer, PowerTransformer, KBinsDiscretizer, Normalizer had lower values of accuracy values and high time of executions. Also, the removal algorithms of outliers from scikit-learn doesn't work with this dataset.
I avoided data leakage everywhere, but if using standard scaling for the entire train dataset, it can be increased accuracy by rough 0.005.
Created on Mart 08, 2022
@author: Vadim Maklakov, used some ideas from public Internet resources.
© 3-clause BSD License
Software environment: Debian 11, Python 3.8.12, TensorFlow 2.5.1 for notebook, TensorFlow 2.8 for defining model with Auto-Keras.
See required installed and imported python modules in the cell No 1.