#Import H2O and other libraries that will be used in this tutorial
import h2o
import matplotlib as plt
#Import the Estimators
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
from h2o.estimators import H2ORandomForestEstimator
from h2o.estimators.gbm import H2OGradientBoostingEstimator
#Import h2o grid search
import h2o.grid
from h2o.grid.grid_search import H2OGridSearch
import h2o
h2o.init(max_mem_size=16)
Checking whether there is an H2O instance running at http://localhost:54321 . connected.
H2O cluster uptime: | 21 hours 35 mins |
H2O cluster timezone: | Etc/UTC |
H2O data parsing timezone: | UTC |
H2O cluster version: | 3.28.0.2 |
H2O cluster version age: | 1 month and 14 days |
H2O cluster name: | H2O_from_python_unknownUser_b8im2o |
H2O cluster total nodes: | 1 |
H2O cluster free memory: | 2.931 Gb |
H2O cluster total cores: | 4 |
H2O cluster allowed cores: | 4 |
H2O cluster status: | locked, healthy |
H2O connection url: | http://localhost:54321 |
H2O connection proxy: | {'http': None, 'https': None} |
H2O internal security: | False |
H2O API Extensions: | Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4 |
Python version: | 3.6.10 final |
loan_level = h2o.import_file("https://s3.amazonaws.com/data.h2o.ai/DAI-Tutorials/loan_level_500k.csv")
Parse progress: |█████████████████████████████████████████████████████████| 100%
train, valid, test = loan_level.split_frame([0.7, 0.15], seed=42)
print("train:%d valid:%d test:%d" % (train.nrows, valid.nrows, test.nrows))
y = "DELINQUENT"
ignore = ["DELINQUENT", "PREPAID", "PREPAYMENT_PENALTY_MORTGAGE_FLAG", "PRODUCT_TYPE"]
x = list(set(train.names) - set(ignore))
train:350268 valid:74971 test:74898
glm_grid = h2o.grid.H2OGridSearch (
H2OGeneralizedLinearEstimator(
family = "binomial",
lambda_search = True),
hyper_params = {
"alpha": [x*0.01 for x in range(0, 4)],
"lambda": [x*1e-6 for x in range(0, 4)],
},
grid_id = "glm_grid_2",
)
%time glm_grid.train(x=x, y=y, training_frame=train, validation_frame = valid)
glm Grid Build progress: |████████████████████████████████████████████████| 100% CPU times: user 755 ms, sys: 55.9 ms, total: 811 ms Wall time: 35.5 s
glm_grid = h2o.grid.H2OGridSearch (
H2OGeneralizedLinearEstimator(
family = "binomial",
lambda_search = True),
hyper_params = {
"alpha": [x*0.01 for x in range(0, 100)],
"lambda": [x*1e-6 for x in range(0, 1000)],
},
grid_id = "glm_grid",
search_criteria = {
"strategy":"RandomDiscrete",
"max_models":100,
"max_runtime_secs":300,
"seed":42
}
)
%time glm_grid.train(x=x, y=y, training_frame=train, validation_frame = valid)
glm Grid Build progress: |████████████████████████████████████████████████| 100% CPU times: user 4.73 s, sys: 504 ms, total: 5.23 s Wall time: 3min 26s
h2o.ls()
key | |
---|---|
0 | GLM_model_python_1583377547966_1 |
1 | GLM_model_python_1583377547966_11 |
2 | GLM_model_python_1583377547966_14 |
3 | GLM_model_python_1583377547966_3 |
4 | GLM_model_python_1583377547966_5 |
5 | GLM_model_python_1583377547966_8 |
6 | glm_grid |
7 | glm_grid_2 |
8 | glm_grid_2_model_1 |
9 | glm_grid_2_model_10 |
10 | glm_grid_2_model_11 |
11 | glm_grid_2_model_12 |
12 | glm_grid_2_model_13 |
13 | glm_grid_2_model_14 |
14 | glm_grid_2_model_15 |
15 | glm_grid_2_model_16 |
16 | glm_grid_2_model_2 |
17 | glm_grid_2_model_3 |
18 | glm_grid_2_model_4 |
19 | glm_grid_2_model_5 |
20 | glm_grid_2_model_6 |
21 | glm_grid_2_model_7 |
22 | glm_grid_2_model_8 |
23 | glm_grid_2_model_9 |
24 | glm_grid_model_1 |
25 | glm_grid_model_10 |
26 | glm_grid_model_100 |
27 | glm_grid_model_11 |
28 | glm_grid_model_12 |
29 | glm_grid_model_13 |
... | ... |
358 | modelmetrics_glm_grid_model_96@-6013562878500035200_on_py_16_sid_9... |
359 | modelmetrics_glm_grid_model_97@-6125460407153251328_on_py_15_sid_9... |
360 | modelmetrics_glm_grid_model_97@-6125460407153251328_on_py_16_sid_9... |
361 | modelmetrics_glm_grid_model_98@-2192349973378340480_on_py_15_sid_9... |
362 | modelmetrics_glm_grid_model_98@-2192349973378340480_on_py_16_sid_9... |
363 | modelmetrics_glm_grid_model_99@-3110742583558476800_on_py_15_sid_9... |
364 | modelmetrics_glm_grid_model_99@-3110742583558476800_on_py_16_sid_9... |
365 | modelmetrics_glm_grid_model_9@9000166489408089856_on_py_15_sid_966... |
366 | modelmetrics_glm_grid_model_9@9000166489408089856_on_py_16_sid_966... |
367 | prostate.hex |
368 | prostate1.hex |
369 | prostate2.hex |
370 | prostate3.hex |
371 | prostate4.hex |
372 | prostate5.hex |
373 | prostate6.hex |
374 | prostate7.hex |
375 | py_15_sid_9664 |
376 | py_16_sid_9664 |
377 | py_17_sid_9664 |
378 | py_1_sid_9fcd |
379 | py_27_sid_9317 |
380 | py_28_sid_9317 |
381 | py_29_sid_9317 |
382 | py_2_sid_9fcd |
383 | py_30_sid_9317 |
384 | py_31_sid_9317 |
385 | py_32_sid_9317 |
386 | py_3_sid_9fcd |
387 | py_4_sid_9fcd |
388 rows × 1 columns
help(h2o.grid.H2OGridSearch)
Help on class H2OGridSearch in module h2o.grid.grid_search: class H2OGridSearch(H2OGridSearch) | Grid Search of a Hyper-Parameter Space for a Model | | :param model: The type of model to be explored initialized with optional parameters that will be | unchanged across explored models. | :param hyper_params: A dictionary of string parameters (keys) and a list of values to be explored by grid | search (values). | :param str grid_id: The unique id assigned to the resulting grid object. If none is given, an id will | automatically be generated. | :param search_criteria: The optional dictionary of directives which control the search of the hyperparameter space. | The dictionary can include values for: ``strategy``, ``max_models``, ``max_runtime_secs``, ``stopping_metric``, | ``stopping_tolerance``, ``stopping_rounds`` and ``seed``. The default strategy, "Cartesian", covers the entire space of | hyperparameter combinations. If you want to use cartesian grid search, you can leave the search_criteria | argument unspecified. Specify the "RandomDiscrete" strategy to get random search of all the combinations of | your hyperparameters with three ways of specifying when to stop the search: max number of models, max time, and | metric-based early stopping (e.g., stop if MSE hasn’t improved by 0.0001 over the 5 best models). | Examples below:: | | >>> criteria = {"strategy": "RandomDiscrete", "max_runtime_secs": 600, | ... "max_models": 100, "stopping_metric": "AUTO", | ... "stopping_tolerance": 0.00001, "stopping_rounds": 5, | ... "seed": 123456} | >>> criteria = {"strategy": "RandomDiscrete", "max_models": 42, | ... "max_runtime_secs": 28800, "seed": 1234} | >>> criteria = {"strategy": "RandomDiscrete", "stopping_metric": "AUTO", | ... "stopping_tolerance": 0.001, "stopping_rounds": 10} | >>> criteria = {"strategy": "RandomDiscrete", "stopping_rounds": 5, | ... "stopping_metric": "misclassification", | ... "stopping_tolerance": 0.00001} | :param parallelism: Level of parallelism during grid model building. 1 = sequential building (default). | Use the value of 0 for adaptive parallelism - decided by H2O. Any number > 1 sets the exact number of models | built in parallel. | :returns: a new H2OGridSearch instance | | Examples | -------- | >>> from h2o.grid.grid_search import H2OGridSearch | >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator | >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} | >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), hyper_parameters) | >>> training_data = h2o.import_file("smalldata/logreg/benign.csv") | >>> gs.train(x=range(3) + range(4,11),y=3, training_frame=training_data) | >>> gs.show() | | Method resolution order: | H2OGridSearch | H2OGridSearch | builtins.object | | Methods defined here: | | __getattr__(self, name) | | __getitem__(self, item) | | __init__(self, *args, **kwargs) | | __iter__(self) | | __len__(self) | | __repr__(self) | Return repr(self). | | aic(self, train=False, valid=False, xval=False) | Get the AIC(s). | | If all are False (default), then return the training metric value. | If more than one options is set to True, then return a dictionary of metrics where the keys are "train", | "valid", and "xval". | | :param bool train: If train is True, then return the AIC value for the training data. | :param bool valid: If valid is True, then return the AIC value for the validation data. | :param bool xval: If xval is True, then return the AIC value for the validation data. | | :returns: The AIC. | | auc(self, train=False, valid=False, xval=False) | Get the AUC(s). | | If all are False (default), then return the training metric value. | If more than one options is set to True, then return a dictionary of metrics where the keys are "train", | "valid", and "xval". | | :param bool train: If train is True, then return the AUC value for the training data. | :param bool valid: If valid is True, then return the AUC value for the validation data. | :param bool xval: If xval is True, then return the AUC value for the validation data. | | :returns: The AUC. | | aucpr(self, train=False, valid=False, xval=False) | Get the aucPR (Area Under PRECISION RECALL Curve). | | If all are False (default), then return the training metric value. | If more than one options is set to True, then return a dictionary of metrics where the keys are "train", | "valid", and "xval". | | :param bool train: If train is True, then return the aucpr value for the training data. | :param bool valid: If valid is True, then return the aucpr value for the validation data. | :param bool xval: If xval is True, then return the aucpr value for the validation data. | | :returns: The AUCPR for the models in this grid. | | biases(self, vector_id=0) | Return the frame for the respective bias vector. | | :param: vector_id: an integer, ranging from 0 to number of layers, that specifies the bias vector to return. | :returns: an H2OFrame which represents the bias vector identified by vector_id | | build_model(self, algo_params) | (internal) | | catoffsets(self) | Categorical offsets for one-hot encoding | | coef(self) | Return the coefficients that can be applied to the non-standardized data. | | Note: standardize = True by default. If set to False, then coef() returns the coefficients that are fit directly. | | coef_norm(self) | Return coefficients fitted on the standardized data (requires standardize = True, which is on by default). These coefficients can be used to evaluate variable importance. | | deepfeatures(self, test_data, layer) | Obtain a hidden layer's details on a dataset. | | :param test_data: Data to create a feature space on. | :param int layer: Index of the hidden layer. | :returns: A dictionary of hidden layer details for each model. | | get_grid(self, sort_by=None, decreasing=None) | Retrieve an H2OGridSearch instance. | | Optionally specify a metric by which to sort models and a sort order. | Note that if neither cross-validation nor a validation frame is used in the grid search, then the | training metrics will display in the "get grid" output. If a validation frame is passed to the grid, and | ``nfolds = 0``, then the validation metrics will display. However, if ``nfolds`` > 1, then cross-validation | metrics will display even if a validation frame is provided. | | :param str sort_by: A metric by which to sort the models in the grid space. Choices are: ``"logloss"``, | ``"residual_deviance"``, ``"mse"``, ``"auc"``, ``"r2"``, ``"accuracy"``, ``"precision"``, ``"recall"``, | ``"f1"``, etc. | :param bool decreasing: Sort the models in decreasing order of metric if true, otherwise sort in increasing | order (default). | | :returns: A new H2OGridSearch instance optionally sorted on the specified metric. | | get_hyperparams(self, id, display=True) | Get the hyperparameters of a model explored by grid search. | | :param str id: The model id of the model with hyperparameters of interest. | :param bool display: Flag to indicate whether to display the hyperparameter names. | | :returns: A list of the hyperparameters for the specified model. | | get_hyperparams_dict(self, id, display=True) | Derived and returned the model parameters used to train the particular grid search model. | | :param str id: The model id of the model with hyperparameters of interest. | :param bool display: Flag to indicate whether to display the hyperparameter names. | | :returns: A dict of model pararmeters derived from the hyper-parameters used to train this particular model. | | get_xval_models(self, key=None) | Return a Model object. | | :param str key: If None, return all cross-validated models; otherwise return the model | specified by the key. | :returns: A model or a list of models. | | gini(self, train=False, valid=False, xval=False) | Get the Gini Coefficient(s). | | If all are False (default), then return the training metric value. | If more than one options is set to True, then return a dictionary of metrics where the keys are "train", | "valid", and "xval". | | :param bool train: If train is True, then return the Gini Coefficient value for the training data. | :param bool valid: If valid is True, then return the Gini Coefficient value for the validation data. | :param bool xval: If xval is True, then return the Gini Coefficient value for the cross validation data. | | :returns: The Gini Coefficient for the models in this grid. | | is_cross_validated(self) | Return True if the model was cross-validated. | | join(self) | Wait until grid finishes computing. | | logloss(self, train=False, valid=False, xval=False) | Get the Log Loss(s). | | If all are False (default), then return the training metric value. | If more than one options is set to True, then return a dictionary of metrics where the keys are "train", | "valid", and "xval". | | :param bool train: If train is True, then return the Log Loss value for the training data. | :param bool valid: If valid is True, then return the Log Loss value for the validation data. | :param bool xval: If xval is True, then return the Log Loss value for the cross validation data. | | :returns: The Log Loss for this binomial model. | | mae(self, train=False, valid=False, xval=False) | | mean_residual_deviance(self, train=False, valid=False, xval=False) | Get the Mean Residual Deviances(s). | | If all are False (default), then return the training metric value. | If more than one options is set to True, then return a dictionary of metrics where the keys are "train", | "valid", and "xval". | | :param bool train: If train is True, then return the Mean Residual Deviance value for the training data. | :param bool valid: If valid is True, then return the Mean Residual Deviance value for the validation data. | :param bool xval: If xval is True, then return the Mean Residual Deviance value for the cross validation data. | :returns: The Mean Residual Deviance for this regression model. | | model_performance(self, test_data=None, train=False, valid=False, xval=False) | Generate model metrics for this model on test_data. | | :param test_data: Data set for which model metrics shall be computed against. All three of train, valid | and xval arguments are ignored if test_data is not None. | :param train: Report the training metrics for the model. | :param valid: Report the validation metrics for the model. | :param xval: Report the validation metrics for the model. | :return: An object of class H2OModelMetrics. | | mse(self, train=False, valid=False, xval=False) | Get the MSE(s). | | If all are False (default), then return the training metric value. | If more than one options is set to True, then return a dictionary of metrics where the keys are "train", | "valid", and "xval". | | :param bool train: If train is True, then return the MSE value for the training data. | :param bool valid: If valid is True, then return the MSE value for the validation data. | :param bool xval: If xval is True, then return the MSE value for the cross validation data. | :returns: The MSE for this regression model. | | normmul(self) | Normalization/Standardization multipliers for numeric predictors. | | normsub(self) | Normalization/Standardization offsets for numeric predictors. | | null_degrees_of_freedom(self, train=False, valid=False, xval=False) | Retreive the null degress of freedom if this model has the attribute, or None otherwise. | | :param bool train: Get the null dof for the training set. If both train and valid are False, then train is | selected by default. | :param bool valid: Get the null dof for the validation set. If both train and valid are True, then train is | selected by default. | :param bool xval: Get the null dof for the cross-validated models. | | :returns: the null dof, or None if it is not present. | | null_deviance(self, train=False, valid=False, xval=False) | Retreive the null deviance if this model has the attribute, or None otherwise. | | :param bool train: Get the null deviance for the training set. If both train and valid are False, then | train is selected by default. | :param bool valid: Get the null deviance for the validation set. If both train and valid are True, then | train is selected by default. | :param bool xval: Get the null deviance for the cross-validated models. | | :returns: the null deviance, or None if it is not present. | | pprint_coef(self) | Pretty print the coefficents table (includes normalized coefficients). | | pr_auc(self) | H2OGridSearch.pr_auc is deprecated, please use ``H2OGridSearch.aucpr`` instead. | | predict(self, test_data) | Predict on a dataset. | | :param H2OFrame test_data: Data to be predicted on. | :returns: H2OFrame filled with predictions. | | r2(self, train=False, valid=False, xval=False) | Return the R^2 for this regression model. | | The R^2 value is defined to be ``1 - MSE/var``, where ``var`` is computed as ``sigma^2``. | | If all are False (default), then return the training metric value. | If more than one options is set to True, then return a dictionary of metrics where the keys are "train", | "valid", and "xval". | | :param bool train: If train is True, then return the R^2 value for the training data. | :param bool valid: If valid is True, then return the R^2 value for the validation data. | :param bool xval: If xval is True, then return the R^2 value for the cross validation data. | | :returns: The R^2 for this regression model. | | residual_degrees_of_freedom(self, train=False, valid=False, xval=False) | Retreive the residual degress of freedom if this model has the attribute, or None otherwise. | | :param bool train: Get the residual dof for the training set. If both train and valid are False, then | train is selected by default. | :param bool valid: Get the residual dof for the validation set. If both train and valid are True, then | train is selected by default. | :param bool xval: Get the residual dof for the cross-validated models. | | :returns: the residual degrees of freedom, or None if they are not present. | | residual_deviance(self, train=False, valid=False, xval=False) | Retreive the residual deviance if this model has the attribute, or None otherwise. | | :param bool train: Get the residual deviance for the training set. If both train and valid are False, | then train is selected by default. | :param bool valid: Get the residual deviance for the validation set. If both train and valid are True, | then train is selected by default. | :param bool xval: Get the residual deviance for the cross-validated models. | | :returns: the residual deviance, or None if it is not present. | | respmul(self) | Normalization/Standardization multipliers for numeric response. | | respsub(self) | Normalization/Standardization offsets for numeric response. | | rmse(self, train=False, valid=False, xval=False) | | rmsle(self, train=False, valid=False, xval=False) | | scoring_history(self) | Retrieve model scoring history. | | :returns: Score history (H2OTwoDimTable) | | show(self) | Print models sorted by metric. | | sort_by(self, metric, increasing=True) | grid.sort_by() is deprecated; use grid.get_grid() instead | | Deprecated since 2016-12-12, use grid.get_grid() instead. | | sorted_metric_table(self) | Retrieve summary table of an H2O Grid Search. | | :returns: The summary table as an H2OTwoDimTable or a Pandas DataFrame. | | start(self, x, y=None, training_frame=None, offset_column=None, fold_column=None, weights_column=None, validation_frame=None, **params) | Asynchronous model build by specifying the predictor columns, response column, and any | additional frame-specific values. | | To block for results, call :meth:`join`. | | :param x: A list of column names or indices indicating the predictor columns. | :param y: An index or a column name indicating the response column. | :param training_frame: The H2OFrame having the columns indicated by x and y (as well as any | additional columns specified by fold, offset, and weights). | :param offset_column: The name or index of the column in training_frame that holds the offsets. | :param fold_column: The name or index of the column in training_frame that holds the per-row fold | assignments. | :param weights_column: The name or index of the column in training_frame that holds the per-row weights. | :param validation_frame: H2OFrame with validation data to be scored on while training. | | summary(self, header=True) | Print a detailed summary of the explored models. | | train(self, x=None, y=None, training_frame=None, offset_column=None, fold_column=None, weights_column=None, validation_frame=None, **params) | Train the model synchronously (i.e. do not return until the model finishes training). | | To train asynchronously call :meth:`start`. | | :param x: A list of column names or indices indicating the predictor columns. | :param y: An index or a column name indicating the response column. | :param training_frame: The H2OFrame having the columns indicated by x and y (as well as any | additional columns specified by fold, offset, and weights). | :param offset_column: The name or index of the column in training_frame that holds the offsets. | :param fold_column: The name or index of the column in training_frame that holds the per-row fold | assignments. | :param weights_column: The name or index of the column in training_frame that holds the per-row weights. | :param validation_frame: H2OFrame with validation data to be scored on while training. | | varimp(self, use_pandas=False) | Pretty print the variable importances, or return them in a list/pandas DataFrame. | | :param bool use_pandas: If True, then the variable importances will be returned as a pandas data frame. | | :returns: A dictionary of lists or Pandas DataFrame instances. | | weights(self, matrix_id=0) | Return the frame for the respective weight matrix. | | :param: matrix_id: an integer, ranging from 0 to number of layers, that specifies the weight matrix to return. | :returns: an H2OFrame which represents the weight matrix identified by matrix_id | | xval_keys(self) | Model keys for the cross-validated model. | | xvals(self) | Return the list of cross-validated models. | | ---------------------------------------------------------------------- | Data descriptors defined here: | | __dict__ | dictionary for instance variables (if defined) | | __weakref__ | list of weak references to the object (if defined) | | failed_params | | failed_raw_params | | failure_details | | failure_stack_traces | | grid_id | A key that identifies this grid search object in H2O. | | hyper_names | | model_ids
glm_grid.get_grid(sort_by='auc',decreasing=True)
alpha lambda model_ids \ 0 [0.87] [4.9999999999999996E-6] glm_grid_model_61 1 [0.4] [1.8E-5] glm_grid_model_46 2 [0.07] [3.7E-5] glm_grid_model_48 3 [0.07] [5.6E-5] glm_grid_model_72 4 [0.48] [2.9E-5] glm_grid_model_96 5 [0.88] [3.1E-5] glm_grid_model_37 6 [0.18] [8.099999999999999E-5] glm_grid_model_86 7 [0.15] [1.3099999999999999E-4] glm_grid_model_30 8 [0.1] [1.59E-4] glm_grid_model_38 9 [0.06] [2.12E-4] glm_grid_model_78 10 [0.53] [9.499999999999999E-5] glm_grid_model_28 11 [0.41000000000000003] [1.37E-4] glm_grid_model_67 12 [0.65] [1.07E-4] glm_grid_model_34 13 [0.93] [7.7E-5] glm_grid_model_100 14 [0.52] [1.37E-4] glm_grid_model_42 15 [0.31] [2.2999999999999998E-4] glm_grid_model_22 16 [0.56] [1.64E-4] glm_grid_model_82 17 [0.47000000000000003] [1.8899999999999999E-4] glm_grid_model_51 18 [0.05] [4.17E-4] glm_grid_model_16 19 [0.18] [3.47E-4] glm_grid_model_17 20 [0.92] [1.45E-4] glm_grid_model_93 21 [0.11] [4.88E-4] glm_grid_model_73 22 [0.16] [4.93E-4] glm_grid_model_14 23 [0.75] [1.8099999999999998E-4] glm_grid_model_11 24 [0.09] [6.68E-4] glm_grid_model_1 25 [0.78] [1.88E-4] glm_grid_model_2 26 [0.07] [7.32E-4] glm_grid_model_79 27 [0.17] [5.459999999999999E-4] glm_grid_model_98 28 [0.65] [2.26E-4] glm_grid_model_83 29 [0.02] [8.87E-4] glm_grid_model_92 .. .. ... ... ... 70 [0.55] [6.129999999999999E-4] glm_grid_model_59 71 [0.86] [4.86E-4] glm_grid_model_21 72 [0.63] [5.75E-4] glm_grid_model_49 73 [0.5] [6.929999999999999E-4] glm_grid_model_6 74 [0.73] [5.53E-4] glm_grid_model_13 75 [0.65] [5.88E-4] glm_grid_model_89 76 [0.87] [5.099999999999999E-4] glm_grid_model_19 77 [0.32] [9.5E-4] glm_grid_model_95 78 [0.88] [5.18E-4] glm_grid_model_63 79 [0.42] [8.129999999999999E-4] glm_grid_model_74 80 [0.43] [8.42E-4] glm_grid_model_40 81 [0.66] [6.59E-4] glm_grid_model_18 82 [0.59] [7.31E-4] glm_grid_model_9 83 [0.41000000000000003] [8.91E-4] glm_grid_model_24 84 [0.54] [7.8E-4] glm_grid_model_5 85 [0.41000000000000003] [9.559999999999999E-4] glm_grid_model_76 86 [0.9400000000000001] [5.09E-4] glm_grid_model_62 87 [0.41000000000000003] [9.93E-4] glm_grid_model_50 88 [0.6] [8.16E-4] glm_grid_model_32 89 [0.55] [8.759999999999999E-4] glm_grid_model_97 90 [0.52] [9.45E-4] glm_grid_model_91 91 [0.84] [6.74E-4] glm_grid_model_57 92 [0.5700000000000001] [9.53E-4] glm_grid_model_69 93 [0.65] [9.379999999999999E-4] glm_grid_model_56 94 [0.9500000000000001] [7.41E-4] glm_grid_model_85 95 [0.86] [8.06E-4] glm_grid_model_39 96 [0.92] [8.6E-4] glm_grid_model_99 97 [0.74] [9.87E-4] glm_grid_model_15 98 [0.88] [9.209999999999999E-4] glm_grid_model_23 99 [0.88] [9.48E-4] glm_grid_model_7 auc 0 0.8460502420206815 1 0.8459231218911992 2 0.8458460074966513 3 0.84578969349 4 0.845718421725669 5 0.8454975463710519 6 0.8454049383408971 7 0.845125597663872 8 0.844990163622469 9 0.8447922633836474 10 0.8446763576704011 11 0.8443068880489722 12 0.8442450331686066 13 0.8441362707601581 14 0.8440820898598193 15 0.8436148511314989 16 0.8436089096500138 17 0.8435708987360638 18 0.843518778104261 19 0.8432349813161218 20 0.8426307867571391 21 0.8426180947745795 22 0.8421399824067635 23 0.8416523933239516 24 0.8416254797655253 25 0.8415763740974008 26 0.841510364862424 27 0.8414721822594293 28 0.8414650545627264 29 0.8413966312757275 .. ... 70 0.8367248032825947 71 0.836457462629252 72 0.836414769286811 73 0.8362693772808174 74 0.8362422374050139 75 0.8362208751256985 76 0.8361702996952978 77 0.835997372408428 78 0.835987266166935 79 0.8359603526085087 80 0.8355779204642072 81 0.8355752566826656 82 0.8353492462648034 83 0.8353201996000644 84 0.8352437797657427 85 0.8348899104352362 86 0.834719503855704 87 0.8346606457295727 88 0.8339922654813755 89 0.8338713885898811 90 0.8336982271815869 91 0.8335408845775446 92 0.8330923235362049 93 0.8328250505179406 94 0.8325835828833598 95 0.8324377694588019 96 0.8318016979671923 97 0.8317961336813356 98 0.8315479207483787 99 0.8313888092269064 [100 rows x 5 columns]
glm_grid.models[0]
Model Details ============= H2OGeneralizedLinearEstimator : Generalized Linear Modeling Model Key: glm_grid_model_61 GLM Model: summary
family | link | regularization | lambda_search | number_of_predictors_total | number_of_active_predictors | number_of_iterations | training_frame | ||
---|---|---|---|---|---|---|---|---|---|
0 | binomial | logit | Elastic Net (alpha = 0.87, lambda = 5.0E-6 ) | nlambda = 100, lambda.max = 0.03808, lambda.min = 5.0E-6, lambda.1... | 161 | 143 | 7 | py_15_sid_9664 |
ModelMetricsBinomialGLM: glm ** Reported on train data. ** MSE: 0.031344275605674536 RMSE: 0.17704314616972477 LogLoss: 0.12279979897845819 Null degrees of freedom: 350267 Residual degrees of freedom: 350124 Null deviance: 108932.13150368733 Residual deviance: 86025.67997717319 AIC: 86313.67997717319 AUC: 0.8519842670925402 AUCPR: 0.21046685420921254 Gini: 0.7039685341850803 Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.13993235618594693:
FALSE | TRUE | Error | Rate | ||
---|---|---|---|---|---|
0 | FALSE | 323805.0 | 13802.0 | 0.0409 | (13802.0/337607.0) |
1 | TRUE | 8129.0 | 4532.0 | 0.6421 | (8129.0/12661.0) |
2 | Total | 331934.0 | 18334.0 | 0.0626 | (21931.0/350268.0) |
Maximum Metrics: Maximum metrics at their respective thresholds
metric | threshold | value | idx | |
---|---|---|---|---|
0 | max f1 | 0.139932 | 0.292434 | 200.0 |
1 | max f2 | 0.068698 | 0.389555 | 264.0 |
2 | max f0point5 | 0.212665 | 0.290398 | 157.0 |
3 | max accuracy | 0.981772 | 0.963851 | 0.0 |
4 | max precision | 0.562303 | 0.421203 | 45.0 |
5 | max recall | 0.000789 | 1.000000 | 398.0 |
6 | max specificity | 0.981772 | 0.999997 | 0.0 |
7 | max absolute_mcc | 0.099058 | 0.269921 | 234.0 |
8 | max min_per_class_accuracy | 0.038519 | 0.774240 | 305.0 |
9 | max mean_per_class_accuracy | 0.036145 | 0.775428 | 309.0 |
10 | max tns | 0.981772 | 337606.000000 | 0.0 |
11 | max fns | 0.981772 | 12661.000000 | 0.0 |
12 | max fps | 0.000498 | 337607.000000 | 399.0 |
13 | max tps | 0.000789 | 12661.000000 | 398.0 |
14 | max tnr | 0.981772 | 0.999997 | 0.0 |
15 | max fnr | 0.981772 | 1.000000 | 0.0 |
16 | max fpr | 0.000498 | 1.000000 | 399.0 |
17 | max tpr | 0.000789 | 1.000000 | 398.0 |
Gains/Lift Table: Avg response rate: 3.61 %, avg score: 3.61 %
group | cumulative_data_fraction | lower_threshold | lift | cumulative_lift | response_rate | score | cumulative_response_rate | cumulative_score | capture_rate | cumulative_capture_rate | gain | cumulative_gain | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.010001 | 0.332250 | 10.724871 | 10.724871 | 0.387668 | 0.466160 | 0.387668 | 0.466160 | 0.107259 | 0.107259 | 972.487123 | 972.487123 | |
1 | 2 | 0.020002 | 0.241718 | 7.629032 | 9.176952 | 0.275764 | 0.280435 | 0.331716 | 0.373297 | 0.076297 | 0.183556 | 662.903211 | 817.695167 | |
2 | 3 | 0.030003 | 0.195284 | 6.452297 | 8.268734 | 0.233229 | 0.216827 | 0.298887 | 0.321140 | 0.064529 | 0.248085 | 545.229734 | 726.873356 | |
3 | 4 | 0.040001 | 0.165076 | 5.166472 | 7.493334 | 0.186750 | 0.179199 | 0.270859 | 0.285663 | 0.051655 | 0.299739 | 416.647177 | 649.333418 | |
4 | 5 | 0.050002 | 0.143363 | 4.770119 | 6.948660 | 0.172424 | 0.153562 | 0.251170 | 0.259241 | 0.047706 | 0.347445 | 377.011946 | 594.866013 | |
5 | 6 | 0.100001 | 0.087455 | 3.342624 | 5.145694 | 0.120825 | 0.110895 | 0.185999 | 0.185070 | 0.167127 | 0.514572 | 234.262433 | 414.569371 | |
6 | 7 | 0.150002 | 0.062108 | 2.397833 | 4.229723 | 0.086674 | 0.073579 | 0.152890 | 0.147906 | 0.119896 | 0.634468 | 139.783271 | 322.972261 | |
7 | 8 | 0.200001 | 0.047315 | 1.600226 | 3.572367 | 0.057843 | 0.054179 | 0.129129 | 0.124475 | 0.080009 | 0.714478 | 60.022611 | 257.236725 | |
8 | 9 | 0.300002 | 0.030452 | 1.121548 | 2.755427 | 0.040540 | 0.037965 | 0.099599 | 0.095638 | 0.112155 | 0.826633 | 12.154798 | 175.542749 | |
9 | 10 | 0.399999 | 0.020842 | 0.645303 | 2.227908 | 0.023326 | 0.025266 | 0.080531 | 0.078045 | 0.064529 | 0.891162 | -35.469658 | 122.790777 | |
10 | 11 | 0.500000 | 0.014542 | 0.399650 | 1.862254 | 0.014446 | 0.017478 | 0.067314 | 0.065932 | 0.039965 | 0.931127 | -60.034981 | 86.225417 | |
11 | 12 | 0.600001 | 0.010215 | 0.280387 | 1.598608 | 0.010135 | 0.012267 | 0.057784 | 0.056988 | 0.028039 | 0.959166 | -71.961301 | 59.860838 | |
12 | 13 | 0.699998 | 0.007058 | 0.198251 | 1.398561 | 0.007166 | 0.008555 | 0.050553 | 0.050069 | 0.019825 | 0.978991 | -80.174889 | 39.856142 | |
13 | 14 | 0.799999 | 0.004607 | 0.116104 | 1.238253 | 0.004197 | 0.005790 | 0.044759 | 0.044534 | 0.011610 | 0.990601 | -88.389609 | 23.825309 | |
14 | 15 | 0.899999 | 0.002610 | 0.057657 | 1.107075 | 0.002084 | 0.003583 | 0.040017 | 0.039984 | 0.005766 | 0.996367 | -94.234296 | 10.707492 | |
15 | 16 | 1.000000 | 0.000095 | 0.036332 | 1.000000 | 0.001313 | 0.001619 | 0.036147 | 0.036147 | 0.003633 | 1.000000 | -96.366816 | 0.000000 |
ModelMetricsBinomialGLM: glm ** Reported on validation data. ** MSE: 0.031018805729749764 RMSE: 0.17612156520355413 LogLoss: 0.12242815235268398 Null degrees of freedom: 74970 Residual degrees of freedom: 74827 Null deviance: 22974.597464481732 Residual deviance: 18357.12202006614 AIC: 18645.12202006614 AUC: 0.8460502420206815 AUCPR: 0.2009137545141779 Gini: 0.6921004840413629 Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.13091994899463488:
FALSE | TRUE | Error | Rate | ||
---|---|---|---|---|---|
0 | FALSE | 69013.0 | 3300.0 | 0.0456 | (3300.0/72313.0) |
1 | TRUE | 1672.0 | 986.0 | 0.629 | (1672.0/2658.0) |
2 | Total | 70685.0 | 4286.0 | 0.0663 | (4972.0/74971.0) |
Maximum Metrics: Maximum metrics at their respective thresholds
metric | threshold | value | idx | |
---|---|---|---|---|
0 | max f1 | 0.130920 | 0.283986 | 201.0 |
1 | max f2 | 0.071763 | 0.375903 | 256.0 |
2 | max f0point5 | 0.242376 | 0.295791 | 133.0 |
3 | max accuracy | 0.974097 | 0.964533 | 0.0 |
4 | max precision | 0.376628 | 0.396364 | 83.0 |
5 | max recall | 0.000735 | 1.000000 | 398.0 |
6 | max specificity | 0.974097 | 0.999986 | 0.0 |
7 | max absolute_mcc | 0.119399 | 0.260856 | 210.0 |
8 | max min_per_class_accuracy | 0.037620 | 0.764108 | 304.0 |
9 | max mean_per_class_accuracy | 0.034897 | 0.767316 | 309.0 |
10 | max tns | 0.974097 | 72312.000000 | 0.0 |
11 | max fns | 0.974097 | 2658.000000 | 0.0 |
12 | max fps | 0.000475 | 72313.000000 | 399.0 |
13 | max tps | 0.000735 | 2658.000000 | 398.0 |
14 | max tnr | 0.974097 | 0.999986 | 0.0 |
15 | max fnr | 0.974097 | 1.000000 | 0.0 |
16 | max fpr | 0.000475 | 1.000000 | 399.0 |
17 | max tpr | 0.000735 | 1.000000 | 398.0 |
Gains/Lift Table: Avg response rate: 3.55 %, avg score: 3.61 %
group | cumulative_data_fraction | lower_threshold | lift | cumulative_lift | response_rate | score | cumulative_response_rate | cumulative_score | capture_rate | cumulative_capture_rate | gain | cumulative_gain | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.010004 | 0.334125 | 10.793417 | 10.793417 | 0.382667 | 0.473002 | 0.382667 | 0.473002 | 0.107976 | 0.107976 | 979.341711 | 979.341711 | |
1 | 2 | 0.020008 | 0.243096 | 8.348915 | 9.571166 | 0.296000 | 0.282541 | 0.339333 | 0.377771 | 0.083521 | 0.191497 | 734.891497 | 857.116604 | |
2 | 3 | 0.030012 | 0.193143 | 5.829197 | 8.323843 | 0.206667 | 0.216035 | 0.295111 | 0.323859 | 0.058315 | 0.249812 | 482.919739 | 732.384316 | |
3 | 4 | 0.040002 | 0.162847 | 4.895532 | 7.467623 | 0.173565 | 0.176602 | 0.264755 | 0.287082 | 0.048909 | 0.298721 | 389.553164 | 646.762264 | |
4 | 5 | 0.050006 | 0.141925 | 3.948811 | 6.763673 | 0.140000 | 0.152013 | 0.239797 | 0.260061 | 0.039503 | 0.338224 | 294.881114 | 576.367262 | |
5 | 6 | 0.100012 | 0.087175 | 3.310363 | 5.037018 | 0.117365 | 0.110232 | 0.178581 | 0.185146 | 0.165538 | 0.503762 | 231.036257 | 403.701760 | |
6 | 7 | 0.150005 | 0.061800 | 2.227565 | 4.100700 | 0.078975 | 0.073379 | 0.145385 | 0.147897 | 0.111362 | 0.615124 | 122.756536 | 310.070006 | |
7 | 8 | 0.200011 | 0.047553 | 1.707846 | 3.502447 | 0.060549 | 0.054259 | 0.124175 | 0.124486 | 0.085403 | 0.700527 | 70.784615 | 250.244669 | |
8 | 9 | 0.300009 | 0.030349 | 1.106110 | 2.703703 | 0.039216 | 0.038070 | 0.095856 | 0.095682 | 0.110609 | 0.811136 | 10.610956 | 170.370316 | |
9 | 10 | 0.400008 | 0.020669 | 0.714833 | 2.206502 | 0.025343 | 0.025116 | 0.078229 | 0.078041 | 0.071482 | 0.882619 | -28.516729 | 120.650213 | |
10 | 11 | 0.500007 | 0.014493 | 0.470285 | 1.859268 | 0.016673 | 0.017395 | 0.065918 | 0.065912 | 0.047028 | 0.929646 | -52.971532 | 85.926790 | |
11 | 12 | 0.600005 | 0.010143 | 0.297220 | 1.598932 | 0.010538 | 0.012191 | 0.056688 | 0.056959 | 0.029722 | 0.959368 | -70.278008 | 59.893236 | |
12 | 13 | 0.700004 | 0.007006 | 0.180589 | 1.396316 | 0.006403 | 0.008512 | 0.049505 | 0.050038 | 0.018059 | 0.977427 | -81.941068 | 39.631578 | |
13 | 14 | 0.800003 | 0.004578 | 0.131680 | 1.238239 | 0.004669 | 0.005761 | 0.043900 | 0.044504 | 0.013168 | 0.990594 | -86.832029 | 23.823891 | |
14 | 15 | 0.900001 | 0.002583 | 0.045147 | 1.105675 | 0.001601 | 0.003547 | 0.039200 | 0.039953 | 0.004515 | 0.995109 | -95.485267 | 10.567514 | |
15 | 16 | 1.000000 | 0.000078 | 0.048910 | 1.000000 | 0.001734 | 0.001600 | 0.035454 | 0.036118 | 0.004891 | 1.000000 | -95.109039 | 0.000000 |
Scoring History:
timestamp | duration | iteration | lambda | predictors | deviance_train | deviance_test | ||
---|---|---|---|---|---|---|---|---|
0 | 2020-03-06 00:44:12 | 0.000 sec | 7 | .5E-5 | 144 | 0.2456 | 0.244856 |
glm_grid.summary()
Grid Summary:
Model Id | family | link | regularization | lambda_search | number_of_predictors_total | number_of_active_predictors | number_of_iterations | training_frame |
glm_grid_model_61 | binomial | logit | Elastic Net (alpha = 0.87, lambda = 5.0E-6 ) | nlambda = 100, lambda.max = 0.03808, lambda.min = 5.0E-6, lambda.1se = -1.0 | 161 | 143 | 7 | py_15_sid_9664 |
glm_grid_model_46 | binomial | logit | Elastic Net (alpha = 0.4, lambda = 1.8E-5 ) | nlambda = 100, lambda.max = 0.08282, lambda.min = 1.8E-5, lambda.1se = -1.0 | 161 | 137 | 7 | py_15_sid_9664 |
glm_grid_model_48 | binomial | logit | Elastic Net (alpha = 0.07, lambda = 3.7E-5 ) | nlambda = 100, lambda.max = 0.4733, lambda.min = 3.7E-5, lambda.1se = -1.0 | 161 | 145 | 5 | py_15_sid_9664 |
glm_grid_model_96 | binomial | logit | Elastic Net (alpha = 0.48, lambda = 2.9E-5 ) | nlambda = 100, lambda.max = 0.06902, lambda.min = 2.9E-5, lambda.1se = -1.0 | 161 | 110 | 7 | py_15_sid_9664 |
glm_grid_model_72 | binomial | logit | Elastic Net (alpha = 0.07, lambda = 5.6E-5 ) | nlambda = 100, lambda.max = 0.4733, lambda.min = 5.6E-5, lambda.1se = -1.0 | 161 | 137 | 5 | py_15_sid_9664 |
glm_grid_model_37 | binomial | logit | Elastic Net (alpha = 0.88, lambda = 3.1E-5 ) | nlambda = 100, lambda.max = 0.03765, lambda.min = 3.1E-5, lambda.1se = -1.0 | 161 | 93 | 7 | py_15_sid_9664 |
glm_grid_model_86 | binomial | logit | Elastic Net (alpha = 0.18, lambda = 8.1E-5 ) | nlambda = 100, lambda.max = 0.184, lambda.min = 8.1E-5, lambda.1se = -1.0 | 161 | 108 | 7 | py_15_sid_9664 |
glm_grid_model_30 | binomial | logit | Elastic Net (alpha = 0.15, lambda = 1.31E-4 ) | nlambda = 100, lambda.max = 0.2209, lambda.min = 1.31E-4, lambda.1se = -1.0 | 161 | 105 | 7 | py_15_sid_9664 |
glm_grid_model_38 | binomial | logit | Elastic Net (alpha = 0.1, lambda = 1.59E-4 ) | nlambda = 100, lambda.max = 0.3313, lambda.min = 1.59E-4, lambda.1se = -1.0 | 161 | 110 | 7 | py_15_sid_9664 |
glm_grid_model_28 | binomial | logit | Elastic Net (alpha = 0.53, lambda = 9.5E-5 ) | nlambda = 100, lambda.max = 0.06251, lambda.min = 9.5E-5, lambda.1se = -1.0 | 161 | 80 | 7 | py_15_sid_9664 |
glm_grid_model_78 | binomial | logit | Elastic Net (alpha = 0.06, lambda = 2.12E-4 ) | nlambda = 100, lambda.max = 0.5521, lambda.min = 2.12E-4, lambda.1se = -1.0 | 161 | 119 | 7 | py_15_sid_9664 |
glm_grid_model_67 | binomial | logit | Elastic Net (alpha = 0.41, lambda = 1.37E-4 ) | nlambda = 100, lambda.max = 0.0808, lambda.min = 1.37E-4, lambda.1se = -1.0 | 161 | 77 | 7 | py_15_sid_9664 |
glm_grid_model_100 | binomial | logit | Elastic Net (alpha = 0.93, lambda = 7.7E-5 ) | nlambda = 100, lambda.max = 0.03562, lambda.min = 7.7E-5, lambda.1se = -1.0 | 161 | 71 | 7 | py_15_sid_9664 |
glm_grid_model_34 | binomial | logit | Elastic Net (alpha = 0.65, lambda = 1.07E-4 ) | nlambda = 100, lambda.max = 0.05097, lambda.min = 1.07E-4, lambda.1se = -1.0 | 161 | 69 | 7 | py_15_sid_9664 |
glm_grid_model_42 | binomial | logit | Elastic Net (alpha = 0.52, lambda = 1.37E-4 ) | nlambda = 100, lambda.max = 0.06371, lambda.min = 1.37E-4, lambda.1se = -1.0 | 161 | 70 | 7 | py_15_sid_9664 |
glm_grid_model_82 | binomial | logit | Elastic Net (alpha = 0.56, lambda = 1.64E-4 ) | nlambda = 100, lambda.max = 0.05916, lambda.min = 1.64E-4, lambda.1se = -1.0 | 161 | 68 | 7 | py_15_sid_9664 |
glm_grid_model_51 | binomial | logit | Elastic Net (alpha = 0.47, lambda = 1.89E-4 ) | nlambda = 100, lambda.max = 0.07048, lambda.min = 1.89E-4, lambda.1se = -1.0 | 161 | 69 | 7 | py_15_sid_9664 |
glm_grid_model_22 | binomial | logit | Elastic Net (alpha = 0.31, lambda = 2.3E-4 ) | nlambda = 100, lambda.max = 0.1069, lambda.min = 2.3E-4, lambda.1se = -1.0 | 161 | 72 | 7 | py_15_sid_9664 |
glm_grid_model_16 | binomial | logit | Elastic Net (alpha = 0.05, lambda = 4.17E-4 ) | nlambda = 100, lambda.max = 0.6626, lambda.min = 4.17E-4, lambda.1se = -1.0 | 161 | 110 | 6 | py_15_sid_9664 |
glm_grid_model_17 | binomial | logit | Elastic Net (alpha = 0.18, lambda = 3.47E-4 ) | nlambda = 100, lambda.max = 0.184, lambda.min = 3.47E-4, lambda.1se = -1.0 | 161 | 80 | 7 | py_15_sid_9664 |
glm_grid_model_93 | binomial | logit | Elastic Net (alpha = 0.92, lambda = 1.45E-4 ) | nlambda = 100, lambda.max = 0.03601, lambda.min = 1.45E-4, lambda.1se = -1.0 | 161 | 58 | 8 | py_15_sid_9664 |
glm_grid_model_73 | binomial | logit | Elastic Net (alpha = 0.11, lambda = 4.88E-4 ) | nlambda = 100, lambda.max = 0.3012, lambda.min = 4.88E-4, lambda.1se = -1.0 | 161 | 85 | 6 | py_15_sid_9664 |
glm_grid_model_11 | binomial | logit | Elastic Net (alpha = 0.75, lambda = 1.81E-4 ) | nlambda = 100, lambda.max = 0.04417, lambda.min = 1.81E-4, lambda.1se = -1.0 | 161 | 53 | 7 | py_15_sid_9664 |
glm_grid_model_2 | binomial | logit | Elastic Net (alpha = 0.78, lambda = 1.88E-4 ) | nlambda = 100, lambda.max = 0.04247, lambda.min = 1.88E-4, lambda.1se = -1.0 | 161 | 53 | 8 | py_15_sid_9664 |
glm_grid_model_14 | binomial | logit | Elastic Net (alpha = 0.16, lambda = 4.93E-4 ) | nlambda = 100, lambda.max = 0.207, lambda.min = 4.93E-4, lambda.1se = -1.0 | 161 | 77 | 6 | py_15_sid_9664 |
glm_grid_model_83 | binomial | logit | Elastic Net (alpha = 0.65, lambda = 2.26E-4 ) | nlambda = 100, lambda.max = 0.05097, lambda.min = 2.26E-4, lambda.1se = -1.0 | 161 | 53 | 8 | py_15_sid_9664 |
glm_grid_model_20 | binomial | logit | Elastic Net (alpha = 0.53, lambda = 2.9E-4 ) | nlambda = 100, lambda.max = 0.06251, lambda.min = 2.9E-4, lambda.1se = -1.0 | 161 | 54 | 8 | py_15_sid_9664 |
glm_grid_model_1 | binomial | logit | Elastic Net (alpha = 0.09, lambda = 6.68E-4 ) | nlambda = 100, lambda.max = 0.3681, lambda.min = 6.68E-4, lambda.1se = -1.0 | 161 | 84 | 6 | py_15_sid_9664 |
glm_grid_model_98 | binomial | logit | Elastic Net (alpha = 0.17, lambda = 5.46E-4 ) | nlambda = 100, lambda.max = 0.1949, lambda.min = 5.46E-4, lambda.1se = -1.0 | 161 | 72 | 6 | py_15_sid_9664 |
glm_grid_model_54 | binomial | logit | Elastic Net (alpha = 0.13, lambda = 6.33E-4 ) | nlambda = 100, lambda.max = 0.2548, lambda.min = 6.33E-4, lambda.1se = -1.0 | 161 | 79 | 7 | py_15_sid_9664 |
glm_grid_model_79 | binomial | logit | Elastic Net (alpha = 0.07, lambda = 7.32E-4 ) | nlambda = 100, lambda.max = 0.4733, lambda.min = 7.32E-4, lambda.1se = -1.0 | 161 | 88 | 6 | py_15_sid_9664 |
glm_grid_model_35 | binomial | logit | Elastic Net (alpha = 0.99, lambda = 2.06E-4 ) | nlambda = 100, lambda.max = 0.03346, lambda.min = 2.06E-4, lambda.1se = -1.0 | 161 | 46 | 7 | py_15_sid_9664 |
glm_grid_model_92 | binomial | logit | Elastic Net (alpha = 0.02, lambda = 8.87E-4 ) | nlambda = 100, lambda.max = 1.6564, lambda.min = 8.87E-4, lambda.1se = -1.0 | 161 | 116 | 5 | py_15_sid_9664 |
glm_grid_model_60 | binomial | logit | Elastic Net (alpha = 0.05, lambda = 8.0E-4 ) | nlambda = 100, lambda.max = 0.6626, lambda.min = 8.0E-4, lambda.1se = -1.0 | 161 | 90 | 6 | py_15_sid_9664 |
glm_grid_model_90 | binomial | logit | Elastic Net (alpha = 0.02, lambda = 9.92E-4 ) | nlambda = 100, lambda.max = 1.6564, lambda.min = 9.92E-4, lambda.1se = -1.0 | 161 | 114 | 5 | py_15_sid_9664 |
glm_grid_model_71 | binomial | logit | Elastic Net (alpha = 0.09, lambda = 7.94E-4 ) | nlambda = 100, lambda.max = 0.3681, lambda.min = 7.94E-4, lambda.1se = -1.0 | 161 | 81 | 6 | py_15_sid_9664 |
glm_grid_model_26 | binomial | logit | Elastic Net (alpha = 0.93, lambda = 2.45E-4 ) | nlambda = 100, lambda.max = 0.03562, lambda.min = 2.45E-4, lambda.1se = -1.0 | 161 | 44 | 7 | py_15_sid_9664 |
glm_grid_model_80 | binomial | logit | Elastic Net (alpha = 0.23, lambda = 5.77E-4 ) | nlambda = 100, lambda.max = 0.144, lambda.min = 5.77E-4, lambda.1se = -1.0 | 161 | 64 | 6 | py_15_sid_9664 |
glm_grid_model_77 | binomial | logit | Elastic Net (alpha = 0.6, lambda = 3.29E-4 ) | nlambda = 100, lambda.max = 0.05521, lambda.min = 3.29E-4, lambda.1se = -1.0 | 161 | 47 | 7 | py_15_sid_9664 |
glm_grid_model_87 | binomial | logit | Elastic Net (alpha = 0.52, lambda = 3.66E-4 ) | nlambda = 100, lambda.max = 0.06371, lambda.min = 3.66E-4, lambda.1se = -1.0 | 161 | 48 | 6 | py_15_sid_9664 |
glm_grid_model_45 | binomial | logit | Elastic Net (alpha = 0.25, lambda = 5.9E-4 ) | nlambda = 100, lambda.max = 0.1325, lambda.min = 5.9E-4, lambda.1se = -1.0 | 161 | 59 | 6 | py_15_sid_9664 |
glm_grid_model_81 | binomial | logit | Elastic Net (alpha = 0.8, lambda = 3.02E-4 ) | nlambda = 100, lambda.max = 0.04141, lambda.min = 3.02E-4, lambda.1se = -1.0 | 161 | 43 | 7 | py_15_sid_9664 |
glm_grid_model_33 | binomial | logit | Elastic Net (alpha = 0.64, lambda = 3.47E-4 ) | nlambda = 100, lambda.max = 0.05176, lambda.min = 3.47E-4, lambda.1se = -1.0 | 161 | 45 | 6 | py_15_sid_9664 |
glm_grid_model_43 | binomial | logit | Elastic Net (alpha = 0.1, lambda = 8.66E-4 ) | nlambda = 100, lambda.max = 0.3313, lambda.min = 8.66E-4, lambda.1se = -1.0 | 161 | 75 | 6 | py_15_sid_9664 |
glm_grid_model_55 | binomial | logit | Elastic Net (alpha = 0.66, lambda = 3.52E-4 ) | nlambda = 100, lambda.max = 0.05019, lambda.min = 3.52E-4, lambda.1se = -1.0 | 161 | 43 | 6 | py_15_sid_9664 |
glm_grid_model_12 | binomial | logit | Elastic Net (alpha = 0.11, lambda = 8.43E-4 ) | nlambda = 100, lambda.max = 0.3012, lambda.min = 8.43E-4, lambda.1se = -1.0 | 161 | 73 | 6 | py_15_sid_9664 |
glm_grid_model_65 | binomial | logit | Elastic Net (alpha = 0.86, lambda = 3.02E-4 ) | nlambda = 100, lambda.max = 0.03852, lambda.min = 3.02E-4, lambda.1se = -1.0 | 161 | 40 | 7 | py_15_sid_9664 |
glm_grid_model_10 | binomial | logit | Elastic Net (alpha = 0.83, lambda = 3.23E-4 ) | nlambda = 100, lambda.max = 0.03991, lambda.min = 3.23E-4, lambda.1se = -1.0 | 161 | 38 | 6 | py_15_sid_9664 |
glm_grid_model_8 | binomial | logit | Elastic Net (alpha = 0.94, lambda = 3.05E-4 ) | nlambda = 100, lambda.max = 0.03524, lambda.min = 3.05E-4, lambda.1se = -1.0 | 161 | 36 | 8 | py_15_sid_9664 |
glm_grid_model_3 | binomial | logit | Elastic Net (alpha = 0.69, lambda = 3.77E-4 ) | nlambda = 100, lambda.max = 0.04801, lambda.min = 3.77E-4, lambda.1se = -1.0 | 161 | 39 | 6 | py_15_sid_9664 |
glm_grid_model_70 | binomial | logit | Elastic Net (alpha = 0.71, lambda = 3.86E-4 ) | nlambda = 100, lambda.max = 0.04666, lambda.min = 3.86E-4, lambda.1se = -1.0 | 161 | 38 | 6 | py_15_sid_9664 |
glm_grid_model_44 | binomial | logit | Elastic Net (alpha = 0.19, lambda = 7.88E-4 ) | nlambda = 100, lambda.max = 0.1744, lambda.min = 7.88E-4, lambda.1se = -1.0 | 161 | 60 | 6 | py_15_sid_9664 |
glm_grid_model_41 | binomial | logit | Elastic Net (alpha = 0.67, lambda = 3.99E-4 ) | nlambda = 100, lambda.max = 0.04944, lambda.min = 3.99E-4, lambda.1se = -1.0 | 161 | 38 | 6 | py_15_sid_9664 |
glm_grid_model_36 | binomial | logit | Elastic Net (alpha = 0.94, lambda = 3.4E-4 ) | nlambda = 100, lambda.max = 0.03524, lambda.min = 3.4E-4, lambda.1se = -1.0 | 161 | 35 | 7 | py_15_sid_9664 |
glm_grid_model_94 | binomial | logit | Elastic Net (alpha = 0.15, lambda = 9.21E-4 ) | nlambda = 100, lambda.max = 0.2209, lambda.min = 9.21E-4, lambda.1se = -1.0 | 161 | 64 | 6 | py_15_sid_9664 |
glm_grid_model_29 | binomial | logit | Elastic Net (alpha = 0.59, lambda = 4.59E-4 ) | nlambda = 100, lambda.max = 0.05615, lambda.min = 4.59E-4, lambda.1se = -1.0 | 161 | 39 | 6 | py_15_sid_9664 |
glm_grid_model_25 | binomial | logit | Elastic Net (alpha = 0.3, lambda = 6.75E-4 ) | nlambda = 100, lambda.max = 0.1104, lambda.min = 6.75E-4, lambda.1se = -1.0 | 161 | 47 | 7 | py_15_sid_9664 |
glm_grid_model_52 | binomial | logit | Elastic Net (alpha = 0.34, lambda = 6.66E-4 ) | nlambda = 100, lambda.max = 0.09743, lambda.min = 6.66E-4, lambda.1se = -1.0 | 161 | 45 | 7 | py_15_sid_9664 |
glm_grid_model_47 | binomial | logit | Elastic Net (alpha = 0.19, lambda = 8.82E-4 ) | nlambda = 100, lambda.max = 0.1744, lambda.min = 8.82E-4, lambda.1se = -1.0 | 161 | 56 | 6 | py_15_sid_9664 |
glm_grid_model_31 | binomial | logit | Elastic Net (alpha = 0.58, lambda = 5.09E-4 ) | nlambda = 100, lambda.max = 0.05712, lambda.min = 5.09E-4, lambda.1se = -1.0 | 161 | 37 | 6 | py_15_sid_9664 |
glm_grid_model_27 | binomial | logit | Elastic Net (alpha = 0.81, lambda = 4.25E-4 ) | nlambda = 100, lambda.max = 0.0409, lambda.min = 4.25E-4, lambda.1se = -1.0 | 161 | 34 | 6 | py_15_sid_9664 |
glm_grid_model_75 | binomial | logit | Elastic Net (alpha = 0.59, lambda = 5.18E-4 ) | nlambda = 100, lambda.max = 0.05615, lambda.min = 5.18E-4, lambda.1se = -1.0 | 161 | 36 | 7 | py_15_sid_9664 |
glm_grid_model_84 | binomial | logit | Elastic Net (alpha = 0.96, lambda = 3.8E-4 ) | nlambda = 100, lambda.max = 0.03451, lambda.min = 3.8E-4, lambda.1se = -1.0 | 161 | 30 | 6 | py_15_sid_9664 |
glm_grid_model_64 | binomial | logit | Elastic Net (alpha = 0.29, lambda = 7.81E-4 ) | nlambda = 100, lambda.max = 0.1142, lambda.min = 7.81E-4, lambda.1se = -1.0 | 161 | 45 | 7 | py_15_sid_9664 |
glm_grid_model_68 | binomial | logit | Elastic Net (alpha = 0.36, lambda = 6.96E-4 ) | nlambda = 100, lambda.max = 0.09202, lambda.min = 6.96E-4, lambda.1se = -1.0 | 161 | 42 | 7 | py_15_sid_9664 |
glm_grid_model_66 | binomial | logit | Elastic Net (alpha = 0.48, lambda = 5.93E-4 ) | nlambda = 100, lambda.max = 0.06902, lambda.min = 5.93E-4, lambda.1se = -1.0 | 161 | 36 | 6 | py_15_sid_9664 |
glm_grid_model_53 | binomial | logit | Elastic Net (alpha = 0.6, lambda = 5.69E-4 ) | nlambda = 100, lambda.max = 0.05521, lambda.min = 5.69E-4, lambda.1se = -1.0 | 161 | 35 | 6 | py_15_sid_9664 |
glm_grid_model_58 | binomial | logit | Elastic Net (alpha = 0.43, lambda = 6.94E-4 ) | nlambda = 100, lambda.max = 0.07704, lambda.min = 6.94E-4, lambda.1se = -1.0 | 161 | 36 | 6 | py_15_sid_9664 |
glm_grid_model_59 | binomial | logit | Elastic Net (alpha = 0.55, lambda = 6.13E-4 ) | nlambda = 100, lambda.max = 0.06023, lambda.min = 6.13E-4, lambda.1se = -1.0 | 161 | 35 | 6 | py_15_sid_9664 |
glm_grid_model_4 | binomial | logit | Elastic Net (alpha = 0.22, lambda = 9.91E-4 ) | nlambda = 100, lambda.max = 0.1506, lambda.min = 9.91E-4, lambda.1se = -1.0 | 161 | 46 | 6 | py_15_sid_9664 |
glm_grid_model_21 | binomial | logit | Elastic Net (alpha = 0.86, lambda = 4.86E-4 ) | nlambda = 100, lambda.max = 0.03852, lambda.min = 4.86E-4, lambda.1se = -1.0 | 161 | 30 | 6 | py_15_sid_9664 |
glm_grid_model_49 | binomial | logit | Elastic Net (alpha = 0.63, lambda = 5.75E-4 ) | nlambda = 100, lambda.max = 0.05258, lambda.min = 5.75E-4, lambda.1se = -1.0 | 161 | 31 | 6 | py_15_sid_9664 |
glm_grid_model_88 | binomial | logit | Elastic Net (alpha = 0.29, lambda = 8.94E-4 ) | nlambda = 100, lambda.max = 0.1142, lambda.min = 8.94E-4, lambda.1se = -1.0 | 161 | 40 | 7 | py_15_sid_9664 |
glm_grid_model_13 | binomial | logit | Elastic Net (alpha = 0.73, lambda = 5.53E-4 ) | nlambda = 100, lambda.max = 0.04538, lambda.min = 5.53E-4, lambda.1se = -1.0 | 161 | 31 | 6 | py_15_sid_9664 |
glm_grid_model_89 | binomial | logit | Elastic Net (alpha = 0.65, lambda = 5.88E-4 ) | nlambda = 100, lambda.max = 0.05097, lambda.min = 5.88E-4, lambda.1se = -1.0 | 161 | 31 | 6 | py_15_sid_9664 |
glm_grid_model_6 | binomial | logit | Elastic Net (alpha = 0.5, lambda = 6.93E-4 ) | nlambda = 100, lambda.max = 0.06626, lambda.min = 6.93E-4, lambda.1se = -1.0 | 161 | 33 | 6 | py_15_sid_9664 |
glm_grid_model_19 | binomial | logit | Elastic Net (alpha = 0.87, lambda = 5.1E-4 ) | nlambda = 100, lambda.max = 0.03808, lambda.min = 5.1E-4, lambda.1se = -1.0 | 161 | 30 | 6 | py_15_sid_9664 |
glm_grid_model_63 | binomial | logit | Elastic Net (alpha = 0.88, lambda = 5.18E-4 ) | nlambda = 100, lambda.max = 0.03765, lambda.min = 5.18E-4, lambda.1se = -1.0 | 161 | 30 | 6 | py_15_sid_9664 |
glm_grid_model_74 | binomial | logit | Elastic Net (alpha = 0.42, lambda = 8.13E-4 ) | nlambda = 100, lambda.max = 0.07888, lambda.min = 8.13E-4, lambda.1se = -1.0 | 161 | 33 | 6 | py_15_sid_9664 |
glm_grid_model_95 | binomial | logit | Elastic Net (alpha = 0.32, lambda = 9.5E-4 ) | nlambda = 100, lambda.max = 0.1035, lambda.min = 9.5E-4, lambda.1se = -1.0 | 161 | 35 | 6 | py_15_sid_9664 |
glm_grid_model_18 | binomial | logit | Elastic Net (alpha = 0.66, lambda = 6.59E-4 ) | nlambda = 100, lambda.max = 0.05019, lambda.min = 6.59E-4, lambda.1se = -1.0 | 161 | 30 | 6 | py_15_sid_9664 |
glm_grid_model_40 | binomial | logit | Elastic Net (alpha = 0.43, lambda = 8.42E-4 ) | nlambda = 100, lambda.max = 0.07704, lambda.min = 8.42E-4, lambda.1se = -1.0 | 161 | 31 | 6 | py_15_sid_9664 |
glm_grid_model_9 | binomial | logit | Elastic Net (alpha = 0.59, lambda = 7.31E-4 ) | nlambda = 100, lambda.max = 0.05615, lambda.min = 7.31E-4, lambda.1se = -1.0 | 161 | 30 | 6 | py_15_sid_9664 |
glm_grid_model_24 | binomial | logit | Elastic Net (alpha = 0.41, lambda = 8.91E-4 ) | nlambda = 100, lambda.max = 0.0808, lambda.min = 8.91E-4, lambda.1se = -1.0 | 161 | 30 | 6 | py_15_sid_9664 |
glm_grid_model_62 | binomial | logit | Elastic Net (alpha = 0.94, lambda = 5.09E-4 ) | nlambda = 100, lambda.max = 0.03524, lambda.min = 5.09E-4, lambda.1se = -1.0 | 161 | 28 | 6 | py_15_sid_9664 |
glm_grid_model_5 | binomial | logit | Elastic Net (alpha = 0.54, lambda = 7.8E-4 ) | nlambda = 100, lambda.max = 0.06135, lambda.min = 7.8E-4, lambda.1se = -1.0 | 161 | 30 | 6 | py_15_sid_9664 |
glm_grid_model_76 | binomial | logit | Elastic Net (alpha = 0.41, lambda = 9.56E-4 ) | nlambda = 100, lambda.max = 0.0808, lambda.min = 9.56E-4, lambda.1se = -1.0 | 161 | 30 | 6 | py_15_sid_9664 |
glm_grid_model_50 | binomial | logit | Elastic Net (alpha = 0.41, lambda = 9.93E-4 ) | nlambda = 100, lambda.max = 0.0808, lambda.min = 9.93E-4, lambda.1se = -1.0 | 161 | 30 | 6 | py_15_sid_9664 |
glm_grid_model_32 | binomial | logit | Elastic Net (alpha = 0.6, lambda = 8.16E-4 ) | nlambda = 100, lambda.max = 0.05521, lambda.min = 8.16E-4, lambda.1se = -1.0 | 161 | 28 | 7 | py_15_sid_9664 |
glm_grid_model_97 | binomial | logit | Elastic Net (alpha = 0.55, lambda = 8.76E-4 ) | nlambda = 100, lambda.max = 0.06023, lambda.min = 8.76E-4, lambda.1se = -1.0 | 161 | 28 | 6 | py_15_sid_9664 |
glm_grid_model_57 | binomial | logit | Elastic Net (alpha = 0.84, lambda = 6.74E-4 ) | nlambda = 100, lambda.max = 0.03944, lambda.min = 6.74E-4, lambda.1se = -1.0 | 161 | 25 | 6 | py_15_sid_9664 |
glm_grid_model_91 | binomial | logit | Elastic Net (alpha = 0.52, lambda = 9.45E-4 ) | nlambda = 100, lambda.max = 0.06371, lambda.min = 9.45E-4, lambda.1se = -1.0 | 161 | 28 | 7 | py_15_sid_9664 |
glm_grid_model_69 | binomial | logit | Elastic Net (alpha = 0.57, lambda = 9.53E-4 ) | nlambda = 100, lambda.max = 0.05812, lambda.min = 9.53E-4, lambda.1se = -1.0 | 161 | 26 | 6 | py_15_sid_9664 |
glm_grid_model_56 | binomial | logit | Elastic Net (alpha = 0.65, lambda = 9.38E-4 ) | nlambda = 100, lambda.max = 0.05097, lambda.min = 9.38E-4, lambda.1se = -1.0 | 161 | 25 | 6 | py_15_sid_9664 |
glm_grid_model_85 | binomial | logit | Elastic Net (alpha = 0.95, lambda = 7.41E-4 ) | nlambda = 100, lambda.max = 0.03487, lambda.min = 7.41E-4, lambda.1se = -1.0 | 161 | 21 | 6 | py_15_sid_9664 |
glm_grid_model_39 | binomial | logit | Elastic Net (alpha = 0.86, lambda = 8.06E-4 ) | nlambda = 100, lambda.max = 0.03852, lambda.min = 8.06E-4, lambda.1se = -1.0 | 161 | 21 | 6 | py_15_sid_9664 |
glm_grid_model_99 | binomial | logit | Elastic Net (alpha = 0.92, lambda = 8.6E-4 ) | nlambda = 100, lambda.max = 0.03601, lambda.min = 8.6E-4, lambda.1se = -1.0 | 161 | 19 | 7 | py_15_sid_9664 |
glm_grid_model_15 | binomial | logit | Elastic Net (alpha = 0.74, lambda = 9.87E-4 ) | nlambda = 100, lambda.max = 0.04477, lambda.min = 9.87E-4, lambda.1se = -1.0 | 161 | 21 | 6 | py_15_sid_9664 |
glm_grid_model_23 | binomial | logit | Elastic Net (alpha = 0.88, lambda = 9.21E-4 ) | nlambda = 100, lambda.max = 0.03765, lambda.min = 9.21E-4, lambda.1se = -1.0 | 161 | 19 | 6 | py_15_sid_9664 |
glm_grid_model_7 | binomial | logit | Elastic Net (alpha = 0.88, lambda = 9.48E-4 ) | nlambda = 100, lambda.max = 0.03765, lambda.min = 9.48E-4, lambda.1se = -1.0 | 161 | 19 | 6 | py_15_sid_9664 |
sorted_glm_grid = glm_grid.get_grid(sort_by='auc',decreasing=True)
sorted_glm_grid[0].actual_params
{'model_id': 'glm_grid_model_61', 'training_frame': 'py_15_sid_9664', 'validation_frame': 'py_16_sid_9664', 'nfolds': 0, 'seed': 202, 'keep_cross_validation_models': True, 'keep_cross_validation_predictions': False, 'keep_cross_validation_fold_assignment': False, 'fold_assignment': 'AUTO', 'fold_column': None, 'response_column': 'DELINQUENT', 'ignored_columns': ['PRODUCT_TYPE', 'PREPAYMENT_PENALTY_MORTGAGE_FLAG', 'PREPAID'], 'random_columns': None, 'ignore_const_cols': True, 'score_each_iteration': False, 'offset_column': None, 'weights_column': None, 'family': 'binomial', 'rand_family': None, 'tweedie_variance_power': 0.0, 'tweedie_link_power': 1.0, 'theta': 1e-10, 'solver': 'COORDINATE_DESCENT', 'alpha': [0.87], 'lambda': [4.9999999999999996e-06], 'lambda_search': True, 'early_stopping': True, 'nlambdas': 100, 'standardize': True, 'missing_values_handling': 'MeanImputation', 'plug_values': None, 'compute_p_values': False, 'remove_collinear_columns': False, 'intercept': True, 'non_negative': False, 'max_iterations': 1000, 'objective_epsilon': 0.0001, 'beta_epsilon': 0.0001, 'gradient_epsilon': 1.0000000000000002e-06, 'link': 'logit', 'rand_link': None, 'startval': None, 'calc_like': False, 'HGLM': False, 'prior': -1.0, 'lambda_min_ratio': 0.0001, 'beta_constraints': None, 'max_active_predictors': 5000, 'interactions': None, 'interaction_pairs': None, 'obj_reg': 2.854956775954412e-06, 'export_checkpoints_dir': None, 'balance_classes': False, 'class_sampling_factors': None, 'max_after_balance_size': 5.0, 'max_confusion_matrix_size': 20, 'max_hit_ratio_k': 0, 'max_runtime_secs': 179.878, 'custom_metric_func': None}
print(sorted_glm_grid[0].F1())
sorted_glm_grid[1].F1()
[[0.13993235618594693, 0.29243426359090174]]
[[0.1388919375261923, 0.29188911043931304]]
sorted_glm_grid[0].model_performance(test) # should give AUC of 0.8524 compared to the untuned version of 0.8523
ModelMetricsBinomialGLM: glm ** Reported on test data. ** MSE: 0.031143376101575086 RMSE: 0.17647485968708146 LogLoss: 0.12199693111453563 Null degrees of freedom: 74897 Residual degrees of freedom: 74754 Null deviance: 23061.156287645877 Residual deviance: 18274.652293232975 AIC: 18562.652293232975 AUC: 0.8524158062119054 AUCPR: 0.20258611034476104 Gini: 0.7048316124238108 Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.13069466877003805:
FALSE | TRUE | Error | Rate | ||
---|---|---|---|---|---|
0 | FALSE | 68851.0 | 3375.0 | 0.0467 | (3375.0/72226.0) |
1 | TRUE | 1676.0 | 996.0 | 0.6272 | (1676.0/2672.0) |
2 | Total | 70527.0 | 4371.0 | 0.0674 | (5051.0/74898.0) |
Maximum Metrics: Maximum metrics at their respective thresholds
metric | threshold | value | idx | |
---|---|---|---|---|
0 | max f1 | 0.130695 | 0.282834 | 202.0 |
1 | max f2 | 0.064974 | 0.386713 | 264.0 |
2 | max f0point5 | 0.206442 | 0.283731 | 151.0 |
3 | max accuracy | 0.939516 | 0.964311 | 0.0 |
4 | max precision | 0.602416 | 0.438596 | 32.0 |
5 | max recall | 0.000945 | 1.000000 | 398.0 |
6 | max specificity | 0.939516 | 0.999986 | 0.0 |
7 | max absolute_mcc | 0.072018 | 0.263347 | 255.0 |
8 | max min_per_class_accuracy | 0.038326 | 0.773336 | 305.0 |
9 | max mean_per_class_accuracy | 0.032702 | 0.777303 | 315.0 |
10 | max tns | 0.939516 | 72225.000000 | 0.0 |
11 | max fns | 0.939516 | 2672.000000 | 0.0 |
12 | max fps | 0.000566 | 72226.000000 | 399.0 |
13 | max tps | 0.000945 | 2672.000000 | 398.0 |
14 | max tnr | 0.939516 | 0.999986 | 0.0 |
15 | max fnr | 0.939516 | 1.000000 | 0.0 |
16 | max fpr | 0.000566 | 1.000000 | 399.0 |
17 | max tpr | 0.000945 | 1.000000 | 398.0 |
Gains/Lift Table: Avg response rate: 3.57 %, avg score: 3.60 %
group | cumulative_data_fraction | lower_threshold | lift | cumulative_lift | response_rate | score | cumulative_response_rate | cumulative_score | capture_rate | cumulative_capture_rate | gain | cumulative_gain | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.010000 | 0.323153 | 9.954824 | 9.954824 | 0.355140 | 0.464324 | 0.355140 | 0.464324 | 0.099551 | 0.099551 | 895.482400 | 895.482400 | |
1 | 2 | 0.020001 | 0.238585 | 7.859072 | 8.906948 | 0.280374 | 0.275583 | 0.317757 | 0.369954 | 0.078593 | 0.178144 | 685.907158 | 790.694779 | |
2 | 3 | 0.030001 | 0.194006 | 6.362106 | 8.058667 | 0.226969 | 0.214539 | 0.287494 | 0.318149 | 0.063623 | 0.241766 | 536.210556 | 705.866705 | |
3 | 4 | 0.040001 | 0.164906 | 4.939988 | 7.278997 | 0.176235 | 0.178489 | 0.259680 | 0.283234 | 0.049401 | 0.291168 | 393.998785 | 627.899725 | |
4 | 5 | 0.050001 | 0.143184 | 4.378626 | 6.698923 | 0.156208 | 0.153711 | 0.238985 | 0.257329 | 0.043787 | 0.334955 | 337.862559 | 569.892292 | |
5 | 6 | 0.100003 | 0.087476 | 3.428052 | 5.063488 | 0.122296 | 0.111145 | 0.180641 | 0.184237 | 0.171407 | 0.506362 | 242.805217 | 406.348754 | |
6 | 7 | 0.150004 | 0.062147 | 2.492448 | 4.206475 | 0.088919 | 0.073591 | 0.150067 | 0.147355 | 0.124626 | 0.630988 | 149.244841 | 320.647450 | |
7 | 8 | 0.200005 | 0.047494 | 1.639178 | 3.564650 | 0.058478 | 0.054272 | 0.127170 | 0.124084 | 0.081961 | 0.712949 | 63.917779 | 256.465032 | |
8 | 9 | 0.300008 | 0.030300 | 1.205058 | 2.778119 | 0.042991 | 0.037927 | 0.099110 | 0.095365 | 0.120509 | 0.833458 | 20.505764 | 177.811943 | |
9 | 10 | 0.399997 | 0.020667 | 0.591381 | 2.231489 | 0.021098 | 0.025088 | 0.079609 | 0.077797 | 0.059132 | 0.892590 | -40.861947 | 123.148945 | |
10 | 11 | 0.500000 | 0.014547 | 0.389211 | 1.863024 | 0.013885 | 0.017407 | 0.066464 | 0.065719 | 0.038922 | 0.931512 | -61.078884 | 86.302395 | |
11 | 12 | 0.600003 | 0.010226 | 0.336817 | 1.608651 | 0.012016 | 0.012272 | 0.057389 | 0.056811 | 0.033683 | 0.965195 | -66.318265 | 60.865053 | |
12 | 13 | 0.699992 | 0.007069 | 0.157202 | 1.401321 | 0.005608 | 0.008589 | 0.049992 | 0.049923 | 0.015719 | 0.980913 | -84.279758 | 40.132057 | |
13 | 14 | 0.799995 | 0.004617 | 0.108530 | 1.239716 | 0.003872 | 0.005802 | 0.044227 | 0.044408 | 0.010853 | 0.991766 | -89.146996 | 23.971636 | |
14 | 15 | 0.899997 | 0.002619 | 0.063621 | 1.109035 | 0.002270 | 0.003584 | 0.039565 | 0.039872 | 0.006362 | 0.998129 | -93.637894 | 10.903523 | |
15 | 16 | 1.000000 | 0.000105 | 0.018712 | 1.000000 | 0.000668 | 0.001628 | 0.035675 | 0.036047 | 0.001871 | 1.000000 | -98.128792 | 0.000000 |
# Grid Search/ Cartesian Search by default or not specified
rf_grid = h2o.grid.H2OGridSearch (
H2ORandomForestEstimator(nfolds=10),
hyper_params = {
"ntrees": [50,100],
"max_depth": [10,20],
},
search_criteria = {
"strategy":"RandomDiscrete", # Random Search
"max_models":100,
"max_runtime_secs":300,
"seed":42
},
grid_id = "rf_grid_2",
)
%time rf_grid.train(x=x, y=y, training_frame=train, validation_frame = valid)
drf Grid Build progress: |████████████████████████████████████████████████| 100% CPU times: user 2.33 s, sys: 432 ms, total: 2.76 s Wall time: 6min 45s
rf_grid.get_grid(sort_by='auc', decreasing=True)
max_depth ntrees model_ids auc 0 20 28 rf_grid_2_model_1 0.818864830103598
best_model = rf_grid.get_grid(sort_by="auc", decreasing=True)[0]
rf = H2ORandomForestEstimator (seed=42, model_id='default_random_forest', checkpoint=best_model.model_id)
%time rf.train(x=x, y=y, training_frame=train, validation_frame=valid)
drf Model Build progress: |███████████████████████████████████████████████| 100% CPU times: user 347 ms, sys: 109 ms, total: 456 ms Wall time: 39.7 s
rf.summary()
Model Summary:
number_of_trees | number_of_internal_trees | model_size_in_bytes | min_depth | max_depth | mean_depth | min_leaves | max_leaves | mean_leaves | ||
---|---|---|---|---|---|---|---|---|---|---|
0 | 50.0 | 50.0 | 7675073.0 | 20.0 | 20.0 | 8.8 | 9881.0 | 11724.0 | 4780.06 |