Hyperparameter search¶

In [22]:

#Import H2O and other libraries that will be used in this tutorial 
import h2o
import matplotlib as plt

#Import the Estimators
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
from h2o.estimators import H2ORandomForestEstimator
from h2o.estimators.gbm import H2OGradientBoostingEstimator

#Import h2o grid search 
import h2o.grid 
from h2o.grid.grid_search import H2OGridSearch

In [23]:

import h2o
h2o.init(max_mem_size=16)

Checking whether there is an H2O instance running at http://localhost:54321 . connected.

H2O cluster uptime:	21 hours 35 mins
H2O cluster timezone:	Etc/UTC
H2O data parsing timezone:	UTC
H2O cluster version:	3.28.0.2
H2O cluster version age:	1 month and 14 days
H2O cluster name:	H2O_from_python_unknownUser_b8im2o
H2O cluster total nodes:	1
H2O cluster free memory:	2.931 Gb
H2O cluster total cores:	4
H2O cluster allowed cores:	4
H2O cluster status:	locked, healthy
H2O connection url:	http://localhost:54321
H2O connection proxy:	{'http': None, 'https': None}
H2O internal security:	False
H2O API Extensions:	Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4
Python version:	3.6.10 final

In [24]:

loan_level = h2o.import_file("https://s3.amazonaws.com/data.h2o.ai/DAI-Tutorials/loan_level_500k.csv")

Parse progress: |█████████████████████████████████████████████████████████| 100%

In [25]:

train, valid, test = loan_level.split_frame([0.7, 0.15], seed=42)
print("train:%d valid:%d test:%d" % (train.nrows, valid.nrows, test.nrows))
y = "DELINQUENT"
ignore = ["DELINQUENT", "PREPAID", "PREPAYMENT_PENALTY_MORTGAGE_FLAG", "PRODUCT_TYPE"] 
x = list(set(train.names) - set(ignore))

train:350268 valid:74971 test:74898

Grid Search/ Cartesian Search by default or not specified¶

In [26]:

glm_grid = h2o.grid.H2OGridSearch (
    H2OGeneralizedLinearEstimator( 
        family = "binomial",
        lambda_search = True),
    
    hyper_params = {
        "alpha": [x*0.01 for x in range(0, 4)],
        "lambda": [x*1e-6 for x in range(0, 4)],
        },
    
    grid_id = "glm_grid_2",
    
)
%time glm_grid.train(x=x, y=y, training_frame=train, validation_frame = valid)

glm Grid Build progress: |████████████████████████████████████████████████| 100%
CPU times: user 755 ms, sys: 55.9 ms, total: 811 ms
Wall time: 35.5 s

Random Search¶

In [27]:

glm_grid = h2o.grid.H2OGridSearch (
    H2OGeneralizedLinearEstimator( 
        family = "binomial",
        lambda_search = True),
    
    hyper_params = {
        "alpha": [x*0.01 for x in range(0, 100)],
        "lambda": [x*1e-6 for x in range(0, 1000)],
        },
    
    grid_id = "glm_grid",
    
    search_criteria = {
        "strategy":"RandomDiscrete", 
        "max_models":100,
        "max_runtime_secs":300,
        "seed":42
        }
)
%time glm_grid.train(x=x, y=y, training_frame=train, validation_frame = valid)

glm Grid Build progress: |████████████████████████████████████████████████| 100%
CPU times: user 4.73 s, sys: 504 ms, total: 5.23 s
Wall time: 3min 26s

In [28]:

h2o.ls()

Out[28]:

	key
0	GLM_model_python_1583377547966_1
1	GLM_model_python_1583377547966_11
2	GLM_model_python_1583377547966_14
3	GLM_model_python_1583377547966_3
4	GLM_model_python_1583377547966_5
5	GLM_model_python_1583377547966_8
6	glm_grid
7	glm_grid_2
8	glm_grid_2_model_1
9	glm_grid_2_model_10
10	glm_grid_2_model_11
11	glm_grid_2_model_12
12	glm_grid_2_model_13
13	glm_grid_2_model_14
14	glm_grid_2_model_15
15	glm_grid_2_model_16
16	glm_grid_2_model_2
17	glm_grid_2_model_3
18	glm_grid_2_model_4
19	glm_grid_2_model_5
20	glm_grid_2_model_6
21	glm_grid_2_model_7
22	glm_grid_2_model_8
23	glm_grid_2_model_9
24	glm_grid_model_1
25	glm_grid_model_10
26	glm_grid_model_100
27	glm_grid_model_11
28	glm_grid_model_12
29	glm_grid_model_13
...	...
358	modelmetrics_glm_grid_model_96@-6013562878500035200_on_py_16_sid_9...
359	modelmetrics_glm_grid_model_97@-6125460407153251328_on_py_15_sid_9...
360	modelmetrics_glm_grid_model_97@-6125460407153251328_on_py_16_sid_9...
361	modelmetrics_glm_grid_model_98@-2192349973378340480_on_py_15_sid_9...
362	modelmetrics_glm_grid_model_98@-2192349973378340480_on_py_16_sid_9...
363	modelmetrics_glm_grid_model_99@-3110742583558476800_on_py_15_sid_9...
364	modelmetrics_glm_grid_model_99@-3110742583558476800_on_py_16_sid_9...
365	modelmetrics_glm_grid_model_9@9000166489408089856_on_py_15_sid_966...
366	modelmetrics_glm_grid_model_9@9000166489408089856_on_py_16_sid_966...
367	prostate.hex
368	prostate1.hex
369	prostate2.hex
370	prostate3.hex
371	prostate4.hex
372	prostate5.hex
373	prostate6.hex
374	prostate7.hex
375	py_15_sid_9664
376	py_16_sid_9664
377	py_17_sid_9664
378	py_1_sid_9fcd
379	py_27_sid_9317
380	py_28_sid_9317
381	py_29_sid_9317
382	py_2_sid_9fcd
383	py_30_sid_9317
384	py_31_sid_9317
385	py_32_sid_9317
386	py_3_sid_9fcd
387	py_4_sid_9fcd

388 rows × 1 columns

In [29]:

help(h2o.grid.H2OGridSearch)

Help on class H2OGridSearch in module h2o.grid.grid_search:

class H2OGridSearch(H2OGridSearch)
 |  Grid Search of a Hyper-Parameter Space for a Model
 |  
 |  :param model: The type of model to be explored initialized with optional parameters that will be
 |      unchanged across explored models.
 |  :param hyper_params: A dictionary of string parameters (keys) and a list of values to be explored by grid
 |      search (values).
 |  :param str grid_id: The unique id assigned to the resulting grid object. If none is given, an id will
 |      automatically be generated.
 |  :param search_criteria:  The optional dictionary of directives which control the search of the hyperparameter space.
 |      The dictionary can include values for: ``strategy``, ``max_models``, ``max_runtime_secs``, ``stopping_metric``, 
 |      ``stopping_tolerance``, ``stopping_rounds`` and ``seed``. The default strategy, "Cartesian", covers the entire space of 
 |      hyperparameter combinations. If you want to use cartesian grid search, you can leave the search_criteria 
 |      argument unspecified. Specify the "RandomDiscrete" strategy to get random search of all the combinations of 
 |      your hyperparameters with three ways of specifying when to stop the search: max number of models, max time, and 
 |      metric-based early stopping (e.g., stop if MSE hasn’t improved by 0.0001 over the 5 best models). 
 |      Examples below::
 |  
 |          >>> criteria = {"strategy": "RandomDiscrete", "max_runtime_secs": 600,
 |          ...             "max_models": 100, "stopping_metric": "AUTO",
 |          ...             "stopping_tolerance": 0.00001, "stopping_rounds": 5,
 |          ...             "seed": 123456}
 |          >>> criteria = {"strategy": "RandomDiscrete", "max_models": 42,
 |          ...             "max_runtime_secs": 28800, "seed": 1234}
 |          >>> criteria = {"strategy": "RandomDiscrete", "stopping_metric": "AUTO",
 |          ...             "stopping_tolerance": 0.001, "stopping_rounds": 10}
 |          >>> criteria = {"strategy": "RandomDiscrete", "stopping_rounds": 5,
 |          ...             "stopping_metric": "misclassification",
 |          ...             "stopping_tolerance": 0.00001}
 |  :param parallelism: Level of parallelism during grid model building. 1 = sequential building (default). 
 |       Use the value of 0 for adaptive parallelism - decided by H2O. Any number > 1 sets the exact number of models
 |       built in parallel.
 |  :returns: a new H2OGridSearch instance
 |  
 |  Examples
 |  --------
 |      >>> from h2o.grid.grid_search import H2OGridSearch
 |      >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator
 |      >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]}
 |      >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), hyper_parameters)
 |      >>> training_data = h2o.import_file("smalldata/logreg/benign.csv")
 |      >>> gs.train(x=range(3) + range(4,11),y=3, training_frame=training_data)
 |      >>> gs.show()
 |  
 |  Method resolution order:
 |      H2OGridSearch
 |      H2OGridSearch
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __getattr__(self, name)
 |  
 |  __getitem__(self, item)
 |  
 |  __init__(self, *args, **kwargs)
 |  
 |  __iter__(self)
 |  
 |  __len__(self)
 |  
 |  __repr__(self)
 |      Return repr(self).
 |  
 |  aic(self, train=False, valid=False, xval=False)
 |      Get the AIC(s).
 |      
 |      If all are False (default), then return the training metric value.
 |      If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
 |      "valid", and "xval".
 |      
 |      :param bool train: If train is True, then return the AIC value for the training data.
 |      :param bool valid: If valid is True, then return the AIC value for the validation data.
 |      :param bool xval:  If xval is True, then return the AIC value for the validation data.
 |      
 |      :returns: The AIC.
 |  
 |  auc(self, train=False, valid=False, xval=False)
 |      Get the AUC(s).
 |      
 |      If all are False (default), then return the training metric value.
 |      If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
 |      "valid", and "xval".
 |      
 |      :param bool train: If train is True, then return the AUC value for the training data.
 |      :param bool valid: If valid is True, then return the AUC value for the validation data.
 |      :param bool xval:  If xval is True, then return the AUC value for the validation data.
 |      
 |      :returns: The AUC.
 |  
 |  aucpr(self, train=False, valid=False, xval=False)
 |      Get the aucPR (Area Under PRECISION RECALL Curve).
 |      
 |      If all are False (default), then return the training metric value.
 |      If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
 |      "valid", and "xval".
 |      
 |      :param bool train: If train is True, then return the aucpr value for the training data.
 |      :param bool valid: If valid is True, then return the aucpr value for the validation data.
 |      :param bool xval:  If xval is True, then return the aucpr value for the validation data.
 |      
 |      :returns: The AUCPR for the models in this grid.
 |  
 |  biases(self, vector_id=0)
 |      Return the frame for the respective bias vector.
 |      
 |      :param: vector_id: an integer, ranging from 0 to number of layers, that specifies the bias vector to return.
 |      :returns: an H2OFrame which represents the bias vector identified by vector_id
 |  
 |  build_model(self, algo_params)
 |      (internal)
 |  
 |  catoffsets(self)
 |      Categorical offsets for one-hot encoding
 |  
 |  coef(self)
 |      Return the coefficients that can be applied to the non-standardized data.
 |      
 |      Note: standardize = True by default. If set to False, then coef() returns the coefficients that are fit directly.
 |  
 |  coef_norm(self)
 |      Return coefficients fitted on the standardized data (requires standardize = True, which is on by default). These coefficients can be used to evaluate variable importance.
 |  
 |  deepfeatures(self, test_data, layer)
 |      Obtain a hidden layer's details on a dataset.
 |      
 |      :param test_data: Data to create a feature space on.
 |      :param int layer: Index of the hidden layer.
 |      :returns: A dictionary of hidden layer details for each model.
 |  
 |  get_grid(self, sort_by=None, decreasing=None)
 |      Retrieve an H2OGridSearch instance.
 |      
 |      Optionally specify a metric by which to sort models and a sort order.
 |      Note that if neither cross-validation nor a validation frame is used in the grid search, then the
 |      training metrics will display in the "get grid" output. If a validation frame is passed to the grid, and
 |      ``nfolds = 0``, then the validation metrics will display. However, if ``nfolds`` > 1, then cross-validation
 |      metrics will display even if a validation frame is provided.
 |      
 |      :param str sort_by: A metric by which to sort the models in the grid space. Choices are: ``"logloss"``,
 |          ``"residual_deviance"``, ``"mse"``, ``"auc"``, ``"r2"``, ``"accuracy"``, ``"precision"``, ``"recall"``,
 |          ``"f1"``, etc.
 |      :param bool decreasing: Sort the models in decreasing order of metric if true, otherwise sort in increasing
 |          order (default).
 |      
 |      :returns: A new H2OGridSearch instance optionally sorted on the specified metric.
 |  
 |  get_hyperparams(self, id, display=True)
 |      Get the hyperparameters of a model explored by grid search.
 |      
 |      :param str id: The model id of the model with hyperparameters of interest.
 |      :param bool display: Flag to indicate whether to display the hyperparameter names.
 |      
 |      :returns: A list of the hyperparameters for the specified model.
 |  
 |  get_hyperparams_dict(self, id, display=True)
 |      Derived and returned the model parameters used to train the particular grid search model.
 |      
 |      :param str id: The model id of the model with hyperparameters of interest.
 |      :param bool display: Flag to indicate whether to display the hyperparameter names.
 |      
 |      :returns: A dict of model pararmeters derived from the hyper-parameters used to train this particular model.
 |  
 |  get_xval_models(self, key=None)
 |      Return a Model object.
 |      
 |      :param str key: If None, return all cross-validated models; otherwise return the model
 |          specified by the key.
 |      :returns: A model or a list of models.
 |  
 |  gini(self, train=False, valid=False, xval=False)
 |      Get the Gini Coefficient(s).
 |      
 |      If all are False (default), then return the training metric value.
 |      If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
 |      "valid", and "xval".
 |      
 |      :param bool train: If train is True, then return the Gini Coefficient value for the training data.
 |      :param bool valid: If valid is True, then return the Gini Coefficient value for the validation data.
 |      :param bool xval:  If xval is True, then return the Gini Coefficient value for the cross validation data.
 |      
 |      :returns: The Gini Coefficient for the models in this grid.
 |  
 |  is_cross_validated(self)
 |      Return True if the model was cross-validated.
 |  
 |  join(self)
 |      Wait until grid finishes computing.
 |  
 |  logloss(self, train=False, valid=False, xval=False)
 |      Get the Log Loss(s).
 |      
 |      If all are False (default), then return the training metric value.
 |      If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
 |      "valid", and "xval".
 |      
 |      :param bool train: If train is True, then return the Log Loss value for the training data.
 |      :param bool valid: If valid is True, then return the Log Loss value for the validation data.
 |      :param bool xval:  If xval is True, then return the Log Loss value for the cross validation data.
 |      
 |      :returns: The Log Loss for this binomial model.
 |  
 |  mae(self, train=False, valid=False, xval=False)
 |  
 |  mean_residual_deviance(self, train=False, valid=False, xval=False)
 |      Get the Mean Residual Deviances(s).
 |      
 |      If all are False (default), then return the training metric value.
 |      If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
 |      "valid", and "xval".
 |      
 |      :param bool train: If train is True, then return the Mean Residual Deviance value for the training data.
 |      :param bool valid: If valid is True, then return the Mean Residual Deviance value for the validation data.
 |      :param bool xval:  If xval is True, then return the Mean Residual Deviance value for the cross validation data.
 |      :returns: The Mean Residual Deviance for this regression model.
 |  
 |  model_performance(self, test_data=None, train=False, valid=False, xval=False)
 |      Generate model metrics for this model on test_data.
 |      
 |      :param test_data: Data set for which model metrics shall be computed against. All three of train, valid
 |          and xval arguments are ignored if test_data is not None.
 |      :param train: Report the training metrics for the model.
 |      :param valid: Report the validation metrics for the model.
 |      :param xval: Report the validation metrics for the model.
 |      :return: An object of class H2OModelMetrics.
 |  
 |  mse(self, train=False, valid=False, xval=False)
 |      Get the MSE(s).
 |      
 |      If all are False (default), then return the training metric value.
 |      If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
 |      "valid", and "xval".
 |      
 |      :param bool train: If train is True, then return the MSE value for the training data.
 |      :param bool valid: If valid is True, then return the MSE value for the validation data.
 |      :param bool xval:  If xval is True, then return the MSE value for the cross validation data.
 |      :returns: The MSE for this regression model.
 |  
 |  normmul(self)
 |      Normalization/Standardization multipliers for numeric predictors.
 |  
 |  normsub(self)
 |      Normalization/Standardization offsets for numeric predictors.
 |  
 |  null_degrees_of_freedom(self, train=False, valid=False, xval=False)
 |      Retreive the null degress of freedom if this model has the attribute, or None otherwise.
 |      
 |      :param bool train: Get the null dof for the training set. If both train and valid are False, then train is
 |          selected by default.
 |      :param bool valid: Get the null dof for the validation set. If both train and valid are True, then train is
 |          selected by default.
 |      :param bool xval: Get the null dof for the cross-validated models.
 |      
 |      :returns: the null dof, or None if it is not present.
 |  
 |  null_deviance(self, train=False, valid=False, xval=False)
 |      Retreive the null deviance if this model has the attribute, or None otherwise.
 |      
 |      :param bool train: Get the null deviance for the training set. If both train and valid are False, then
 |          train is selected by default.
 |      :param bool valid: Get the null deviance for the validation set. If both train and valid are True, then
 |          train is selected by default.
 |      :param bool xval: Get the null deviance for the cross-validated models.
 |      
 |      :returns: the null deviance, or None if it is not present.
 |  
 |  pprint_coef(self)
 |      Pretty print the coefficents table (includes normalized coefficients).
 |  
 |  pr_auc(self)
 |      H2OGridSearch.pr_auc is deprecated, please use ``H2OGridSearch.aucpr`` instead.
 |  
 |  predict(self, test_data)
 |      Predict on a dataset.
 |      
 |      :param H2OFrame test_data: Data to be predicted on.
 |      :returns: H2OFrame filled with predictions.
 |  
 |  r2(self, train=False, valid=False, xval=False)
 |      Return the R^2 for this regression model.
 |      
 |      The R^2 value is defined to be ``1 - MSE/var``, where ``var`` is computed as ``sigma^2``.
 |      
 |      If all are False (default), then return the training metric value.
 |      If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
 |      "valid", and "xval".
 |      
 |      :param bool train: If train is True, then return the R^2 value for the training data.
 |      :param bool valid: If valid is True, then return the R^2 value for the validation data.
 |      :param bool xval:  If xval is True, then return the R^2 value for the cross validation data.
 |      
 |      :returns: The R^2 for this regression model.
 |  
 |  residual_degrees_of_freedom(self, train=False, valid=False, xval=False)
 |      Retreive the residual degress of freedom if this model has the attribute, or None otherwise.
 |      
 |      :param bool train: Get the residual dof for the training set. If both train and valid are False, then
 |          train is selected by default.
 |      :param bool valid: Get the residual dof for the validation set. If both train and valid are True, then
 |          train is selected by default.
 |      :param bool xval: Get the residual dof for the cross-validated models.
 |      
 |      :returns: the residual degrees of freedom, or None if they are not present.
 |  
 |  residual_deviance(self, train=False, valid=False, xval=False)
 |      Retreive the residual deviance if this model has the attribute, or None otherwise.
 |      
 |      :param bool train: Get the residual deviance for the training set. If both train and valid are False,
 |          then train is selected by default.
 |      :param bool valid: Get the residual deviance for the validation set. If both train and valid are True,
 |          then train is selected by default.
 |      :param bool xval: Get the residual deviance for the cross-validated models.
 |      
 |      :returns: the residual deviance, or None if it is not present.
 |  
 |  respmul(self)
 |      Normalization/Standardization multipliers for numeric response.
 |  
 |  respsub(self)
 |      Normalization/Standardization offsets for numeric response.
 |  
 |  rmse(self, train=False, valid=False, xval=False)
 |  
 |  rmsle(self, train=False, valid=False, xval=False)
 |  
 |  scoring_history(self)
 |      Retrieve model scoring history.
 |      
 |      :returns: Score history (H2OTwoDimTable)
 |  
 |  show(self)
 |      Print models sorted by metric.
 |  
 |  sort_by(self, metric, increasing=True)
 |      grid.sort_by() is deprecated; use grid.get_grid() instead
 |      
 |      Deprecated since 2016-12-12, use grid.get_grid() instead.
 |  
 |  sorted_metric_table(self)
 |      Retrieve summary table of an H2O Grid Search.
 |      
 |      :returns: The summary table as an H2OTwoDimTable or a Pandas DataFrame.
 |  
 |  start(self, x, y=None, training_frame=None, offset_column=None, fold_column=None, weights_column=None, validation_frame=None, **params)
 |      Asynchronous model build by specifying the predictor columns, response column, and any
 |      additional frame-specific values.
 |      
 |      To block for results, call :meth:`join`.
 |      
 |      :param x: A list of column names or indices indicating the predictor columns.
 |      :param y: An index or a column name indicating the response column.
 |      :param training_frame: The H2OFrame having the columns indicated by x and y (as well as any
 |          additional columns specified by fold, offset, and weights).
 |      :param offset_column: The name or index of the column in training_frame that holds the offsets.
 |      :param fold_column: The name or index of the column in training_frame that holds the per-row fold
 |          assignments.
 |      :param weights_column: The name or index of the column in training_frame that holds the per-row weights.
 |      :param validation_frame: H2OFrame with validation data to be scored on while training.
 |  
 |  summary(self, header=True)
 |      Print a detailed summary of the explored models.
 |  
 |  train(self, x=None, y=None, training_frame=None, offset_column=None, fold_column=None, weights_column=None, validation_frame=None, **params)
 |      Train the model synchronously (i.e. do not return until the model finishes training).
 |      
 |      To train asynchronously call :meth:`start`.
 |      
 |      :param x: A list of column names or indices indicating the predictor columns.
 |      :param y: An index or a column name indicating the response column.
 |      :param training_frame: The H2OFrame having the columns indicated by x and y (as well as any
 |          additional columns specified by fold, offset, and weights).
 |      :param offset_column: The name or index of the column in training_frame that holds the offsets.
 |      :param fold_column: The name or index of the column in training_frame that holds the per-row fold
 |          assignments.
 |      :param weights_column: The name or index of the column in training_frame that holds the per-row weights.
 |      :param validation_frame: H2OFrame with validation data to be scored on while training.
 |  
 |  varimp(self, use_pandas=False)
 |      Pretty print the variable importances, or return them in a list/pandas DataFrame.
 |      
 |      :param bool use_pandas: If True, then the variable importances will be returned as a pandas data frame.
 |      
 |      :returns: A dictionary of lists or Pandas DataFrame instances.
 |  
 |  weights(self, matrix_id=0)
 |      Return the frame for the respective weight matrix.
 |      
 |      :param: matrix_id: an integer, ranging from 0 to number of layers, that specifies the weight matrix to return.
 |      :returns: an H2OFrame which represents the weight matrix identified by matrix_id
 |  
 |  xval_keys(self)
 |      Model keys for the cross-validated model.
 |  
 |  xvals(self)
 |      Return the list of cross-validated models.
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  failed_params
 |  
 |  failed_raw_params
 |  
 |  failure_details
 |  
 |  failure_stack_traces
 |  
 |  grid_id
 |      A key that identifies this grid search object in H2O.
 |  
 |  hyper_names
 |  
 |  model_ids

In [30]:

glm_grid.get_grid(sort_by='auc',decreasing=True)

                       alpha                   lambda           model_ids  \
0                     [0.87]  [4.9999999999999996E-6]   glm_grid_model_61   
1                      [0.4]                 [1.8E-5]   glm_grid_model_46   
2                     [0.07]                 [3.7E-5]   glm_grid_model_48   
3                     [0.07]                 [5.6E-5]   glm_grid_model_72   
4                     [0.48]                 [2.9E-5]   glm_grid_model_96   
5                     [0.88]                 [3.1E-5]   glm_grid_model_37   
6                     [0.18]   [8.099999999999999E-5]   glm_grid_model_86   
7                     [0.15]  [1.3099999999999999E-4]   glm_grid_model_30   
8                      [0.1]                [1.59E-4]   glm_grid_model_38   
9                     [0.06]                [2.12E-4]   glm_grid_model_78   
10                    [0.53]   [9.499999999999999E-5]   glm_grid_model_28   
11     [0.41000000000000003]                [1.37E-4]   glm_grid_model_67   
12                    [0.65]                [1.07E-4]   glm_grid_model_34   
13                    [0.93]                 [7.7E-5]  glm_grid_model_100   
14                    [0.52]                [1.37E-4]   glm_grid_model_42   
15                    [0.31]  [2.2999999999999998E-4]   glm_grid_model_22   
16                    [0.56]                [1.64E-4]   glm_grid_model_82   
17     [0.47000000000000003]  [1.8899999999999999E-4]   glm_grid_model_51   
18                    [0.05]                [4.17E-4]   glm_grid_model_16   
19                    [0.18]                [3.47E-4]   glm_grid_model_17   
20                    [0.92]                [1.45E-4]   glm_grid_model_93   
21                    [0.11]                [4.88E-4]   glm_grid_model_73   
22                    [0.16]                [4.93E-4]   glm_grid_model_14   
23                    [0.75]  [1.8099999999999998E-4]   glm_grid_model_11   
24                    [0.09]                [6.68E-4]    glm_grid_model_1   
25                    [0.78]                [1.88E-4]    glm_grid_model_2   
26                    [0.07]                [7.32E-4]   glm_grid_model_79   
27                    [0.17]   [5.459999999999999E-4]   glm_grid_model_98   
28                    [0.65]                [2.26E-4]   glm_grid_model_83   
29                    [0.02]                [8.87E-4]   glm_grid_model_92   
.. ..                    ...                      ...                 ...   
70                    [0.55]   [6.129999999999999E-4]   glm_grid_model_59   
71                    [0.86]                [4.86E-4]   glm_grid_model_21   
72                    [0.63]                [5.75E-4]   glm_grid_model_49   
73                     [0.5]   [6.929999999999999E-4]    glm_grid_model_6   
74                    [0.73]                [5.53E-4]   glm_grid_model_13   
75                    [0.65]                [5.88E-4]   glm_grid_model_89   
76                    [0.87]   [5.099999999999999E-4]   glm_grid_model_19   
77                    [0.32]                 [9.5E-4]   glm_grid_model_95   
78                    [0.88]                [5.18E-4]   glm_grid_model_63   
79                    [0.42]   [8.129999999999999E-4]   glm_grid_model_74   
80                    [0.43]                [8.42E-4]   glm_grid_model_40   
81                    [0.66]                [6.59E-4]   glm_grid_model_18   
82                    [0.59]                [7.31E-4]    glm_grid_model_9   
83     [0.41000000000000003]                [8.91E-4]   glm_grid_model_24   
84                    [0.54]                 [7.8E-4]    glm_grid_model_5   
85     [0.41000000000000003]   [9.559999999999999E-4]   glm_grid_model_76   
86      [0.9400000000000001]                [5.09E-4]   glm_grid_model_62   
87     [0.41000000000000003]                [9.93E-4]   glm_grid_model_50   
88                     [0.6]                [8.16E-4]   glm_grid_model_32   
89                    [0.55]   [8.759999999999999E-4]   glm_grid_model_97   
90                    [0.52]                [9.45E-4]   glm_grid_model_91   
91                    [0.84]                [6.74E-4]   glm_grid_model_57   
92      [0.5700000000000001]                [9.53E-4]   glm_grid_model_69   
93                    [0.65]   [9.379999999999999E-4]   glm_grid_model_56   
94      [0.9500000000000001]                [7.41E-4]   glm_grid_model_85   
95                    [0.86]                [8.06E-4]   glm_grid_model_39   
96                    [0.92]                 [8.6E-4]   glm_grid_model_99   
97                    [0.74]                [9.87E-4]   glm_grid_model_15   
98                    [0.88]   [9.209999999999999E-4]   glm_grid_model_23   
99                    [0.88]                [9.48E-4]    glm_grid_model_7   

                   auc  
0   0.8460502420206815  
1   0.8459231218911992  
2   0.8458460074966513  
3        0.84578969349  
4    0.845718421725669  
5   0.8454975463710519  
6   0.8454049383408971  
7    0.845125597663872  
8    0.844990163622469  
9   0.8447922633836474  
10  0.8446763576704011  
11  0.8443068880489722  
12  0.8442450331686066  
13  0.8441362707601581  
14  0.8440820898598193  
15  0.8436148511314989  
16  0.8436089096500138  
17  0.8435708987360638  
18   0.843518778104261  
19  0.8432349813161218  
20  0.8426307867571391  
21  0.8426180947745795  
22  0.8421399824067635  
23  0.8416523933239516  
24  0.8416254797655253  
25  0.8415763740974008  
26   0.841510364862424  
27  0.8414721822594293  
28  0.8414650545627264  
29  0.8413966312757275  
..                 ...  
70  0.8367248032825947  
71   0.836457462629252  
72   0.836414769286811  
73  0.8362693772808174  
74  0.8362422374050139  
75  0.8362208751256985  
76  0.8361702996952978  
77   0.835997372408428  
78   0.835987266166935  
79  0.8359603526085087  
80  0.8355779204642072  
81  0.8355752566826656  
82  0.8353492462648034  
83  0.8353201996000644  
84  0.8352437797657427  
85  0.8348899104352362  
86   0.834719503855704  
87  0.8346606457295727  
88  0.8339922654813755  
89  0.8338713885898811  
90  0.8336982271815869  
91  0.8335408845775446  
92  0.8330923235362049  
93  0.8328250505179406  
94  0.8325835828833598  
95  0.8324377694588019  
96  0.8318016979671923  
97  0.8317961336813356  
98  0.8315479207483787  
99  0.8313888092269064  

[100 rows x 5 columns]

Out[30]:

In [36]:

glm_grid.models[0]

Model Details
=============
H2OGeneralizedLinearEstimator :  Generalized Linear Modeling
Model Key:  glm_grid_model_61


GLM Model: summary

		family	link	regularization	lambda_search	number_of_predictors_total	number_of_active_predictors	number_of_iterations	training_frame
0		binomial	logit	Elastic Net (alpha = 0.87, lambda = 5.0E-6 )	nlambda = 100, lambda.max = 0.03808, lambda.min = 5.0E-6, lambda.1...	161	143	7	py_15_sid_9664

ModelMetricsBinomialGLM: glm
** Reported on train data. **

MSE: 0.031344275605674536
RMSE: 0.17704314616972477
LogLoss: 0.12279979897845819
Null degrees of freedom: 350267
Residual degrees of freedom: 350124
Null deviance: 108932.13150368733
Residual deviance: 86025.67997717319
AIC: 86313.67997717319
AUC: 0.8519842670925402
AUCPR: 0.21046685420921254
Gini: 0.7039685341850803

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.13993235618594693:

		FALSE	TRUE	Error	Rate
0	FALSE	323805.0	13802.0	0.0409	(13802.0/337607.0)
1	TRUE	8129.0	4532.0	0.6421	(8129.0/12661.0)
2	Total	331934.0	18334.0	0.0626	(21931.0/350268.0)

Maximum Metrics: Maximum metrics at their respective thresholds

	metric	threshold	value	idx
0	max f1	0.139932	0.292434	200.0
1	max f2	0.068698	0.389555	264.0
2	max f0point5	0.212665	0.290398	157.0
3	max accuracy	0.981772	0.963851	0.0
4	max precision	0.562303	0.421203	45.0
5	max recall	0.000789	1.000000	398.0
6	max specificity	0.981772	0.999997	0.0
7	max absolute_mcc	0.099058	0.269921	234.0
8	max min_per_class_accuracy	0.038519	0.774240	305.0
9	max mean_per_class_accuracy	0.036145	0.775428	309.0
10	max tns	0.981772	337606.000000	0.0
11	max fns	0.981772	12661.000000	0.0
12	max fps	0.000498	337607.000000	399.0
13	max tps	0.000789	12661.000000	398.0
14	max tnr	0.981772	0.999997	0.0
15	max fnr	0.981772	1.000000	0.0
16	max fpr	0.000498	1.000000	399.0
17	max tpr	0.000789	1.000000	398.0

Gains/Lift Table: Avg response rate:  3.61 %, avg score:  3.61 %

	group	cumulative_data_fraction	lower_threshold	lift	cumulative_lift	response_rate	score	cumulative_response_rate	cumulative_score	capture_rate	cumulative_capture_rate	gain	cumulative_gain
0	1	0.010001	0.332250	10.724871	10.724871	0.387668	0.466160	0.387668	0.466160	0.107259	0.107259	972.487123	972.487123
1	2	0.020002	0.241718	7.629032	9.176952	0.275764	0.280435	0.331716	0.373297	0.076297	0.183556	662.903211	817.695167
2	3	0.030003	0.195284	6.452297	8.268734	0.233229	0.216827	0.298887	0.321140	0.064529	0.248085	545.229734	726.873356
3	4	0.040001	0.165076	5.166472	7.493334	0.186750	0.179199	0.270859	0.285663	0.051655	0.299739	416.647177	649.333418
4	5	0.050002	0.143363	4.770119	6.948660	0.172424	0.153562	0.251170	0.259241	0.047706	0.347445	377.011946	594.866013
5	6	0.100001	0.087455	3.342624	5.145694	0.120825	0.110895	0.185999	0.185070	0.167127	0.514572	234.262433	414.569371
6	7	0.150002	0.062108	2.397833	4.229723	0.086674	0.073579	0.152890	0.147906	0.119896	0.634468	139.783271	322.972261
7	8	0.200001	0.047315	1.600226	3.572367	0.057843	0.054179	0.129129	0.124475	0.080009	0.714478	60.022611	257.236725
8	9	0.300002	0.030452	1.121548	2.755427	0.040540	0.037965	0.099599	0.095638	0.112155	0.826633	12.154798	175.542749
9	10	0.399999	0.020842	0.645303	2.227908	0.023326	0.025266	0.080531	0.078045	0.064529	0.891162	-35.469658	122.790777
10	11	0.500000	0.014542	0.399650	1.862254	0.014446	0.017478	0.067314	0.065932	0.039965	0.931127	-60.034981	86.225417
11	12	0.600001	0.010215	0.280387	1.598608	0.010135	0.012267	0.057784	0.056988	0.028039	0.959166	-71.961301	59.860838
12	13	0.699998	0.007058	0.198251	1.398561	0.007166	0.008555	0.050553	0.050069	0.019825	0.978991	-80.174889	39.856142
13	14	0.799999	0.004607	0.116104	1.238253	0.004197	0.005790	0.044759	0.044534	0.011610	0.990601	-88.389609	23.825309
14	15	0.899999	0.002610	0.057657	1.107075	0.002084	0.003583	0.040017	0.039984	0.005766	0.996367	-94.234296	10.707492
15	16	1.000000	0.000095	0.036332	1.000000	0.001313	0.001619	0.036147	0.036147	0.003633	1.000000	-96.366816	0.000000

ModelMetricsBinomialGLM: glm
** Reported on validation data. **

MSE: 0.031018805729749764
RMSE: 0.17612156520355413
LogLoss: 0.12242815235268398
Null degrees of freedom: 74970
Residual degrees of freedom: 74827
Null deviance: 22974.597464481732
Residual deviance: 18357.12202006614
AIC: 18645.12202006614
AUC: 0.8460502420206815
AUCPR: 0.2009137545141779
Gini: 0.6921004840413629

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.13091994899463488:

		FALSE	TRUE	Error	Rate
0	FALSE	69013.0	3300.0	0.0456	(3300.0/72313.0)
1	TRUE	1672.0	986.0	0.629	(1672.0/2658.0)
2	Total	70685.0	4286.0	0.0663	(4972.0/74971.0)

Maximum Metrics: Maximum metrics at their respective thresholds

	metric	threshold	value	idx
0	max f1	0.130920	0.283986	201.0
1	max f2	0.071763	0.375903	256.0
2	max f0point5	0.242376	0.295791	133.0
3	max accuracy	0.974097	0.964533	0.0
4	max precision	0.376628	0.396364	83.0
5	max recall	0.000735	1.000000	398.0
6	max specificity	0.974097	0.999986	0.0
7	max absolute_mcc	0.119399	0.260856	210.0
8	max min_per_class_accuracy	0.037620	0.764108	304.0
9	max mean_per_class_accuracy	0.034897	0.767316	309.0
10	max tns	0.974097	72312.000000	0.0
11	max fns	0.974097	2658.000000	0.0
12	max fps	0.000475	72313.000000	399.0
13	max tps	0.000735	2658.000000	398.0
14	max tnr	0.974097	0.999986	0.0
15	max fnr	0.974097	1.000000	0.0
16	max fpr	0.000475	1.000000	399.0
17	max tpr	0.000735	1.000000	398.0

Gains/Lift Table: Avg response rate:  3.55 %, avg score:  3.61 %

	group	cumulative_data_fraction	lower_threshold	lift	cumulative_lift	response_rate	score	cumulative_response_rate	cumulative_score	capture_rate	cumulative_capture_rate	gain	cumulative_gain
0	1	0.010004	0.334125	10.793417	10.793417	0.382667	0.473002	0.382667	0.473002	0.107976	0.107976	979.341711	979.341711
1	2	0.020008	0.243096	8.348915	9.571166	0.296000	0.282541	0.339333	0.377771	0.083521	0.191497	734.891497	857.116604
2	3	0.030012	0.193143	5.829197	8.323843	0.206667	0.216035	0.295111	0.323859	0.058315	0.249812	482.919739	732.384316
3	4	0.040002	0.162847	4.895532	7.467623	0.173565	0.176602	0.264755	0.287082	0.048909	0.298721	389.553164	646.762264
4	5	0.050006	0.141925	3.948811	6.763673	0.140000	0.152013	0.239797	0.260061	0.039503	0.338224	294.881114	576.367262
5	6	0.100012	0.087175	3.310363	5.037018	0.117365	0.110232	0.178581	0.185146	0.165538	0.503762	231.036257	403.701760
6	7	0.150005	0.061800	2.227565	4.100700	0.078975	0.073379	0.145385	0.147897	0.111362	0.615124	122.756536	310.070006
7	8	0.200011	0.047553	1.707846	3.502447	0.060549	0.054259	0.124175	0.124486	0.085403	0.700527	70.784615	250.244669
8	9	0.300009	0.030349	1.106110	2.703703	0.039216	0.038070	0.095856	0.095682	0.110609	0.811136	10.610956	170.370316
9	10	0.400008	0.020669	0.714833	2.206502	0.025343	0.025116	0.078229	0.078041	0.071482	0.882619	-28.516729	120.650213
10	11	0.500007	0.014493	0.470285	1.859268	0.016673	0.017395	0.065918	0.065912	0.047028	0.929646	-52.971532	85.926790
11	12	0.600005	0.010143	0.297220	1.598932	0.010538	0.012191	0.056688	0.056959	0.029722	0.959368	-70.278008	59.893236
12	13	0.700004	0.007006	0.180589	1.396316	0.006403	0.008512	0.049505	0.050038	0.018059	0.977427	-81.941068	39.631578
13	14	0.800003	0.004578	0.131680	1.238239	0.004669	0.005761	0.043900	0.044504	0.013168	0.990594	-86.832029	23.823891
14	15	0.900001	0.002583	0.045147	1.105675	0.001601	0.003547	0.039200	0.039953	0.004515	0.995109	-95.485267	10.567514
15	16	1.000000	0.000078	0.048910	1.000000	0.001734	0.001600	0.035454	0.036118	0.004891	1.000000	-95.109039	0.000000

Scoring History:

		timestamp	duration	iteration	lambda	predictors	deviance_train	deviance_test
0		2020-03-06 00:44:12	0.000 sec	7	.5E-5	144	0.2456	0.244856

Out[36]:

In [32]:

glm_grid.summary()

Grid Summary:

Model Id	family	link	regularization	lambda_search	number_of_predictors_total	number_of_active_predictors	number_of_iterations	training_frame
glm_grid_model_61	binomial	logit	Elastic Net (alpha = 0.87, lambda = 5.0E-6 )	nlambda = 100, lambda.max = 0.03808, lambda.min = 5.0E-6, lambda.1se = -1.0	161	143	7	py_15_sid_9664
glm_grid_model_46	binomial	logit	Elastic Net (alpha = 0.4, lambda = 1.8E-5 )	nlambda = 100, lambda.max = 0.08282, lambda.min = 1.8E-5, lambda.1se = -1.0	161	137	7	py_15_sid_9664
glm_grid_model_48	binomial	logit	Elastic Net (alpha = 0.07, lambda = 3.7E-5 )	nlambda = 100, lambda.max = 0.4733, lambda.min = 3.7E-5, lambda.1se = -1.0	161	145	5	py_15_sid_9664
glm_grid_model_96	binomial	logit	Elastic Net (alpha = 0.48, lambda = 2.9E-5 )	nlambda = 100, lambda.max = 0.06902, lambda.min = 2.9E-5, lambda.1se = -1.0	161	110	7	py_15_sid_9664
glm_grid_model_72	binomial	logit	Elastic Net (alpha = 0.07, lambda = 5.6E-5 )	nlambda = 100, lambda.max = 0.4733, lambda.min = 5.6E-5, lambda.1se = -1.0	161	137	5	py_15_sid_9664
glm_grid_model_37	binomial	logit	Elastic Net (alpha = 0.88, lambda = 3.1E-5 )	nlambda = 100, lambda.max = 0.03765, lambda.min = 3.1E-5, lambda.1se = -1.0	161	93	7	py_15_sid_9664
glm_grid_model_86	binomial	logit	Elastic Net (alpha = 0.18, lambda = 8.1E-5 )	nlambda = 100, lambda.max = 0.184, lambda.min = 8.1E-5, lambda.1se = -1.0	161	108	7	py_15_sid_9664
glm_grid_model_30	binomial	logit	Elastic Net (alpha = 0.15, lambda = 1.31E-4 )	nlambda = 100, lambda.max = 0.2209, lambda.min = 1.31E-4, lambda.1se = -1.0	161	105	7	py_15_sid_9664
glm_grid_model_38	binomial	logit	Elastic Net (alpha = 0.1, lambda = 1.59E-4 )	nlambda = 100, lambda.max = 0.3313, lambda.min = 1.59E-4, lambda.1se = -1.0	161	110	7	py_15_sid_9664
glm_grid_model_28	binomial	logit	Elastic Net (alpha = 0.53, lambda = 9.5E-5 )	nlambda = 100, lambda.max = 0.06251, lambda.min = 9.5E-5, lambda.1se = -1.0	161	80	7	py_15_sid_9664
glm_grid_model_78	binomial	logit	Elastic Net (alpha = 0.06, lambda = 2.12E-4 )	nlambda = 100, lambda.max = 0.5521, lambda.min = 2.12E-4, lambda.1se = -1.0	161	119	7	py_15_sid_9664
glm_grid_model_67	binomial	logit	Elastic Net (alpha = 0.41, lambda = 1.37E-4 )	nlambda = 100, lambda.max = 0.0808, lambda.min = 1.37E-4, lambda.1se = -1.0	161	77	7	py_15_sid_9664
glm_grid_model_100	binomial	logit	Elastic Net (alpha = 0.93, lambda = 7.7E-5 )	nlambda = 100, lambda.max = 0.03562, lambda.min = 7.7E-5, lambda.1se = -1.0	161	71	7	py_15_sid_9664
glm_grid_model_34	binomial	logit	Elastic Net (alpha = 0.65, lambda = 1.07E-4 )	nlambda = 100, lambda.max = 0.05097, lambda.min = 1.07E-4, lambda.1se = -1.0	161	69	7	py_15_sid_9664
glm_grid_model_42	binomial	logit	Elastic Net (alpha = 0.52, lambda = 1.37E-4 )	nlambda = 100, lambda.max = 0.06371, lambda.min = 1.37E-4, lambda.1se = -1.0	161	70	7	py_15_sid_9664
glm_grid_model_82	binomial	logit	Elastic Net (alpha = 0.56, lambda = 1.64E-4 )	nlambda = 100, lambda.max = 0.05916, lambda.min = 1.64E-4, lambda.1se = -1.0	161	68	7	py_15_sid_9664
glm_grid_model_51	binomial	logit	Elastic Net (alpha = 0.47, lambda = 1.89E-4 )	nlambda = 100, lambda.max = 0.07048, lambda.min = 1.89E-4, lambda.1se = -1.0	161	69	7	py_15_sid_9664
glm_grid_model_22	binomial	logit	Elastic Net (alpha = 0.31, lambda = 2.3E-4 )	nlambda = 100, lambda.max = 0.1069, lambda.min = 2.3E-4, lambda.1se = -1.0	161	72	7	py_15_sid_9664
glm_grid_model_16	binomial	logit	Elastic Net (alpha = 0.05, lambda = 4.17E-4 )	nlambda = 100, lambda.max = 0.6626, lambda.min = 4.17E-4, lambda.1se = -1.0	161	110	6	py_15_sid_9664
glm_grid_model_17	binomial	logit	Elastic Net (alpha = 0.18, lambda = 3.47E-4 )	nlambda = 100, lambda.max = 0.184, lambda.min = 3.47E-4, lambda.1se = -1.0	161	80	7	py_15_sid_9664
glm_grid_model_93	binomial	logit	Elastic Net (alpha = 0.92, lambda = 1.45E-4 )	nlambda = 100, lambda.max = 0.03601, lambda.min = 1.45E-4, lambda.1se = -1.0	161	58	8	py_15_sid_9664
glm_grid_model_73	binomial	logit	Elastic Net (alpha = 0.11, lambda = 4.88E-4 )	nlambda = 100, lambda.max = 0.3012, lambda.min = 4.88E-4, lambda.1se = -1.0	161	85	6	py_15_sid_9664
glm_grid_model_11	binomial	logit	Elastic Net (alpha = 0.75, lambda = 1.81E-4 )	nlambda = 100, lambda.max = 0.04417, lambda.min = 1.81E-4, lambda.1se = -1.0	161	53	7	py_15_sid_9664
glm_grid_model_2	binomial	logit	Elastic Net (alpha = 0.78, lambda = 1.88E-4 )	nlambda = 100, lambda.max = 0.04247, lambda.min = 1.88E-4, lambda.1se = -1.0	161	53	8	py_15_sid_9664
glm_grid_model_14	binomial	logit	Elastic Net (alpha = 0.16, lambda = 4.93E-4 )	nlambda = 100, lambda.max = 0.207, lambda.min = 4.93E-4, lambda.1se = -1.0	161	77	6	py_15_sid_9664
glm_grid_model_83	binomial	logit	Elastic Net (alpha = 0.65, lambda = 2.26E-4 )	nlambda = 100, lambda.max = 0.05097, lambda.min = 2.26E-4, lambda.1se = -1.0	161	53	8	py_15_sid_9664
glm_grid_model_20	binomial	logit	Elastic Net (alpha = 0.53, lambda = 2.9E-4 )	nlambda = 100, lambda.max = 0.06251, lambda.min = 2.9E-4, lambda.1se = -1.0	161	54	8	py_15_sid_9664
glm_grid_model_1	binomial	logit	Elastic Net (alpha = 0.09, lambda = 6.68E-4 )	nlambda = 100, lambda.max = 0.3681, lambda.min = 6.68E-4, lambda.1se = -1.0	161	84	6	py_15_sid_9664
glm_grid_model_98	binomial	logit	Elastic Net (alpha = 0.17, lambda = 5.46E-4 )	nlambda = 100, lambda.max = 0.1949, lambda.min = 5.46E-4, lambda.1se = -1.0	161	72	6	py_15_sid_9664
glm_grid_model_54	binomial	logit	Elastic Net (alpha = 0.13, lambda = 6.33E-4 )	nlambda = 100, lambda.max = 0.2548, lambda.min = 6.33E-4, lambda.1se = -1.0	161	79	7	py_15_sid_9664
glm_grid_model_79	binomial	logit	Elastic Net (alpha = 0.07, lambda = 7.32E-4 )	nlambda = 100, lambda.max = 0.4733, lambda.min = 7.32E-4, lambda.1se = -1.0	161	88	6	py_15_sid_9664
glm_grid_model_35	binomial	logit	Elastic Net (alpha = 0.99, lambda = 2.06E-4 )	nlambda = 100, lambda.max = 0.03346, lambda.min = 2.06E-4, lambda.1se = -1.0	161	46	7	py_15_sid_9664
glm_grid_model_92	binomial	logit	Elastic Net (alpha = 0.02, lambda = 8.87E-4 )	nlambda = 100, lambda.max = 1.6564, lambda.min = 8.87E-4, lambda.1se = -1.0	161	116	5	py_15_sid_9664
glm_grid_model_60	binomial	logit	Elastic Net (alpha = 0.05, lambda = 8.0E-4 )	nlambda = 100, lambda.max = 0.6626, lambda.min = 8.0E-4, lambda.1se = -1.0	161	90	6	py_15_sid_9664
glm_grid_model_90	binomial	logit	Elastic Net (alpha = 0.02, lambda = 9.92E-4 )	nlambda = 100, lambda.max = 1.6564, lambda.min = 9.92E-4, lambda.1se = -1.0	161	114	5	py_15_sid_9664
glm_grid_model_71	binomial	logit	Elastic Net (alpha = 0.09, lambda = 7.94E-4 )	nlambda = 100, lambda.max = 0.3681, lambda.min = 7.94E-4, lambda.1se = -1.0	161	81	6	py_15_sid_9664
glm_grid_model_26	binomial	logit	Elastic Net (alpha = 0.93, lambda = 2.45E-4 )	nlambda = 100, lambda.max = 0.03562, lambda.min = 2.45E-4, lambda.1se = -1.0	161	44	7	py_15_sid_9664
glm_grid_model_80	binomial	logit	Elastic Net (alpha = 0.23, lambda = 5.77E-4 )	nlambda = 100, lambda.max = 0.144, lambda.min = 5.77E-4, lambda.1se = -1.0	161	64	6	py_15_sid_9664
glm_grid_model_77	binomial	logit	Elastic Net (alpha = 0.6, lambda = 3.29E-4 )	nlambda = 100, lambda.max = 0.05521, lambda.min = 3.29E-4, lambda.1se = -1.0	161	47	7	py_15_sid_9664
glm_grid_model_87	binomial	logit	Elastic Net (alpha = 0.52, lambda = 3.66E-4 )	nlambda = 100, lambda.max = 0.06371, lambda.min = 3.66E-4, lambda.1se = -1.0	161	48	6	py_15_sid_9664
glm_grid_model_45	binomial	logit	Elastic Net (alpha = 0.25, lambda = 5.9E-4 )	nlambda = 100, lambda.max = 0.1325, lambda.min = 5.9E-4, lambda.1se = -1.0	161	59	6	py_15_sid_9664
glm_grid_model_81	binomial	logit	Elastic Net (alpha = 0.8, lambda = 3.02E-4 )	nlambda = 100, lambda.max = 0.04141, lambda.min = 3.02E-4, lambda.1se = -1.0	161	43	7	py_15_sid_9664
glm_grid_model_33	binomial	logit	Elastic Net (alpha = 0.64, lambda = 3.47E-4 )	nlambda = 100, lambda.max = 0.05176, lambda.min = 3.47E-4, lambda.1se = -1.0	161	45	6	py_15_sid_9664
glm_grid_model_43	binomial	logit	Elastic Net (alpha = 0.1, lambda = 8.66E-4 )	nlambda = 100, lambda.max = 0.3313, lambda.min = 8.66E-4, lambda.1se = -1.0	161	75	6	py_15_sid_9664
glm_grid_model_55	binomial	logit	Elastic Net (alpha = 0.66, lambda = 3.52E-4 )	nlambda = 100, lambda.max = 0.05019, lambda.min = 3.52E-4, lambda.1se = -1.0	161	43	6	py_15_sid_9664
glm_grid_model_12	binomial	logit	Elastic Net (alpha = 0.11, lambda = 8.43E-4 )	nlambda = 100, lambda.max = 0.3012, lambda.min = 8.43E-4, lambda.1se = -1.0	161	73	6	py_15_sid_9664
glm_grid_model_65	binomial	logit	Elastic Net (alpha = 0.86, lambda = 3.02E-4 )	nlambda = 100, lambda.max = 0.03852, lambda.min = 3.02E-4, lambda.1se = -1.0	161	40	7	py_15_sid_9664
glm_grid_model_10	binomial	logit	Elastic Net (alpha = 0.83, lambda = 3.23E-4 )	nlambda = 100, lambda.max = 0.03991, lambda.min = 3.23E-4, lambda.1se = -1.0	161	38	6	py_15_sid_9664
glm_grid_model_8	binomial	logit	Elastic Net (alpha = 0.94, lambda = 3.05E-4 )	nlambda = 100, lambda.max = 0.03524, lambda.min = 3.05E-4, lambda.1se = -1.0	161	36	8	py_15_sid_9664
glm_grid_model_3	binomial	logit	Elastic Net (alpha = 0.69, lambda = 3.77E-4 )	nlambda = 100, lambda.max = 0.04801, lambda.min = 3.77E-4, lambda.1se = -1.0	161	39	6	py_15_sid_9664
glm_grid_model_70	binomial	logit	Elastic Net (alpha = 0.71, lambda = 3.86E-4 )	nlambda = 100, lambda.max = 0.04666, lambda.min = 3.86E-4, lambda.1se = -1.0	161	38	6	py_15_sid_9664
glm_grid_model_44	binomial	logit	Elastic Net (alpha = 0.19, lambda = 7.88E-4 )	nlambda = 100, lambda.max = 0.1744, lambda.min = 7.88E-4, lambda.1se = -1.0	161	60	6	py_15_sid_9664
glm_grid_model_41	binomial	logit	Elastic Net (alpha = 0.67, lambda = 3.99E-4 )	nlambda = 100, lambda.max = 0.04944, lambda.min = 3.99E-4, lambda.1se = -1.0	161	38	6	py_15_sid_9664
glm_grid_model_36	binomial	logit	Elastic Net (alpha = 0.94, lambda = 3.4E-4 )	nlambda = 100, lambda.max = 0.03524, lambda.min = 3.4E-4, lambda.1se = -1.0	161	35	7	py_15_sid_9664
glm_grid_model_94	binomial	logit	Elastic Net (alpha = 0.15, lambda = 9.21E-4 )	nlambda = 100, lambda.max = 0.2209, lambda.min = 9.21E-4, lambda.1se = -1.0	161	64	6	py_15_sid_9664
glm_grid_model_29	binomial	logit	Elastic Net (alpha = 0.59, lambda = 4.59E-4 )	nlambda = 100, lambda.max = 0.05615, lambda.min = 4.59E-4, lambda.1se = -1.0	161	39	6	py_15_sid_9664
glm_grid_model_25	binomial	logit	Elastic Net (alpha = 0.3, lambda = 6.75E-4 )	nlambda = 100, lambda.max = 0.1104, lambda.min = 6.75E-4, lambda.1se = -1.0	161	47	7	py_15_sid_9664
glm_grid_model_52	binomial	logit	Elastic Net (alpha = 0.34, lambda = 6.66E-4 )	nlambda = 100, lambda.max = 0.09743, lambda.min = 6.66E-4, lambda.1se = -1.0	161	45	7	py_15_sid_9664
glm_grid_model_47	binomial	logit	Elastic Net (alpha = 0.19, lambda = 8.82E-4 )	nlambda = 100, lambda.max = 0.1744, lambda.min = 8.82E-4, lambda.1se = -1.0	161	56	6	py_15_sid_9664
glm_grid_model_31	binomial	logit	Elastic Net (alpha = 0.58, lambda = 5.09E-4 )	nlambda = 100, lambda.max = 0.05712, lambda.min = 5.09E-4, lambda.1se = -1.0	161	37	6	py_15_sid_9664
glm_grid_model_27	binomial	logit	Elastic Net (alpha = 0.81, lambda = 4.25E-4 )	nlambda = 100, lambda.max = 0.0409, lambda.min = 4.25E-4, lambda.1se = -1.0	161	34	6	py_15_sid_9664
glm_grid_model_75	binomial	logit	Elastic Net (alpha = 0.59, lambda = 5.18E-4 )	nlambda = 100, lambda.max = 0.05615, lambda.min = 5.18E-4, lambda.1se = -1.0	161	36	7	py_15_sid_9664
glm_grid_model_84	binomial	logit	Elastic Net (alpha = 0.96, lambda = 3.8E-4 )	nlambda = 100, lambda.max = 0.03451, lambda.min = 3.8E-4, lambda.1se = -1.0	161	30	6	py_15_sid_9664
glm_grid_model_64	binomial	logit	Elastic Net (alpha = 0.29, lambda = 7.81E-4 )	nlambda = 100, lambda.max = 0.1142, lambda.min = 7.81E-4, lambda.1se = -1.0	161	45	7	py_15_sid_9664
glm_grid_model_68	binomial	logit	Elastic Net (alpha = 0.36, lambda = 6.96E-4 )	nlambda = 100, lambda.max = 0.09202, lambda.min = 6.96E-4, lambda.1se = -1.0	161	42	7	py_15_sid_9664
glm_grid_model_66	binomial	logit	Elastic Net (alpha = 0.48, lambda = 5.93E-4 )	nlambda = 100, lambda.max = 0.06902, lambda.min = 5.93E-4, lambda.1se = -1.0	161	36	6	py_15_sid_9664
glm_grid_model_53	binomial	logit	Elastic Net (alpha = 0.6, lambda = 5.69E-4 )	nlambda = 100, lambda.max = 0.05521, lambda.min = 5.69E-4, lambda.1se = -1.0	161	35	6	py_15_sid_9664
glm_grid_model_58	binomial	logit	Elastic Net (alpha = 0.43, lambda = 6.94E-4 )	nlambda = 100, lambda.max = 0.07704, lambda.min = 6.94E-4, lambda.1se = -1.0	161	36	6	py_15_sid_9664
glm_grid_model_59	binomial	logit	Elastic Net (alpha = 0.55, lambda = 6.13E-4 )	nlambda = 100, lambda.max = 0.06023, lambda.min = 6.13E-4, lambda.1se = -1.0	161	35	6	py_15_sid_9664
glm_grid_model_4	binomial	logit	Elastic Net (alpha = 0.22, lambda = 9.91E-4 )	nlambda = 100, lambda.max = 0.1506, lambda.min = 9.91E-4, lambda.1se = -1.0	161	46	6	py_15_sid_9664
glm_grid_model_21	binomial	logit	Elastic Net (alpha = 0.86, lambda = 4.86E-4 )	nlambda = 100, lambda.max = 0.03852, lambda.min = 4.86E-4, lambda.1se = -1.0	161	30	6	py_15_sid_9664
glm_grid_model_49	binomial	logit	Elastic Net (alpha = 0.63, lambda = 5.75E-4 )	nlambda = 100, lambda.max = 0.05258, lambda.min = 5.75E-4, lambda.1se = -1.0	161	31	6	py_15_sid_9664
glm_grid_model_88	binomial	logit	Elastic Net (alpha = 0.29, lambda = 8.94E-4 )	nlambda = 100, lambda.max = 0.1142, lambda.min = 8.94E-4, lambda.1se = -1.0	161	40	7	py_15_sid_9664
glm_grid_model_13	binomial	logit	Elastic Net (alpha = 0.73, lambda = 5.53E-4 )	nlambda = 100, lambda.max = 0.04538, lambda.min = 5.53E-4, lambda.1se = -1.0	161	31	6	py_15_sid_9664
glm_grid_model_89	binomial	logit	Elastic Net (alpha = 0.65, lambda = 5.88E-4 )	nlambda = 100, lambda.max = 0.05097, lambda.min = 5.88E-4, lambda.1se = -1.0	161	31	6	py_15_sid_9664
glm_grid_model_6	binomial	logit	Elastic Net (alpha = 0.5, lambda = 6.93E-4 )	nlambda = 100, lambda.max = 0.06626, lambda.min = 6.93E-4, lambda.1se = -1.0	161	33	6	py_15_sid_9664
glm_grid_model_19	binomial	logit	Elastic Net (alpha = 0.87, lambda = 5.1E-4 )	nlambda = 100, lambda.max = 0.03808, lambda.min = 5.1E-4, lambda.1se = -1.0	161	30	6	py_15_sid_9664
glm_grid_model_63	binomial	logit	Elastic Net (alpha = 0.88, lambda = 5.18E-4 )	nlambda = 100, lambda.max = 0.03765, lambda.min = 5.18E-4, lambda.1se = -1.0	161	30	6	py_15_sid_9664
glm_grid_model_74	binomial	logit	Elastic Net (alpha = 0.42, lambda = 8.13E-4 )	nlambda = 100, lambda.max = 0.07888, lambda.min = 8.13E-4, lambda.1se = -1.0	161	33	6	py_15_sid_9664
glm_grid_model_95	binomial	logit	Elastic Net (alpha = 0.32, lambda = 9.5E-4 )	nlambda = 100, lambda.max = 0.1035, lambda.min = 9.5E-4, lambda.1se = -1.0	161	35	6	py_15_sid_9664
glm_grid_model_18	binomial	logit	Elastic Net (alpha = 0.66, lambda = 6.59E-4 )	nlambda = 100, lambda.max = 0.05019, lambda.min = 6.59E-4, lambda.1se = -1.0	161	30	6	py_15_sid_9664
glm_grid_model_40	binomial	logit	Elastic Net (alpha = 0.43, lambda = 8.42E-4 )	nlambda = 100, lambda.max = 0.07704, lambda.min = 8.42E-4, lambda.1se = -1.0	161	31	6	py_15_sid_9664
glm_grid_model_9	binomial	logit	Elastic Net (alpha = 0.59, lambda = 7.31E-4 )	nlambda = 100, lambda.max = 0.05615, lambda.min = 7.31E-4, lambda.1se = -1.0	161	30	6	py_15_sid_9664
glm_grid_model_24	binomial	logit	Elastic Net (alpha = 0.41, lambda = 8.91E-4 )	nlambda = 100, lambda.max = 0.0808, lambda.min = 8.91E-4, lambda.1se = -1.0	161	30	6	py_15_sid_9664
glm_grid_model_62	binomial	logit	Elastic Net (alpha = 0.94, lambda = 5.09E-4 )	nlambda = 100, lambda.max = 0.03524, lambda.min = 5.09E-4, lambda.1se = -1.0	161	28	6	py_15_sid_9664
glm_grid_model_5	binomial	logit	Elastic Net (alpha = 0.54, lambda = 7.8E-4 )	nlambda = 100, lambda.max = 0.06135, lambda.min = 7.8E-4, lambda.1se = -1.0	161	30	6	py_15_sid_9664
glm_grid_model_76	binomial	logit	Elastic Net (alpha = 0.41, lambda = 9.56E-4 )	nlambda = 100, lambda.max = 0.0808, lambda.min = 9.56E-4, lambda.1se = -1.0	161	30	6	py_15_sid_9664
glm_grid_model_50	binomial	logit	Elastic Net (alpha = 0.41, lambda = 9.93E-4 )	nlambda = 100, lambda.max = 0.0808, lambda.min = 9.93E-4, lambda.1se = -1.0	161	30	6	py_15_sid_9664
glm_grid_model_32	binomial	logit	Elastic Net (alpha = 0.6, lambda = 8.16E-4 )	nlambda = 100, lambda.max = 0.05521, lambda.min = 8.16E-4, lambda.1se = -1.0	161	28	7	py_15_sid_9664
glm_grid_model_97	binomial	logit	Elastic Net (alpha = 0.55, lambda = 8.76E-4 )	nlambda = 100, lambda.max = 0.06023, lambda.min = 8.76E-4, lambda.1se = -1.0	161	28	6	py_15_sid_9664
glm_grid_model_57	binomial	logit	Elastic Net (alpha = 0.84, lambda = 6.74E-4 )	nlambda = 100, lambda.max = 0.03944, lambda.min = 6.74E-4, lambda.1se = -1.0	161	25	6	py_15_sid_9664
glm_grid_model_91	binomial	logit	Elastic Net (alpha = 0.52, lambda = 9.45E-4 )	nlambda = 100, lambda.max = 0.06371, lambda.min = 9.45E-4, lambda.1se = -1.0	161	28	7	py_15_sid_9664
glm_grid_model_69	binomial	logit	Elastic Net (alpha = 0.57, lambda = 9.53E-4 )	nlambda = 100, lambda.max = 0.05812, lambda.min = 9.53E-4, lambda.1se = -1.0	161	26	6	py_15_sid_9664
glm_grid_model_56	binomial	logit	Elastic Net (alpha = 0.65, lambda = 9.38E-4 )	nlambda = 100, lambda.max = 0.05097, lambda.min = 9.38E-4, lambda.1se = -1.0	161	25	6	py_15_sid_9664
glm_grid_model_85	binomial	logit	Elastic Net (alpha = 0.95, lambda = 7.41E-4 )	nlambda = 100, lambda.max = 0.03487, lambda.min = 7.41E-4, lambda.1se = -1.0	161	21	6	py_15_sid_9664
glm_grid_model_39	binomial	logit	Elastic Net (alpha = 0.86, lambda = 8.06E-4 )	nlambda = 100, lambda.max = 0.03852, lambda.min = 8.06E-4, lambda.1se = -1.0	161	21	6	py_15_sid_9664
glm_grid_model_99	binomial	logit	Elastic Net (alpha = 0.92, lambda = 8.6E-4 )	nlambda = 100, lambda.max = 0.03601, lambda.min = 8.6E-4, lambda.1se = -1.0	161	19	7	py_15_sid_9664
glm_grid_model_15	binomial	logit	Elastic Net (alpha = 0.74, lambda = 9.87E-4 )	nlambda = 100, lambda.max = 0.04477, lambda.min = 9.87E-4, lambda.1se = -1.0	161	21	6	py_15_sid_9664
glm_grid_model_23	binomial	logit	Elastic Net (alpha = 0.88, lambda = 9.21E-4 )	nlambda = 100, lambda.max = 0.03765, lambda.min = 9.21E-4, lambda.1se = -1.0	161	19	6	py_15_sid_9664
glm_grid_model_7	binomial	logit	Elastic Net (alpha = 0.88, lambda = 9.48E-4 )	nlambda = 100, lambda.max = 0.03765, lambda.min = 9.48E-4, lambda.1se = -1.0	161	19	6	py_15_sid_9664

In [37]:

sorted_glm_grid = glm_grid.get_grid(sort_by='auc',decreasing=True)

In [38]:

sorted_glm_grid[0].actual_params

Out[38]:

{'model_id': 'glm_grid_model_61',
 'training_frame': 'py_15_sid_9664',
 'validation_frame': 'py_16_sid_9664',
 'nfolds': 0,
 'seed': 202,
 'keep_cross_validation_models': True,
 'keep_cross_validation_predictions': False,
 'keep_cross_validation_fold_assignment': False,
 'fold_assignment': 'AUTO',
 'fold_column': None,
 'response_column': 'DELINQUENT',
 'ignored_columns': ['PRODUCT_TYPE',
  'PREPAYMENT_PENALTY_MORTGAGE_FLAG',
  'PREPAID'],
 'random_columns': None,
 'ignore_const_cols': True,
 'score_each_iteration': False,
 'offset_column': None,
 'weights_column': None,
 'family': 'binomial',
 'rand_family': None,
 'tweedie_variance_power': 0.0,
 'tweedie_link_power': 1.0,
 'theta': 1e-10,
 'solver': 'COORDINATE_DESCENT',
 'alpha': [0.87],
 'lambda': [4.9999999999999996e-06],
 'lambda_search': True,
 'early_stopping': True,
 'nlambdas': 100,
 'standardize': True,
 'missing_values_handling': 'MeanImputation',
 'plug_values': None,
 'compute_p_values': False,
 'remove_collinear_columns': False,
 'intercept': True,
 'non_negative': False,
 'max_iterations': 1000,
 'objective_epsilon': 0.0001,
 'beta_epsilon': 0.0001,
 'gradient_epsilon': 1.0000000000000002e-06,
 'link': 'logit',
 'rand_link': None,
 'startval': None,
 'calc_like': False,
 'HGLM': False,
 'prior': -1.0,
 'lambda_min_ratio': 0.0001,
 'beta_constraints': None,
 'max_active_predictors': 5000,
 'interactions': None,
 'interaction_pairs': None,
 'obj_reg': 2.854956775954412e-06,
 'export_checkpoints_dir': None,
 'balance_classes': False,
 'class_sampling_factors': None,
 'max_after_balance_size': 5.0,
 'max_confusion_matrix_size': 20,
 'max_hit_ratio_k': 0,
 'max_runtime_secs': 179.878,
 'custom_metric_func': None}

In [39]:

print(sorted_glm_grid[0].F1())
sorted_glm_grid[1].F1()

[[0.13993235618594693, 0.29243426359090174]]

Out[39]:

[[0.1388919375261923, 0.29188911043931304]]

In [40]:

sorted_glm_grid[0].model_performance(test) # should give AUC of 0.8524 compared to the untuned version of 0.8523

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.031143376101575086
RMSE: 0.17647485968708146
LogLoss: 0.12199693111453563
Null degrees of freedom: 74897
Residual degrees of freedom: 74754
Null deviance: 23061.156287645877
Residual deviance: 18274.652293232975
AIC: 18562.652293232975
AUC: 0.8524158062119054
AUCPR: 0.20258611034476104
Gini: 0.7048316124238108

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.13069466877003805:

		FALSE	TRUE	Error	Rate
0	FALSE	68851.0	3375.0	0.0467	(3375.0/72226.0)
1	TRUE	1676.0	996.0	0.6272	(1676.0/2672.0)
2	Total	70527.0	4371.0	0.0674	(5051.0/74898.0)

Maximum Metrics: Maximum metrics at their respective thresholds

	metric	threshold	value	idx
0	max f1	0.130695	0.282834	202.0
1	max f2	0.064974	0.386713	264.0
2	max f0point5	0.206442	0.283731	151.0
3	max accuracy	0.939516	0.964311	0.0
4	max precision	0.602416	0.438596	32.0
5	max recall	0.000945	1.000000	398.0
6	max specificity	0.939516	0.999986	0.0
7	max absolute_mcc	0.072018	0.263347	255.0
8	max min_per_class_accuracy	0.038326	0.773336	305.0
9	max mean_per_class_accuracy	0.032702	0.777303	315.0
10	max tns	0.939516	72225.000000	0.0
11	max fns	0.939516	2672.000000	0.0
12	max fps	0.000566	72226.000000	399.0
13	max tps	0.000945	2672.000000	398.0
14	max tnr	0.939516	0.999986	0.0
15	max fnr	0.939516	1.000000	0.0
16	max fpr	0.000566	1.000000	399.0
17	max tpr	0.000945	1.000000	398.0

Gains/Lift Table: Avg response rate:  3.57 %, avg score:  3.60 %

	group	cumulative_data_fraction	lower_threshold	lift	cumulative_lift	response_rate	score	cumulative_response_rate	cumulative_score	capture_rate	cumulative_capture_rate	gain	cumulative_gain
0	1	0.010000	0.323153	9.954824	9.954824	0.355140	0.464324	0.355140	0.464324	0.099551	0.099551	895.482400	895.482400
1	2	0.020001	0.238585	7.859072	8.906948	0.280374	0.275583	0.317757	0.369954	0.078593	0.178144	685.907158	790.694779
2	3	0.030001	0.194006	6.362106	8.058667	0.226969	0.214539	0.287494	0.318149	0.063623	0.241766	536.210556	705.866705
3	4	0.040001	0.164906	4.939988	7.278997	0.176235	0.178489	0.259680	0.283234	0.049401	0.291168	393.998785	627.899725
4	5	0.050001	0.143184	4.378626	6.698923	0.156208	0.153711	0.238985	0.257329	0.043787	0.334955	337.862559	569.892292
5	6	0.100003	0.087476	3.428052	5.063488	0.122296	0.111145	0.180641	0.184237	0.171407	0.506362	242.805217	406.348754
6	7	0.150004	0.062147	2.492448	4.206475	0.088919	0.073591	0.150067	0.147355	0.124626	0.630988	149.244841	320.647450
7	8	0.200005	0.047494	1.639178	3.564650	0.058478	0.054272	0.127170	0.124084	0.081961	0.712949	63.917779	256.465032
8	9	0.300008	0.030300	1.205058	2.778119	0.042991	0.037927	0.099110	0.095365	0.120509	0.833458	20.505764	177.811943
9	10	0.399997	0.020667	0.591381	2.231489	0.021098	0.025088	0.079609	0.077797	0.059132	0.892590	-40.861947	123.148945
10	11	0.500000	0.014547	0.389211	1.863024	0.013885	0.017407	0.066464	0.065719	0.038922	0.931512	-61.078884	86.302395
11	12	0.600003	0.010226	0.336817	1.608651	0.012016	0.012272	0.057389	0.056811	0.033683	0.965195	-66.318265	60.865053
12	13	0.699992	0.007069	0.157202	1.401321	0.005608	0.008589	0.049992	0.049923	0.015719	0.980913	-84.279758	40.132057
13	14	0.799995	0.004617	0.108530	1.239716	0.003872	0.005802	0.044227	0.044408	0.010853	0.991766	-89.146996	23.971636
14	15	0.899997	0.002619	0.063621	1.109035	0.002270	0.003584	0.039565	0.039872	0.006362	0.998129	-93.637894	10.903523
15	16	1.000000	0.000105	0.018712	1.000000	0.000668	0.001628	0.035675	0.036047	0.001871	1.000000	-98.128792	0.000000

Out[40]:

Random Forest¶

In [41]:

# Grid Search/ Cartesian Search by default or not specified
rf_grid = h2o.grid.H2OGridSearch (
    H2ORandomForestEstimator(nfolds=10),
    
    hyper_params = {
        "ntrees": [50,100],
        "max_depth": [10,20],
        },
    
     search_criteria = {
        "strategy":"RandomDiscrete", # Random Search 
        "max_models":100,
        "max_runtime_secs":300,
        "seed":42
        },
    
    grid_id = "rf_grid_2",
    
)
%time rf_grid.train(x=x, y=y, training_frame=train, validation_frame = valid)

drf Grid Build progress: |████████████████████████████████████████████████| 100%
CPU times: user 2.33 s, sys: 432 ms, total: 2.76 s
Wall time: 6min 45s

In [42]:

rf_grid.get_grid(sort_by='auc', decreasing=True)

    max_depth ntrees          model_ids                auc
0          20     28  rf_grid_2_model_1  0.818864830103598

Out[42]:

Get the best model and train on top of that¶

In [43]:

best_model = rf_grid.get_grid(sort_by="auc", decreasing=True)[0]

rf = H2ORandomForestEstimator (seed=42, model_id='default_random_forest', checkpoint=best_model.model_id)
%time rf.train(x=x, y=y, training_frame=train, validation_frame=valid)

drf Model Build progress: |███████████████████████████████████████████████| 100%
CPU times: user 347 ms, sys: 109 ms, total: 456 ms
Wall time: 39.7 s

In [45]:

rf.summary()

Model Summary:

		number_of_trees	number_of_internal_trees	model_size_in_bytes	min_depth	max_depth	mean_depth	min_leaves	max_leaves	mean_leaves
0		50.0	50.0	7675073.0	20.0	20.0	8.8	9881.0	11724.0	4780.06

Out[45]:

In [ ]: