import h2o
from h2o.estimators.random_forest import H2ORandomForestEstimator
import mlflow
import mlflow.h2o
h2o.init()
wine = h2o.import_file(path="wine-quality.csv")
r = wine["quality"].runif()
train = wine[r < 0.7]
test = wine[0.3 <= r]
Checking whether there is an H2O instance running at http://localhost:54321..... not found. Attempting to start a local H2O server... Java Version: openjdk version "1.8.0_181"; OpenJDK Runtime Environment (build 1.8.0_181-8u181-b13-2~deb9u1-b13); OpenJDK 64-Bit Server VM (build 25.181-b13, mixed mode) Starting server from /opt/conda/lib/python2.7/site-packages/h2o/backend/bin/h2o.jar Ice root: /tmp/tmpz8qTmm JVM stdout: /tmp/tmpz8qTmm/h2o_unknownUser_started_from_python.out JVM stderr: /tmp/tmpz8qTmm/h2o_unknownUser_started_from_python.err Server is running at http://127.0.0.1:54321 Connecting to H2O server at http://127.0.0.1:54321... successful.
H2O cluster uptime: | 01 secs |
H2O cluster timezone: | Etc/UTC |
H2O data parsing timezone: | UTC |
H2O cluster version: | 3.22.1.1 |
H2O cluster version age: | 23 days |
H2O cluster name: | H2O_from_python_unknownUser_ukj9f9 |
H2O cluster total nodes: | 1 |
H2O cluster free memory: | 3.042 Gb |
H2O cluster total cores: | 7 |
H2O cluster allowed cores: | 7 |
H2O cluster status: | accepting new members, healthy |
H2O connection url: | http://127.0.0.1:54321 |
H2O connection proxy: | None |
H2O internal security: | False |
H2O API Extensions: | XGBoost, Algos, AutoML, Core V3, Core V4 |
Python version: | 2.7.15 final |
Parse progress: |█████████████████████████████████████████████████████████| 100%
def train_random_forest(ntrees):
with mlflow.start_run():
rf = H2ORandomForestEstimator(ntrees=ntrees)
train_cols = [n for n in wine.col_names if n != "quality"]
rf.train(train_cols, "quality", training_frame=train, validation_frame=test)
mlflow.log_param("ntrees", ntrees)
mlflow.log_metric("rmse", rf.rmse())
mlflow.log_metric("r2", rf.r2())
mlflow.log_metric("mae", rf.mae())
mlflow.h2o.log_model(rf, "model")
for ntrees in [10, 20, 50, 100, 200]:
train_random_forest(ntrees)
drf Model Build progress: |███████████████████████████████████████████████| 100% drf Model Build progress: |███████████████████████████████████████████████| 100% drf Model Build progress: |███████████████████████████████████████████████| 100% drf Model Build progress: |███████████████████████████████████████████████| 100% drf Model Build progress: |███████████████████████████████████████████████| 100%
import yaml
yaml.safe_dump
<function yaml.safe_dump>