In [1]:

import h2o

In [2]:

# Connect to a cluster
h2o.init()

H2O cluster uptime:	17 seconds 548 milliseconds
H2O cluster version:	3.1.0.99999
H2O cluster name:	anqi_fu
H2O cluster total nodes:	1
H2O cluster total memory:	1.78 GB
H2O cluster total cores:	8
H2O cluster allowed cores:	8
H2O cluster healthy:	True
H2O Connection ip:	127.0.0.1
H2O Connection port:	54321

In [3]:

weather_path = h2o.locate("smalldata/chicago/chicagoAllWeather.csv")
census_path = h2o.locate("smalldata/chicago/chicagoCensus.csv")
crimes_path = h2o.locate("smalldata/chicago/chicagoCrimes10k.csv.zip")

print "Import and Parse weather data"
weather = h2o.import_frame(path=weather_path)
weather.drop("date")
weather.describe()

print "Import and Parse census data"
census = h2o.import_frame(path=census_path)
census.describe()

print "Import and Parse crimes data"
crimes = h2o.import_frame(path=crimes_path)
crimes.describe()

Import and Parse weather data

Parse Progress: [##################################################] 100%
Imported  /Users/anqi_fu/Documents/workspace/h2o-3/smalldata/chicago/chicagoAllWeather.csv . Parsed 5,162 rows and 7 cols
Rows: 5,162 Cols: 7

Chunk compression summary:

chunk_type	chunk_name	count	count_percentage	size	size_percentage
C1N	1-Byte Integers (w/o NAs)	2	28.57143	10.2 KB	11.221008
C1S	1-Byte Fractions	4	57.14286	20.5 KB	22.510675
CStr	String	1	14.285715	60.3 KB	66.26832

Frame distribution summary:

	size	number_of_rows	number_of_chunks_per_column	number_of_chunks
172.16.2.17:54321	91.0 KB	5162.0	1.0	7.0
mean	91.0 KB	5162.0	1.0	7.0
min	91.0 KB	5162.0	1.0	7.0
max	91.0 KB	5162.0	1.0	7.0
stddev	0 B	0.0	0.0	0.0
total	91.0 KB	5162.0	1.0	7.0

Column-by-Column Summary:

	date	month	day	year	maxTemp	meanTemp	minTemp
type	string	int	int	int	int	int	int
mins	NaN	1.0	1.0	2001.0	-2.0	-9.0	-18.0
maxs	NaN	12.0	31.0	2015.0	103.0	93.0	82.0
sigma	NaN	3.46905171694	8.79895173997	4.0773409057	21.4829777237	19.9302399266	19.0207297123
zero_count	0	0	0	0	0	2	16
missing_count	0	0	0	0	13	13	13

Import and Parse census data

Parse Progress: [##################################################] 100%
Imported  /Users/anqi_fu/Documents/workspace/h2o-3/smalldata/chicago/chicagoCensus.csv . Parsed 79 rows and 9 cols
Rows: 79 Cols: 9

Chunk compression summary:

chunk_type	chunk_name	count	count_percentage	size	size_percentage
C1	1-Byte Integers	2	22.222223	294 B	9.312638
C1S	1-Byte Fractions	1	11.111112	163 B	5.1631293
C2S	2-Byte Fractions	4	44.444447	968 B	30.662022
C4	4-Byte Integers	1	11.111112	384 B	12.163446
CStr	String	1	11.111112	1.3 KB	42.698765

Frame distribution summary:

	size	number_of_rows	number_of_chunks_per_column	number_of_chunks
172.16.2.17:54321	3.1 KB	79.0	1.0	9.0
mean	3.1 KB	79.0	1.0	9.0
min	3.1 KB	79.0	1.0	9.0
max	3.1 KB	79.0	1.0	9.0
stddev	0 B	0.0	0.0	0.0
total	3.1 KB	79.0	1.0	9.0

Column-by-Column Summary:

	Community Area Number	COMMUNITY AREA NAME	PERCENT OF HOUSING CROWDED	PERCENT HOUSEHOLDS BELOW POVERTY	PERCENT AGED 16 UNEMPLOYED	PERCENT AGED 25 WITHOUT HIGH SCHOOL DIPLOMA	PERCENT AGED UNDER 18 OR OVER 64	PER CAPITA INCOME	HARDSHIP INDEX
type	int	string	real	real	real	real	real	int	int
mins	1.0	NaN	0.3	3.3	4.7	2.5	13.5	8201.0	1.0
maxs	77.0	NaN	15.8	56.5	35.9	54.8	51.5	88669.0	98.0
sigma	22.3718573212	NaN	3.65898144135	11.457230913	7.49949670861	11.7465143511	7.28442108494	15196.4055413	28.6905556516
zero_count	0	0	0	0	0	0	0	0	0
missing_count	2	0	1	1	1	1	1	1	2

Import and Parse crimes data

Parse Progress: [##################################################] 100%
Imported  /Users/anqi_fu/Documents/workspace/h2o-3/smalldata/chicago/chicagoCrimes10k.csv.zip . Parsed 9,999 rows and 22 cols
Rows: 9,999 Cols: 22

Chunk compression summary:

chunk_type	chunk_name	count	count_percentage	size	size_percentage
C0L	Constant Integers	4	4.5454545	320 B	0.03695244
C1	1-Byte Integers	32	36.363636	80.2 KB	9.488462
C1N	1-Byte Integers (w/o NAs)	8	9.090909	20.1 KB	2.3721156
C2	2-Byte Integers	16	18.181818	79.2 KB	9.362824
C4	4-Byte Integers	12	13.636364	118.0 KB	13.950008
CStr	String	8	9.090909	391.1 KB	46.252445
C8D	64-bit Reals	8	9.090909	156.8 KB	18.537191

Frame distribution summary:

	size	number_of_rows	number_of_chunks_per_column	number_of_chunks
172.16.2.17:54321	845.7 KB	9999.0	4.0	88.0
mean	845.7 KB	9999.0	4.0	88.0
min	845.7 KB	9999.0	4.0	88.0
max	845.7 KB	9999.0	4.0	88.0
stddev	0 B	0.0	0.0	0.0
total	845.7 KB	9999.0	4.0	88.0

Column-by-Column Summary:

	ID	Case Number	Date	Block	IUCR	Primary Type	Description	Location Description	Arrest	Domestic	Beat	District	Ward	Community Area	FBI Code	X Coordinate	Y Coordinate	Year	Updated On	Latitude	Longitude	Location
type	int	string	string	enum	int	enum	enum	enum	enum	enum	int	int	int	int	int	int	int	int	enum	real	real	enum
mins	21735.0	NaN	NaN	0.0	110.0	0.0	0.0	0.0	0.0	0.0	111.0	1.0	1.0	1.0	2.0	1100317.0	1814255.0	2015.0	0.0	41.64507243	-87.906463888	0.0
maxs	9962898.0	NaN	NaN	6517.0	5131.0	26.0	198.0	90.0	1.0	1.0	2535.0	25.0	50.0	77.0	26.0	1205069.0	1951533.0	2015.0	32.0	42.022646183	-87.524773286	8603.0
sigma	396787.564221	NaN	NaN	1915.88517194	927.751435583	9.16241735944	60.1059382029	25.5963972463	0.455083515588	0.35934414686	695.76029875	6.94547493301	13.6495661144	21.2748762223	7.57423857911	16496.4493681	31274.0163199	0.0	10.0824464345	0.0860186579359	0.0600357970653	2469.64729385
zero_count	0	0	0	3	0	11	933	19	7071	8476	0	0	0	0	0	0	0	0	603	0	0	1
missing_count	0	0	0	0	419	0	0	6	0	0	0	162	0	0	2557	162	162	0	0	162	162	162

In [4]:

def refine_date_col(data, col, pattern):
    data[col]         = data[col].as_date(pattern)
    data["Day"]       = data[col].day()
    data["Month"]     = data[col].month() + 1     # Since H2O indexes from 0
    data["Year"]      = data[col].year() + 1900   # Start of epoch is 1900
    data["WeekNum"]   = data[col].week()
    data["WeekDay"]   = data[col].dayOfWeek()
    data["HourOfDay"] = data[col].hour()
    
    data.describe()  # HACK: Force evaluation before ifelse and cut. See PUBDEV-1425.
    
    # Create weekend and season cols
    # Spring = Mar, Apr, May. Summer = Jun, Jul, Aug. Autumn = Sep, Oct. Winter = Nov, Dec, Jan, Feb.
    # data["Weekend"]   = [1 if x in ("Sun", "Sat") else 0 for x in data["WeekDay"]]
    data["Weekend"] = h2o.ifelse(data["WeekDay"] == "Sun" or data["WeekDay"] == "Sat", 1, 0)[0]
    data["Season"] = data["Month"].cut([0, 2, 5, 7, 10, 12], ["Winter", "Spring", "Summer", "Autumn", "Winter"])
    
refine_date_col(crimes, "Date", "%m/%d/%Y %I:%M:%S %p")
crimes = crimes.drop("Date")
crimes.describe()

Rows: 9,999 Cols: 27

Chunk compression summary:

chunk_type	chunk_name	count	count_percentage	size	size_percentage
C0L	Constant Integers	9	8.333334	720 B	0.10067465
C1	1-Byte Integers	32	29.62963	80.2 KB	11.489216
C1N	1-Byte Integers (w/o NAs)	23	21.296297	57.9 KB	8.29671
C2	2-Byte Integers	16	14.814815	79.2 KB	11.337085
C4	4-Byte Integers	12	11.111112	118.0 KB	16.891531
C8	64-bit Integers	4	3.7037036	78.4 KB	11.222987
CStr	String	4	3.7037036	127.2 KB	18.215822
C8D	64-bit Reals	8	7.4074073	156.8 KB	22.445974

Frame distribution summary:

	size	number_of_rows	number_of_chunks_per_column	number_of_chunks
172.16.2.17:54321	698.4 KB	9999.0	4.0	108.0
mean	698.4 KB	9999.0	4.0	108.0
min	698.4 KB	9999.0	4.0	108.0
max	698.4 KB	9999.0	4.0	108.0
stddev	0 B	0.0	0.0	0.0
total	698.4 KB	9999.0	4.0	108.0

Column-by-Column Summary:

	ID	Case Number	Date	Block	IUCR	Primary Type	Description	Location Description	Arrest	Domestic	Beat	District	Ward	Community Area	FBI Code	X Coordinate	Y Coordinate	Year	Updated On	Latitude	Longitude	Location	Day	Month	WeekNum	WeekDay	HourOfDay
type	int	string	int	enum	int	enum	enum	enum	enum	enum	int	int	int	int	int	int	int	int	enum	real	real	enum	int	int	int	enum	int
mins	21735.0	NaN	1.42203063e+12	0.0	110.0	0.0	0.0	0.0	0.0	0.0	111.0	1.0	1.0	1.0	2.0	1100317.0	1814255.0	3915.0	0.0	41.64507243	-87.906463888	0.0	1.0	2.0	4.0	0.0	0.0
maxs	9962898.0	NaN	1.42346782e+12	6517.0	5131.0	26.0	198.0	90.0	1.0	1.0	2535.0	25.0	50.0	77.0	26.0	1205069.0	1951533.0	3915.0	32.0	42.022646183	-87.524773286	8603.0	31.0	3.0	6.0	6.0	23.0
sigma	396787.564221	NaN	433879245.188	1915.88517194	927.751435583	9.16241735944	60.1059382029	25.5963972463	0.455083515588	0.35934414686	695.76029875	6.94547493301	13.6495661144	21.2748762223	7.57423857911	16496.4493681	31274.0163199	0.0	10.0824464345	0.0860186579359	0.0600357970653	2469.64729385	11.1801043358	0.493492406787	0.738929830409	1.93284056432	6.47321735807
zero_count	0	0	0	3	0	11	933	19	7071	8476	0	0	0	0	0	0	0	0	603	0	0	1	0	0	0	1038	374
missing_count	0	0	0	0	419	0	0	6	0	0	0	162	0	0	2557	162	162	0	0	162	162	162	0	0	0	0	0

Rows: 9,999 Cols: 28

Chunk compression summary:

chunk_type	chunk_name	count	count_percentage	size	size_percentage
C0L	Constant Integers	13	11.607142	1.0 KB	0.16332634
CBS	Bits	4	3.5714288	1.5 KB	0.2404352
C1	1-Byte Integers	32	28.57143	80.2 KB	12.9040365
C1N	1-Byte Integers (w/o NAs)	23	20.535715	57.9 KB	9.318395
C2	2-Byte Integers	16	14.285715	79.2 KB	12.733171
C4	4-Byte Integers	12	10.714286	118.0 KB	18.97161
CStr	String	4	3.5714288	127.2 KB	20.458979
C8D	64-bit Reals	8	7.1428576	156.8 KB	25.210047

Frame distribution summary:

	size	number_of_rows	number_of_chunks_per_column	number_of_chunks
172.16.2.17:54321	621.8 KB	9999.0	4.0	112.0
mean	621.8 KB	9999.0	4.0	112.0
min	621.8 KB	9999.0	4.0	112.0
max	621.8 KB	9999.0	4.0	112.0
stddev	0 B	0.0	0.0	0.0
total	621.8 KB	9999.0	4.0	112.0

Column-by-Column Summary:

	ID	Case Number	Block	IUCR	Primary Type	Description	Location Description	Arrest	Domestic	Beat	District	Ward	Community Area	FBI Code	X Coordinate	Y Coordinate	Year	Updated On	Latitude	Longitude	Location	Day	Month	WeekNum	WeekDay	HourOfDay	Weekend	Season
type	int	string	enum	int	enum	enum	enum	enum	enum	int	int	int	int	int	int	int	int	enum	real	real	enum	int	int	int	enum	int	int	enum
mins	21735.0	NaN	0.0	110.0	0.0	0.0	0.0	0.0	0.0	111.0	1.0	1.0	1.0	2.0	1100317.0	1814255.0	3915.0	0.0	41.64507243	-87.906463888	0.0	1.0	2.0	4.0	0.0	0.0	0.0	0.0
maxs	9962898.0	NaN	6517.0	5131.0	26.0	198.0	90.0	1.0	1.0	2535.0	25.0	50.0	77.0	26.0	1205069.0	1951533.0	3915.0	32.0	42.022646183	-87.524773286	8603.0	31.0	3.0	6.0	6.0	23.0	1.0	1.0
sigma	396787.564221	NaN	1915.88517194	927.751435583	9.16241735944	60.1059382029	25.5963972463	0.455083515588	0.35934414686	695.76029875	6.94547493301	13.6495661144	21.2748762223	7.57423857911	16496.4493681	31274.0163199	0.0	10.0824464345	0.0860186579359	0.0600357970653	2469.64729385	11.1801043358	0.493492406787	0.738929830409	1.93284056432	6.47321735807	0.365802434041	0.493492406787
zero_count	0	0	3	0	11	933	19	7071	8476	0	0	0	0	0	0	0	0	603	0	0	1	0	0	0	1038	374	8408	5805
missing_count	0	0	0	419	0	0	6	0	0	0	162	0	0	2557	162	162	0	0	162	162	162	0	0	0	0	0	0	0

In [5]:

# Merge crimes data with weather and census
census["Community Area Number"]._name = "Community Area"
weather["month"]._name = "Month"
weather["day"]  ._name = "Day"
weather["year"] ._name = "Year"
crimes.merge(census, allLeft=True, allRite=False)
crimes.merge(weather, allLeft=True, allRite=False)

---------------------------------------------------------------------------
EnvironmentError                          Traceback (most recent call last)
<ipython-input-5-e946a6af6204> in <module>()
      4 weather["day"]  ._name = "Day"
      5 weather["year"] ._name = "Year"
----> 6 crimes.merge(census, allLeft=True, allRite=False)
      7 crimes.merge(weather, allLeft=True, allRite=False)

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/frame.pyc in merge(self, other, allLeft, allRite)
   1022     expr2 = "(, "+expr+" (del %"+lkey+" #0) (del %"+rkey+" #0) )"
   1023 
-> 1024     h2o.rapids(expr2)       # merge in h2o
   1025     # Make backing H2OVecs for the remote h2o vecs
   1026     j = h2o.frame(tmp_key)  # Fetch the frame as JSON

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/h2o.pyc in rapids(expr)
    487   :return: The JSON response of the Rapids execution
    488   """
--> 489   result = H2OConnection.post_json("Rapids", ast=urllib.quote(expr), _rest_version=99)
    490   if result['error'] is not None:
    491     raise EnvironmentError("rapids expression not evaluated: {0}".format(str(result['error'])))

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc in post_json(url_suffix, file_upload_info, **kwargs)
    360     if __H2OCONN__ is None:
    361       raise ValueError("No h2o connection. Did you run `h2o.init()` ?")
--> 362     return __H2OCONN__._rest_json(url_suffix, "POST", file_upload_info, **kwargs)
    363 
    364   def _rest_json(self, url_suffix, method, file_upload_info, **kwargs):

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc in _rest_json(self, url_suffix, method, file_upload_info, **kwargs)
    363 
    364   def _rest_json(self, url_suffix, method, file_upload_info, **kwargs):
--> 365     raw_txt = self._do_raw_rest(url_suffix, method, file_upload_info, **kwargs)
    366     return self._process_tables(raw_txt.json())
    367 

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc in _do_raw_rest(self, url_suffix, method, file_upload_info, **kwargs)
    429       raise EnvironmentError(("h2o-py got an unexpected HTTP status code:\n {} {} (method = {}; url = {}). \n"+ \
    430                               "detailed error messages: {}")
--> 431                              .format(http_result.status_code,http_result.reason,method,url,detailed_error_msgs))
    432 
    433     # TODO: is.logging? -> write to logs

EnvironmentError: h2o-py got an unexpected HTTP status code:
 412 Precondition Failed (method = POST; url = http://localhost:54321/99/Rapids). 
detailed error messages: water.DException$DistributedException: from /172.16.2.17:54321; by class water.rapids.ASTMerge$MergeSet$MakeHash; class water.exceptions.H2OIllegalArgumentException: unimplemented

In [12]:

# Create test/train split
data_split = h2o.split_frame(data, ratios = [0.8,0.2])
train = data_split[1]
test  = data_split[2]

# Simple GBM - Predict Arrest
data_gbm = h2o.gbm(x              =train.drop("Arrest"),
                   y              =train     ["Arrest"],
                   validation_x   =test .drop("Arrest"),
                   validation_y   =test      ["Arrest"],
                   ntrees         =10,
                   max_depth      =6,
                   distribution   ="bernoulli")

# Simple Deep Learning
data_dl = h2o.deeplearning(x                   =train.drop("Arrest"),
                           y                   =train     ["Arrest"],
                           validation_x        =test .drop("Arrest"),
                           validation_y        =test      ["Arrest"],
                           variable_importances=True,
                           loss                ="Automatic")

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-347776b381b3> in <module>()
      1 # Create test/train split
----> 2 data_split = h2o.split_frame(data, ratios = [0.8,0.2])
      3 train = data_split[1]
      4 test  = data_split[2]
      5 

NameError: name 'data' is not defined

In [2]:

# GBM performance on train/test data
train_auc_gbm = data_gbm.model_performance(train).auc()
test_auc_gbm  = data_gbm.model_performance(test) .auc()

# Deep Learning performance on train/test data
train_auc_dl = data_dl.model_performance(train).auc()
test_auc_dl  = data_dl.model_performance(test) .auc()

# Make a pretty HTML table printout of the results
header = ["Model", "AUC Train", "AUC Test"]
table  = [
           ["GBM", train_auc_gbm, test_auc_gbm],
           ["DL ", train_auc_dl,  test_auc_dl]
         ]
h2o.H2ODisplay(table, header)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-f7c2ab3a3e26> in <module>()
      1 # GBM performance on train/test data
----> 2 train_auc_gbm = data_gbm.model_performance(train).auc()
      3 test_auc_gbm  = data_gbm.model_performance(test) .auc()
      4 
      5 # Deep Learning performance on train/test data

NameError: name 'data_gbm' is not defined

In [6]:

# Create new H2OFrame of crime observations
examples = {
            "Date":                 ["02/08/2015 11:43:58 PM", "02/08/2015 11:00:39 PM"],
            "IUCR":                 [1811, 1150],
            "Primary.Type":         ["NARCOTICS", "DECEPTIVE PRACTICE"],
            "Location.Description": ["STREET", "RESIDENCE"],
            "Domestic":             ["false", "false"],
            "Beat":                 [422, 923],
            "District":             [4, 9],
            "Ward":                 [7, 14],
            "Community.Area":       [46, 63],
            "FBI.Code":             [18, 11]
            }

crime_examples = h2o.H2OFrame(python_obj = examples)

# Refine date column and merge with census data
refine_date_col(crime_examples, "Date", "%m/%d/%Y %I:%M:%S %p")
crime_examples.drop("Date")
crime_examples.merge(census, allLeft=True, allRite=False)

Parse Progress: [##################################################] 100%
Uploaded py634b18a9-7e84-40ca-b265-b2fe43e064aa into cluster with 2 rows and 10 cols
Rows: 2 Cols: 16

Chunk compression summary:

chunk_type	chunk_name	count	count_percentage	size	size_percentage
C0L	Constant Integers	7	43.75	560 B	43.818466
C1N	1-Byte Integers (w/o NAs)	4	25.0	280 B	21.909233
C2	2-Byte Integers	2	12.5	144 B	11.267606
C2S	2-Byte Fractions	1	6.25	88 B	6.885759
CStr	String	2	12.5	206 B	16.118937

Frame distribution summary:

	size	number_of_rows	number_of_chunks_per_column	number_of_chunks
172.16.2.17:54321	1.2 KB	2.0	1.0	16.0
mean	1.2 KB	2.0	1.0	16.0
min	1.2 KB	2.0	1.0	16.0
max	1.2 KB	2.0	1.0	16.0
stddev	0 B	0.0	0.0	0.0
total	1.2 KB	2.0	1.0	16.0

Column-by-Column Summary:

	Location.Description	FBI.Code	Primary.Type	Community.Area	District	Beat	Domestic	IUCR	Date	Ward	Day	Month	Year	WeekNum	WeekDay	HourOfDay
type	string	int	string	int	int	int	enum	int	int	int	int	int	int	int	enum	int
mins	NaN	11.0	NaN	46.0	4.0	422.0	0.0	1150.0	1.423465239e+12	7.0	8.0	3.0	3915.0	6.0	6.0	23.0
maxs	NaN	18.0	NaN	63.0	9.0	923.0	0.0	1811.0	1.423467838e+12	14.0	8.0	3.0	3915.0	6.0	6.0	23.0
sigma	NaN	4.94974746831	NaN	12.0208152802	3.53553390593	354.260497374	0.0	467.397582364	1837770.5243	4.94974746831	0.0	0.0	0.0	0.0	0.0	0.0
zero_count	0	0	0	0	0	0	2	0	0	0	0	0	0	0	0	0
missing_count	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0

---------------------------------------------------------------------------
EnvironmentError                          Traceback (most recent call last)
<ipython-input-6-85bb7c75c897> in <module>()
     16 
     17 # Refine date column and merge with census data
---> 18 refine_date_col(crime_examples, "Date", "%m/%d/%Y %I:%M:%S %p")
     19 crime_examples.drop("Date")
     20 crime_examples.merge(census, allLeft=True, allRite=False)

<ipython-input-4-c2702228f9f1> in refine_date_col(data, col, pattern)
     15     # data["Weekend"] = h2o.ifelse(data["WeekDay"] in ("Sun", "Sat"), 1, 0)[0]
     16     data["Weekend"] = h2o.ifelse(data["WeekDay"] == "Sun" or data["WeekDay"] == "Sat", 1, 0)[0]
---> 17     data["Season"] = data["Month"].cut([0, 2, 5, 7, 10, 12], ["Winter", "Spring", "Summer", "Autumn", "Winter"])
     18 
     19 refine_date_col(crimes, "Date", "%m/%d/%Y %I:%M:%S %p")

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/frame.pyc in cut(self, breaks, labels, include_lowest, right, dig_lab)
   1256 
   1257     expr = "(cut '{}' {} {} {} {} #{}".format(self.key(), breaks_list, labels_list, "%TRUE" if include_lowest else "%FALSE", "%TRUE" if right else "%FALSE", dig_lab)
-> 1258     res = h2o.rapids(expr)
   1259     return H2OVec(self._name, Expr(op=res["vec_ids"][0]["name"], length=res["num_rows"]))
   1260 

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/h2o.pyc in rapids(expr)
    487   :return: The JSON response of the Rapids execution
    488   """
--> 489   result = H2OConnection.post_json("Rapids", ast=urllib.quote(expr), _rest_version=99)
    490   if result['error'] is not None:
    491     raise EnvironmentError("rapids expression not evaluated: {0}".format(str(result['error'])))

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc in post_json(url_suffix, file_upload_info, **kwargs)
    360     if __H2OCONN__ is None:
    361       raise ValueError("No h2o connection. Did you run `h2o.init()` ?")
--> 362     return __H2OCONN__._rest_json(url_suffix, "POST", file_upload_info, **kwargs)
    363 
    364   def _rest_json(self, url_suffix, method, file_upload_info, **kwargs):

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc in _rest_json(self, url_suffix, method, file_upload_info, **kwargs)
    363 
    364   def _rest_json(self, url_suffix, method, file_upload_info, **kwargs):
--> 365     raw_txt = self._do_raw_rest(url_suffix, method, file_upload_info, **kwargs)
    366     return self._process_tables(raw_txt.json())
    367 

/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc in _do_raw_rest(self, url_suffix, method, file_upload_info, **kwargs)
    429       raise EnvironmentError(("h2o-py got an unexpected HTTP status code:\n {} {} (method = {}; url = {}). \n"+ \
    430                               "detailed error messages: {}")
--> 431                              .format(http_result.status_code,http_result.reason,method,url,detailed_error_msgs))
    432 
    433     # TODO: is.logging? -> write to logs

EnvironmentError: h2o-py got an unexpected HTTP status code:
 412 Precondition Failed (method = POST; url = http://localhost:54321/99/Rapids). 
detailed error messages: Data vector is constant!

In [ ]:

# Predict probability of arrest from new observations
gbm_pred = data_gbm.predict(crime_examples)
dl_pred  = data_dl .predict(crime_examples)

# TODO: Replace with a pretty HTML table
gbm_pred.describe()
dl_pred.describe()