#----------------------------------------------------------------------
# Try to slice by using != factor_level
#----------------------------------------------------------------------
import h2o
h2o.init()
H2O cluster uptime: | 4 minutes 78 milliseconds |
H2O cluster version: | 3.1.0.99999 |
H2O cluster name: | ece |
H2O cluster total nodes: | 1 |
H2O cluster total memory: | 4.44 GB |
H2O cluster total cores: | 8 |
H2O cluster allowed cores: | 8 |
H2O cluster healthy: | True |
H2O Connection ip: | 127.0.0.1 |
H2O Connection port: | 54321 |
air = h2o.import_frame(path=h2o.locate("smalldata/airlines/allyears2k_headers.zip"))
Parse Progress: [##################################################] 100% Imported /Users/ece/0xdata/h2o-dev/smalldata/airlines/allyears2k_headers.zip . Parsed 43,978 rows and 31 cols
rows, cols = air.dim()
print [rows, cols]
[43978, 31]
#
# Select all flights not departing from SFO
#
not_sfo = air[air["Origin"] != "SFO"]
sfo = air[air["Origin"] == "SFO"]
no_rows, no_cols = not_sfo.dim()
yes_rows, yes_cols = sfo.dim()
print "no_rows: {0}".format(no_rows)
print "yes_rows: {0}".format(yes_rows)
print "no_cols: {0}".format(no_cols)
print "yes_cols: {0}".format(yes_cols)
no_rows: 42434 yes_rows: 1544 no_cols: 31 yes_cols: 31