#!/usr/bin/env python # coding: utf-8 # # IsoTree to TreeLite # # This is a short example of converting an Isolation Forest model generated through the [isotree](https://github.com/david-cortes/isotree) library to [treelite](https://treelite.readthedocs.io/en/latest/index.html) format, which can then be passed to the [tl2cgen](https://tl2cgen.readthedocs.io/en/latest/) library to compile these trees to a standalone runtime library which is oftentimes faster ar making predictions. # # ** * # ### Getting some medium-size data from scikit-learn to fit a model # In[1]: from sklearn.datasets import fetch_california_housing X, y = fetch_california_housing(return_X_y=True) print(X.shape) # ### Fitting an isolation forest model through isotree # # *Note: only models that use `ndim=1` can be exported to `treelite` format.* # In[2]: from isotree import IsolationForest iso = IsolationForest(ndim=1, ntrees=100, sample_size=256, missing_action="impute", max_depth=8) iso.fit(X) ### Now convert treelite_model = iso.to_treelite() ### OPTIONAL: add annotations for better branch prediction import tl2cgen tl2cgen.annotate_branch( model=treelite_model, dmat=tl2cgen.DMatrix(X), verbose=False, path="iso_branches_annotation.json" ) # ### Compiling the treelite model # # These models need to be compiled into a shared library in order to be used: # In[3]: get_ipython().run_cell_magic('capture', '', 'import tl2cgen\nimport multiprocessing\n\ntl2cgen.export_lib(\n model=treelite_model,\n toolchain="clang",\n libpath=\'./predictor.so\',\n params={\n "parallel_comp": multiprocessing.cpu_count(),\n "annotate_in": "iso_branches_annotation.json"\n }\n)\ntreelite_predictor = tl2cgen.Predictor("predictor.so")\n') # Now verify that they make the same predictions: # In[4]: iso.predict(X[:10]) # In[5]: treelite_predictor.predict(tl2cgen.DMatrix(X[:10])) # *Note: some small disagreement between the two is expected due to loss of precision when converting. See the documentation in `isotree` for more details.* # ### Comparing prediction times # In[6]: get_ipython().run_cell_magic('timeit', '', "import joblib\n### see docs for 'IsolationForest.predict' about this part\niso.set_params(nthreads=joblib.cpu_count(only_physical_cores=True))\niso.predict(X)\n") # In[7]: get_ipython().run_cell_magic('timeit', '', 'treelite_predictor.predict(tl2cgen.DMatrix(X))\n')