from rubicon_ml.schema import registry
available_schema = registry.available_schema()
available_schema
['sklearn__RandomForestClassifier', 'xgboost__XGBClassifier', 'xgboost__DaskXGBClassifier']
Load a schema
import pprint
rfc_schema = registry.get_schema("sklearn__RandomForestClassifier")
pprint.pprint(rfc_schema)
{'artifacts': ['self'], 'compatibility': {'scikit-learn': {'max_version': None, 'min_version': '1.0.2'}}, 'docs_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html', 'features': [{'importances_attr': 'feature_importances_', 'names_attr': 'feature_names_in_', 'optional': True}], 'metrics': [{'name': 'classes', 'value_attr': 'classes_'}, {'name': 'n_classes', 'value_attr': 'n_classes_'}, {'name': 'n_features_in', 'value_attr': 'n_features_in_'}, {'name': 'n_outputs', 'value_attr': 'n_outputs_'}, {'name': 'oob_decision_function', 'optional': True, 'value_attr': 'oob_decision_function_'}, {'name': 'oob_score', 'optional': True, 'value_attr': 'oob_score_'}], 'name': 'sklearn__RandomForestClassifier', 'parameters': [{'name': 'bootstrap', 'value_attr': 'bootstrap'}, {'name': 'ccp_alpha', 'value_attr': 'ccp_alpha'}, {'name': 'class_weight', 'value_attr': 'class_weight'}, {'name': 'criterion', 'value_attr': 'criterion'}, {'name': 'max_depth', 'value_attr': 'max_depth'}, {'name': 'max_features', 'value_attr': 'max_features'}, {'name': 'min_impurity_decrease', 'value_attr': 'min_impurity_decrease'}, {'name': 'max_leaf_nodes', 'value_attr': 'max_leaf_nodes'}, {'name': 'max_samples', 'value_attr': 'max_samples'}, {'name': 'min_samples_split', 'value_attr': 'min_samples_split'}, {'name': 'min_samples_leaf', 'value_attr': 'min_samples_leaf'}, {'name': 'min_weight_fraction_leaf', 'value_attr': 'min_weight_fraction_leaf'}, {'name': 'n_estimators', 'value_attr': 'n_estimators'}, {'name': 'oob_score', 'value_attr': 'oob_score'}, {'name': 'random_state', 'value_attr': 'random_state'}], 'verison': '1.0.0'}
Create a rubicon_ml
project
from rubicon_ml import Rubicon
rubicon = Rubicon(persistence="memory")
project = rubicon.create_project(name="apply schema")
project
<rubicon_ml.client.project.Project at 0x134d4fd50>
Set the schema on the project
project.set_schema(rfc_schema)
Now, log_with_schema
will leverage the schema rfc_schema
instead of trying to infer one