!pip install comet_ml
import comet_ml
import getpass, os
os.environ["COMET_API_KEY"] = getpass.getpass("Paste your COMET API KEY: ")
from sklearn.datasets import load_wine as load_data
dataset = load_data()
X, y = dataset.data, dataset.target
featurecols = dataset.feature_names
import pandas as pd
df = pd.DataFrame(X, columns=featurecols)
df["target"] = y
df.head()
experiment = comet_ml.Experiment(project_name="comet-vega")
experiment.add_tag("dataset")
experiment.log_table("wine.json", df, headers=False, **{"orient": "records"})
experiment.end()
df.shape
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import OneHotEncoder
y.shape
RANDOM_STATE = 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)
y_test.shape
optimizer_config = {
# We pick the Bayes algorithm:
"algorithm": "random",
# Declare your hyperparameters in the Vizier-inspired format:
"parameters": {
"n_estimators": {"type": "discrete", "values": [10, 100, 500]},
"max_depth": {"type": "discrete", "values": [4, 6, 8]}
},
# Declare what we will be optimizing, and how:
"spec": {
"metric": "accuracy",
"objective": "maximize",
},
}
optimizer = comet_ml.Optimizer(optimizer_config)
def create_feature_importance_df(model, feature_names):
output = pd.DataFrame()
importances = model.feature_importances_
output['feature_name'] = feature_names
output['feature_importance'] = importances
return output
feature_importance_df = pd.DataFrame()
for experiment in optimizer.get_experiments(project_name="comet-vega"):
model = RandomForestClassifier(
random_state=RANDOM_STATE,
max_depth=experiment.get_parameter("max_depth"),
n_estimators=experiment.get_parameter("n_estimators"))
model.fit(X_train, y_train)
predictions = model.predict(X_test)
report = classification_report(y_test, predictions, output_dict=True)
for k, v in report.items():
if isinstance(v, dict):
experiment.log_metrics(v, prefix=f"label_{k}")
else:
experiment.log_metric(k, v)
feature_importance = create_feature_importance_df(model, featurecols)
experiment.log_table("importance.json", feature_importance, headers=False, **{"orient": "records"})