!pip install -U pip
!pip install tensorflow-model-analysis
!pip install comet_ml
import getpass, os
import comet_ml
import tensorflow_model_analysis as tfma
os.environ["COMET_API_KEY"] = getpass.getpass("Paste your COMET API KEY: ")
# This setup was tested with TF 2.3 and TFMA 0.24 (using colab), but it should
# also work with the latest release.
import sys
# Confirm that we're using Python 3
assert sys.version_info.major == 3, "This notebook must be run using Python 3."
print("Installing TensorFlow")
import tensorflow as tf
print("TF version: {}".format(tf.__version__))
# Get the Data
import io, os, tempfile
TAR_NAME = "saved_models-2.2"
BASE_DIR = tempfile.mkdtemp()
DATA_DIR = os.path.join(BASE_DIR, TAR_NAME, "data")
MODELS_DIR = os.path.join(BASE_DIR, TAR_NAME, "models")
SCHEMA = os.path.join(BASE_DIR, TAR_NAME, "schema.pbtxt")
OUTPUT_DIR = os.path.join(BASE_DIR, "output")
MODEL_VERSION = "1"
!curl -O https://storage.googleapis.com/artifacts.tfx-oss-public.appspot.com/datasets/{TAR_NAME}.tar
!tar xf {TAR_NAME}.tar
!mv {TAR_NAME} {BASE_DIR}
!rm {TAR_NAME}.tar
print("Here's what we downloaded:")
!ls -R {BASE_DIR}
# Setup Data Schema
import tensorflow as tf
from google.protobuf import text_format
from tensorflow.python.lib.io import file_io
from tensorflow_metadata.proto.v0 import schema_pb2
from tensorflow.core.example import example_pb2
schema = schema_pb2.Schema()
contents = file_io.read_file_to_string(SCHEMA)
schema = text_format.Parse(contents, schema)
# Encode Data to TFRecords format using the Schema
import csv
datafile = os.path.join(DATA_DIR, "eval", "data.csv")
reader = csv.DictReader(open(datafile, "r"))
examples = []
for line in reader:
example = example_pb2.Example()
for feature in schema.feature:
key = feature.name
if feature.type == schema_pb2.FLOAT:
example.features.feature[key].float_list.value[:] = (
[float(line[key])] if len(line[key]) > 0 else []
)
elif feature.type == schema_pb2.INT:
example.features.feature[key].int64_list.value[:] = (
[int(line[key])] if len(line[key]) > 0 else []
)
elif feature.type == schema_pb2.BYTES:
example.features.feature[key].bytes_list.value[:] = (
[line[key].encode("utf8")] if len(line[key]) > 0 else []
)
big_tipper = float(line["tips"]) > float(line["fare"]) * 0.2
example.features.feature["big_tipper"].float_list.value[:] = [big_tipper]
examples.append(example)
tfrecord_file = os.path.join(BASE_DIR, "train_data.rio")
with tf.io.TFRecordWriter(tfrecord_file) as writer:
for example in examples:
writer.write(example.SerializeToString())
!ls {tfrecord_file}
import tensorflow_model_analysis as tfma
# Setup tfma.EvalConfig settings
keras_eval_config = text_format.Parse(
"""
## Model information
model_specs {
# For keras (and serving models) we need to add a `label_key`.
label_key: "big_tipper"
}
## Post training metric information. These will be merged with any built-in
## metrics from training.
metrics_specs {
metrics { class_name: "ExampleCount" }
metrics { class_name: "BinaryAccuracy" }
metrics { class_name: "BinaryCrossentropy" }
metrics { class_name: "AUC" }
metrics { class_name: "AUCPrecisionRecall" }
metrics { class_name: "Precision" }
metrics { class_name: "Recall" }
metrics { class_name: "MeanLabel" }
metrics { class_name: "MeanPrediction" }
metrics { class_name: "Calibration" }
metrics { class_name: "CalibrationPlot" }
metrics { class_name: "ConfusionMatrixPlot" }
# ... add additional metrics and plots ...
}
## Slicing information
slicing_specs {} # overall slice
slicing_specs {
feature_keys: ["trip_start_hour"]
}
slicing_specs {
feature_keys: ["trip_start_day"]
}
slicing_specs {
feature_values: {
key: "trip_start_month"
value: "1"
}
}
slicing_specs {
feature_keys: ["trip_start_hour", "trip_start_day"]
}
""",
tfma.EvalConfig(),
)
# Create a tfma.EvalSharedModel that points at our keras model.
keras_model_path = os.path.join(MODELS_DIR, "keras", MODEL_VERSION)
keras_eval_shared_model = tfma.default_eval_shared_model(
eval_saved_model_path=keras_model_path, eval_config=keras_eval_config
)
keras_output_path = os.path.join(OUTPUT_DIR, "keras")
# Run TFMA
results = tfma.run_model_analysis(
eval_shared_model=keras_eval_shared_model,
eval_config=keras_eval_config,
data_location=tfrecord_file,
output_path=keras_output_path,
)
%env COMET_PROJECT_NAME=tf-model-analysis
%env COMET_AUTO_LOG_TFMA=1
experiment = comet_ml.start()
experiment.log_parameter("model_version", MODEL_VERSION)
tfma.view.render_slicing_metrics(results)
tfma.view.render_slicing_metrics(results, slicing_column="trip_start_day")
tfma.view.render_plot(results)
experiment.end()