Notebook

In [ ]:

!pip install -U pip

In [ ]:

!pip install tensorflow-model-analysis

In [ ]:

!pip install comet_ml

In [ ]:

import getpass, os
import comet_ml
import tensorflow_model_analysis as tfma

In [ ]:

os.environ["COMET_API_KEY"] = getpass.getpass("Paste your COMET API KEY: ")

In [ ]:

# This setup was tested with TF 2.3 and TFMA 0.24 (using colab), but it should
# also work with the latest release.
import sys

# Confirm that we're using Python 3
assert sys.version_info.major == 3, "This notebook must be run using Python 3."

print("Installing TensorFlow")
import tensorflow as tf

print("TF version: {}".format(tf.__version__))

In [ ]:

# Get the Data

import io, os, tempfile

TAR_NAME = "saved_models-2.2"
BASE_DIR = tempfile.mkdtemp()
DATA_DIR = os.path.join(BASE_DIR, TAR_NAME, "data")
MODELS_DIR = os.path.join(BASE_DIR, TAR_NAME, "models")
SCHEMA = os.path.join(BASE_DIR, TAR_NAME, "schema.pbtxt")
OUTPUT_DIR = os.path.join(BASE_DIR, "output")
MODEL_VERSION = "1"

!curl -O https://storage.googleapis.com/artifacts.tfx-oss-public.appspot.com/datasets/{TAR_NAME}.tar
!tar xf {TAR_NAME}.tar
!mv {TAR_NAME} {BASE_DIR}
!rm {TAR_NAME}.tar

print("Here's what we downloaded:")
!ls -R {BASE_DIR}

In [ ]:

# Setup Data Schema

import tensorflow as tf
from google.protobuf import text_format
from tensorflow.python.lib.io import file_io
from tensorflow_metadata.proto.v0 import schema_pb2
from tensorflow.core.example import example_pb2

schema = schema_pb2.Schema()
contents = file_io.read_file_to_string(SCHEMA)
schema = text_format.Parse(contents, schema)

In [ ]:

# Encode Data to TFRecords format using the Schema

import csv

datafile = os.path.join(DATA_DIR, "eval", "data.csv")
reader = csv.DictReader(open(datafile, "r"))
examples = []
for line in reader:
    example = example_pb2.Example()
    for feature in schema.feature:
        key = feature.name
        if feature.type == schema_pb2.FLOAT:
            example.features.feature[key].float_list.value[:] = (
                [float(line[key])] if len(line[key]) > 0 else []
            )
        elif feature.type == schema_pb2.INT:
            example.features.feature[key].int64_list.value[:] = (
                [int(line[key])] if len(line[key]) > 0 else []
            )
        elif feature.type == schema_pb2.BYTES:
            example.features.feature[key].bytes_list.value[:] = (
                [line[key].encode("utf8")] if len(line[key]) > 0 else []
            )

    big_tipper = float(line["tips"]) > float(line["fare"]) * 0.2
    example.features.feature["big_tipper"].float_list.value[:] = [big_tipper]
    examples.append(example)

tfrecord_file = os.path.join(BASE_DIR, "train_data.rio")
with tf.io.TFRecordWriter(tfrecord_file) as writer:
    for example in examples:
        writer.write(example.SerializeToString())

!ls {tfrecord_file}

In [ ]:

import tensorflow_model_analysis as tfma

# Setup tfma.EvalConfig settings
keras_eval_config = text_format.Parse(
    """
  ## Model information
  model_specs {
    # For keras (and serving models) we need to add a `label_key`.
    label_key: "big_tipper"
  }

  ## Post training metric information. These will be merged with any built-in
  ## metrics from training.
  metrics_specs {
    metrics { class_name: "ExampleCount" }
    metrics { class_name: "BinaryAccuracy" }
    metrics { class_name: "BinaryCrossentropy" }
    metrics { class_name: "AUC" }
    metrics { class_name: "AUCPrecisionRecall" }
    metrics { class_name: "Precision" }
    metrics { class_name: "Recall" }
    metrics { class_name: "MeanLabel" }
    metrics { class_name: "MeanPrediction" }
    metrics { class_name: "Calibration" }
    metrics { class_name: "CalibrationPlot" }
    metrics { class_name: "ConfusionMatrixPlot" }
    # ... add additional metrics and plots ...
  }

  ## Slicing information
  slicing_specs {}  # overall slice
  slicing_specs {
    feature_keys: ["trip_start_hour"]
  }
  slicing_specs {
    feature_keys: ["trip_start_day"]
  }
  slicing_specs {
    feature_values: {
      key: "trip_start_month"
      value: "1"
    }
  }
  slicing_specs {
    feature_keys: ["trip_start_hour", "trip_start_day"]
  }
""",
    tfma.EvalConfig(),
)

# Create a tfma.EvalSharedModel that points at our keras model.
keras_model_path = os.path.join(MODELS_DIR, "keras", MODEL_VERSION)
keras_eval_shared_model = tfma.default_eval_shared_model(
    eval_saved_model_path=keras_model_path, eval_config=keras_eval_config
)

keras_output_path = os.path.join(OUTPUT_DIR, "keras")

# Run TFMA
results = tfma.run_model_analysis(
    eval_shared_model=keras_eval_shared_model,
    eval_config=keras_eval_config,
    data_location=tfrecord_file,
    output_path=keras_output_path,
)

In [ ]:

%env COMET_PROJECT_NAME=tf-model-analysis
%env COMET_AUTO_LOG_TFMA=1

experiment = comet_ml.start()
experiment.log_parameter("model_version", MODEL_VERSION)

tfma.view.render_slicing_metrics(results)
tfma.view.render_slicing_metrics(results, slicing_column="trip_start_day")
tfma.view.render_plot(results)

experiment.end()

In [ ]: