#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Warning: This notebook is designed to be run in a Google Colab only. It installs packages on the system and requires root access. If you want to run it in a local Jupyter notebook, please proceed with caution.
Note: You can run this example right now in a Jupyter-style notebook, no setup required! Just click "Run in Google Colab"
This guide trains a neural network model to classify images of clothing, like sneakers and shirts, saves the trained model, and then serves it with TensorFlow Serving. The focus is on TensorFlow Serving, rather than the modeling and training in TensorFlow, so for a complete example which focuses on the modeling and training see the Basic Classification example.
This guide uses tf.keras, a high-level API to build and train models in TensorFlow.
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import os
import subprocess
print(tf.__version__)
1.13.1
This guide uses the Fashion MNIST dataset which contains 70,000 grayscale images in 10 categories. The images show individual articles of clothing at low resolution (28 by 28 pixels), as seen here:
Figure 1. Fashion-MNIST samples (by Zalando, MIT License). |
Fashion MNIST is intended as a drop-in replacement for the classic MNIST dataset—often used as the "Hello, World" of machine learning programs for computer vision. You can access the Fashion MNIST directly from TensorFlow, just import and load the data.
Note: Although these are really images, they are loaded as NumPy arrays and not binary image objects.
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
# scale the values to 0.0 to 1.0
train_images = train_images / 255.0
test_images = test_images / 255.0
# reshape for feeding into the model
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1)
test_images = test_images.reshape(test_images.shape[0], 28, 28, 1)
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
print('\ntrain_images.shape: {}, of {}'.format(train_images.shape, train_images.dtype))
print('test_images.shape: {}, of {}'.format(test_images.shape, test_images.dtype))
train_images.shape: (60000, 28, 28, 1), of float64 test_images.shape: (10000, 28, 28, 1), of float64
Let's use the simplest possible CNN, since we're not focused on the modeling part.
model = keras.Sequential([
keras.layers.Conv2D(input_shape=(28,28,1), filters=8, kernel_size=3,
strides=2, activation='relu', name='Conv1'),
keras.layers.Flatten(),
keras.layers.Dense(10, activation=tf.nn.softmax, name='Softmax')
])
model.summary()
testing = False
epochs = 5
model.compile(optimizer=tf.train.AdamOptimizer(),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(train_images, train_labels, epochs=epochs)
test_loss, test_acc = model.evaluate(test_images, test_labels)
print('\nTest accuracy: {}'.format(test_acc))
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= Conv1 (Conv2D) (None, 13, 13, 8) 80 _________________________________________________________________ flatten_2 (Flatten) (None, 1352) 0 _________________________________________________________________ Softmax (Dense) (None, 10) 13530 ================================================================= Total params: 13,610 Trainable params: 13,610 Non-trainable params: 0 _________________________________________________________________ Epoch 1/5 60000/60000 [==============================] - 7s 119us/sample - loss: 0.5402 - acc: 0.8132 Epoch 2/5 60000/60000 [==============================] - 7s 117us/sample - loss: 0.3924 - acc: 0.8632 Epoch 3/5 60000/60000 [==============================] - 7s 117us/sample - loss: 0.3524 - acc: 0.8761 Epoch 4/5 60000/60000 [==============================] - 7s 117us/sample - loss: 0.3293 - acc: 0.8825 Epoch 5/5 60000/60000 [==============================] - 7s 116us/sample - loss: 0.3147 - acc: 0.8880 10000/10000 [==============================] - 1s 86us/sample - loss: 0.3446 - acc: 0.8792 Test accuracy: 0.879199981689
To load our trained model into TensorFlow Serving we first need to save it in SavedModel format. This will create a protobuf file in a well-defined directory hierarchy, and will include a version number. TensorFlow Serving allows us to select which version of a model, or "servable" we want to use when we make inference requests. Each version will be exported to a different sub-directory under the given path.
# Fetch the Keras session and save the model
# The signature definition is defined by the input and output tensors,
# and stored with the default serving key
import tempfile
MODEL_DIR = tempfile.gettempdir()
version = 1
export_path = os.path.join(MODEL_DIR, str(version))
print('export_path = {}\n'.format(export_path))
if os.path.isdir(export_path):
print('\nAlready saved a model, cleaning up\n')
!rm -r {export_path}
tf.saved_model.simple_save(
keras.backend.get_session(),
export_path,
inputs={'input_image': model.input},
outputs={t.name:t for t in model.outputs})
print('\nSaved model:')
!ls -l {export_path}
export_path = /tmp/1 WARNING:tensorflow:From <ipython-input-10-71938700dab5>:15: simple_save (from tensorflow.python.saved_model.simple_save) is deprecated and will be removed in a future version. Instructions for updating: This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.simple_save. WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensorflow/python/saved_model/signature_def_utils_impl.py:205: build_tensor_info (from tensorflow.python.saved_model.utils_impl) is deprecated and will be removed in a future version. Instructions for updating: This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info. INFO:tensorflow:Assets added to graph. INFO:tensorflow:No assets to write. INFO:tensorflow:SavedModel written to: /tmp/1/saved_model.pb Saved model: total 344 -rw-r--r-- 1 root root 344543 Mar 3 01:21 saved_model.pb drwxr-xr-x 2 root root 4096 Mar 3 01:21 variables
We'll use the command line utility saved_model_cli
to look at the MetaGraphDefs (the models) and SignatureDefs (the methods you can call) in our SavedModel. See this discussion of the SavedModel CLI in the TensorFlow Guide.
!saved_model_cli show --dir {export_path} --all
MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs: signature_def['serving_default']: The given SavedModel SignatureDef contains the following input(s): inputs['input_image'] tensor_info: dtype: DT_FLOAT shape: (-1, 28, 28, 1) name: Conv1_input_2:0 The given SavedModel SignatureDef contains the following output(s): outputs['Softmax_2/Softmax:0'] tensor_info: dtype: DT_FLOAT shape: (-1, 10) name: Softmax_2/Softmax:0 Method name is: tensorflow/serving/predict
That tells us a lot about our model! In this case we just trained our model, so we already know the inputs and outputs, but if we didn't this would be important information. It doesn't tell us everything, like the fact that this is grayscale image data for example, but it's a great start.
We're preparing to install TensorFlow Serving using Aptitude since this Colab runs in a Debian environment. We'll add the tensorflow-model-server
package to the list of packages that Aptitude knows about. Note that we're running as root.
Note: This example is running TensorFlow Serving natively, but you can also run it in a Docker container, which is one of the easiest ways to get started using TensorFlow Serving.
# This is the same as you would do from your command line, but without the [arch=amd64], and no sudo
# You would instead do:
# echo "deb [arch=amd64] http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal" | sudo tee /etc/apt/sources.list.d/tensorflow-serving.list && \
# curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | sudo apt-key add -
!echo "deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal" | tee /etc/apt/sources.list.d/tensorflow-serving.list && \
curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add -
!apt update
deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 2343 100 2343 0 0 93720 0 --:--:-- --:--:-- --:--:-- 93720 OK Get:1 http://storage.googleapis.com/tensorflow-serving-apt stable InRelease [3,012 B] Hit:2 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease Get:3 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB] Hit:4 http://archive.ubuntu.com/ubuntu bionic InRelease Get:5 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB] Hit:6 http://ppa.launchpad.net/marutter/c2d4u3.5/ubuntu bionic InRelease Get:7 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB] Get:8 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/ InRelease [3,609 B] Get:9 http://storage.googleapis.com/tensorflow-serving-apt stable/tensorflow-model-server-universal amd64 Packages [360 B] Get:10 http://storage.googleapis.com/tensorflow-serving-apt stable/tensorflow-model-server amd64 Packages [355 B] Ign:11 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 InRelease Get:12 http://security.ubuntu.com/ubuntu bionic-security/universe amd64 Packages [156 kB] Get:13 http://security.ubuntu.com/ubuntu bionic-security/main amd64 Packages [345 kB] Get:14 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 Packages [943 kB] Get:15 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 Packages [691 kB] Ign:16 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 InRelease Get:17 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Release [564 B] Get:18 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Release [564 B] Get:19 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Release.gpg [801 B] Get:20 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Release.gpg [801 B] Get:21 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 Packages [64.9 kB] Get:22 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Packages [5,867 B] Fetched 2,467 kB in 2s (1,205 kB/s) Reading package lists... Done Building dependency tree Reading state information... Done 38 packages can be upgraded. Run 'apt list --upgradable' to see them.
This is all you need - one command line!
!apt-get install tensorflow-model-server
Reading package lists... Done Building dependency tree Reading state information... Done The following NEW packages will be installed: tensorflow-model-server 0 upgraded, 1 newly installed, 0 to remove and 38 not upgraded. Need to get 136 MB of archives. After this operation, 0 B of additional disk space will be used. Get:1 http://storage.googleapis.com/tensorflow-serving-apt stable/tensorflow-model-server amd64 tensorflow-model-server all 1.13.0 [136 MB] Fetched 136 MB in 2s (60.5 MB/s) Selecting previously unselected package tensorflow-model-server. (Reading database ... 131322 files and directories currently installed.) Preparing to unpack .../tensorflow-model-server_1.13.0_all.deb ... Unpacking tensorflow-model-server (1.13.0) ... Setting up tensorflow-model-server (1.13.0) ...
This is where we start running TensorFlow Serving and load our model. After it loads we can start making inference requests using REST. There are some important parameters:
rest_api_port
: The port that you'll use for REST requests.model_name
: You'll use this in the URL of REST requests. It can be anything.model_base_path
: This is the path to the directory where you've saved your model.os.environ["MODEL_DIR"] = MODEL_DIR
%%bash --bg
nohup tensorflow_model_server \
--rest_api_port=8501 \
--model_name=fashion_model \
--model_base_path="${MODEL_DIR}" >server.log 2>&1
Starting job # 0 in a separate thread.
!tail server.log
2019-03-03 01:22:12.062676: I external/org_tensorflow/tensorflow/cc/saved_model/reader.cc:31] Reading SavedModel from: /tmp/1 2019-03-03 01:22:12.066819: I external/org_tensorflow/tensorflow/cc/saved_model/reader.cc:54] Reading meta graph with tags { serve } 2019-03-03 01:22:12.070614: I external/org_tensorflow/tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA 2019-03-03 01:22:12.091723: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:182] Restoring SavedModel bundle. 2019-03-03 01:22:12.126355: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:285] SavedModel load for tags { serve }; Status: success. Took 63670 microseconds. 2019-03-03 01:22:12.126511: I tensorflow_serving/servables/tensorflow/saved_model_warmup.cc:101] No warmup data file found at /tmp/1/assets.extra/tf_serving_warmup_requests 2019-03-03 01:22:12.126621: I tensorflow_serving/core/loader_harness.cc:86] Successfully loaded servable version {name: fashion_model version: 1} 2019-03-03 01:22:12.127975: I tensorflow_serving/model_servers/server.cc:313] Running gRPC ModelServer at 0.0.0.0:8500 ... 2019-03-03 01:22:12.128750: I tensorflow_serving/model_servers/server.cc:333] Exporting HTTP/REST API at:localhost:8501 ... [evhttp_server.cc : 237] RAW: Entering the event loop ...
First, let's take a look at a random example from our test data.
def show(idx, title):
plt.figure()
plt.imshow(test_images[idx].reshape(28,28))
plt.axis('off')
plt.title('\n\n{}'.format(title), fontdict={'size': 16})
import random
rando = random.randint(0,len(test_images)-1)
show(rando, 'An Example Image: {}'.format(class_names[test_labels[rando]]))
Ok, that looks interesting. How hard is that for you to recognize? Now let's create the JSON object for a batch of three inference requests, and see how well our model recognizes things:
import json
data = json.dumps({"signature_name": "serving_default", "instances": test_images[0:3].tolist()})
print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))
Data: {"instances": [[[[0.0], [0.0], [0.0], [0.0], [0.0] ... 0.0], [0.0]]]], "signature_name": "serving_default"}
We'll send a predict request as a POST to our server's REST endpoint, and pass it three examples. We'll ask our server to give us the latest version of our servable by not specifying a particular version.
!pip install -q requests
import requests
headers = {"content-type": "application/json"}
json_response = requests.post('http://localhost:8501/v1/models/fashion_model:predict', data=data, headers=headers)
predictions = json.loads(json_response.text)['predictions']
show(0, 'The model thought this was a {} (class {}), and it was actually a {} (class {})'.format(
class_names[np.argmax(predictions[0])], test_labels[0], class_names[np.argmax(predictions[0])], test_labels[0]))
Now let's specify a particular version of our servable. Since we only have one, let's select version 1. We'll also look at all three results.
headers = {"content-type": "application/json"}
json_response = requests.post('http://localhost:8501/v1/models/fashion_model/versions/1:predict', data=data, headers=headers)
predictions = json.loads(json_response.text)['predictions']
for i in range(0,3):
show(i, 'The model thought this was a {} (class {}), and it was actually a {} (class {})'.format(
class_names[np.argmax(predictions[i])], test_labels[i], class_names[np.argmax(predictions[i])], test_labels[i]))