Chương 10 – Giới thiệu về Mạng Nơ-ron Nhân tạo với Keras
Notebook này sẽ chứa tất cả các mã lập trình mẫu và lời giải cho các bài tập trong chương 10.
Đầu tiên hãy nhập một vài mô-đun thông dụng, đảm bảo rằng Matplotlib sẽ vẽ đồ thị ngay trong notebook, và chuẩn bị một hàm để lưu đồ thị. Ta cũng kiểm tra xem Python phiên bản từ 3.5 trở lên đã được cài đặt hay chưa (mặc dù Python 2.x vẫn có thể hoạt động, phiên bản này đã không còn sử dụng nên chúng tôi rất khuyến khích việc sử dụng Python 3), cũng như Scikit-Learn ≥ 0.20.
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)
# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"
try:
# %tensorflow_version only exists in Colab.
%tensorflow_version 2.x
except Exception:
pass
# TensorFlow ≥2.0 is required
import tensorflow as tf
assert tf.__version__ >= "2.0"
# Common imports
import numpy as np
import os
# to make this notebook's output stable across runs
np.random.seed(42)
# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "ann"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)
def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
print("Saving figure", fig_id)
if tight_layout:
plt.tight_layout()
plt.savefig(path, format=fig_extension, dpi=resolution)
Chú ý: ta thiết lập max_iter
và tol
một cách rõ ràng để tránh các cảnh báo là giá trị mặc định của chúng sẽ thay đổi trong các phiên bản tương lai của Scikit-Learn.
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron
iris = load_iris()
X = iris.data[:, (2, 3)] # petal length, petal width
y = (iris.target == 0).astype(np.int)
per_clf = Perceptron(max_iter=1000, tol=1e-3, random_state=42)
per_clf.fit(X, y)
y_pred = per_clf.predict([[2, 0.5]])
/tmp/ipykernel_584199/3295362500.py:7: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations y = (iris.target == 0).astype(np.int)
y_pred
array([1])
a = -per_clf.coef_[0][0] / per_clf.coef_[0][1]
b = -per_clf.intercept_ / per_clf.coef_[0][1]
axes = [0, 5, 0, 2]
x0, x1 = np.meshgrid(
np.linspace(axes[0], axes[1], 500).reshape(-1, 1),
np.linspace(axes[2], axes[3], 200).reshape(-1, 1),
)
X_new = np.c_[x0.ravel(), x1.ravel()]
y_predict = per_clf.predict(X_new)
zz = y_predict.reshape(x0.shape)
plt.figure(figsize=(10, 4))
plt.plot(X[y==0, 0], X[y==0, 1], "bs", label="Không phải Iris-Setosa")
plt.plot(X[y==1, 0], X[y==1, 1], "yo", label="Iris-Setosa")
plt.plot([axes[0], axes[1]], [a * axes[0] + b, a * axes[1] + b], "k-", linewidth=3)
from matplotlib.colors import ListedColormap
custom_cmap = ListedColormap(['#9898ff', '#fafab0'])
plt.contourf(x0, x1, zz, cmap=custom_cmap)
plt.xlabel("Chiều dài cánh hoa", fontsize=14)
plt.ylabel("Chiều rộng cánh hoa", fontsize=14)
plt.legend(loc="lower right", fontsize=14)
plt.axis(axes)
save_fig("perceptron_iris_plot")
plt.show()
Saving figure perceptron_iris_plot
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def relu(z):
return np.maximum(0, z)
def derivative(f, z, eps=0.000001):
return (f(z + eps) - f(z - eps))/(2 * eps)
z = np.linspace(-5, 5, 200)
plt.figure(figsize=(11,4))
plt.subplot(121)
plt.plot(z, np.sign(z), "r-", linewidth=1, label="Hàm Bước")
plt.plot(z, sigmoid(z), "g--", linewidth=2, label="Sigmoid")
plt.plot(z, np.tanh(z), "b-", linewidth=2, label="Tanh")
plt.plot(z, relu(z), "m-.", linewidth=2, label="ReLU")
plt.grid(True)
plt.legend(loc="center right", fontsize=14)
plt.title("Hàm Kích hoạt", fontsize=14)
plt.axis([-5, 5, -1.2, 1.2])
plt.subplot(122)
plt.plot(z, derivative(np.sign, z), "r-", linewidth=1, label="Hàm Bước")
plt.plot(0, 0, "ro", markersize=5)
plt.plot(0, 0, "rx", markersize=10)
plt.plot(z, derivative(sigmoid, z), "g--", linewidth=2, label="Sigmoid")
plt.plot(z, derivative(np.tanh, z), "b-", linewidth=2, label="Tanh")
plt.plot(z, derivative(relu, z), "m-.", linewidth=2, label="ReLU")
plt.grid(True)
#plt.legend(loc="center right", fontsize=14)
plt.title("Đạo Hàm", fontsize=14)
plt.axis([-5, 5, -0.2, 1.2])
save_fig("activation_functions_plot")
plt.show()
Saving figure activation_functions_plot
def heaviside(z):
return (z >= 0).astype(z.dtype)
def mlp_xor(x1, x2, activation=heaviside):
return activation(-activation(x1 + x2 - 1.5) + activation(x1 + x2 - 0.5) - 0.5)
x1s = np.linspace(-0.2, 1.2, 100)
x2s = np.linspace(-0.2, 1.2, 100)
x1, x2 = np.meshgrid(x1s, x2s)
z1 = mlp_xor(x1, x2, activation=heaviside)
z2 = mlp_xor(x1, x2, activation=sigmoid)
plt.figure(figsize=(10,4))
plt.subplot(121)
plt.contourf(x1, x2, z1)
plt.plot([0, 1], [0, 1], "gs", markersize=20)
plt.plot([0, 1], [1, 0], "y^", markersize=20)
plt.title("Hàm Kích hoạt: heaviside", fontsize=14)
plt.grid(True)
plt.subplot(122)
plt.contourf(x1, x2, z2)
plt.plot([0, 1], [0, 1], "gs", markersize=20)
plt.plot([0, 1], [1, 0], "y^", markersize=20)
plt.title("Hàm Kích hoạt: sigmoid", fontsize=14)
plt.grid(True)
Đầu tiên hãy nạp TensoFlow và Keras.
import tensorflow as tf
from tensorflow import keras
tf.__version__
'2.4.1'
keras.__version__
'2.4.0'
Hãy bắt đầu bằng cách tải tập dữ liệu MNIST thời trang. Keras có một số phương thức để tải các bộ dữ liệu phổ biến trong keras.datasets
. Tập dữ liệu đã được phân chia thành tập huấn luyện và tập kiểm tra cho chúng ta, nhưng sẽ là hữu ích khi ta chia tập huấn luyện một lần nữa để có được tập kiểm định:
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()
Tập huấn luyện chứa 60,000 ảnh xám, mỗi ảnh có kích thước 28x28 pixel:
X_train_full.shape
(60000, 28, 28)
Cường độ mỗi điểm ảnh được biểu diễn bởi một byte (từ 0 tới 255):
X_train_full.dtype
dtype('uint8')
Hãy chia tập huấn luyện đầy đủ thành một tập kiểm định và một tập huấn luyện (nhỏ hơn). Ta cũng tỷ lệ hóa cường độ pixel xuống khoảng 0-1 và chuyển đổi chúng thành kiểu float bằng cách chia cho 255.
X_valid, X_train = X_train_full[:5000] / 255., X_train_full[5000:] / 255.
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test / 255.
Ta có thể biểu diễn hình ảnh này sử dụng hàm imshow()
của Matplotlib, với bản đồ màu binary
:
plt.imshow(X_train[0], cmap="binary")
plt.axis('off')
plt.show()
Nhãn là ID của lớp (biểu diễn dưới kiểu uint8), từ 0 tới 9:
y_train
array([4, 0, 7, ..., 3, 0, 5], dtype=uint8)
Dưới đây là tên của các lớp tương ứng:
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
"Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]
Vậy nên hình ảnh đầu tiên trong tập huấn luyện là một cái áo khoác (coat):
class_names[y_train[0]]
'Coat'
Tập kiểm định chứa 5.000 hình ảnh và tập kiểm tra chứa 10.000 hình ảnh:
X_valid.shape
(5000, 28, 28)
X_test.shape
(10000, 28, 28)
Hãy xem một ví dụ các hình ảnh trong tập dữ liệu:
n_rows = 4
n_cols = 10
plt.figure(figsize=(n_cols * 1.2, n_rows * 1.2))
for row in range(n_rows):
for col in range(n_cols):
index = n_cols * row + col
plt.subplot(n_rows, n_cols, index + 1)
plt.imshow(X_train[index], cmap="binary", interpolation="nearest")
plt.axis('off')
plt.title(class_names[y_train[index]], fontsize=12)
plt.subplots_adjust(wspace=0.2, hspace=0.5)
save_fig('fashion_mnist_plot', tight_layout=False)
plt.show()
Saving figure fashion_mnist_plot
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(300, activation="relu"))
model.add(keras.layers.Dense(100, activation="relu"))
model.add(keras.layers.Dense(10, activation="softmax"))
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, activation="relu"),
keras.layers.Dense(100, activation="relu"),
keras.layers.Dense(10, activation="softmax")
])
model.layers
[<tensorflow.python.keras.layers.core.Flatten at 0x7fd9891fef90>, <tensorflow.python.keras.layers.core.Dense at 0x7fd989205290>, <tensorflow.python.keras.layers.core.Dense at 0x7fd989205610>, <tensorflow.python.keras.layers.core.Dense at 0x7fd989205a10>]
model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten (Flatten) (None, 784) 0 _________________________________________________________________ dense (Dense) (None, 300) 235500 _________________________________________________________________ dense_1 (Dense) (None, 100) 30100 _________________________________________________________________ dense_2 (Dense) (None, 10) 1010 ================================================================= Total params: 266,610 Trainable params: 266,610 Non-trainable params: 0 _________________________________________________________________
keras.utils.plot_model(model, "my_fashion_mnist_model.png", show_shapes=True)
hidden1 = model.layers[1]
hidden1.name
'dense'
model.get_layer(hidden1.name) is hidden1
True
weights, biases = hidden1.get_weights()
weights
array([[ 0.02448617, -0.00877795, -0.02189048, ..., -0.02766046, 0.03859074, -0.06889391], [ 0.00476504, -0.03105379, -0.0586676 , ..., 0.00602964, -0.02763776, -0.04165364], [-0.06189284, -0.06901957, 0.07102345, ..., -0.04238207, 0.07121518, -0.07331658], ..., [-0.03048757, 0.02155137, -0.05400612, ..., -0.00113463, 0.00228987, 0.05581069], [ 0.07061854, -0.06960931, 0.07038955, ..., -0.00384101, 0.00034875, 0.02878492], [-0.06022581, 0.01577859, -0.02585464, ..., -0.00527829, 0.00272203, -0.06793761]], dtype=float32)
weights.shape
(784, 300)
biases
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)
biases.shape
(300,)
model.compile(loss="sparse_categorical_crossentropy",
optimizer="sgd",
metrics=["accuracy"])
Điều này tương đương với:
model.compile(loss=keras.losses.sparse_categorical_crossentropy,
optimizer=keras.optimizers.SGD(),
metrics=[keras.metrics.sparse_categorical_accuracy])
history = model.fit(X_train, y_train, epochs=30,
validation_data=(X_valid, y_valid))
Epoch 1/30 1719/1719 [==============================] - 2s 1ms/step - loss: 1.0187 - accuracy: 0.6807 - val_loss: 0.5207 - val_accuracy: 0.8234 Epoch 2/30 1719/1719 [==============================] - 2s 921us/step - loss: 0.5028 - accuracy: 0.8260 - val_loss: 0.4345 - val_accuracy: 0.8538 Epoch 3/30 1719/1719 [==============================] - 2s 881us/step - loss: 0.4485 - accuracy: 0.8423 - val_loss: 0.5334 - val_accuracy: 0.7982 Epoch 4/30 1719/1719 [==============================] - 2s 902us/step - loss: 0.4209 - accuracy: 0.8535 - val_loss: 0.3916 - val_accuracy: 0.8652 Epoch 5/30 1719/1719 [==============================] - 2s 908us/step - loss: 0.4061 - accuracy: 0.8580 - val_loss: 0.3750 - val_accuracy: 0.8686 Epoch 6/30 1719/1719 [==============================] - 2s 916us/step - loss: 0.3755 - accuracy: 0.8669 - val_loss: 0.3709 - val_accuracy: 0.8718 Epoch 7/30 1719/1719 [==============================] - 2s 879us/step - loss: 0.3655 - accuracy: 0.8711 - val_loss: 0.3618 - val_accuracy: 0.8722 Epoch 8/30 1719/1719 [==============================] - 2s 894us/step - loss: 0.3483 - accuracy: 0.8760 - val_loss: 0.3862 - val_accuracy: 0.8618 Epoch 9/30 1719/1719 [==============================] - 2s 906us/step - loss: 0.3486 - accuracy: 0.8756 - val_loss: 0.3604 - val_accuracy: 0.8696 Epoch 10/30 1719/1719 [==============================] - 2s 905us/step - loss: 0.3299 - accuracy: 0.8835 - val_loss: 0.3430 - val_accuracy: 0.8772 Epoch 11/30 1719/1719 [==============================] - 2s 926us/step - loss: 0.3219 - accuracy: 0.8831 - val_loss: 0.3439 - val_accuracy: 0.8772 Epoch 12/30 1719/1719 [==============================] - 2s 883us/step - loss: 0.3123 - accuracy: 0.8873 - val_loss: 0.3310 - val_accuracy: 0.8832 Epoch 13/30 1719/1719 [==============================] - 2s 914us/step - loss: 0.3055 - accuracy: 0.8893 - val_loss: 0.3263 - val_accuracy: 0.8878 Epoch 14/30 1719/1719 [==============================] - 2s 924us/step - loss: 0.2992 - accuracy: 0.8914 - val_loss: 0.3412 - val_accuracy: 0.8782 Epoch 15/30 1719/1719 [==============================] - 2s 885us/step - loss: 0.2936 - accuracy: 0.8939 - val_loss: 0.3218 - val_accuracy: 0.8848 Epoch 16/30 1719/1719 [==============================] - 2s 916us/step - loss: 0.2863 - accuracy: 0.8975 - val_loss: 0.3095 - val_accuracy: 0.8898 Epoch 17/30 1719/1719 [==============================] - 2s 905us/step - loss: 0.2781 - accuracy: 0.9004 - val_loss: 0.3572 - val_accuracy: 0.8736 Epoch 18/30 1719/1719 [==============================] - 2s 904us/step - loss: 0.2782 - accuracy: 0.8997 - val_loss: 0.3138 - val_accuracy: 0.8898 Epoch 19/30 1719/1719 [==============================] - 2s 921us/step - loss: 0.2742 - accuracy: 0.9026 - val_loss: 0.3130 - val_accuracy: 0.8894 Epoch 20/30 1719/1719 [==============================] - 2s 910us/step - loss: 0.2700 - accuracy: 0.9037 - val_loss: 0.3252 - val_accuracy: 0.8824 Epoch 21/30 1719/1719 [==============================] - 2s 891us/step - loss: 0.2671 - accuracy: 0.9050 - val_loss: 0.3049 - val_accuracy: 0.8930 Epoch 22/30 1719/1719 [==============================] - 2s 942us/step - loss: 0.2615 - accuracy: 0.9052 - val_loss: 0.2976 - val_accuracy: 0.8976 Epoch 23/30 1719/1719 [==============================] - 2s 928us/step - loss: 0.2548 - accuracy: 0.9084 - val_loss: 0.2983 - val_accuracy: 0.8930 Epoch 24/30 1719/1719 [==============================] - 2s 901us/step - loss: 0.2454 - accuracy: 0.9118 - val_loss: 0.3079 - val_accuracy: 0.8892 Epoch 25/30 1719/1719 [==============================] - 2s 922us/step - loss: 0.2496 - accuracy: 0.9109 - val_loss: 0.2975 - val_accuracy: 0.8956 Epoch 26/30 1719/1719 [==============================] - 2s 891us/step - loss: 0.2431 - accuracy: 0.9136 - val_loss: 0.3068 - val_accuracy: 0.8888 Epoch 27/30 1719/1719 [==============================] - 2s 883us/step - loss: 0.2374 - accuracy: 0.9163 - val_loss: 0.3023 - val_accuracy: 0.8938 Epoch 28/30 1719/1719 [==============================] - 2s 935us/step - loss: 0.2314 - accuracy: 0.9176 - val_loss: 0.2992 - val_accuracy: 0.8930 Epoch 29/30 1719/1719 [==============================] - 2s 917us/step - loss: 0.2284 - accuracy: 0.9177 - val_loss: 0.3053 - val_accuracy: 0.8896 Epoch 30/30 1719/1719 [==============================] - 2s 916us/step - loss: 0.2252 - accuracy: 0.9211 - val_loss: 0.3004 - val_accuracy: 0.8920
history.params
{'verbose': 1, 'epochs': 30, 'steps': 1719}
print(history.epoch)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
history.history.keys()
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
import pandas as pd
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
save_fig("keras_learning_curves_plot")
plt.show()
Saving figure keras_learning_curves_plot
model.evaluate(X_test, y_test)
313/313 [==============================] - 0s 639us/step - loss: 0.3357 - accuracy: 0.8837
[0.3357059359550476, 0.8837000131607056]
X_new = X_test[:3]
y_proba = model.predict(X_new)
y_proba.round(2)
array([[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.03, 0. , 0.96], [0. , 0. , 0.99, 0. , 0.01, 0. , 0. , 0. , 0. , 0. ], [0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]], dtype=float32)
Cảnh báo: model.predict_classes(X_new)
không còn được sử dụng nữa. Nó được thay thế bằng np.argmax(model.predict(X_new), axis=-1)
.
#y_pred = model.predict_classes(X_new) # deprecated
y_pred = np.argmax(model.predict(X_new), axis=-1)
y_pred
array([9, 2, 1])
np.array(class_names)[y_pred]
array(['Ankle boot', 'Pullover', 'Trouser'], dtype='<U11')
y_new = y_test[:3]
y_new
array([9, 2, 1], dtype=uint8)
plt.figure(figsize=(7.2, 2.4))
for index, image in enumerate(X_new):
plt.subplot(1, 3, index + 1)
plt.imshow(image, cmap="binary", interpolation="nearest")
plt.axis('off')
plt.title(class_names[y_test[index]], fontsize=12)
plt.subplots_adjust(wspace=0.2, hspace=0.5)
save_fig('fashion_mnist_images_plot', tight_layout=False)
plt.show()
Saving figure fashion_mnist_images_plot
Hãy tải, phân tách và chia tỷ lệ tập dữ liệu nhà ở California (bản gốc, không phải bản sửa đổi như trong chương 2):
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)
np.random.seed(42)
tf.random.set_seed(42)
model = keras.models.Sequential([
keras.layers.Dense(30, activation="relu", input_shape=X_train.shape[1:]),
keras.layers.Dense(1)
])
model.compile(loss="mean_squared_error", optimizer=keras.optimizers.SGD(learning_rate=1e-3))
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))
mse_test = model.evaluate(X_test, y_test)
X_new = X_test[:3]
y_pred = model.predict(X_new)
Epoch 1/20 363/363 [==============================] - 0s 893us/step - loss: 2.2656 - val_loss: 0.8560 Epoch 2/20 363/363 [==============================] - 0s 670us/step - loss: 0.7413 - val_loss: 0.6531 Epoch 3/20 363/363 [==============================] - 0s 661us/step - loss: 0.6604 - val_loss: 0.6099 Epoch 4/20 363/363 [==============================] - 0s 640us/step - loss: 0.6245 - val_loss: 0.5658 Epoch 5/20 363/363 [==============================] - 0s 688us/step - loss: 0.5770 - val_loss: 0.5355 Epoch 6/20 363/363 [==============================] - 0s 668us/step - loss: 0.5609 - val_loss: 0.5173 Epoch 7/20 363/363 [==============================] - 0s 667us/step - loss: 0.5500 - val_loss: 0.5081 Epoch 8/20 363/363 [==============================] - 0s 647us/step - loss: 0.5200 - val_loss: 0.4799 Epoch 9/20 363/363 [==============================] - 0s 683us/step - loss: 0.5051 - val_loss: 0.4690 Epoch 10/20 363/363 [==============================] - 0s 679us/step - loss: 0.4910 - val_loss: 0.4656 Epoch 11/20 363/363 [==============================] - 0s 643us/step - loss: 0.4794 - val_loss: 0.4482 Epoch 12/20 363/363 [==============================] - 0s 644us/step - loss: 0.4656 - val_loss: 0.4479 Epoch 13/20 363/363 [==============================] - 0s 666us/step - loss: 0.4693 - val_loss: 0.4296 Epoch 14/20 363/363 [==============================] - 0s 655us/step - loss: 0.4537 - val_loss: 0.4233 Epoch 15/20 363/363 [==============================] - 0s 636us/step - loss: 0.4586 - val_loss: 0.4176 Epoch 16/20 363/363 [==============================] - 0s 646us/step - loss: 0.4612 - val_loss: 0.4123 Epoch 17/20 363/363 [==============================] - 0s 620us/step - loss: 0.4449 - val_loss: 0.4071 Epoch 18/20 363/363 [==============================] - 0s 675us/step - loss: 0.4407 - val_loss: 0.4037 Epoch 19/20 363/363 [==============================] - 0s 650us/step - loss: 0.4184 - val_loss: 0.4000 Epoch 20/20 363/363 [==============================] - 0s 646us/step - loss: 0.4128 - val_loss: 0.3969 162/162 [==============================] - 0s 428us/step - loss: 0.4212
plt.plot(pd.DataFrame(history.history))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()
y_pred
array([[0.3885664], [1.6792021], [3.1022797]], dtype=float32)
Không phải tất cả các mô hình mạng nơ-ron đều đơn giản là tuần tự. Một số có thể có cấu trúc liên kết phức tạp. Một số có thể có nhiều đầu vào và/hoặc nhiều đầu ra. Ví dụ: mạng nơ-ron Rộng & Sâu (xem bài báo) kết nối trực tiếp tất cả hoặc một phần đầu vào với tầng đầu ra.
np.random.seed(42)
tf.random.set_seed(42)
input_ = keras.layers.Input(shape=X_train.shape[1:])
hidden1 = keras.layers.Dense(30, activation="relu")(input_)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_, hidden2])
output = keras.layers.Dense(1)(concat)
model = keras.models.Model(inputs=[input_], outputs=[output])
model.summary()
Model: "model" __________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ================================================================================================== input_1 (InputLayer) [(None, 8)] 0 __________________________________________________________________________________________________ dense_5 (Dense) (None, 30) 270 input_1[0][0] __________________________________________________________________________________________________ dense_6 (Dense) (None, 30) 930 dense_5[0][0] __________________________________________________________________________________________________ concatenate (Concatenate) (None, 38) 0 input_1[0][0] dense_6[0][0] __________________________________________________________________________________________________ dense_7 (Dense) (None, 1) 39 concatenate[0][0] ================================================================================================== Total params: 1,239 Trainable params: 1,239 Non-trainable params: 0 __________________________________________________________________________________________________
model.compile(loss="mean_squared_error", optimizer=keras.optimizers.SGD(learning_rate=1e-3))
history = model.fit(X_train, y_train, epochs=20,
validation_data=(X_valid, y_valid))
mse_test = model.evaluate(X_test, y_test)
y_pred = model.predict(X_new)
Epoch 1/20 363/363 [==============================] - 1s 887us/step - loss: 1.9731 - val_loss: 3.3940 Epoch 2/20 363/363 [==============================] - 0s 683us/step - loss: 0.7638 - val_loss: 0.9360 Epoch 3/20 363/363 [==============================] - 0s 687us/step - loss: 0.6045 - val_loss: 0.5649 Epoch 4/20 363/363 [==============================] - 0s 709us/step - loss: 0.5862 - val_loss: 0.5712 Epoch 5/20 363/363 [==============================] - 0s 707us/step - loss: 0.5452 - val_loss: 0.5045 Epoch 6/20 363/363 [==============================] - 0s 672us/step - loss: 0.5243 - val_loss: 0.4831 Epoch 7/20 363/363 [==============================] - 0s 681us/step - loss: 0.5185 - val_loss: 0.4639 Epoch 8/20 363/363 [==============================] - 0s 700us/step - loss: 0.4947 - val_loss: 0.4638 Epoch 9/20 363/363 [==============================] - 0s 675us/step - loss: 0.4782 - val_loss: 0.4421 Epoch 10/20 363/363 [==============================] - 0s 693us/step - loss: 0.4708 - val_loss: 0.4313 Epoch 11/20 363/363 [==============================] - 0s 668us/step - loss: 0.4585 - val_loss: 0.4345 Epoch 12/20 363/363 [==============================] - 0s 686us/step - loss: 0.4481 - val_loss: 0.4168 Epoch 13/20 363/363 [==============================] - 0s 675us/step - loss: 0.4476 - val_loss: 0.4230 Epoch 14/20 363/363 [==============================] - 0s 681us/step - loss: 0.4361 - val_loss: 0.4047 Epoch 15/20 363/363 [==============================] - 0s 698us/step - loss: 0.4392 - val_loss: 0.4078 Epoch 16/20 363/363 [==============================] - 0s 682us/step - loss: 0.4420 - val_loss: 0.3938 Epoch 17/20 363/363 [==============================] - 0s 680us/step - loss: 0.4277 - val_loss: 0.3952 Epoch 18/20 363/363 [==============================] - 0s 671us/step - loss: 0.4216 - val_loss: 0.3860 Epoch 19/20 363/363 [==============================] - 0s 660us/step - loss: 0.4033 - val_loss: 0.3827 Epoch 20/20 363/363 [==============================] - 0s 662us/step - loss: 0.3939 - val_loss: 0.4054 162/162 [==============================] - 0s 423us/step - loss: 0.4032
Điều gì sẽ xảy ra nếu bạn muốn gửi các tập hợp con khác nhau của các đặc trưng đầu vào thông qua các đường dẫn rộng hoặc sâu? Chúng ta sẽ gửi 5 đặc trưng (đặc trưng 0 đến 4) và 6 thông qua đường dẫn sâu (đặc trưng 2 đến 7). Lưu ý rằng cả ba đặc trưng sẽ đi qua cả hai (đặc trưng 2, 3 và 4).
np.random.seed(42)
tf.random.set_seed(42)
input_A = keras.layers.Input(shape=[5], name="wide_input")
input_B = keras.layers.Input(shape=[6], name="deep_input")
hidden1 = keras.layers.Dense(30, activation="relu")(input_B)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_A, hidden2])
output = keras.layers.Dense(1, name="output")(concat)
model = keras.models.Model(inputs=[input_A, input_B], outputs=[output])
model.compile(loss="mse", optimizer=keras.optimizers.SGD(learning_rate=1e-3))
X_train_A, X_train_B = X_train[:, :5], X_train[:, 2:]
X_valid_A, X_valid_B = X_valid[:, :5], X_valid[:, 2:]
X_test_A, X_test_B = X_test[:, :5], X_test[:, 2:]
X_new_A, X_new_B = X_test_A[:3], X_test_B[:3]
history = model.fit((X_train_A, X_train_B), y_train, epochs=20,
validation_data=((X_valid_A, X_valid_B), y_valid))
mse_test = model.evaluate((X_test_A, X_test_B), y_test)
y_pred = model.predict((X_new_A, X_new_B))
Epoch 1/20 363/363 [==============================] - 1s 934us/step - loss: 3.1941 - val_loss: 0.8072 Epoch 2/20 363/363 [==============================] - 0s 734us/step - loss: 0.7247 - val_loss: 0.6658 Epoch 3/20 363/363 [==============================] - 0s 719us/step - loss: 0.6176 - val_loss: 0.5687 Epoch 4/20 363/363 [==============================] - 0s 718us/step - loss: 0.5799 - val_loss: 0.5296 Epoch 5/20 363/363 [==============================] - 0s 689us/step - loss: 0.5409 - val_loss: 0.4993 Epoch 6/20 363/363 [==============================] - 0s 717us/step - loss: 0.5173 - val_loss: 0.4811 Epoch 7/20 363/363 [==============================] - 0s 708us/step - loss: 0.5186 - val_loss: 0.4696 Epoch 8/20 363/363 [==============================] - 0s 697us/step - loss: 0.4977 - val_loss: 0.4496 Epoch 9/20 363/363 [==============================] - 0s 713us/step - loss: 0.4765 - val_loss: 0.4404 Epoch 10/20 363/363 [==============================] - 0s 723us/step - loss: 0.4676 - val_loss: 0.4315 Epoch 11/20 363/363 [==============================] - 0s 713us/step - loss: 0.4574 - val_loss: 0.4268 Epoch 12/20 363/363 [==============================] - 0s 697us/step - loss: 0.4479 - val_loss: 0.4166 Epoch 13/20 363/363 [==============================] - 0s 710us/step - loss: 0.4487 - val_loss: 0.4125 Epoch 14/20 363/363 [==============================] - 0s 684us/step - loss: 0.4469 - val_loss: 0.4074 Epoch 15/20 363/363 [==============================] - 0s 738us/step - loss: 0.4460 - val_loss: 0.4044 Epoch 16/20 363/363 [==============================] - 0s 734us/step - loss: 0.4495 - val_loss: 0.4007 Epoch 17/20 363/363 [==============================] - 0s 698us/step - loss: 0.4378 - val_loss: 0.4013 Epoch 18/20 363/363 [==============================] - 0s 715us/step - loss: 0.4375 - val_loss: 0.3987 Epoch 19/20 363/363 [==============================] - 0s 733us/step - loss: 0.4151 - val_loss: 0.3934 Epoch 20/20 363/363 [==============================] - 0s 701us/step - loss: 0.4078 - val_loss: 0.4204 162/162 [==============================] - 0s 447us/step - loss: 0.4219
Thêm một đầu ra phụ trợ cho mục đích điểu chuẩn:
np.random.seed(42)
tf.random.set_seed(42)
input_A = keras.layers.Input(shape=[5], name="wide_input")
input_B = keras.layers.Input(shape=[6], name="deep_input")
hidden1 = keras.layers.Dense(30, activation="relu")(input_B)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_A, hidden2])
output = keras.layers.Dense(1, name="main_output")(concat)
aux_output = keras.layers.Dense(1, name="aux_output")(hidden2)
model = keras.models.Model(inputs=[input_A, input_B],
outputs=[output, aux_output])
model.compile(loss=["mse", "mse"], loss_weights=[0.9, 0.1], optimizer=keras.optimizers.SGD(learning_rate=1e-3))
history = model.fit([X_train_A, X_train_B], [y_train, y_train], epochs=20,
validation_data=([X_valid_A, X_valid_B], [y_valid, y_valid]))
Epoch 1/20 363/363 [==============================] - 1s 1ms/step - loss: 3.4633 - main_output_loss: 3.3289 - aux_output_loss: 4.6732 - val_loss: 1.6233 - val_main_output_loss: 0.8468 - val_aux_output_loss: 8.6117 Epoch 2/20 363/363 [==============================] - 0s 879us/step - loss: 0.9807 - main_output_loss: 0.7503 - aux_output_loss: 3.0537 - val_loss: 1.5163 - val_main_output_loss: 0.6836 - val_aux_output_loss: 9.0109 Epoch 3/20 363/363 [==============================] - 0s 890us/step - loss: 0.7742 - main_output_loss: 0.6290 - aux_output_loss: 2.0810 - val_loss: 1.4639 - val_main_output_loss: 0.6229 - val_aux_output_loss: 9.0326 Epoch 4/20 363/363 [==============================] - 0s 847us/step - loss: 0.6952 - main_output_loss: 0.5897 - aux_output_loss: 1.6449 - val_loss: 1.3388 - val_main_output_loss: 0.5481 - val_aux_output_loss: 8.4552 Epoch 5/20 363/363 [==============================] - 0s 902us/step - loss: 0.6469 - main_output_loss: 0.5508 - aux_output_loss: 1.5118 - val_loss: 1.2177 - val_main_output_loss: 0.5194 - val_aux_output_loss: 7.5030 Epoch 6/20 363/363 [==============================] - 0s 867us/step - loss: 0.6120 - main_output_loss: 0.5251 - aux_output_loss: 1.3943 - val_loss: 1.0935 - val_main_output_loss: 0.5106 - val_aux_output_loss: 6.3396 Epoch 7/20 363/363 [==============================] - 0s 864us/step - loss: 0.6114 - main_output_loss: 0.5256 - aux_output_loss: 1.3833 - val_loss: 0.9918 - val_main_output_loss: 0.5115 - val_aux_output_loss: 5.3151 Epoch 8/20 363/363 [==============================] - 0s 850us/step - loss: 0.5765 - main_output_loss: 0.5024 - aux_output_loss: 1.2439 - val_loss: 0.8733 - val_main_output_loss: 0.4733 - val_aux_output_loss: 4.4740 Epoch 9/20 363/363 [==============================] - 0s 882us/step - loss: 0.5535 - main_output_loss: 0.4811 - aux_output_loss: 1.2057 - val_loss: 0.7832 - val_main_output_loss: 0.4555 - val_aux_output_loss: 3.7323 Epoch 10/20 363/363 [==============================] - 0s 846us/step - loss: 0.5456 - main_output_loss: 0.4708 - aux_output_loss: 1.2189 - val_loss: 0.7170 - val_main_output_loss: 0.4604 - val_aux_output_loss: 3.0262 Epoch 11/20 363/363 [==============================] - 0s 875us/step - loss: 0.5297 - main_output_loss: 0.4587 - aux_output_loss: 1.1684 - val_loss: 0.6510 - val_main_output_loss: 0.4293 - val_aux_output_loss: 2.6468 Epoch 12/20 363/363 [==============================] - 0s 879us/step - loss: 0.5181 - main_output_loss: 0.4501 - aux_output_loss: 1.1305 - val_loss: 0.6051 - val_main_output_loss: 0.4310 - val_aux_output_loss: 2.1722 Epoch 13/20 363/363 [==============================] - 0s 879us/step - loss: 0.5100 - main_output_loss: 0.4487 - aux_output_loss: 1.0620 - val_loss: 0.5644 - val_main_output_loss: 0.4161 - val_aux_output_loss: 1.8992 Epoch 14/20 363/363 [==============================] - 0s 884us/step - loss: 0.5064 - main_output_loss: 0.4459 - aux_output_loss: 1.0503 - val_loss: 0.5354 - val_main_output_loss: 0.4119 - val_aux_output_loss: 1.6466 Epoch 15/20 363/363 [==============================] - 0s 878us/step - loss: 0.5027 - main_output_loss: 0.4452 - aux_output_loss: 1.0207 - val_loss: 0.5124 - val_main_output_loss: 0.4047 - val_aux_output_loss: 1.4812 Epoch 16/20 363/363 [==============================] - 0s 864us/step - loss: 0.5057 - main_output_loss: 0.4480 - aux_output_loss: 1.0249 - val_loss: 0.4934 - val_main_output_loss: 0.4034 - val_aux_output_loss: 1.3035 Epoch 17/20 363/363 [==============================] - 0s 855us/step - loss: 0.4931 - main_output_loss: 0.4360 - aux_output_loss: 1.0075 - val_loss: 0.4801 - val_main_output_loss: 0.3984 - val_aux_output_loss: 1.2150 Epoch 18/20 363/363 [==============================] - 0s 863us/step - loss: 0.4922 - main_output_loss: 0.4352 - aux_output_loss: 1.0053 - val_loss: 0.4694 - val_main_output_loss: 0.3962 - val_aux_output_loss: 1.1279 Epoch 19/20 363/363 [==============================] - 0s 895us/step - loss: 0.4658 - main_output_loss: 0.4139 - aux_output_loss: 0.9323 - val_loss: 0.4580 - val_main_output_loss: 0.3936 - val_aux_output_loss: 1.0372 Epoch 20/20 363/363 [==============================] - 0s 870us/step - loss: 0.4589 - main_output_loss: 0.4072 - aux_output_loss: 0.9243 - val_loss: 0.4655 - val_main_output_loss: 0.4048 - val_aux_output_loss: 1.0118
total_loss, main_loss, aux_loss = model.evaluate(
[X_test_A, X_test_B], [y_test, y_test])
y_pred_main, y_pred_aux = model.predict([X_new_A, X_new_B])
162/162 [==============================] - 0s 546us/step - loss: 0.4668 - main_output_loss: 0.4178 - aux_output_loss: 0.9082 WARNING:tensorflow:5 out of the last 6 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7fd97a1a24d0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
class WideAndDeepModel(keras.models.Model):
def __init__(self, units=30, activation="relu", **kwargs):
super().__init__(**kwargs)
self.hidden1 = keras.layers.Dense(units, activation=activation)
self.hidden2 = keras.layers.Dense(units, activation=activation)
self.main_output = keras.layers.Dense(1)
self.aux_output = keras.layers.Dense(1)
def call(self, inputs):
input_A, input_B = inputs
hidden1 = self.hidden1(input_B)
hidden2 = self.hidden2(hidden1)
concat = keras.layers.concatenate([input_A, hidden2])
main_output = self.main_output(concat)
aux_output = self.aux_output(hidden2)
return main_output, aux_output
model = WideAndDeepModel(30, activation="relu")
model.compile(loss="mse", loss_weights=[0.9, 0.1], optimizer=keras.optimizers.SGD(learning_rate=1e-3))
history = model.fit((X_train_A, X_train_B), (y_train, y_train), epochs=10,
validation_data=((X_valid_A, X_valid_B), (y_valid, y_valid)))
total_loss, main_loss, aux_loss = model.evaluate((X_test_A, X_test_B), (y_test, y_test))
y_pred_main, y_pred_aux = model.predict((X_new_A, X_new_B))
Epoch 1/10 363/363 [==============================] - 1s 1ms/step - loss: 3.3855 - output_1_loss: 3.3304 - output_2_loss: 3.8821 - val_loss: 2.1435 - val_output_1_loss: 1.1581 - val_output_2_loss: 11.0117 Epoch 2/10 363/363 [==============================] - 0s 852us/step - loss: 1.0790 - output_1_loss: 0.9329 - output_2_loss: 2.3942 - val_loss: 1.7567 - val_output_1_loss: 0.8205 - val_output_2_loss: 10.1825 Epoch 3/10 363/363 [==============================] - 0s 885us/step - loss: 0.8644 - output_1_loss: 0.7583 - output_2_loss: 1.8194 - val_loss: 1.5664 - val_output_1_loss: 0.7913 - val_output_2_loss: 8.5419 Epoch 4/10 363/363 [==============================] - 0s 863us/step - loss: 0.7850 - output_1_loss: 0.6979 - output_2_loss: 1.5689 - val_loss: 1.3088 - val_output_1_loss: 0.6549 - val_output_2_loss: 7.1933 Epoch 5/10 363/363 [==============================] - 0s 843us/step - loss: 0.7294 - output_1_loss: 0.6499 - output_2_loss: 1.4452 - val_loss: 1.1357 - val_output_1_loss: 0.5964 - val_output_2_loss: 5.9898 Epoch 6/10 363/363 [==============================] - 0s 837us/step - loss: 0.6880 - output_1_loss: 0.6092 - output_2_loss: 1.3974 - val_loss: 1.0036 - val_output_1_loss: 0.5937 - val_output_2_loss: 4.6933 Epoch 7/10 363/363 [==============================] - 0s 866us/step - loss: 0.6918 - output_1_loss: 0.6143 - output_2_loss: 1.3899 - val_loss: 0.8904 - val_output_1_loss: 0.5591 - val_output_2_loss: 3.8714 Epoch 8/10 363/363 [==============================] - 0s 840us/step - loss: 0.6504 - output_1_loss: 0.5805 - output_2_loss: 1.2797 - val_loss: 0.8009 - val_output_1_loss: 0.5243 - val_output_2_loss: 3.2903 Epoch 9/10 363/363 [==============================] - 0s 842us/step - loss: 0.6270 - output_1_loss: 0.5574 - output_2_loss: 1.2533 - val_loss: 0.7357 - val_output_1_loss: 0.5144 - val_output_2_loss: 2.7275 Epoch 10/10 363/363 [==============================] - 0s 863us/step - loss: 0.6160 - output_1_loss: 0.5456 - output_2_loss: 1.2495 - val_loss: 0.6849 - val_output_1_loss: 0.5014 - val_output_2_loss: 2.3370 162/162 [==============================] - 0s 546us/step - loss: 0.5841 - output_1_loss: 0.5188 - output_2_loss: 1.1722 WARNING:tensorflow:6 out of the last 7 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7fd9725c2320> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
np.random.seed(42)
tf.random.set_seed(42)
model = keras.models.Sequential([
keras.layers.Dense(30, activation="relu", input_shape=[8]),
keras.layers.Dense(30, activation="relu"),
keras.layers.Dense(1)
])
model.compile(loss="mse", optimizer=keras.optimizers.SGD(learning_rate=1e-3))
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))
mse_test = model.evaluate(X_test, y_test)
Epoch 1/10 363/363 [==============================] - 0s 882us/step - loss: 3.3697 - val_loss: 0.7126 Epoch 2/10 363/363 [==============================] - 0s 646us/step - loss: 0.6964 - val_loss: 0.6880 Epoch 3/10 363/363 [==============================] - 0s 658us/step - loss: 0.6167 - val_loss: 0.5803 Epoch 4/10 363/363 [==============================] - 0s 653us/step - loss: 0.5846 - val_loss: 0.5166 Epoch 5/10 363/363 [==============================] - 0s 649us/step - loss: 0.5321 - val_loss: 0.4895 Epoch 6/10 363/363 [==============================] - 0s 664us/step - loss: 0.5083 - val_loss: 0.4951 Epoch 7/10 363/363 [==============================] - 0s 677us/step - loss: 0.5044 - val_loss: 0.4861 Epoch 8/10 363/363 [==============================] - 0s 649us/step - loss: 0.4813 - val_loss: 0.4554 Epoch 9/10 363/363 [==============================] - 0s 676us/step - loss: 0.4627 - val_loss: 0.4413 Epoch 10/10 363/363 [==============================] - 0s 688us/step - loss: 0.4549 - val_loss: 0.4379 162/162 [==============================] - 0s 497us/step - loss: 0.4382
model.save("my_keras_model.h5")
model = keras.models.load_model("my_keras_model.h5")
model.predict(X_new)
WARNING:tensorflow:7 out of the last 8 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7fd9725c28c0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
array([[0.5400236], [1.6505969], [3.0098243]], dtype=float32)
model.save_weights("my_keras_weights.ckpt")
model.load_weights("my_keras_weights.ckpt")
<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fd9890c2990>
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)
model = keras.models.Sequential([
keras.layers.Dense(30, activation="relu", input_shape=[8]),
keras.layers.Dense(30, activation="relu"),
keras.layers.Dense(1)
])
model.compile(loss="mse", optimizer=keras.optimizers.SGD(learning_rate=1e-3))
checkpoint_cb = keras.callbacks.ModelCheckpoint("my_keras_model.h5", save_best_only=True)
history = model.fit(X_train, y_train, epochs=10,
validation_data=(X_valid, y_valid),
callbacks=[checkpoint_cb])
model = keras.models.load_model("my_keras_model.h5") # rollback to best model
mse_test = model.evaluate(X_test, y_test)
Epoch 1/10 363/363 [==============================] - 0s 846us/step - loss: 3.3697 - val_loss: 0.7126 Epoch 2/10 363/363 [==============================] - 0s 672us/step - loss: 0.6964 - val_loss: 0.6880 Epoch 3/10 363/363 [==============================] - 0s 658us/step - loss: 0.6167 - val_loss: 0.5803 Epoch 4/10 363/363 [==============================] - 0s 651us/step - loss: 0.5846 - val_loss: 0.5166 Epoch 5/10 363/363 [==============================] - 0s 670us/step - loss: 0.5321 - val_loss: 0.4895 Epoch 6/10 363/363 [==============================] - 0s 658us/step - loss: 0.5083 - val_loss: 0.4951 Epoch 7/10 363/363 [==============================] - 0s 682us/step - loss: 0.5044 - val_loss: 0.4861 Epoch 8/10 363/363 [==============================] - 0s 657us/step - loss: 0.4813 - val_loss: 0.4554 Epoch 9/10 363/363 [==============================] - 0s 672us/step - loss: 0.4627 - val_loss: 0.4413 Epoch 10/10 363/363 [==============================] - 0s 655us/step - loss: 0.4549 - val_loss: 0.4379 162/162 [==============================] - 0s 460us/step - loss: 0.4382
model.compile(loss="mse", optimizer=keras.optimizers.SGD(learning_rate=1e-3))
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,
restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=100,
validation_data=(X_valid, y_valid),
callbacks=[checkpoint_cb, early_stopping_cb])
mse_test = model.evaluate(X_test, y_test)
Epoch 1/100 363/363 [==============================] - 0s 878us/step - loss: 0.4578 - val_loss: 0.4110 Epoch 2/100 363/363 [==============================] - 0s 702us/step - loss: 0.4430 - val_loss: 0.4266 Epoch 3/100 363/363 [==============================] - 0s 676us/step - loss: 0.4376 - val_loss: 0.3996 Epoch 4/100 363/363 [==============================] - 0s 671us/step - loss: 0.4361 - val_loss: 0.3939 Epoch 5/100 363/363 [==============================] - 0s 674us/step - loss: 0.4204 - val_loss: 0.3889 Epoch 6/100 363/363 [==============================] - 0s 672us/step - loss: 0.4112 - val_loss: 0.3866 Epoch 7/100 363/363 [==============================] - 0s 671us/step - loss: 0.4226 - val_loss: 0.3860 Epoch 8/100 363/363 [==============================] - 0s 659us/step - loss: 0.4135 - val_loss: 0.3793 Epoch 9/100 363/363 [==============================] - 0s 661us/step - loss: 0.4039 - val_loss: 0.3746 Epoch 10/100 363/363 [==============================] - 0s 655us/step - loss: 0.4023 - val_loss: 0.3723 Epoch 11/100 363/363 [==============================] - 0s 674us/step - loss: 0.3950 - val_loss: 0.3697 Epoch 12/100 363/363 [==============================] - 0s 652us/step - loss: 0.3912 - val_loss: 0.3669 Epoch 13/100 363/363 [==============================] - 0s 660us/step - loss: 0.3939 - val_loss: 0.3661 Epoch 14/100 363/363 [==============================] - 0s 648us/step - loss: 0.3868 - val_loss: 0.3631 Epoch 15/100 363/363 [==============================] - 0s 677us/step - loss: 0.3878 - val_loss: 0.3660 Epoch 16/100 363/363 [==============================] - 0s 651us/step - loss: 0.3935 - val_loss: 0.3625 Epoch 17/100 363/363 [==============================] - 0s 653us/step - loss: 0.3817 - val_loss: 0.3592 Epoch 18/100 <<123 more lines>> Epoch 80/100 363/363 [==============================] - 0s 677us/step - loss: 0.3323 - val_loss: 0.3354 Epoch 81/100 363/363 [==============================] - 0s 677us/step - loss: 0.3297 - val_loss: 0.3274 Epoch 82/100 363/363 [==============================] - 0s 643us/step - loss: 0.3441 - val_loss: 0.3167 Epoch 83/100 363/363 [==============================] - 0s 699us/step - loss: 0.3369 - val_loss: 0.3280 Epoch 84/100 363/363 [==============================] - 0s 646us/step - loss: 0.3182 - val_loss: 0.3634 Epoch 85/100 363/363 [==============================] - 0s 682us/step - loss: 0.3235 - val_loss: 0.3176 Epoch 86/100 363/363 [==============================] - 0s 590us/step - loss: 0.3184 - val_loss: 0.3156 Epoch 87/100 363/363 [==============================] - 0s 677us/step - loss: 0.3395 - val_loss: 0.3529 Epoch 88/100 363/363 [==============================] - 0s 701us/step - loss: 0.3264 - val_loss: 0.3258 Epoch 89/100 363/363 [==============================] - 0s 710us/step - loss: 0.3210 - val_loss: 0.3630 Epoch 90/100 363/363 [==============================] - 0s 692us/step - loss: 0.3192 - val_loss: 0.3376 Epoch 91/100 363/363 [==============================] - 0s 704us/step - loss: 0.3237 - val_loss: 0.3211 Epoch 92/100 363/363 [==============================] - 0s 696us/step - loss: 0.3281 - val_loss: 0.3456 Epoch 93/100 363/363 [==============================] - 0s 696us/step - loss: 0.3424 - val_loss: 0.3158 Epoch 94/100 363/363 [==============================] - 0s 684us/step - loss: 0.3209 - val_loss: 0.3409 Epoch 95/100 363/363 [==============================] - 0s 676us/step - loss: 0.3230 - val_loss: 0.3379 Epoch 96/100 363/363 [==============================] - 0s 676us/step - loss: 0.3341 - val_loss: 0.3213 162/162 [==============================] - 0s 440us/step - loss: 0.3310
class PrintValTrainRatioCallback(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
print("\nval/train: {:.2f}".format(logs["val_loss"] / logs["loss"]))
val_train_ratio_cb = PrintValTrainRatioCallback()
history = model.fit(X_train, y_train, epochs=1,
validation_data=(X_valid, y_valid),
callbacks=[val_train_ratio_cb])
363/363 [==============================] - 0s 799us/step - loss: 0.3302 - val_loss: 0.3556 val/train: 1.08
root_logdir = os.path.join(os.curdir, "my_logs")
def get_run_logdir():
import time
run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
return os.path.join(root_logdir, run_id)
run_logdir = get_run_logdir()
run_logdir
'./my_logs/run_2021_02_13-18_39_20'
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)
model = keras.models.Sequential([
keras.layers.Dense(30, activation="relu", input_shape=[8]),
keras.layers.Dense(30, activation="relu"),
keras.layers.Dense(1)
])
model.compile(loss="mse", optimizer=keras.optimizers.SGD(learning_rate=1e-3))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
history = model.fit(X_train, y_train, epochs=30,
validation_data=(X_valid, y_valid),
callbacks=[checkpoint_cb, tensorboard_cb])
Epoch 1/30 363/363 [==============================] - 1s 927us/step - loss: 3.3697 - val_loss: 0.7126 Epoch 2/30 363/363 [==============================] - 0s 695us/step - loss: 0.6964 - val_loss: 0.6880 Epoch 3/30 363/363 [==============================] - 0s 668us/step - loss: 0.6167 - val_loss: 0.5803 Epoch 4/30 363/363 [==============================] - 0s 672us/step - loss: 0.5846 - val_loss: 0.5166 Epoch 5/30 363/363 [==============================] - 0s 692us/step - loss: 0.5321 - val_loss: 0.4895 Epoch 6/30 363/363 [==============================] - 0s 755us/step - loss: 0.5083 - val_loss: 0.4951 Epoch 7/30 363/363 [==============================] - 0s 697us/step - loss: 0.5044 - val_loss: 0.4861 Epoch 8/30 363/363 [==============================] - 0s 668us/step - loss: 0.4813 - val_loss: 0.4554 Epoch 9/30 363/363 [==============================] - 0s 681us/step - loss: 0.4627 - val_loss: 0.4413 Epoch 10/30 363/363 [==============================] - 0s 701us/step - loss: 0.4549 - val_loss: 0.4379 Epoch 11/30 363/363 [==============================] - 0s 696us/step - loss: 0.4416 - val_loss: 0.4396 Epoch 12/30 363/363 [==============================] - 0s 692us/step - loss: 0.4295 - val_loss: 0.4507 Epoch 13/30 363/363 [==============================] - 0s 703us/step - loss: 0.4326 - val_loss: 0.3997 Epoch 14/30 363/363 [==============================] - 0s 703us/step - loss: 0.4207 - val_loss: 0.3956 Epoch 15/30 363/363 [==============================] - 0s 698us/step - loss: 0.4198 - val_loss: 0.3916 Epoch 16/30 363/363 [==============================] - 0s 695us/step - loss: 0.4248 - val_loss: 0.3937 Epoch 17/30 363/363 [==============================] - 0s 699us/step - loss: 0.4105 - val_loss: 0.3809 Epoch 18/30 363/363 [==============================] - 0s 697us/step - loss: 0.4070 - val_loss: 0.3793 Epoch 19/30 363/363 [==============================] - 0s 674us/step - loss: 0.3902 - val_loss: 0.3850 Epoch 20/30 363/363 [==============================] - 0s 680us/step - loss: 0.3864 - val_loss: 0.3809 Epoch 21/30 363/363 [==============================] - 0s 693us/step - loss: 0.3978 - val_loss: 0.3701 Epoch 22/30 363/363 [==============================] - 0s 694us/step - loss: 0.3816 - val_loss: 0.3781 Epoch 23/30 363/363 [==============================] - 0s 680us/step - loss: 0.4042 - val_loss: 0.3650 Epoch 24/30 363/363 [==============================] - 0s 630us/step - loss: 0.3823 - val_loss: 0.3655 Epoch 25/30 363/363 [==============================] - 0s 699us/step - loss: 0.3792 - val_loss: 0.3611 Epoch 26/30 363/363 [==============================] - 0s 684us/step - loss: 0.3800 - val_loss: 0.3626 Epoch 27/30 363/363 [==============================] - 0s 686us/step - loss: 0.3858 - val_loss: 0.3564 Epoch 28/30 363/363 [==============================] - 0s 690us/step - loss: 0.3839 - val_loss: 0.3579 Epoch 29/30 363/363 [==============================] - 0s 695us/step - loss: 0.3736 - val_loss: 0.3561 Epoch 30/30 363/363 [==============================] - 0s 684us/step - loss: 0.3843 - val_loss: 0.3548
Để khởi động máy chủ TensorBoard, một tùy chọn là sử dụng terminal, nếu cần, hãy kích hoạt virtualenv nơi bạn đã cài đặt TensorBoard, đi tới thư mục của notebook này, sau đó nhập:
$ tensorboard --logdir=./my_logs --port=6006
Sau đó, bạn có thể mở trình duyệt web của mình với địa chỉ localhost:6006 và sử dụng TensorBoard. Khi bạn đã hoàn tất, hãy nhấn Ctrl-C trong cửa sổ đầu cuối, thao tác này sẽ tắt máy chủ TensorBoard.
Ngoài ra, bạn có thể tải tiện ích mở rộng Jupyter của TensorBoard và chạy nó như thế này:
%load_ext tensorboard
%tensorboard --logdir=./my_logs --port=6006
run_logdir2 = get_run_logdir()
run_logdir2
'./my_logs/run_2021_02_13-18_39_31'
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)
model = keras.models.Sequential([
keras.layers.Dense(30, activation="relu", input_shape=[8]),
keras.layers.Dense(30, activation="relu"),
keras.layers.Dense(1)
])
model.compile(loss="mse", optimizer=keras.optimizers.SGD(learning_rate=0.05))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir2)
history = model.fit(X_train, y_train, epochs=30,
validation_data=(X_valid, y_valid),
callbacks=[checkpoint_cb, tensorboard_cb])
Epoch 1/30 363/363 [==============================] - 1s 1ms/step - loss: 0.7645 - val_loss: 302.8536 Epoch 2/30 363/363 [==============================] - 0s 713us/step - loss: 8159520618.2209 - val_loss: 1.3230 Epoch 3/30 363/363 [==============================] - 0s 735us/step - loss: 1.3439 - val_loss: 1.3176 Epoch 4/30 363/363 [==============================] - 0s 738us/step - loss: 1.3546 - val_loss: 1.3261 Epoch 5/30 363/363 [==============================] - 0s 712us/step - loss: 1.3513 - val_loss: 1.3154 Epoch 6/30 363/363 [==============================] - 0s 724us/step - loss: 1.3274 - val_loss: 1.3203 Epoch 7/30 363/363 [==============================] - 0s 693us/step - loss: 1.3639 - val_loss: 1.3149 Epoch 8/30 363/363 [==============================] - 0s 709us/step - loss: 1.3487 - val_loss: 1.3157 Epoch 9/30 363/363 [==============================] - 0s 681us/step - loss: 1.3445 - val_loss: 1.3150 Epoch 10/30 363/363 [==============================] - 0s 681us/step - loss: 1.3697 - val_loss: 1.3172 Epoch 11/30 363/363 [==============================] - 0s 687us/step - loss: 1.3622 - val_loss: 1.3174 Epoch 12/30 363/363 [==============================] - 0s 693us/step - loss: 1.3389 - val_loss: 1.3150 Epoch 13/30 363/363 [==============================] - 0s 668us/step - loss: 1.3336 - val_loss: 1.3270 Epoch 14/30 363/363 [==============================] - 0s 673us/step - loss: 1.3429 - val_loss: 1.3195 Epoch 15/30 363/363 [==============================] - 0s 679us/step - loss: 1.3275 - val_loss: 1.3157 Epoch 16/30 363/363 [==============================] - 0s 701us/step - loss: 1.3669 - val_loss: 1.3182 Epoch 17/30 363/363 [==============================] - 0s 692us/step - loss: 1.3645 - val_loss: 1.3223 Epoch 18/30 363/363 [==============================] - 0s 691us/step - loss: 1.3839 - val_loss: 1.3154 Epoch 19/30 363/363 [==============================] - 0s 680us/step - loss: 1.3078 - val_loss: 1.3168 Epoch 20/30 363/363 [==============================] - 0s 663us/step - loss: 1.3215 - val_loss: 1.3151 Epoch 21/30 363/363 [==============================] - 0s 723us/step - loss: 1.3344 - val_loss: 1.3174 Epoch 22/30 363/363 [==============================] - 0s 674us/step - loss: 1.3269 - val_loss: 1.3204 Epoch 23/30 363/363 [==============================] - 0s 700us/step - loss: 1.3590 - val_loss: 1.3164 Epoch 24/30 363/363 [==============================] - 0s 687us/step - loss: 1.3381 - val_loss: 1.3157 Epoch 25/30 363/363 [==============================] - 0s 687us/step - loss: 1.3265 - val_loss: 1.3180 Epoch 26/30 363/363 [==============================] - 0s 704us/step - loss: 1.3532 - val_loss: 1.3195 Epoch 27/30 363/363 [==============================] - 0s 715us/step - loss: 1.3552 - val_loss: 1.3157 Epoch 28/30 363/363 [==============================] - 0s 698us/step - loss: 1.3447 - val_loss: 1.3222 Epoch 29/30 363/363 [==============================] - 0s 713us/step - loss: 1.3379 - val_loss: 1.3267 Epoch 30/30 363/363 [==============================] - 0s 698us/step - loss: 1.3583 - val_loss: 1.3174
Lưu ý cách TensorBoard hiển thị hai lần chạy và bạn có thể so sánh các đường cong học.
Kiểm tra các tùy chọn logging có sẵn khác:
help(keras.callbacks.TensorBoard.__init__)
Help on function __init__ in module tensorflow.python.keras.callbacks: __init__(self, log_dir='logs', histogram_freq=0, write_graph=True, write_images=False, update_freq='epoch', profile_batch=2, embeddings_freq=0, embeddings_metadata=None, **kwargs) Initialize self. See help(type(self)) for accurate signature.
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)
def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3, input_shape=[8]):
model = keras.models.Sequential()
model.add(keras.layers.InputLayer(input_shape=input_shape))
for layer in range(n_hidden):
model.add(keras.layers.Dense(n_neurons, activation="relu"))
model.add(keras.layers.Dense(1))
optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
model.compile(loss="mse", optimizer=optimizer)
return model
keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)
keras_reg.fit(X_train, y_train, epochs=100,
validation_data=(X_valid, y_valid),
callbacks=[keras.callbacks.EarlyStopping(patience=10)])
Epoch 1/100 363/363 [==============================] - 0s 905us/step - loss: 1.5673 - val_loss: 20.7721 Epoch 2/100 363/363 [==============================] - 0s 665us/step - loss: 1.3216 - val_loss: 5.0266 Epoch 3/100 363/363 [==============================] - 0s 671us/step - loss: 0.5972 - val_loss: 0.5490 Epoch 4/100 363/363 [==============================] - 0s 661us/step - loss: 0.4985 - val_loss: 0.4529 Epoch 5/100 363/363 [==============================] - 0s 687us/step - loss: 0.4608 - val_loss: 0.4188 Epoch 6/100 363/363 [==============================] - 0s 678us/step - loss: 0.4410 - val_loss: 0.4129 Epoch 7/100 363/363 [==============================] - 0s 676us/step - loss: 0.4463 - val_loss: 0.4004 Epoch 8/100 363/363 [==============================] - 0s 686us/step - loss: 0.4283 - val_loss: 0.3944 Epoch 9/100 363/363 [==============================] - 0s 660us/step - loss: 0.4139 - val_loss: 0.3961 Epoch 10/100 363/363 [==============================] - 0s 681us/step - loss: 0.4107 - val_loss: 0.4071 Epoch 11/100 363/363 [==============================] - 0s 655us/step - loss: 0.3992 - val_loss: 0.3855 Epoch 12/100 363/363 [==============================] - 0s 627us/step - loss: 0.3982 - val_loss: 0.4136 Epoch 13/100 363/363 [==============================] - 0s 692us/step - loss: 0.3983 - val_loss: 0.3997 Epoch 14/100 363/363 [==============================] - 0s 675us/step - loss: 0.3910 - val_loss: 0.3818 Epoch 15/100 363/363 [==============================] - 0s 592us/step - loss: 0.3948 - val_loss: 0.3829 Epoch 16/100 363/363 [==============================] - 0s 686us/step - loss: 0.3981 - val_loss: 0.3739 Epoch 17/100 363/363 [==============================] - 0s 674us/step - loss: 0.3821 - val_loss: 0.4022 Epoch 18/100 <<130 more lines>> 363/363 [==============================] - 0s 627us/step - loss: 0.3441 - val_loss: 0.3342 Epoch 84/100 363/363 [==============================] - 0s 640us/step - loss: 0.3240 - val_loss: 0.4136 Epoch 85/100 363/363 [==============================] - 0s 656us/step - loss: 0.3303 - val_loss: 0.3285 Epoch 86/100 363/363 [==============================] - 0s 671us/step - loss: 0.3263 - val_loss: 0.3440 Epoch 87/100 363/363 [==============================] - 0s 672us/step - loss: 0.3483 - val_loss: 0.3733 Epoch 88/100 363/363 [==============================] - 0s 649us/step - loss: 0.3305 - val_loss: 0.3188 Epoch 89/100 363/363 [==============================] - 0s 578us/step - loss: 0.3283 - val_loss: 0.3492 Epoch 90/100 363/363 [==============================] - 0s 665us/step - loss: 0.3243 - val_loss: 0.3175 Epoch 91/100 363/363 [==============================] - 0s 664us/step - loss: 0.3288 - val_loss: 0.3594 Epoch 92/100 363/363 [==============================] - 0s 675us/step - loss: 0.3343 - val_loss: 0.3169 Epoch 93/100 363/363 [==============================] - 0s 666us/step - loss: 0.3485 - val_loss: 0.3607 Epoch 94/100 363/363 [==============================] - 0s 659us/step - loss: 0.3262 - val_loss: 0.5184 Epoch 95/100 363/363 [==============================] - 0s 677us/step - loss: 0.3284 - val_loss: 0.7536 Epoch 96/100 363/363 [==============================] - 0s 674us/step - loss: 0.3494 - val_loss: 0.5075 Epoch 97/100 363/363 [==============================] - 0s 628us/step - loss: 0.3290 - val_loss: 0.8087 Epoch 98/100 363/363 [==============================] - 0s 624us/step - loss: 0.3277 - val_loss: 1.0447 Epoch 99/100 363/363 [==============================] - 0s 683us/step - loss: 0.3199 - val_loss: 1.6881 Epoch 100/100 363/363 [==============================] - 0s 671us/step - loss: 0.3706 - val_loss: 1.9265
<tensorflow.python.keras.callbacks.History at 0x7fd97a00df90>
mse_test = keras_reg.score(X_test, y_test)
162/162 [==============================] - 0s 417us/step - loss: 0.3409
y_pred = keras_reg.predict(X_new)
WARNING:tensorflow:8 out of the last 9 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7fd98963b7a0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
np.random.seed(42)
tf.random.set_seed(42)
Cảnh báo: cell dưới đây gặp lỗi ở giai đoạn cuối quá trình huấn luyện. Điều này có vẻ là do issue Keras #13586, gây ra bởi một thay đổi gần đây trong Scikit-Learn. Pull Request #13598 dường như đã khắc phục được sự cố, vì vậy vấn đề sẽ sớm được giải quyết. Trong thời gian chờ đợi, tôi đã thêm .tolist()
và .rvs(1000).tolist()
làm giải pháp thay thế.
from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV
param_distribs = {
"n_hidden": [0, 1, 2, 3],
"n_neurons": np.arange(1, 100) .tolist(),
"learning_rate": reciprocal(3e-4, 3e-2) .rvs(1000).tolist(),
}
rnd_search_cv = RandomizedSearchCV(keras_reg, param_distribs, n_iter=10, cv=3, verbose=2)
rnd_search_cv.fit(X_train, y_train, epochs=100,
validation_data=(X_valid, y_valid),
callbacks=[keras.callbacks.EarlyStopping(patience=10)])
Fitting 3 folds for each of 10 candidates, totalling 30 fits Epoch 1/100 242/242 [==============================] - 0s 1ms/step - loss: 1.3827 - val_loss: 0.4703 Epoch 2/100 242/242 [==============================] - 0s 757us/step - loss: 0.4880 - val_loss: 0.4247 Epoch 3/100 242/242 [==============================] - 0s 765us/step - loss: 0.4541 - val_loss: 0.4052 Epoch 4/100 242/242 [==============================] - 0s 745us/step - loss: 0.4518 - val_loss: 0.3975 Epoch 5/100 242/242 [==============================] - 0s 765us/step - loss: 0.4337 - val_loss: 0.3991 Epoch 6/100 242/242 [==============================] - 0s 751us/step - loss: 0.4263 - val_loss: 0.4031 Epoch 7/100 242/242 [==============================] - 0s 743us/step - loss: 0.4385 - val_loss: 0.4043 Epoch 8/100 242/242 [==============================] - 0s 780us/step - loss: 0.4301 - val_loss: 0.3929 Epoch 9/100 242/242 [==============================] - 0s 792us/step - loss: 0.4108 - val_loss: 0.4040 Epoch 10/100 242/242 [==============================] - 0s 764us/step - loss: 0.4200 - val_loss: 0.3886 Epoch 11/100 242/242 [==============================] - 0s 745us/step - loss: 0.4099 - val_loss: 0.3999 Epoch 12/100 242/242 [==============================] - 0s 740us/step - loss: 0.3897 - val_loss: 0.4085 Epoch 13/100 242/242 [==============================] - 0s 765us/step - loss: 0.4265 - val_loss: 0.3922 Epoch 14/100 242/242 [==============================] - 0s 752us/step - loss: 0.4108 - val_loss: 0.3918 Epoch 15/100 242/242 [==============================] - 0s 731us/step - loss: 0.4070 - val_loss: 0.3886 Epoch 16/100 242/242 [==============================] - 0s 737us/step - loss: 0.4032 - val_loss: 0.3933 Epoch 17/100 242/242 [==============================] - 0s 774us/step - loss: 0.4212 - val_loss: 0.3907 <<2367 more lines>> 363/363 [==============================] - 0s 622us/step - loss: 0.3312 - val_loss: 0.5455 Epoch 12/100 363/363 [==============================] - 0s 727us/step - loss: 0.3456 - val_loss: 0.6470 Epoch 13/100 363/363 [==============================] - 0s 742us/step - loss: 0.3320 - val_loss: 0.3109 Epoch 14/100 363/363 [==============================] - 0s 697us/step - loss: 0.3259 - val_loss: 0.3198 Epoch 15/100 363/363 [==============================] - 0s 662us/step - loss: 0.3222 - val_loss: 0.3065 Epoch 16/100 363/363 [==============================] - 0s 748us/step - loss: 0.3277 - val_loss: 0.3252 Epoch 17/100 363/363 [==============================] - 0s 724us/step - loss: 0.3095 - val_loss: 0.3965 Epoch 18/100 363/363 [==============================] - 0s 703us/step - loss: 0.3107 - val_loss: 0.2997 Epoch 19/100 363/363 [==============================] - 0s 706us/step - loss: 0.3060 - val_loss: 0.3079 Epoch 20/100 363/363 [==============================] - 0s 704us/step - loss: 0.3003 - val_loss: 0.4544 Epoch 21/100 363/363 [==============================] - 0s 698us/step - loss: 0.3090 - val_loss: 0.3274 Epoch 22/100 363/363 [==============================] - 0s 709us/step - loss: 0.2949 - val_loss: 0.5018 Epoch 23/100 363/363 [==============================] - 0s 715us/step - loss: 0.3126 - val_loss: 0.5565 Epoch 24/100 363/363 [==============================] - 0s 702us/step - loss: 0.3031 - val_loss: 0.5390 Epoch 25/100 363/363 [==============================] - 0s 698us/step - loss: 0.2992 - val_loss: 0.3339 Epoch 26/100 363/363 [==============================] - 0s 719us/step - loss: 0.2988 - val_loss: 0.5095 Epoch 27/100 363/363 [==============================] - 0s 716us/step - loss: 0.3001 - val_loss: 0.6597 Epoch 28/100 363/363 [==============================] - 0s 721us/step - loss: 0.3058 - val_loss: 0.5106
RandomizedSearchCV(cv=3, estimator=<tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor object at 0x7fd939643c10>, param_distributions={'learning_rate': [0.001683454924600351, 0.02390836445593178, 0.008731907739399206, 0.004725396149933917, 0.0006154014789262348, 0.0006153331256530192, 0.0003920021771415983, 0.01619845322936229, 0.004779156784872302, 0.0... 0.005021425736625637, 0.0005703073595961105, 0.001151888789941251, 0.001621231156394198, 0.0024505367684280487, 0.011155092541719619, 0.0007524347058135697, 0.0032032448128444043, 0.004591455636549438, 0.0003715541189658278, ...], 'n_hidden': [0, 1, 2, 3], 'n_neurons': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, ...]}, verbose=2)
rnd_search_cv.best_params_
{'n_neurons': 74, 'n_hidden': 3, 'learning_rate': 0.005803602934201024}
rnd_search_cv.best_score_
-0.32039451599121094
rnd_search_cv.best_estimator_
<tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor at 0x7fd972969310>
rnd_search_cv.score(X_test, y_test)
162/162 [==============================] - 0s 436us/step - loss: 0.3029
-0.3028871417045593
model = rnd_search_cv.best_estimator_.model
model
<tensorflow.python.keras.engine.sequential.Sequential at 0x7fd988eafa10>
model.evaluate(X_test, y_test)
162/162 [==============================] - 0s 446us/step - loss: 0.3029
0.3028871417045593
Tham khảo phụ lục A.
Bài tập: Huấn luyện MLP sâu trên tập dữ liệu MNIST (bạn có thể tải nó bằng cách sử dụng keras.datasets.mnist.load_data()
. Hãy xem liệu bạn có thể đạt được độ chính xác trên 98% hay không. Hãy thử tìm kiếm tốc độ học tối ưu bằng cách sử dụng phương pháp được trình bày trong chương này (như là tăng tốc độ học theo cấp số nhân, vẽ đồ thị mất mát và tìm điểm mà tại đó mất mát tăng lên). Hãy thử thêm tất cả các điểm lưu trữ (checkpoint), sử dụng tính năng dừng sớm và vẽ đồ thị đường cong học bằng TensorBoard.
Nạp tập dữ liệu:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.mnist.load_data()
Giống như đối với tập dữ liệu Fashion MNIST, tập huấn luyện MNIST chứa 60,000 hình ảnh thang độ xám, mỗi hình ảnh có kích thước 28x28 pixel:
X_train_full.shape
(60000, 28, 28)
Mỗi cường độ pixel cũng được biểu diễn dưới kiểu byte (0 đến 255):
X_train_full.dtype
dtype('uint8')
Hãy chia toàn bộ tập huấn luyện thành một tập kiểm định và một tập huấn luyện (nhỏ hơn). Ta cũng tỷ lệ hóa cường độ pixel xuống trong khoảng 0-1 và chuyển đổi chúng thành kiểu float, bằng cách chia cho 255, giống như cách ta đã làm với Fashion MNIST:
X_valid, X_train = X_train_full[:5000] / 255., X_train_full[5000:] / 255.
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test / 255.
Hãy biểu diễn một hình ảnh bằng cách sử dụng phương thức imshow()
của Matplotlib, với bản đồ màu 'nhị phân'
:
plt.imshow(X_train[0], cmap="binary")
plt.axis('off')
plt.show()
Các nhãn là ID lớp (được biểu thị bằng kiểu uint8), từ 0 đến 9. Một điều khá tiện đó là là ID lớp tương ứng với các chữ số được biểu thị trong hình ảnh, vì vậy chúng ta không cần mảng class_names
:
y_train
array([7, 3, 4, ..., 5, 6, 8], dtype=uint8)
Tập kiểm định chứa 5,000 hình ảnh và tập kiểm tra chứa 10,000 hình ảnh:
X_valid.shape
(5000, 28, 28)
X_test.shape
(10000, 28, 28)
Hãy xem một mẫu các hình ảnh trong tập dữ liệu:
n_rows = 4
n_cols = 10
plt.figure(figsize=(n_cols * 1.2, n_rows * 1.2))
for row in range(n_rows):
for col in range(n_cols):
index = n_cols * row + col
plt.subplot(n_rows, n_cols, index + 1)
plt.imshow(X_train[index], cmap="binary", interpolation="nearest")
plt.axis('off')
plt.title(y_train[index], fontsize=12)
plt.subplots_adjust(wspace=0.2, hspace=0.5)
plt.show()
Hãy xây dựng một mạng dày đặc đơn giản và tìm tốc độ học tối ưu. Ta sẽ cần một callback để tăng tốc độ học sau mỗi vòng lặp. Nó cũng sẽ ghi lại tốc độ học và mất mát trong mỗi vòng lặp:
K = keras.backend
class ExponentialLearningRate(keras.callbacks.Callback):
def __init__(self, factor):
self.factor = factor
self.rates = []
self.losses = []
def on_batch_end(self, batch, logs):
self.rates.append(K.get_value(self.model.optimizer.learning_rate))
self.losses.append(logs["loss"])
K.set_value(self.model.optimizer.learning_rate, self.model.optimizer.learning_rate * self.factor)
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, activation="relu"),
keras.layers.Dense(100, activation="relu"),
keras.layers.Dense(10, activation="softmax")
])
Ta sẽ bắt đầu với tốc độ học nhỏ là 1e-3 và tăng giá trị này lên 0,5% sau mỗi vòng lặp:
model.compile(loss="sparse_categorical_crossentropy",
optimizer=keras.optimizers.SGD(learning_rate=1e-3),
metrics=["accuracy"])
expon_lr = ExponentialLearningRate(factor=1.005)
Bây giờ hãy huấn luyện mô hình với chỉ 1 epoch:
history = model.fit(X_train, y_train, epochs=1,
validation_data=(X_valid, y_valid),
callbacks=[expon_lr])
1719/1719 [==============================] - 2s 1ms/step - loss: 4.6604 - accuracy: 0.4887 - val_loss: 2.3911 - val_accuracy: 0.1126
Ta giờ đây có thể biểu diễn mất mát như là một hàm của tốc độ học:
plt.plot(expon_lr.rates, expon_lr.losses)
plt.gca().set_xscale('log')
plt.hlines(min(expon_lr.losses), min(expon_lr.rates), max(expon_lr.rates))
plt.axis([min(expon_lr.rates), max(expon_lr.rates), 0, expon_lr.losses[0]])
plt.grid()
plt.xlabel("Tốc độ học")
plt.ylabel("Mất mát")
Text(0, 0.5, 'Loss')
Mất mát bắt đầu tăng trở lại một cách dữ dội khi tốc độ học vượt quá 6e-1, vì vậy hãy thử sử dụng một nửa giá trị đó, ở mức 3e-1:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, activation="relu"),
keras.layers.Dense(100, activation="relu"),
keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy",
optimizer=keras.optimizers.SGD(learning_rate=3e-1),
metrics=["accuracy"])
run_index = 1 # increment this at every run
run_logdir = os.path.join(os.curdir, "my_mnist_logs", "run_{:03d}".format(run_index))
run_logdir
'./my_mnist_logs/run_001'
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
checkpoint_cb = keras.callbacks.ModelCheckpoint("my_mnist_model.h5", save_best_only=True)
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
history = model.fit(X_train, y_train, epochs=100,
validation_data=(X_valid, y_valid),
callbacks=[checkpoint_cb, early_stopping_cb, tensorboard_cb])
Epoch 1/100 1719/1719 [==============================] - 3s 2ms/step - loss: 0.4195 - accuracy: 0.8677 - val_loss: 0.0995 - val_accuracy: 0.9724 Epoch 2/100 1719/1719 [==============================] - 2s 882us/step - loss: 0.0941 - accuracy: 0.9698 - val_loss: 0.0913 - val_accuracy: 0.9746 Epoch 3/100 1719/1719 [==============================] - 1s 845us/step - loss: 0.0650 - accuracy: 0.9792 - val_loss: 0.0785 - val_accuracy: 0.9772 Epoch 4/100 1719/1719 [==============================] - 2s 932us/step - loss: 0.0438 - accuracy: 0.9855 - val_loss: 0.0793 - val_accuracy: 0.9784 Epoch 5/100 1719/1719 [==============================] - 1s 832us/step - loss: 0.0348 - accuracy: 0.9888 - val_loss: 0.0724 - val_accuracy: 0.9812 Epoch 6/100 1719/1719 [==============================] - 1s 835us/step - loss: 0.0289 - accuracy: 0.9905 - val_loss: 0.0814 - val_accuracy: 0.9792 Epoch 7/100 1719/1719 [==============================] - 1s 868us/step - loss: 0.0230 - accuracy: 0.9926 - val_loss: 0.0794 - val_accuracy: 0.9808 Epoch 8/100 1719/1719 [==============================] - 1s 847us/step - loss: 0.0180 - accuracy: 0.9943 - val_loss: 0.0718 - val_accuracy: 0.9826 Epoch 9/100 1719/1719 [==============================] - 1s 848us/step - loss: 0.0158 - accuracy: 0.9949 - val_loss: 0.0874 - val_accuracy: 0.9798 Epoch 10/100 1719/1719 [==============================] - 1s 844us/step - loss: 0.0155 - accuracy: 0.9944 - val_loss: 0.0782 - val_accuracy: 0.9824 Epoch 11/100 1719/1719 [==============================] - 1s 834us/step - loss: 0.0089 - accuracy: 0.9971 - val_loss: 0.0902 - val_accuracy: 0.9832 Epoch 12/100 1719/1719 [==============================] - 1s 844us/step - loss: 0.0064 - accuracy: 0.9979 - val_loss: 0.0832 - val_accuracy: 0.9832 Epoch 13/100 1719/1719 [==============================] - 1s 859us/step - loss: 0.0059 - accuracy: 0.9981 - val_loss: 0.0888 - val_accuracy: 0.9814 Epoch 14/100 1719/1719 [==============================] - 2s 919us/step - loss: 0.0110 - accuracy: 0.9963 - val_loss: 0.1080 - val_accuracy: 0.9792 Epoch 15/100 1719/1719 [==============================] - 2s 921us/step - loss: 0.0075 - accuracy: 0.9973 - val_loss: 0.0828 - val_accuracy: 0.9840 Epoch 16/100 1719/1719 [==============================] - 2s 945us/step - loss: 0.0039 - accuracy: 0.9991 - val_loss: 0.0869 - val_accuracy: 0.9848 Epoch 17/100 1719/1719 [==============================] - 2s 962us/step - loss: 0.0064 - accuracy: 0.9982 - val_loss: 0.0997 - val_accuracy: 0.9816 Epoch 18/100 1719/1719 [==============================] - 2s 976us/step - loss: 0.0071 - accuracy: 0.9979 - val_loss: 0.1001 - val_accuracy: 0.9840 Epoch 19/100 1719/1719 [==============================] - 2s 1ms/step - loss: 0.0086 - accuracy: 0.9972 - val_loss: 0.1239 - val_accuracy: 0.9796 Epoch 20/100 1719/1719 [==============================] - 2s 1ms/step - loss: 0.0095 - accuracy: 0.9973 - val_loss: 0.1107 - val_accuracy: 0.9808 Epoch 21/100 1719/1719 [==============================] - 2s 1ms/step - loss: 0.0055 - accuracy: 0.9981 - val_loss: 0.0891 - val_accuracy: 0.9840 Epoch 22/100 1719/1719 [==============================] - 2s 967us/step - loss: 0.0041 - accuracy: 0.9988 - val_loss: 0.0893 - val_accuracy: 0.9844 Epoch 23/100 1719/1719 [==============================] - 2s 963us/step - loss: 6.1009e-04 - accuracy: 0.9999 - val_loss: 0.0899 - val_accuracy: 0.9848 Epoch 24/100 1719/1719 [==============================] - 2s 972us/step - loss: 8.4212e-05 - accuracy: 1.0000 - val_loss: 0.0894 - val_accuracy: 0.9862 Epoch 25/100 1719/1719 [==============================] - 2s 1ms/step - loss: 6.0306e-05 - accuracy: 1.0000 - val_loss: 0.0899 - val_accuracy: 0.9858 Epoch 26/100 1719/1719 [==============================] - 2s 1ms/step - loss: 4.9564e-05 - accuracy: 1.0000 - val_loss: 0.0903 - val_accuracy: 0.9860 Epoch 27/100 1719/1719 [==============================] - 2s 1ms/step - loss: 4.3609e-05 - accuracy: 1.0000 - val_loss: 0.0906 - val_accuracy: 0.9862 Epoch 28/100 1719/1719 [==============================] - 2s 973us/step - loss: 4.2216e-05 - accuracy: 1.0000 - val_loss: 0.0911 - val_accuracy: 0.9862
model = keras.models.load_model("my_mnist_model.h5") # rollback to best model
model.evaluate(X_test, y_test)
313/313 [==============================] - 0s 701us/step - loss: 0.0804 - accuracy: 0.9806
[0.08043695986270905, 0.9805999994277954]
Ta đã đạt được độ chính xác hơn 98%. Cuối cùng, hãy xem các đồ thị đường cong học bằng TensorBoard:
%tensorboard --logdir=./my_mnist_logs --port=6006