#!/usr/bin/env python # coding: utf-8 # # CS 20 : TensorFlow for Deep Learning Research # ## Lecture 04 : Eager execution # ### Custon training basics # * Reference # + https://www.tensorflow.org/tutorials/eager/custom_training?hl=ko # ### Setup # In[1]: from __future__ import absolute_import, division, print_function import numpy as np import tensorflow as tf import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') tf.enable_eager_execution() print(tf.__version__) # ### Variables # Tensors in TensorFlow are immutable stateless objects. Machine learning models, however, need to have changing state: as your model trains, the same code to compute predictions should behave differently over time (hopefully with a lower loss!). To represent this state which needs to change over the course of your computation, you can choose to rely on the fact that Python is a stateful programming language: # In[2]: # Using python state x = tf.zeros([10, 10]) x += 2 # This is equivalent to x = x + 2, which does not mutate the original # value of x print(x) # ***TensorFlow, however, has stateful operations built in, and these are often more pleasant to use than low-level Python representations of your state.*** To represent weights in a model, for example, it's often convenient and efficient ***to use TensorFlow variables.*** # ***Computations using Variables are automatically traced when computing gradients***. For Variables representing embeddings TensorFlow will do sparse updates by default, which are more computation and memory efficient. Using Variables is also a way to quickly let a reader of your code know that this piece of state is mutable. # In[3]: v = tf.Variable(1.0) print(v) # Re-assign the value v.assign(3.0) print(v) # Use `v` in a TensorFlow operation like tf.square() and reassign v.assign(tf.square(v)) print(v.numpy) # ### Example: Fitting a linear model # 1. Define the model # 2. Define a loss function # 3. Obtain training data # 4. Run through the training data and use "optimizer" to adjust the variables to fit the data # #### define model # In[4]: class Model(): def __init__(self): self.w = tf.Variable(tf.random_normal(shape = [])) self.b = tf.Variable(0.) def __call__(self, x): return self.w * x + self.b model = Model() # #### Define a loss function # In[5]: def loss_fn(predicted_y, desired_y): return tf.reduce_mean(tf.square(predicted_y - desired_y)) # In[6]: # Obtain training data true_w = 3.0 true_b = 2.0 num_examples = 1000 inputs = tf.random_normal(shape=[num_examples]) noise = tf.random_normal(shape=[num_examples]) outputs = inputs * true_w + true_b + noise # In[7]: plt.scatter(inputs, outputs, c='b') plt.scatter(inputs, model(inputs), c='r') plt.show() print('Current loss: '), print(loss_fn(model(inputs), outputs).numpy()) # #### Run through the training data and use "optimizer" to adjust the variables to fit the data # In[8]: epochs = 10 batch_size = 64 learning_rate = .1 data = tf.data.Dataset.from_tensor_slices((inputs, outputs)) data = data.shuffle(500) data = data.batch(batch_size = batch_size) # In[9]: # When using tf.train, you read the document (https://www.tensorflow.org/guide/eager) w_hist = [] b_hist = [] for epoch in range(epochs): avg_loss = 0 tr_step = 0 for mb_x, mb_y in data: with tf.GradientTape() as tape: mb_yhat = model(mb_x) mb_loss = loss_fn(mb_yhat, mb_y) dw, db = tape.gradient(target = mb_loss, sources = [model.w, model.b]) model.w.assign_sub(learning_rate * dw) model.b.assign_sub(learning_rate * db) tr_step += 1 avg_loss += mb_loss else: w_hist.append(model.w.numpy()) b_hist.append(model.b.numpy()) avg_loss /= tr_step print('epoch: {:2}, w: {:.2f}, b: {:.2f}, mse_loss: {:.3f}'.format(epoch + 1, w_hist[-1], b_hist[-1], avg_loss)) # In[10]: # Let's plot it all plt.plot(range(epochs), w_hist, 'r', range(epochs), b_hist, 'b') plt.plot([true_w] * len(range(epochs)), 'r--', [true_b] * len(range(epochs)), 'b--') plt.legend(['w', 'b', 'true_w', 'true_b']) plt.show()