%matplotlib inline
# Lab 3 Minimizing Cost
import tensorflow as tf
import matplotlib.pyplot as plt
X = [1, 2, 3]
Y = [1, 2, 3]
W = tf.placeholder(tf.float32)
# Our hypothesis for linear model X * W
hypothesis = X * W
# cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
# Variables for plotting cost function
# cost 함수를 그리기 위한 변수
W_history = []
cost_history = []
for i in range(-30, 50):
curr_W = i * 0.1
curr_cost = sess.run(cost, feed_dict={W: curr_W})
W_history.append(curr_W)
cost_history.append(curr_cost)
# Show the cost function
plt.plot(W_history, cost_history)
plt.show()
위 수식은 다음처럼 코딩할 수 있음.
# Lab 3 Minimizing Cost
import tensorflow as tf
tf.set_random_seed(777) # for reproducibility
x_data = [1, 2, 3]
y_data = [1, 2, 3]
# Try to find values for W and b to compute y_data = W * x_data + b
# We know that W should be 1 and b should be 0
# But let's use TensorFlow to figure it out
W = tf.Variable(tf.random_normal([1]), name='weight')
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
# Our hypothesis for linear model X * W
hypothesis = X * W
# cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize: Gradient Descent using derivative: W -= learning_rate * derivative
learning_rate = 0.1
gradient = tf.reduce_mean((W * X - Y) * X)
descent = W - learning_rate * gradient
update = W.assign(descent)
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
for step in range(21):
sess.run(update, feed_dict={X: x_data, Y: y_data})
print(step, sess.run(cost, feed_dict={X: x_data, Y: y_data}), sess.run(W))
0 0.000196199 [ 1.00648403] 1 5.58083e-05 [ 1.00345814] 2 1.58727e-05 [ 1.00184429] 3 4.51458e-06 [ 1.0009836] 4 1.28461e-06 [ 1.00052464] 5 3.65236e-07 [ 1.00027978] 6 1.03953e-07 [ 1.00014925] 7 2.95924e-08 [ 1.00007963] 8 8.40479e-09 [ 1.00004244] 9 2.39406e-09 [ 1.00002265] 10 6.79378e-10 [ 1.00001204] 11 1.93381e-10 [ 1.00000644] 12 5.66018e-11 [ 1.00000346] 13 1.44998e-11 [ 1.00000179] 14 4.24431e-12 [ 1.00000095] 15 1.06108e-12 [ 1.00000048] 16 2.65269e-13 [ 1.00000024] 17 9.9476e-14 [ 1.00000012] 18 0.0 [ 1.] 19 0.0 [ 1.] 20 0.0 [ 1.]
Gradient Descent를 다음처럼 간단하게 할 수 있음.
W = 5.0을 초기값을 주었을 때
cost가 최소가 되는 W를 찾는 프로그램
# Lab 3 Minimizing Cost
import tensorflow as tf
# tf Graph Input
X = [1, 2, 3]
Y = [1, 2, 3]
# Set wrong model weights
W = tf.Variable(5.0)
# Linear model
hypothesis = X * W
# cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize: Gradient Descent Magic
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
train = optimizer.minimize(cost)
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
#for step in range(100):
for step in range(10):
print(step, sess.run(W))
sess.run(train)
0 -3.0 1 0.733334 2 0.982222 3 0.998815 4 0.999921 5 0.999995 6 1.0 7 1.0 8 1.0 9 1.0
W = 5.0을 초기값을 주었을 때
cost가 최소가 되는 W를 찾는 프로그램
# Lab 3 Minimizing Cost
import tensorflow as tf
# tf Graph Input
X = [1, 2, 3]
Y = [1, 2, 3]
# Set wrong model weights
W = tf.Variable(-3.0)
# Linear model
hypothesis = X * W
# cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize: Gradient Descent Magic
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
train = optimizer.minimize(cost)
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
#for step in range(100):
for step in range(10):
print(step, sess.run(W))
sess.run(train)
0 -3.0 1 0.733334 2 0.982222 3 0.998815 4 0.999921 5 0.999995 6 1.0 7 1.0 8 1.0 9 1.0
수동으로 구한 gradient와 텐서플로 optimizer에서 구한 gvs(gradient)가 같은지 확인.
# Lab 3 Minimizing Cost
# This is optional
import tensorflow as tf
# tf Graph Input
X = [1, 2, 3]
Y = [1, 2, 3]
# Set wrong model weights
W = tf.Variable(5.)
# Linear model
hypothesis = X * W
# Manual gradient
gradient = tf.reduce_mean((W * X - Y) * X) * 2
# cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize: Gradient Descent Magic
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
train = optimizer.minimize(cost)
# Get gradients
gvs = optimizer.compute_gradients(cost, [W])
# Optional: modify gradient if necessary
# gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
# Apply gradients
apply_gradients = optimizer.apply_gradients(gvs)
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
for step in range(100):
print(step, sess.run([gradient, W, gvs]))
sess.run(apply_gradients)
0 [37.333332, 5.0, [(37.333336, 5.0)]] 1 [33.848888, 4.6266665, [(33.848888, 4.6266665)]] 2 [30.689657, 4.2881775, [(30.689657, 4.2881775)]] 3 [27.825287, 3.9812808, [(27.825287, 3.9812808)]] 4 [25.228262, 3.703028, [(25.228264, 3.703028)]] 5 [22.873621, 3.4507453, [(22.873623, 3.4507453)]] 6 [20.738752, 3.2220092, [(20.738752, 3.2220092)]] 7 [18.803137, 3.0146217, [(18.803137, 3.0146217)]] 8 [17.048176, 2.8265903, [(17.048176, 2.8265903)]] 9 [15.457013, 2.6561086, [(15.457014, 2.6561086)]] 10 [14.014359, 2.5015385, [(14.01436, 2.5015385)]] 11 [12.706352, 2.3613949, [(12.706352, 2.3613949)]] 12 [11.520427, 2.2343314, [(11.520427, 2.2343314)]] 13 [10.445186, 2.119127, [(10.445186, 2.119127)]] 14 [9.4703016, 2.0146751, [(9.4703016, 2.0146751)]] 15 [8.5864067, 1.9199722, [(8.5864067, 1.9199722)]] 16 [7.7850089, 1.8341081, [(7.7850089, 1.8341081)]] 17 [7.0584083, 1.756258, [(7.0584083, 1.756258)]] 18 [6.3996239, 1.685674, [(6.3996239, 1.685674)]] 19 [5.8023257, 1.6216778, [(5.8023257, 1.6216778)]] 20 [5.260776, 1.5636545, [(5.260776, 1.5636545)]] 21 [4.7697697, 1.5110468, [(4.7697697, 1.5110468)]] 22 [4.3245912, 1.4633491, [(4.3245912, 1.4633491)]] 23 [3.9209633, 1.4201032, [(3.9209635, 1.4201032)]] 24 [3.5550067, 1.3808936, [(3.5550067, 1.3808936)]] 25 [3.2232056, 1.3453435, [(3.2232056, 1.3453435)]] 26 [2.9223735, 1.3131114, [(2.9223738, 1.3131114)]] 27 [2.6496189, 1.2838877, [(2.6496186, 1.2838877)]] 28 [2.4023216, 1.2573916, [(2.4023218, 1.2573916)]] 29 [2.1781051, 1.2333684, [(2.1781051, 1.2333684)]] 30 [1.9748148, 1.2115873, [(1.9748147, 1.2115873)]] 31 [1.7904993, 1.1918392, [(1.7904994, 1.1918392)]] 32 [1.623386, 1.1739342, [(1.6233861, 1.1739342)]] 33 [1.4718695, 1.1577003, [(1.4718695, 1.1577003)]] 34 [1.3344955, 1.1429816, [(1.3344957, 1.1429816)]] 35 [1.2099417, 1.1296366, [(1.2099419, 1.1296366)]] 36 [1.0970144, 1.1175373, [(1.0970144, 1.1175373)]] 37 [0.9946267, 1.1065671, [(0.9946267, 1.1065671)]] 38 [0.90179497, 1.0966209, [(0.90179503, 1.0966209)]] 39 [0.81762749, 1.087603, [(0.81762755, 1.087603)]] 40 [0.74131513, 1.0794266, [(0.74131513, 1.0794266)]] 41 [0.67212623, 1.0720135, [(0.67212629, 1.0720135)]] 42 [0.60939401, 1.0652922, [(0.60939401, 1.0652922)]] 43 [0.55251688, 1.0591983, [(0.55251688, 1.0591983)]] 44 [0.50094914, 1.0536731, [(0.50094914, 1.0536731)]] 45 [0.45419374, 1.0486636, [(0.45419377, 1.0486636)]] 46 [0.41180158, 1.0441216, [(0.41180158, 1.0441216)]] 47 [0.37336722, 1.0400037, [(0.37336725, 1.0400037)]] 48 [0.33851996, 1.03627, [(0.33851999, 1.03627)]] 49 [0.30692515, 1.0328848, [(0.30692515, 1.0328848)]] 50 [0.27827826, 1.0298156, [(0.27827829, 1.0298156)]] 51 [0.25230527, 1.0270327, [(0.25230527, 1.0270327)]] 52 [0.2287569, 1.0245097, [(0.2287569, 1.0245097)]] 53 [0.20740573, 1.022222, [(0.20740573, 1.022222)]] 54 [0.18804836, 1.020148, [(0.18804836, 1.020148)]] 55 [0.17049654, 1.0182675, [(0.17049655, 1.0182675)]] 56 [0.15458433, 1.0165626, [(0.15458435, 1.0165626)]] 57 [0.14015675, 1.0150168, [(0.14015675, 1.0150168)]] 58 [0.12707591, 1.0136153, [(0.12707591, 1.0136153)]] 59 [0.11521538, 1.0123445, [(0.11521538, 1.0123445)]] 60 [0.10446167, 1.0111923, [(0.10446167, 1.0111923)]] 61 [0.094712019, 1.0101477, [(0.094712019, 1.0101477)]] 62 [0.085872017, 1.0092006, [(0.085872017, 1.0092006)]] 63 [0.077858053, 1.0083419, [(0.077858053, 1.0083419)]] 64 [0.070591293, 1.0075634, [(0.070591293, 1.0075634)]] 65 [0.064002357, 1.0068574, [(0.064002357, 1.0068574)]] 66 [0.05802846, 1.0062174, [(0.05802846, 1.0062174)]] 67 [0.052612226, 1.005637, [(0.052612226, 1.005637)]] 68 [0.047702473, 1.005111, [(0.047702473, 1.005111)]] 69 [0.043249767, 1.0046339, [(0.043249767, 1.0046339)]] 70 [0.039213181, 1.0042014, [(0.039213181, 1.0042014)]] 71 [0.035553534, 1.0038093, [(0.035553537, 1.0038093)]] 72 [0.032236177, 1.0034539, [(0.032236181, 1.0034539)]] 73 [0.029227654, 1.0031315, [(0.029227655, 1.0031315)]] 74 [0.02649951, 1.0028392, [(0.02649951, 1.0028392)]] 75 [0.024025917, 1.0025742, [(0.024025917, 1.0025742)]] 76 [0.021783749, 1.002334, [(0.021783751, 1.002334)]] 77 [0.01975123, 1.0021162, [(0.019751232, 1.0021162)]] 78 [0.017907381, 1.0019187, [(0.017907381, 1.0019187)]] 79 [0.016236702, 1.0017396, [(0.016236704, 1.0017396)]] 80 [0.014720838, 1.0015773, [(0.014720838, 1.0015773)]] 81 [0.01334699, 1.00143, [(0.013346991, 1.00143)]] 82 [0.012100856, 1.0012965, [(0.012100856, 1.0012965)]] 83 [0.010971785, 1.0011755, [(0.010971785, 1.0011755)]] 84 [0.0099481745, 1.0010659, [(0.0099481754, 1.0010659)]] 85 [0.009018898, 1.0009663, [(0.009018898, 1.0009663)]] 86 [0.0081768828, 1.0008761, [(0.0081768837, 1.0008761)]] 87 [0.0074131489, 1.0007943, [(0.0074131489, 1.0007943)]] 88 [0.0067215762, 1.0007201, [(0.0067215762, 1.0007201)]] 89 [0.0060940585, 1.0006529, [(0.0060940585, 1.0006529)]] 90 [0.0055252709, 1.000592, [(0.0055252714, 1.000592)]] 91 [0.0050098896, 1.0005368, [(0.0050098896, 1.0005368)]] 92 [0.0045425892, 1.0004867, [(0.0045425892, 1.0004867)]] 93 [0.0041189194, 1.0004413, [(0.0041189194, 1.0004413)]] 94 [0.0037339528, 1.0004001, [(0.003733953, 1.0004001)]] 95 [0.0033854644, 1.0003628, [(0.0033854644, 1.0003628)]] 96 [0.0030694802, 1.0003289, [(0.0030694804, 1.0003289)]] 97 [0.0027837753, 1.0002983, [(0.0027837753, 1.0002983)]] 98 [0.0025234222, 1.0002704, [(0.0025234222, 1.0002704)]] 99 [0.0022875469, 1.0002451, [(0.0022875469, 1.0002451)]]