#!/usr/bin/env python
SAVE_PARAMS_EVERY = 5000
import glob
import random
import numpy as np
import os.path as op
import pickle
def load_saved_params():
"""
A helper function that loads previously saved parameters and resets
iter_ation start.
"""
st = 0
for f in glob.glob("saved_params_*.npy"):
iter_ = int(op.splitext(op.basename(f))[0].split("_")[2])
if (iter_ > st):
st = iter_
if st > 0:
print(st)
with open("saved_params_%d.npy" % st, "rb") as f:
params = pickle.load(f)
state = pickle.load(f)
return st, params, state
else:
return st, None, None
def save_params(iter_, params):
with open("saved_params_%d.npy" % iter_, "wb") as f:
pickle.dump(params, f)
pickle.dump(random.getstate(), f)
mini-batch已经写好了,学习率递减也写好了,直接算梯度乘以学习率即可
def sgd(f, x0, step, iterations, postprocessing=None, useSaved=False,
PRINT_EVERY=10):
""" Stochastic Gradient Descent
Implement the stochastic gradient descent method in this function.
Arguments:
f -- the function to optimize, it should take a single
argument and yield two outputs, a cost and the gradient
with respect to the arguments
x0 -- the initial point to start SGD from
step -- the step size for SGD
iter_ations -- total iter_ations to run SGD for
postprocessing -- postprocessing function for the parameters
if necessary. In the case of word2vec we will need to
normalize the word vectors to have unit length.
PRINT_EVERY -- specifies how many iter_ations to output loss
Return:
x -- the parameter value after SGD finishes
"""
# Anneal learning rate every several iter_ations
ANNEAL_EVERY = 20000
if useSaved:
start_iter_, oldx, state = load_saved_params()
if start_iter_ > 0:
x0 = oldx
step *= 0.5 ** (start_iter_ / ANNEAL_EVERY)
if state:
random.setstate(state)
else:
start_iter_ = 0
x = x0
if not postprocessing:
postprocessing = lambda x: x
expcost = None
for iter_ in range(start_iter_ + 1, iterations + 1):
# Don't forget to apply the postprocessing after every iteration!
# You might want to print the progress every few iterations.
cost = None
### YOUR CODE HERE
# mini-batch已经写好了,学习率递减也写好了,直接算梯度乘以学习率即可
cost, grad = f(x)
x -= step * grad
x = postprocessing(x)
### END YOUR CODE
if iter_ % PRINT_EVERY == 0:
if not expcost:
expcost = cost
else:
expcost = .95 * expcost + .05 * cost
print("iter_ %d: %f" % (iter_, expcost))
try:
if iter_ % SAVE_PARAMS_EVERY == 0 and useSaved:
save_params(iter_, x)
print("saved!")
if iter_ % ANNEAL_EVERY == 0:
step *= 0.5
except Exception as e:
print(str(e))
return x
def sanity_check():
quad = lambda x: (np.sum(x ** 2), x * 2)
print("Running sanity checks...")
t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100)
print("test 1 result:", t1)
assert abs(t1) <= 1e-6
t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100)
print("test 2 result:", t2)
assert abs(t2) <= 1e-6
t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100)
print("test 3 result:", t3)
assert abs(t3) <= 1e-6
print("")
if __name__ == "__main__":
sanity_check()
# your_sanity_checks()
Running sanity checks... iter_ 100: 0.004578 iter_ 200: 0.004353 iter_ 300: 0.004136 iter_ 400: 0.003929 iter_ 500: 0.003733 iter_ 600: 0.003546 iter_ 700: 0.003369 iter_ 800: 0.003200 iter_ 900: 0.003040 iter_ 1000: 0.002888 test 1 result: 8.414836786079764e-10 iter_ 100: 0.000000 iter_ 200: 0.000000 iter_ 300: 0.000000 iter_ 400: 0.000000 iter_ 500: 0.000000 iter_ 600: 0.000000 iter_ 700: 0.000000 iter_ 800: 0.000000 iter_ 900: 0.000000 iter_ 1000: 0.000000 test 2 result: 0.0 iter_ 100: 0.041205 iter_ 200: 0.039181 iter_ 300: 0.037222 iter_ 400: 0.035361 iter_ 500: 0.033593 iter_ 600: 0.031913 iter_ 700: 0.030318 iter_ 800: 0.028802 iter_ 900: 0.027362 iter_ 1000: 0.025994 test 3 result: -2.524451035823933e-09