# After installing and starting Julia run the following to install the required packages:
# Pkg.init(); Pkg.update()
# for p in ("CUDAdrv","IJulia","PyCall","JLD2","Knet"); Pkg.add(p); end
# Pkg.checkout("Knet","ilkarman") # make sure we have the right Knet version
# Pkg.build("Knet")
using Knet
True=true # so we can read the python params
include("common/params_lstm.py");
println("OS: ", Sys.KERNEL)
println("Julia: ", VERSION)
println("Knet: ", Pkg.installed("Knet"))
println("GPU: ", readstring(`nvidia-smi --query-gpu=name --format=csv,noheader`))
OS: Linux Julia: 0.6.1 Knet: 0.8.5+ GPU: Tesla K80
# define model
function initmodel()
rnnSpec,rnnWeights = rnninit(EMBEDSIZE,NUMHIDDEN; rnnType=:gru)
inputMatrix = KnetArray(xavier(Float32,EMBEDSIZE,MAXFEATURES))
outputMatrix = KnetArray(xavier(Float32,2,NUMHIDDEN))
return rnnSpec,(rnnWeights,inputMatrix,outputMatrix)
end;
# define loss and its gradient
function predict(weights, inputs, rnnSpec)
rnnWeights, inputMatrix, outputMatrix = weights # (1,1,W), (X,V), (2,H)
indices = hcat(inputs...)' # (B,T)
rnnInput = inputMatrix[:,indices] # (X,B,T)
rnnOutput = rnnforw(rnnSpec, rnnWeights, rnnInput)[1] # (H,B,T)
return outputMatrix * rnnOutput[:,:,end] # (2,H) * (H,B) = (2,B)
end
loss(w,x,y,r)=nll(predict(w,x,r),y)
lossgradient = grad(loss);
# load data
include(Knet.dir("data","imdb.jl"))
@time (xtrn,ytrn,xtst,ytst,imdbdict)=imdb(maxlen=MAXLEN,maxval=MAXFEATURES)
for d in (xtrn,ytrn,xtst,ytst); println(summary(d)); end
INFO: Loading IMDB...
10.266185 seconds (15.94 M allocations: 835.780 MiB, 3.98% gc time) 25000-element Array{Array{Int32,1},1} 25000-element Array{Int8,1} 25000-element Array{Array{Int32,1},1} 25000-element Array{Int8,1}
# prepare for training
weights = nothing; knetgc(); # Reclaim memory from previous run
rnnSpec,weights = initmodel()
optim = optimizers(weights, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS);
# cold start
@time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true)
grads = lossgradient(weights,x,y,rnnSpec)
update!(weights, grads, optim)
end
14.319533 seconds (2.08 M allocations: 138.579 MiB, 3.58% gc time)
# prepare for training
weights = nothing; knetgc(); # Reclaim memory from previous run
rnnSpec,weights = initmodel()
optim = optimizers(weights, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS);
# 29s
info("Training...")
@time for epoch in 1:EPOCHS
@time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true)
grads = lossgradient(weights,x,y,rnnSpec)
update!(weights, grads, optim)
end
end
INFO: Training...
9.776101 seconds (356.68 k allocations: 45.007 MiB, 4.79% gc time) 9.786896 seconds (352.22 k allocations: 44.658 MiB, 5.91% gc time) 9.732747 seconds (352.94 k allocations: 44.669 MiB, 5.92% gc time) 29.298876 seconds (1.07 M allocations: 134.572 MiB, 5.54% gc time)
info("Testing...")
@time accuracy(weights, minibatch(xtst,ytst,BATCHSIZE), (w,x)->predict(w,x,rnnSpec))
INFO: Testing...
2.999301 seconds (70.50 k allocations: 34.680 MiB, 11.61% gc time)
0.844511217948718