Knet RNN example¶

In [1]:

# After installing and starting Julia run the following to install the required packages:
# Pkg.init(); Pkg.update()
# for p in ("CUDAdrv","IJulia","PyCall","JLD2","Knet"); Pkg.add(p); end
# Pkg.checkout("Knet","ilkarman") # make sure we have the right Knet version
# Pkg.build("Knet")

In [2]:

using Knet
True=true # so we can read the python params
include("common/params_lstm.py");

In [3]:

println("OS: ", Sys.KERNEL)
println("Julia: ", VERSION)
println("Knet: ", Pkg.installed("Knet"))
println("GPU: ", readstring(`nvidia-smi --query-gpu=name --format=csv,noheader`))

OS: Linux
Julia: 0.6.1
Knet: 0.8.5+
GPU: Tesla K80

In [4]:

# define model
function initmodel()
    rnnSpec,rnnWeights = rnninit(EMBEDSIZE,NUMHIDDEN; rnnType=:gru)
    inputMatrix = KnetArray(xavier(Float32,EMBEDSIZE,MAXFEATURES))
    outputMatrix = KnetArray(xavier(Float32,2,NUMHIDDEN))
    return rnnSpec,(rnnWeights,inputMatrix,outputMatrix)
end;

In [5]:

# define loss and its gradient
function predict(weights, inputs, rnnSpec)
    rnnWeights, inputMatrix, outputMatrix = weights # (1,1,W), (X,V), (2,H)
    indices = hcat(inputs...)' # (B,T)
    rnnInput = inputMatrix[:,indices] # (X,B,T)
    rnnOutput = rnnforw(rnnSpec, rnnWeights, rnnInput)[1] # (H,B,T)
    return outputMatrix * rnnOutput[:,:,end] # (2,H) * (H,B) = (2,B)
end

loss(w,x,y,r)=nll(predict(w,x,r),y)
lossgradient = grad(loss);

In [6]:

# load data
include(Knet.dir("data","imdb.jl"))
@time (xtrn,ytrn,xtst,ytst,imdbdict)=imdb(maxlen=MAXLEN,maxval=MAXFEATURES)
for d in (xtrn,ytrn,xtst,ytst); println(summary(d)); end

INFO: Loading IMDB...

 10.266185 seconds (15.94 M allocations: 835.780 MiB, 3.98% gc time)
25000-element Array{Array{Int32,1},1}
25000-element Array{Int8,1}
25000-element Array{Array{Int32,1},1}
25000-element Array{Int8,1}

In [7]:

# prepare for training
weights = nothing; knetgc(); # Reclaim memory from previous run
rnnSpec,weights = initmodel()
optim = optimizers(weights, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS);

In [8]:

# cold start
@time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true)
    grads = lossgradient(weights,x,y,rnnSpec)
    update!(weights, grads, optim)
end

 14.319533 seconds (2.08 M allocations: 138.579 MiB, 3.58% gc time)

In [9]:

# prepare for training
weights = nothing; knetgc(); # Reclaim memory from previous run
rnnSpec,weights = initmodel()
optim = optimizers(weights, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS);

In [10]:

# 29s
info("Training...")
@time for epoch in 1:EPOCHS
    @time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true)
        grads = lossgradient(weights,x,y,rnnSpec)
        update!(weights, grads, optim)
    end
end

INFO: Training...

  9.776101 seconds (356.68 k allocations: 45.007 MiB, 4.79% gc time)
  9.786896 seconds (352.22 k allocations: 44.658 MiB, 5.91% gc time)
  9.732747 seconds (352.94 k allocations: 44.669 MiB, 5.92% gc time)
 29.298876 seconds (1.07 M allocations: 134.572 MiB, 5.54% gc time)

In [14]:

info("Testing...")
@time accuracy(weights, minibatch(xtst,ytst,BATCHSIZE), (w,x)->predict(w,x,rnnSpec))

INFO: Testing...

  2.999301 seconds (70.50 k allocations: 34.680 MiB, 11.61% gc time)

Out[14]:

0.844511217948718

In [ ]: