# After installing and starting Julia run the following to install the required packages: # Pkg.init(); Pkg.update() # for p in ("CUDAdrv","IJulia","PyCall","JLD2","Knet"); Pkg.add(p); end # Pkg.checkout("Knet","ilkarman") # make sure we have the right Knet version # Pkg.build("Knet") using Knet True=true # so we can read the python params include("common/params_lstm.py"); println("OS: ", Sys.KERNEL) println("Julia: ", VERSION) println("Knet: ", Pkg.installed("Knet")) println("GPU: ", readstring(`nvidia-smi --query-gpu=name --format=csv,noheader`)) # define model function initmodel() rnnSpec,rnnWeights = rnninit(EMBEDSIZE,NUMHIDDEN; rnnType=:gru) inputMatrix = KnetArray(xavier(Float32,EMBEDSIZE,MAXFEATURES)) outputMatrix = KnetArray(xavier(Float32,2,NUMHIDDEN)) return rnnSpec,(rnnWeights,inputMatrix,outputMatrix) end; # define loss and its gradient function predict(weights, inputs, rnnSpec) rnnWeights, inputMatrix, outputMatrix = weights # (1,1,W), (X,V), (2,H) indices = hcat(inputs...)' # (B,T) rnnInput = inputMatrix[:,indices] # (X,B,T) rnnOutput = rnnforw(rnnSpec, rnnWeights, rnnInput)[1] # (H,B,T) return outputMatrix * rnnOutput[:,:,end] # (2,H) * (H,B) = (2,B) end loss(w,x,y,r)=nll(predict(w,x,r),y) lossgradient = grad(loss); # load data include(Knet.dir("data","imdb.jl")) @time (xtrn,ytrn,xtst,ytst,imdbdict)=imdb(maxlen=MAXLEN,maxval=MAXFEATURES) for d in (xtrn,ytrn,xtst,ytst); println(summary(d)); end # prepare for training weights = nothing; knetgc(); # Reclaim memory from previous run rnnSpec,weights = initmodel() optim = optimizers(weights, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS); # cold start @time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true) grads = lossgradient(weights,x,y,rnnSpec) update!(weights, grads, optim) end # prepare for training weights = nothing; knetgc(); # Reclaim memory from previous run rnnSpec,weights = initmodel() optim = optimizers(weights, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS); # 29s info("Training...") @time for epoch in 1:EPOCHS @time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true) grads = lossgradient(weights,x,y,rnnSpec) update!(weights, grads, optim) end end info("Testing...") @time accuracy(weights, minibatch(xtst,ytst,BATCHSIZE), (w,x)->predict(w,x,rnnSpec))