Copyright (c) 2019 Lirimy
Released under the MIT license
https://opensource.org/licenses/MIT
Simulation of Ising spin model using CUDA
http://www.vinkovic.org/Projects/MindExercises/radnje/2011_Jurisic.pdf
CPU Implimentation by Gen Kuroki
https://nbviewer.jupyter.org/gist/genkuroki/4fa46c68c56ee0f3b1a6fc8ec628b9d7
versioninfo()
# GeForce GTX 980
Julia Version 1.1.1 Commit 55e36cc308 (2019-05-16 04:10 UTC) Platform Info: OS: Linux (x86_64-pc-linux-gnu) CPU: Intel(R) Core(TM) i5-2500K CPU @ 3.30GHz WORD_SIZE: 64 LIBM: libopenlibm LLVM: libLLVM-6.0.1 (ORCJIT, sandybridge)
ENV["JULIA_DEBUG"] = "CUDAnative";
using CUDAnative
#using CuArrays
#using CUDAdrv
using Random: rand!
using CuArrays.CURAND
using Parameters
# Show results
# ]add ImageMagick
# ]add https://github.com/Lirimy/ImageContainers.jl
# ]add https://github.com/Lirimy/SimpleHeatmaps.jl
using SimpleHeatmaps
function shrinkshow(s, ratio::Int=4)
r = ratio
ss = Array(s)
m, n = size(ss)
@assert mod(m, r) == 0
@assert mod(n, r) == 0
shrinked = r^2 \ reshape(
sum(sum(
reshape(ss, (r, m÷r, r, n÷r)),
dims=1), dims=3),
(m÷r, n÷r)
)
matshow(@. 2 \ (shrinked + 1))
end
shrinkshow (generic function with 2 methods)
struct IsingPlan
rnd
m
n
β::Float32
threads
blocks
end
function genplan(m=1024, n=1024, β=log(1+sqrt(2)))
rnd = curand(m, n)
s = @. Int32(2 * (rnd < 0.5f0) - 1)
β = Float32(β)
threads = (32, 32)
blocks = @. ceil(Int, (m, n) / threads)
(s, IsingPlan(rnd, m, n, β, threads, blocks))
end
genplan (generic function with 4 methods)
s, iplan = genplan(3072, 4096)
┌ Debug: Initializing CUDA after call to cuMemAlloc └ @ CUDAnative /home/lirimy/.julia/packages/CUDAnative/ytV2j/src/init.jl:32 ┌ Debug: (Re)compiling function │ job = CUDAnative.CompilerJob(getfield(GPUArrays, Symbol("##23#24"))(), Tuple{CuArrays.CuKernelState,CuDeviceArray{Int32,2,CUDAnative.AS.Global},Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}},getfield(CuArrays, Symbol("##39#40")){Int32},Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.ArrayStyle{CuArrays.CuArray},Nothing,typeof(-),Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.ArrayStyle{CuArrays.CuArray},Nothing,typeof(*),Tuple{Int64,Base.Broadcast.Broadcasted{Base.Broadcast.ArrayStyle{CuArrays.CuArray},Nothing,typeof(<),Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32,2,CUDAnative.AS.Global},Tuple{Bool,Bool},Tuple{Int64,Int64}},Float32}}}},Int64}}}}}, v"5.2.0", true, nothing, nothing, nothing, nothing, nothing) └ @ CUDAnative /home/lirimy/.julia/packages/CUDAnative/ytV2j/src/compiler/driver.jl:36 ┌ Debug: Compiled #23 to PTX 5.2.0 for SM 5.2.0 using 36 registers. │ Memory usage: 8 bytes local, 0 bytes shared, 256 bytes constant └ @ CUDAnative /home/lirimy/.julia/packages/CUDAnative/ytV2j/src/execution.jl:382
(Int32[-1 -1 … 1 1; -1 1 … -1 1; … ; -1 -1 … -1 -1; -1 -1 … 1 -1], IsingPlan(Float32[0.740219 0.595785 … 0.410974 0.055439; 0.920994 0.00549043 … 0.807365 0.0405582; … ; 0.601098 0.929978 … 0.950703 0.76796; 0.677859 0.886094 … 0.0388521 0.772511], 3072, 4096, 0.8813736f0, (32, 32), (96, 128)))
function _update!(s, rnd, m, n, β, white=false)
i = (blockIdx().x-1) * blockDim().x + threadIdx().x
j = (blockIdx().y-1) * blockDim().y + threadIdx().y
if i ≤ m && j ≤ n && iseven(i+j) == white
@inbounds ajs = (
s[ifelse(i+1 ≤ m, i+1, 1), j] +
s[ifelse(i-1 ≥ 1, i-1, m), j] +
s[i, ifelse(j+1 ≤ n, j+1, 1)] +
s[i, ifelse(j-1 ≥ 1, j-1, n)]
)
@inbounds prob = CUDAnative.exp_fast(-β * s[i, j] * ajs)
@inbounds s[i, j] = ifelse(rnd[i, j] < prob, -s[i, j], s[i, j])
end
return
end
_update! (generic function with 2 methods)
function update!(s, iplan)
@unpack rnd, m, n, β, threads, blocks = iplan
rand!(rnd)
@cuda blocks=blocks threads=threads _update!(s, rnd, m, n, β, true)
@cuda blocks=blocks threads=threads _update!(s, rnd, m, n, β, false)
nothing
end
update! (generic function with 1 method)
# compile
update!(s, iplan)
┌ Debug: (Re)compiling function │ job = CUDAnative.CompilerJob(_update!, Tuple{CuDeviceArray{Int32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},Int64,Int64,Float32,Bool}, v"5.2.0", true, nothing, nothing, nothing, nothing, nothing) └ @ CUDAnative /home/lirimy/.julia/packages/CUDAnative/ytV2j/src/compiler/driver.jl:36 ┌ Debug: Compiled _update! to PTX 5.2.0 for SM 5.2.0 using 26 registers. │ Memory usage: 0 bytes local, 0 bytes shared, 256 bytes constant └ @ CUDAnative /home/lirimy/.julia/packages/CUDAnative/ytV2j/src/execution.jl:382
# Main
@time for i in 1:10000
update!(s, iplan)
end
shrinkshow(s, 8)
22.377500 seconds (1.77 M allocations: 60.425 MiB, 0.07% gc time)
# Animation
anim = @time openanim(:gif) do aio
for i in 1:10 #frames
for j in 1:16
update!(s, iplan)
end
img = shrinkshow(s, 16)
addframe(aio, img)
end
end
3.389458 seconds (8.64 M allocations: 971.415 MiB, 11.64% gc time)