Ising Model (CUDAnative.jl)¶

Copyright (c) 2019 Lirimy
Released under the MIT license
https://opensource.org/licenses/MIT

Reference¶

Simulation of Ising spin model using CUDA
http://www.vinkovic.org/Projects/MindExercises/radnje/2011_Jurisic.pdf

CPU Implimentation by Gen Kuroki
https://nbviewer.jupyter.org/gist/genkuroki/4fa46c68c56ee0f3b1a6fc8ec628b9d7

In [ ]:

In [1]:

versioninfo()
# GeForce GTX 980

Julia Version 1.1.1
Commit 55e36cc308 (2019-05-16 04:10 UTC)
Platform Info:
  OS: Linux (x86_64-pc-linux-gnu)
  CPU: Intel(R) Core(TM) i5-2500K CPU @ 3.30GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-6.0.1 (ORCJIT, sandybridge)

In [2]:

ENV["JULIA_DEBUG"] = "CUDAnative";

In [3]:

using CUDAnative
#using CuArrays
#using CUDAdrv

In [4]:

using Random: rand!
using CuArrays.CURAND

In [5]:

using Parameters

In [6]:

# Show results

# ]add ImageMagick
# ]add https://github.com/Lirimy/ImageContainers.jl
# ]add https://github.com/Lirimy/SimpleHeatmaps.jl

using SimpleHeatmaps

In [7]:

function shrinkshow(s, ratio::Int=4)
    
    r = ratio
    ss = Array(s)
    m, n = size(ss)
    @assert mod(m, r) == 0
    @assert mod(n, r) == 0
    
    shrinked = r^2 \ reshape(
        sum(sum(
            reshape(ss, (r, m÷r, r, n÷r)),
        dims=1), dims=3),
        (m÷r, n÷r)
    )
    
    matshow(@. 2 \ (shrinked + 1))
end

Out[7]:

shrinkshow (generic function with 2 methods)

In [ ]:

In [8]:

struct IsingPlan
    rnd
    m
    n
    β::Float32
    threads
    blocks
end

function genplan(m=1024, n=1024, β=log(1+sqrt(2)))
    
    rnd = curand(m, n)
    s = @. Int32(2 * (rnd < 0.5f0) - 1)
    β = Float32(β)
    threads = (32, 32)
    blocks = @. ceil(Int, (m, n) / threads)
    
    (s, IsingPlan(rnd, m, n, β, threads, blocks))
end

Out[8]:

genplan (generic function with 4 methods)

In [9]:

s, iplan = genplan(3072, 4096)

┌ Debug: Initializing CUDA after call to cuMemAlloc
└ @ CUDAnative /home/lirimy/.julia/packages/CUDAnative/ytV2j/src/init.jl:32
┌ Debug: (Re)compiling function
│   job = CUDAnative.CompilerJob(getfield(GPUArrays, Symbol("##23#24"))(), Tuple{CuArrays.CuKernelState,CuDeviceArray{Int32,2,CUDAnative.AS.Global},Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}},getfield(CuArrays, Symbol("##39#40")){Int32},Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.ArrayStyle{CuArrays.CuArray},Nothing,typeof(-),Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.ArrayStyle{CuArrays.CuArray},Nothing,typeof(*),Tuple{Int64,Base.Broadcast.Broadcasted{Base.Broadcast.ArrayStyle{CuArrays.CuArray},Nothing,typeof(<),Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32,2,CUDAnative.AS.Global},Tuple{Bool,Bool},Tuple{Int64,Int64}},Float32}}}},Int64}}}}}, v"5.2.0", true, nothing, nothing, nothing, nothing, nothing)
└ @ CUDAnative /home/lirimy/.julia/packages/CUDAnative/ytV2j/src/compiler/driver.jl:36
┌ Debug: Compiled #23 to PTX 5.2.0 for SM 5.2.0 using 36 registers.
│ Memory usage: 8 bytes local, 0 bytes shared, 256 bytes constant
└ @ CUDAnative /home/lirimy/.julia/packages/CUDAnative/ytV2j/src/execution.jl:382

Out[9]:

(Int32[-1 -1 … 1 1; -1 1 … -1 1; … ; -1 -1 … -1 -1; -1 -1 … 1 -1], IsingPlan(Float32[0.740219 0.595785 … 0.410974 0.055439; 0.920994 0.00549043 … 0.807365 0.0405582; … ; 0.601098 0.929978 … 0.950703 0.76796; 0.677859 0.886094 … 0.0388521 0.772511], 3072, 4096, 0.8813736f0, (32, 32), (96, 128)))

In [ ]:

In [10]:

function _update!(s, rnd, m, n, β, white=false)
    
    i = (blockIdx().x-1) * blockDim().x + threadIdx().x
    j = (blockIdx().y-1) * blockDim().y + threadIdx().y
    
    if i ≤ m && j ≤ n && iseven(i+j) == white
        
        @inbounds ajs = (
            s[ifelse(i+1 ≤ m, i+1, 1), j] +
            s[ifelse(i-1 ≥ 1, i-1, m), j] +
            s[i, ifelse(j+1 ≤ n, j+1, 1)] +
            s[i, ifelse(j-1 ≥ 1, j-1, n)]
        )
        @inbounds prob = CUDAnative.exp_fast(-β * s[i, j] * ajs)
        @inbounds s[i, j] = ifelse(rnd[i, j] < prob, -s[i, j], s[i, j])
    end
    
    return
end

Out[10]:

_update! (generic function with 2 methods)

In [11]:

function update!(s, iplan)
    @unpack rnd, m, n, β, threads, blocks = iplan
    rand!(rnd)
    @cuda blocks=blocks threads=threads _update!(s, rnd, m, n, β, true)
    @cuda blocks=blocks threads=threads _update!(s, rnd, m, n, β, false)
    nothing
end

Out[11]:

update! (generic function with 1 method)

In [12]:

# compile
update!(s, iplan)

┌ Debug: (Re)compiling function
│   job = CUDAnative.CompilerJob(_update!, Tuple{CuDeviceArray{Int32,2,CUDAnative.AS.Global},CuDeviceArray{Float32,2,CUDAnative.AS.Global},Int64,Int64,Float32,Bool}, v"5.2.0", true, nothing, nothing, nothing, nothing, nothing)
└ @ CUDAnative /home/lirimy/.julia/packages/CUDAnative/ytV2j/src/compiler/driver.jl:36
┌ Debug: Compiled _update! to PTX 5.2.0 for SM 5.2.0 using 26 registers.
│ Memory usage: 0 bytes local, 0 bytes shared, 256 bytes constant
└ @ CUDAnative /home/lirimy/.julia/packages/CUDAnative/ytV2j/src/execution.jl:382

In [13]:

# Main

@time for i in 1:10000
    update!(s, iplan)
end

shrinkshow(s, 8)

 22.377500 seconds (1.77 M allocations: 60.425 MiB, 0.07% gc time)

Out[13]:

In [14]:

# Animation

anim = @time openanim(:gif) do aio
    
    for i in 1:10 #frames
        
        for j in 1:16
            update!(s, iplan)
        end
        
        img = shrinkshow(s, 16)
        addframe(aio, img)
    end
end

  3.389458 seconds (8.64 M allocations: 971.415 MiB, 11.64% gc time)

Out[14]:

In [ ]: