import OpenCL
const cl = OpenCL
OpenCL
using PyPlot
w = 2048 * 2;
h = 2048 * 2;
@printf("Size %i MB\n", sizeof(Complex64) * w * h / 1024 / 1024)
Size 128 MB
# julia set
# (the familiar mandelbrot set is obtained by setting c==z initially)
function julia(z; maxiter=200)
c = complex64(-0.5, 0.75)
for n = 1:maxiter
if abs2(z) > 4.0
return n-1
end
z = z*z + c
end
return maxiter
end
julia (generic function with 1 method)
q = [complex64(r,i) for i=1:-(2.0/w):-1, r=-1.5:(3.0/h):1.5];
# TODO: is is a fair comparison?
function ordinary_julia(q)
(h, w) = size(q)
m = Array(Uint8, (h, w));
for i in 1:w
for j in 1:h
@inbounds v = q[j, i]
@inbounds m[j, i] = julia(v)
end
end
return m
end
ordinary_julia (generic function with 1 method)
#code_llvm(ordinary_julia, (Array{Complex64},))
@time m = ordinary_julia(q);
elapsed time: 0.281128794 seconds (16785584 bytes allocated)
get_cmap("RdGy")
imshow(m, cmap="RdGy", extent=[-1.5,1.5,-1,1]);
julia_source = "
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
__kernel void julia(__global float2 *q,
__global ushort *output,
ushort const maxiter)
{
int gid = get_global_id(0);
float nreal = 0;
float real = q[gid].x;
float imag = q[gid].y;
output[gid] = 0;
for(int curiter = 0; curiter < maxiter; curiter++) {
if (real*real + imag*imag > 4.0f) {
output[gid] = curiter;
}
nreal = real*real - imag*imag + -0.5f;
imag = 2*real*imag + 0.75f;
real = nreal;
}
}";
function julia_opencl(q::Array{Complex64}, maxiter::Int64)
ctx = cl.Context(cl.devices()[4])
queue = cl.CmdQueue(ctx)
out = Array(Uint16, size(q))
q_buff = cl.Buffer(Complex64, ctx, (:r, :copy), hostbuf=q)
o_buff = cl.Buffer(Uint16, ctx, :w, length(out))
prg = cl.Program(ctx, source=julia_source) |> cl.build!
k = cl.Kernel(prg, "julia")
cl.call(queue, k, length(q), nothing, q_buff, o_buff, uint16(maxiter))
cl.copy!(queue, out, o_buff)
return out
end
julia_opencl (generic function with 1 method)
@time m = julia_opencl(q, 200);
elapsed time: 0.166367052 seconds (33580160 bytes allocated)
imshow(m, cmap="RdGy", extent=[-1.5,1.5,-1,1]);
mandel_source = "
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
__kernel void mandelbrot(__global float2 *q,
__global ushort *output,
ushort const maxiter)
{
int gid = get_global_id(0);
float nreal, real = 0;
float imag = 0;
output[gid] = 0;
for(int curiter = 0; curiter < maxiter; curiter++) {
nreal = real*real - imag*imag + q[gid].x;
imag = 2* real*imag + q[gid].y;
real = nreal;
if (real*real + imag*imag > 4.0f)
output[gid] = curiter;
}
}";
function mandel_opencl(q::Array{Complex64}, maxiter::Int64)
ctx = cl.Context(cl.devices()[4])
queue = cl.CmdQueue(ctx)
out = Array(Uint16, size(q))
q_buff = cl.Buffer(Complex64, ctx, (:r, :copy), hostbuf=q)
o_buff = cl.Buffer(Uint16, ctx, :w, length(out))
prg = cl.Program(ctx, source=mandel_source) |> cl.build!
k = cl.Kernel(prg, "mandelbrot")
cl.call(queue, k, length(out), nothing, q_buff, o_buff, uint16(maxiter))
cl.copy!(queue, out, o_buff)
return out
end
mandel_opencl (generic function with 1 method)
y1 = -1.0
y2 = 1.0
x1 = -1.5
x2 = 0.5
q = Array(Complex64, (h, w))
for x in 1:w
for y in 1:h
xx = x1 + x * ((x2 - x1) / w)
yy = y1 + y * ((y2 - y1) / h)
@inbounds q[y, x] = complex64(xx, yy)
end
end
@time m = mandel_opencl(q, 30);
elapsed time: 0.127250342 seconds (33563760 bytes allocated)
imshow(m, cmap="RdGy")
PyObject <matplotlib.image.AxesImage object at 0x7fbb3b84fcd0>