%pylab nbagg
Populating the interactive namespace from numpy and matplotlib
import os
#os.environ["PYOPENCL_CTX"] = "0:1"
os.environ["PYOPENCL_COMPILER_OUTPUT"]="1"
import pyopencl,numpy
from pyopencl import array
shape=1024,1024
ctx=pyopencl.create_some_context()
queue=pyopencl.CommandQueue(ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
data=numpy.exp(20*numpy.random.random(shape)**2)
x = numpy.random.randint(0, shape[1], size=10*shape[1])
y = numpy.random.randint(0, shape[0], size=10*shape[0])
data[y,x] = numpy.NaN
datag=array.to_device(queue, data.astype("float32"))
prg=pyopencl.Program(ctx, open("../pyFAI/resources/openCL/sigma_clip.cl").read()).build();
m=array.empty(queue,1024,"float32");
d=array.empty(queue,1024,"float32");
ws=shape[0]//8;
local_mem = pyopencl.LocalMemory(ws * 20);
print(abs(numpy.nanmean(data, dtype="float32",axis=0)-numpy.nanmean(data, dtype="float64",axis=0)).max(),
abs(numpy.nanmean(data, dtype="float32",axis=-1)-numpy.nanmean(data, dtype="float64",axis=-1)).max(),
abs(numpy.nanstd(data, dtype="float32",axis=0)-numpy.nanstd(data, dtype="float64",axis=0)).max(),
abs(numpy.nanstd(data, dtype="float32",axis=-1)-numpy.nanstd(data, dtype="float64",axis=-1)).max())
(28.443244431167841, 27.278686532750726, 326.02058730274439, 319.114636272192)
/usr/lib/python2.7/dist-packages/pkg_resources/__init__.py:1869: UserWarning: /usr/lib/pymodules/python2.7/rpl-1.5.5.egg-info could not be properly decoded in UTF-8 warnings.warn(msg)
e=prg.mean_std_horizontal(queue, (1024, 1024//8), (1, 1024//8), datag.data, m.data, d.data, numpy.float32(-1), local_mem);
print(abs(numpy.nanmean(data, axis=1, dtype="float64")-m.get()).max(), abs(numpy.nanstd(data, axis=1, dtype="float64")-d.get()).max())
print(1e-6 * (e.profile.end - e.profile.start),"ms")
print(m)
print(d)
(3.2379647828638554, 9.2297753766179085) (0.109792, 'ms') [ 13357314. 15665482. 10535890. ..., 14345746. 11944847. 11272129.] [ 55772528. 59189568. 44902948. ..., 58260868. 51066760. 49634728.]
e = prg.mean_std_vertical(queue, (1024//8, 1024), (1024//8, 1), datag.data, m.data, d.data, numpy.float32(-1), local_mem)
print(abs(numpy.nanmean(data, axis=0, dtype="float64")-m.get()).max(),abs(numpy.nanstd(data, axis=0, dtype="float64")-d.get()).max())
print(1e-6 * (e.profile.end - e.profile.start),"ms")
print(m)
print(d)
(2.6332576386630535, 9.4150413945317268) (0.233376, 'ms') [ 9952937. 13959638. 15037384. ..., 12514131. 13894042. 13005303.] [ 46625852. 56019904. 60556288. ..., 54266764. 58167156. 55265980.]
import warnings
as_strided = numpy.lib.stride_tricks.as_strided
def sigma_clip(image, sigma_lo=3, sigma_hi=3, max_iter=5, axis=0):
image=image.copy()
mask = numpy.logical_not(numpy.isfinite(image))
dummies = mask.sum()
image[mask] = numpy.NaN
mean = numpy.nanmean(image, axis=axis)
std = numpy.nanstd(image, axis=axis)
for i in range(max_iter):
if axis==0:
mean2d = as_strided(mean, image.shape, (0, mean.strides[0]))
std2d = as_strided(std, image.shape, (0, std.strides[0]))
else:
mean2d = as_strided(mean, image.shape, (mean.strides[0], 0))
std2d = as_strided(std, image.shape, (std.strides[0], 0))
with warnings.catch_warnings():
warnings.simplefilter("ignore")
delta = (image - mean2d) / std2d
mask = numpy.logical_or(delta > sigma_hi,
delta < -sigma_lo)
dummies = mask.sum()
if dummies == 0:
break
image[mask] = numpy.NaN
mean = numpy.nanmean(image, axis=axis)
std = numpy.nanstd(image, axis=axis)
return mean, std
#work on the vertical direction
%timeit sigma_clip(data, max_iter=5)
mean_n, std_n = sigma_clip(data, max_iter=5)
print(mean_n)
print(std_n)
10 loops, best of 3: 117 ms per loop [ 92733.44112401 159155.93516633 158456.13784525 ..., 159711.39281794 159685.13550263 191007.00782656] [ 343436.52753755 557663.12703671 577658.10171051 ..., 602293.83770182 550346.87793645 630643.26263446]
e = prg.sigma_clip_vertical(queue, (1024//8, 1024), (1024//8, 1),
datag.data, m.data, d.data,
numpy.float32(-1), numpy.float32(3.0), numpy.float32(3.0), numpy.int32(5), local_mem)
e.wait()
#print(abs(data.mean(axis=0, dtype="float64")-m.get()).max(),abs(data.std(axis=0, dtype="float64")-d.get()).max())
print(1e-6 * (e.profile.end - e.profile.start),"ms")
print(m)
print(d)
print("error:", abs(mean_n-m.get()).max(),abs(std_n-d.get()).max())
(0.514336, 'ms') [ 92733.4375 159155.9375 158456.140625 ..., 159711.390625 159685.140625 191007.03125 ] [ 343436.5625 557663.125 577658.125 ..., 602293.8125 550346.875 630643.3125] ('error:', 0.025501322001218796, 0.10890190454665571)
print("Vertical speed-up", 113/(1e-6 * (e.profile.end - e.profile.start)))
('Vertical speed-up', 219.7007403720525)
#work on the horizontal direction
%timeit sigma_clip(data, max_iter=5, axis=1)
mean_n, std_n = sigma_clip(data, max_iter=5, axis=1)
print(mean_n)
print(std_n)
10 loops, best of 3: 125 ms per loop [ 151751.73710174 136632.06422316 102139.70055282 ..., 109451.53472975 117544.86283701 98161.13061407] [ 510740.43844003 471928.27273414 356969.13879386 ..., 419526.83165962 447362.00584674 354266.71424213]
e = prg.sigma_clip_horizontal(queue, (1024, 1024//8), (1, 1024//8),
datag.data, m.data, d.data,
numpy.float32(-1), numpy.float32(3.0), numpy.float32(3.0), numpy.int32(5), local_mem)
e.wait()
print(1e-6 * (e.profile.end - e.profile.start),"ms")
print(m)
print(d)
print("error:", abs(mean_n-m.get()).max(),abs(std_n-d.get()).max())
(0.419968, 'ms') [ 151751.734375 136632.0625 102139.6953125 ..., 109451.53125 117544.875 98161.1328125] [ 510740.40625 471928.3125 356969.15625 ..., 419526.84375 447361.96875 354266.75 ] ('error:', 0.028056760405888781, 0.10537082934752107)
print("Horizontal speed-up", 124/(1e-6 * (e.profile.end - e.profile.start)))
('Horizontal speed-up', 295.2605912831454)
%%timeit
prg.sigma_clip_horizontal(queue, (1024, 1024//8), (1, 1024//8),
datag.data, m.data, d.data,
numpy.float32(-1), numpy.float32(3.0), numpy.float32(3.0), numpy.int32(5), local_mem).wait()
1000 loops, best of 3: 453 µs per loop
%%timeit
prg.sigma_clip_vertical(queue, (1024//8, 1024), (1024//8, 1),
datag.data, m.data, d.data,
numpy.float32(-1), numpy.float32(3.0), numpy.float32(3.0), numpy.int32(5), local_mem).wait()
1000 loops, best of 3: 548 µs per loop