#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'nbagg') # In[2]: import os os.environ["PYOPENCL_COMPILER_OUTPUT"]="1" import numpy import fabio import pyopencl from pyopencl import array as cla from matplotlib.pyplot import subplots # In[3]: ctx = pyopencl.create_some_context(interactive=True) queue = pyopencl.CommandQueue(ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) ctx # In[4]: image = fabio.open("/users/kieffer/workspace-400/tmp/pyFAI/test/testimages/Pilatus6M.cbf").data mask = (image<0).astype("int8") # In[5]: fig, ax = subplots() ax.imshow(image.clip(0,100)) # In[6]: get_ipython().run_line_magic('load_ext', 'pyopencl.ipython_ext') # In[10]: get_ipython().run_cell_magic('cl_kernel', '', '\n//read withou caching\nfloat inline read_simple(global int *img, \n int height,\n int width,\n int row,\n int col){\n //This kernel reads the value and returns it without active caching\n float value = NAN;\n \n // Read\n if ((col>=0) && (col=0) && (row=0) && (col0) && (row=-half_wind_width) && (col<=width+half_wind_width) && (row>-half_wind_height) && (row<=height+half_wind_height)){\n line_size = get_local_size(0) + 2 * half_wind_width;\n idx_line = (half_wind_height+row)%(2*half_wind_height+1);\n write_pos = line_size*idx_line + half_wind_width + col - get_group_id(0)*get_local_size(0);\n storage[write_pos] = value;\n }\n //return value\n}\n\n//Store a complete line\nvoid inline store_line(global int *img, \n int height,\n int width,\n int row,\n int half_wind_height,\n int half_wind_width,\n local float* storage){\n read_and_store(img, height, width, \n row, get_global_id(0), \n half_wind_height, half_wind_width, storage);\n if (get_local_id(0)=-half_wind_width) && (col<=width+half_wind_width) && (row>-half_wind_height) && (row<=height+half_wind_height)){\n line_size = get_local_size(0) + 2 * half_wind_width;\n idx_line = (half_wind_height+row)%(2*half_wind_height+1);\n write_pos = line_size*idx_line + half_wind_width + col - get_group_id(0)*get_local_size(0);\n value = storage[write_pos]; \n }\n return value;\n}\n\n// workgroup size of kernel: 32 to 128, cache_read needs to be (wg+2*half_wind_width)*(2*half_wind_height+1)*sizeof(float)\nkernel void spot_finder(global int *img, \n int height,\n int width,\n int half_wind_height,\n int half_wind_width,\n float threshold,\n float radius,\n global int *cnt_high, //output\n global int *high, //output\n int high_size,\n local float *cache_read,\n local int *local_high,\n int local_size){\n //decaration of variables\n int col, row, cnt, i, j, where;\n float value, sum, std, centroid_r, centroid_c, dist, mean;\n col = get_global_id(0);\n \n local int local_cnt_high[1];\n local_cnt_high[0] = 0;\n for (i=0; ithreshold*std){\n where = atomic_inc(local_cnt_high);\n if (where0) && (cnthigh_size){\n cnt = high_size-where; //store what we can\n }\n for (i=0; ithreshold*std) && (fabs(centroid_r)0) && (cnthigh_size){\n cnt = high_size-where; //store what we can\n }\n local_cnt_high[0] = cnt;\n local_cnt_high[1] = where;\n }\n }\n barrier(CLK_LOCAL_MEM_FENCE);\n //copy the data from local to global memory\n for (i=0; i