+
+ ++y;
+ if( in_x < in_width )
+ {
+ tmp_sum = column_sum[local_id0];
+ tmp_sum -= in[(column_index_start)*in_width+in_x];
+ tmp_sum += in[(column_index_end+1)*in_width+in_x];
+ ++column_index_start;
+ ++column_index_end;
+ column_sum[local_id0] = tmp_sum;
+ }
+
+ barrier( CLK_LOCAL_MEM_FENCE );
+ }
+}
diff --git a/opencl/box-blur.cl.h b/opencl/box-blur.cl.h
index bfed601..8f6aa81 100644
--- a/opencl/box-blur.cl.h
+++ b/opencl/box-blur.cl.h
@@ -44,4 +44,70 @@ static const char* box_blur_cl_source =
" out[out_index] = mean / (float)(2 * radius + 1); \n"
" } \n"
"} \n"
+" \n"
+"__kernel void kernel_box_blur_fast(const __global float4 *in, \n"
+" __global float4 *out, \n"
+" __local float4 *column_sum, \n"
+" const int width, \n"
+" const int height, \n"
+" const int radius, \n"
+" const int size) \n"
+"{ \n"
+" const int local_id0 = get_local_id(0); \n"
+" const int twice_radius = 2 * radius; \n"
+" const int in_width = twice_radius + width; \n"
+" const int in_height = twice_radius + height; \n"
+" const float4 area = (float4)( (twice_radius+1) * (twice_radius+1) ); \n"
+" int column_index_start,column_index_end; \n"
+" int y = get_global_id(1) * size; \n"
+" const int out_x = get_group_id(0) \n"
+" * ( get_local_size(0) - twice_radius ) + local_id0 - radius; \n"
+" const int in_x = out_x + radius; \n"
+" int tmp_size = size; \n"
+" int tmp_index = 0; \n"
+" float4 tmp_sum = (float4)0.0f; \n"
+" float4 total_sum = (float4)0.0f; \n"
+" if( in_x < in_width ) \n"
+" { \n"
+" column_index_start = y; \n"
+" column_index_end = y + twice_radius; \n"
+" for( int i=0; i<twice_radius+1; ++i ) \n"
+" tmp_sum+=in[(y+i)*in_width+in_x]; \n"
+" column_sum[local_id0] = tmp_sum; \n"
+" } \n"
+" \n"
+" barrier( CLK_LOCAL_MEM_FENCE ); \n"
+" \n"
+" while(1) \n"
+" { \n"
+" if( out_x < width ) \n"
+" { \n"
+" if( local_id0 >= radius \n"
+" && local_id0 < get_local_size(0) - radius ) \n"
+" { \n"
+" total_sum = (float4)0.0f; \n"
+" for( int i=0; i<twice_radius+1; ++i ) \n"
+" total_sum += column_sum[local_id0-radius+i]; \n"
+" out[y*width+out_x] = total_sum/area; \n"
+" } \n"
+" } \n"
+" if( --tmp_size ==0 || y == height - 1 ) \n"
+" break; \n"
+" \n"
+" barrier( CLK_LOCAL_MEM_FENCE ); \n"
+" \n"
+" ++y; \n"
+" if( in_x < in_width ) \n"
+" { \n"
+" tmp_sum = column_sum[local_id0]; \n"
+" tmp_sum -= in[(column_index_start)*in_width+in_x]; \n"
+" tmp_sum += in[(column_index_end+1)*in_width+in_x]; \n"
+" ++column_index_start; \n"
+" ++column_index_end; \n"
+" column_sum[local_id0] = tmp_sum; \n"
+" } \n"
+" \n"
+" barrier( CLK_LOCAL_MEM_FENCE ); \n"
+" } \n"
+"} \n"
;
diff --git a/operations/common/box-blur.c b/operations/common/box-blur.c
index afc19ea..cb77ec0 100644
--- a/operations/common/box-blur.c
+++ b/operations/common/box-blur.c
@@ -180,9 +180,7 @@ static void prepare (GeglOperation *operation)
#include "buffer/gegl-buffer-cl-iterator.h"
#include "opencl/box-blur.cl.h"
-
static GeglClRunData *cl_data = NULL;
-
static gboolean
cl_box_blur (cl_mem in_tex,
cl_mem aux_tex,
@@ -192,57 +190,82 @@ cl_box_blur (cl_mem in_tex,
gint radius)
{
cl_int cl_err = 0;
- size_t global_ws_hor[2], global_ws_ver[2];
- size_t local_ws_hor[2], local_ws_ver[2];
-
+ size_t global_ws_hor[2], global_ws_ver[2], global_ws[2];
+ size_t local_ws_hor[2], local_ws_ver[2], local_ws[2];
+ size_t step_size ;
if (!cl_data)
{
- const char *kernel_name[] = {"kernel_blur_hor", "kernel_blur_ver", NULL};
+ const char *kernel_name[] = { "kernel_blur_hor", "kernel_blur_ver","kernel_box_blur_fast", NULL};
cl_data = gegl_cl_compile_and_build (box_blur_cl_source, kernel_name);
}
if (!cl_data)
return TRUE;
-
- local_ws_hor[0] = 1;
- local_ws_hor[1] = 256;
- global_ws_hor[0] = roi->height + 2 * radius;
- global_ws_hor[1] = ((roi->width + local_ws_hor[1] -1)/local_ws_hor[1]) * local_ws_hor[1];
-
- local_ws_ver[0] = 1;
- local_ws_ver[1] = 256;
- global_ws_ver[0] = roi->height;
- global_ws_ver[1] = ((roi->width + local_ws_ver[1] -1)/local_ws_ver[1]) * local_ws_ver[1];
-
-
- cl_err = gegl_cl_set_kernel_args (cl_data->kernel[0],
- sizeof(cl_mem), (void*)&in_tex,
- sizeof(cl_mem), (void*)&aux_tex,
- sizeof(cl_int), (void*)&roi->width,
- sizeof(cl_int), (void*)&radius,
- NULL);
- CL_CHECK;
-
- cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
- cl_data->kernel[0], 2,
- NULL, global_ws_hor, local_ws_hor,
- 0, NULL, NULL);
- CL_CHECK;
-
-
- cl_err = gegl_cl_set_kernel_args (cl_data->kernel[1],
- sizeof(cl_mem), (void*)&aux_tex,
- sizeof(cl_mem), (void*)&out_tex,
- sizeof(cl_int), (void*)&roi->width,
- sizeof(cl_int), (void*)&radius,
- NULL);
- CL_CHECK;
-
- cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
- cl_data->kernel[1], 2,
- NULL, global_ws_ver, local_ws_ver,
- 0, NULL, NULL);
- CL_CHECK;
+ step_size = 64;
+ local_ws[0]=256;
+ local_ws[1]=1;
+
+
+ if( radius <=110 )
+ {
+ global_ws[0] = (roi->width + local_ws[0] - 2 * radius - 1) / ( local_ws[0] - 2 * radius ) *
local_ws[0];
+ global_ws[1] = (roi->height + step_size - 1) / step_size;
+ cl_err = gegl_cl_set_kernel_args(cl_data->kernel[2],
+ sizeof(cl_mem), (void *)&in_tex,
+ sizeof(cl_mem), (void *)&out_tex,
+ sizeof(cl_float4)*local_ws[0], (void *)NULL,
+ sizeof(cl_int), (void *)&roi->width,
+ sizeof(cl_int), (void *)&roi->height,
+ sizeof(cl_int), (void *)&radius,
+ sizeof(cl_int), (void *)&step_size, NULL);
+ CL_CHECK;
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(),
+ cl_data->kernel[2], 2,
+ NULL, global_ws, local_ws, 0, NULL, NULL );
+ CL_CHECK;
+
+ }
+ else
+ {
+ local_ws_hor[0] = 1;
+ local_ws_hor[1] = 256;
+ global_ws_hor[0] = roi->height + 2 * radius;
+ global_ws_hor[1] = ((roi->width + local_ws_hor[1] -1)/local_ws_hor[1]) * local_ws_hor[1];
+
+ local_ws_ver[0] = 1;
+ local_ws_ver[1] = 256;
+ global_ws_ver[0] = roi->height;
+ global_ws_ver[1] = ((roi->width + local_ws_ver[1] -1)/local_ws_ver[1]) * local_ws_ver[1];
+
+
+ cl_err = gegl_cl_set_kernel_args (cl_data->kernel[0],
+ sizeof(cl_mem), (void*)&in_tex,
+ sizeof(cl_mem), (void*)&aux_tex,
+ sizeof(cl_int), (void*)&roi->width,
+ sizeof(cl_int), (void*)&radius,
+ NULL);
+ CL_CHECK;
+ cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
+ cl_data->kernel[0], 2,
+ NULL, global_ws_hor, local_ws_hor,
+ 0, NULL, NULL);
+ CL_CHECK;
+
+
+ cl_err = gegl_cl_set_kernel_args (cl_data->kernel[1],
+ sizeof(cl_mem), (void*)&aux_tex,
+ sizeof(cl_mem), (void*)&out_tex,
+ sizeof(cl_int), (void*)&roi->width,
+ sizeof(cl_int), (void*)&radius,
+ NULL);
+ CL_CHECK;
+
+ cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
+ cl_data->kernel[1], 2,
+ NULL, global_ws_ver, local_ws_ver,
+ 0, NULL, NULL);
+ CL_CHECK;
+ }
return FALSE;
--
1.8.3.2
_______________________________________________
gegl-developer-list mailing list
List address: gegl-developer-list gnome org
List membership: https://mail.gnome.org/mailman/listinfo/gegl-developer-list