[Gegl-developer] [PATCH] Refine opencl implementation of operation weighted-blend



Remove kernel cl_copy_weighted_blend, use clEnqueueCopyBuffer instead.
It is three times faster than executing kernel cl_copy_weighted_blend
based on my tests on Intel Beignet.

Signed-off-by: Yongjia Zhang<yongjia zhang intel com>
---
 opencl/weighted-blend.cl           |  8 --------
 opencl/weighted-blend.cl.h         |  8 --------
 operations/common/weighted-blend.c | 17 +++++------------
 3 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/opencl/weighted-blend.cl b/opencl/weighted-blend.cl
index 68cec63..fd410c8 100644
--- a/opencl/weighted-blend.cl
+++ b/opencl/weighted-blend.cl
@@ -1,11 +1,3 @@
-__kernel void cl_copy_weigthed_blend(__global const float4 *in,
-                                     __global       float4 *out)
-{
-  int gid = get_global_id(0);
-  float4 in_v = in[gid];
-  out[gid] = in_v;
-}
-
 __kernel void cl_weighted_blend(__global const float4 *in,
                                 __global const float4 *aux,
                                 __global       float4 *out)
diff --git a/opencl/weighted-blend.cl.h b/opencl/weighted-blend.cl.h
index d516482..f849f10 100644
--- a/opencl/weighted-blend.cl.h
+++ b/opencl/weighted-blend.cl.h
@@ -1,12 +1,4 @@
 static const char* weighted_blend_cl_source =
-"__kernel void cl_copy_weigthed_blend(__global const float4 *in,               \n"
-"                                     __global       float4 *out)              \n"
-"{                                                                             \n"
-"  int gid = get_global_id(0);                                                 \n"
-"  float4 in_v = in[gid];                                                      \n"
-"  out[gid] = in_v;                                                            \n"
-"}                                                                             \n"
-"                                                                              \n"
 "__kernel void cl_weighted_blend(__global const float4 *in,                    \n"
 "                                __global const float4 *aux,                   \n"
 "                                __global       float4 *out)                   \n"
diff --git a/operations/common/weighted-blend.c b/operations/common/weighted-blend.c
index 707429f..91d8114 100644
--- a/operations/common/weighted-blend.c
+++ b/operations/common/weighted-blend.c
@@ -56,9 +56,7 @@ cl_process (GeglOperation       *self,
 
   if (!cl_data)
     {
-      const char *kernel_name[] = {"cl_copy_weigthed_blend",
-                                   "cl_weighted_blend",
-                                   NULL};
+      const char *kernel_name[] = {"cl_weighted_blend", NULL};
       cl_data = gegl_cl_compile_and_build (weighted_blend_cl_source,
                                            kernel_name);
     }
@@ -67,15 +65,10 @@ cl_process (GeglOperation       *self,
 
   if (!aux_tex)
     {
-      cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
-      CL_CHECK;
-      cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&out_tex);
-      CL_CHECK;
-
-      cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
-                                           cl_data->kernel[0], 1,
-                                           NULL, &global_worksize, NULL,
-                                           0, NULL, NULL);
+      cl_err = gegl_clEnqueueCopyBuffer(gegl_cl_get_command_queue(),
+                                        in_tex, out_tex, 0, 0,
+                                        sizeof(cl_float4)*global_worksize,
+                                        0, NULL, NULL);
       CL_CHECK;
     }
   else
-- 
1.8.3.2




[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]