[gegl/gsoc2011-opencl: 17/46] gegl:over is working (very slowly) with OpenCL



commit 06a119e970ba8eb34c90275cbedbf92f3fc71088
Author: Victor Oliveira <victormatheus gmail com>
Date:   Thu May 19 14:29:00 2011 -0300

    gegl:over is working (very slowly) with OpenCL

 operations/common/over.c |   36 ++++++++++++++++++------------------
 1 files changed, 18 insertions(+), 18 deletions(-)
---
diff --git a/operations/common/over.c b/operations/common/over.c
index 0fb9167..c3d2358 100644
--- a/operations/common/over.c
+++ b/operations/common/over.c
@@ -65,22 +65,22 @@ cl_process (GeglOperation        *op,
 
   const char* kernel_source[] =
   {
-  "__kernel void kernel_over (__global float4* in,                \n",
-  "                           __global float4* aux,               \n",
-  "                           __global float4* out,               \n",
-  "                           uint n_pixels)                      \n",
-  "{                                                              \n",
-  "    size_t gid = get_global_id(0);                             \n",
-  "    if (gid < n_pixels)                                        \n",
-  "    {                                                          \n",
-  "        float4 _in  = in[gid];                                 \n",
-  "        float4 _aux = aux[gid];                                \n",
-  "        out[gid] = (float4)(_in.x + _aux.x * (1.0f - _in.w),   \n",
-  "                            _in.x + _aux.x * (1.0f - _in.w),   \n",
-  "                            _in.x + _aux.x * (1.0f - _in.w),   \n",
-  "                            _in.w + _aux.w - _in.w * _aux.w);  \n",
-  "    }                                                          \n",
-  "}                                                              \n",
+  "__kernel void kernel_over (__global float4* in,               \n",
+  "                           __global float4* aux,              \n",
+  "                           __global float4* out,              \n",
+  "                           uint n_pixels)                     \n",
+  "{                                                             \n",
+  "    size_t gid = get_global_id(0);                            \n",
+  "    if (gid < n_pixels)                                       \n",
+  "    {                                                         \n",
+  "        float4 _in  = in[gid];                                \n",
+  "        float4 _aux = aux[gid];                               \n",
+  "        out[gid] = (float4)(_aux.x + _in.x * (1.0f - _aux.w), \n",
+  "                            _aux.y + _in.y * (1.0f - _aux.w), \n",
+  "                            _aux.z + _in.z * (1.0f - _aux.w), \n",
+  "                            _aux.w + _in.w - _aux.w * _in.w); \n",
+  "    }                                                         \n", 
+  "}                                                             \n",
   };
 
   size_t len;
@@ -110,7 +110,7 @@ cl_process (GeglOperation        *op,
   CL_SAFE_CALL( errcode = gegl_clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL) );
   CL_SAFE_CALL( ctx     = gegl_clCreateContext(0, 1, &device, NULL, NULL, &errcode) );
   CL_SAFE_CALL( cq      = gegl_clCreateCommandQueue(ctx, device, 0, &errcode) );
-  CL_SAFE_CALL( program = gegl_clCreateProgramWithSource(ctx, 11, (const char **)&kernel_source, NULL, &errcode) );
+  CL_SAFE_CALL( program = gegl_clCreateProgramWithSource(ctx, 16, (const char **)&kernel_source, NULL, &errcode) );
   errcode = gegl_clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
   if (errcode != CL_SUCCESS)
     {
@@ -133,7 +133,7 @@ cl_process (GeglOperation        *op,
   /* -- Running -- */
 
   CL_SAFE_CALL( errcode = gegl_clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &local_worksize, NULL) );
-  global_worksize = MAX( (((_n_pixels+local_worksize-1) / local_worksize) * local_worksize) / 4, local_worksize );
+  global_worksize = MAX( ((_n_pixels+local_worksize-1) / local_worksize) * local_worksize, local_worksize );
 
   CL_SAFE_CALL( errcode = gegl_clEnqueueWriteBuffer(cq, d_in,  CL_FALSE, 0, sizeof(cl_float4) * _n_pixels, in,  0, NULL, NULL) );
   CL_SAFE_CALL( errcode = gegl_clEnqueueWriteBuffer(cq, d_aux, CL_FALSE, 0, sizeof(cl_float4) * _n_pixels, aux, 0, NULL, NULL) );



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]