[gegl] Add OpenCL support for gegl:c2g

From: Victor Matheus de Araujo Oliveira <vmaolive src gnome org>
To: commits-list gnome org
Cc:
Subject: [gegl] Add OpenCL support for gegl:c2g
Date: Sun, 1 Apr 2012 18:02:50 +0000 (UTC)
commit 969feed68f04e6e1daa2f8ef97f12410f944866f
Author: Zhang Peixuan <zhangpeixuan cn gmail com>
Date:   Thu Mar 15 12:18:44 2012 -0300

    Add OpenCL support for gegl:c2g

 operations/common/c2g.c |  310 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 305 insertions(+), 5 deletions(-)
---
diff --git a/operations/common/c2g.c b/operations/common/c2g.c
index 0317977..52962ce 100644
--- a/operations/common/c2g.c
+++ b/operations/common/c2g.c
@@ -95,18 +95,18 @@ static void c2g (GeglBuffer          *src,
              */
 
             gfloat nominator = 0;
- 	    gfloat denominator = 0;
+            gfloat denominator = 0;
             gint c;
- 	    for (c=0; c<3; c++)
- 	      {
+            for (c=0; c<3; c++)
+              {
                 nominator   += (pixel[c] - min[c]) * (pixel[c] - min[c]);
                 denominator += (pixel[c] - max[c]) * (pixel[c] - max[c]);
- 	      }
+              }
 
             nominator = sqrt (nominator);
             denominator = sqrt (denominator);
             denominator = nominator + denominator;
- 	
+
             if (denominator>0.000)
               {
                 dst_buf[dst_offset+0] = nominator/denominator;
@@ -133,6 +133,8 @@ static void prepare (GeglOperation *operation)
   GeglOperationAreaFilter *area = GEGL_OPERATION_AREA_FILTER (operation);
   area->left = area->right = area->top = area->bottom =
       ceil (GEGL_CHANT_PROPERTIES (operation)->radius);
+
+  gegl_operation_set_format (operation, "input", babl_format ("RGBA float"));
   gegl_operation_set_format (operation, "output", babl_format ("YA float"));
 }
 
@@ -147,6 +149,297 @@ get_bounding_box (GeglOperation *operation)
   return *in_rect;
 }
 
+#include "opencl/gegl-cl.h"
+#include "buffer/gegl-buffer-cl-iterator.h"
+
+static const char* kernel_source =
+"#define TRUE true                                                     \n"
+"                                                                      \n"
+"#define FALSE false                                                   \n"
+"#define ANGLE_PRIME 95273                                             \n"
+"#define RADIUS_PRIME 29537                                            \n"
+"                                                                      \n"
+"void sample_min_max(const __global   float4 *src_buf,                 \n"
+"                                     int     src_width,               \n"
+"                                     int     src_height,              \n"
+"                    const __global   float  *radiuses,                \n"
+"                    const __global   float  *lut_cos,                 \n"
+"                    const __global   float  *lut_sin,                 \n"
+"                                     int     x,                       \n"
+"                                     int     y,                       \n"
+"                                     int     radius,                  \n"
+"                                     int     samples,                 \n"
+"                                     float4 *min,                     \n"
+"                                     float4 *max,                     \n"
+"                                     int     j,                       \n"
+"                                     int     iterations)              \n"
+"{                                                                     \n"
+"    float4 best_min;                                                  \n"
+"    float4 best_max;                                                  \n"
+"    float4 center_pix = *(src_buf + src_width * y + x);               \n"
+"    int i;                                                            \n"
+"                                                                      \n"
+"    best_min = center_pix;                                            \n"
+"    best_max = center_pix;                                            \n"
+"                                                                      \n"
+"    int angle_no  = (src_width * y + x) * (iterations) *              \n"
+"                       samples + j * samples;                         \n"
+"    int radius_no = angle_no;                                         \n"
+"    angle_no  %= ANGLE_PRIME;                                         \n"
+"    radius_no %= RADIUS_PRIME;                                        \n"
+"    for(i=0; i<samples; i++)                                          \n"
+"    {                                                                 \n"
+"        int angle;                                                    \n"
+"        float rmag;                                                   \n"
+"        /* if we've sampled outside the valid image                   \n"
+"           area, we grab another sample instead, this                 \n"
+"           should potentially work better than mirroring              \n"
+"           or extending the image */                                  \n"
+"                                                                      \n"
+"         angle = angle_no++;                                          \n"
+"         rmag  = radiuses[radius_no++] * radius;                      \n"
+"                                                                      \n"
+"         if( angle_no  >= ANGLE_PRIME)                                \n"
+"             angle_no   = 0;                                          \n"
+"         if( radius_no >= RADIUS_PRIME)                               \n"
+"             radius_no  = 0;                                          \n"
+"                                                                      \n"
+"         int u = x + rmag * lut_cos[angle];                           \n"
+"         int v = y + rmag * lut_sin[angle];                           \n"
+"                                                                      \n"
+"         if(u>=src_width || u <0 || v>=src_height || v<0)             \n"
+"         {                                                            \n"
+"             //--i;                                                   \n"
+"             continue;                                                \n"
+"         }                                                            \n"
+"         float4 pixel = *(src_buf + (src_width * v + u));             \n"
+"         if(pixel.w<=0.0f)                                            \n"
+"         {                                                            \n"
+"             //--i;                                                   \n"
+"             continue;                                                \n"
+"         }                                                            \n"
+"                                                                      \n"
+"         best_min = pixel < best_min ? pixel : best_min;              \n"
+"         best_max = pixel > best_max ? pixel : best_max;              \n"
+"    }                                                                 \n"
+"                                                                      \n"
+"    (*min).xyz = best_min.xyz;                                        \n"
+"    (*max).xyz = best_max.xyz;                                        \n"
+"}                                                                     \n"
+"                                                                      \n"
+"void compute_envelopes_CL(const __global  float4 *src_buf,            \n"
+"                                          int     src_width,          \n"
+"                                          int     src_height,         \n"
+"                          const __global  float  *radiuses,           \n"
+"                          const __global  float  *lut_cos,            \n"
+"                          const __global  float  *lut_sin,            \n"
+"                                          int     x,                  \n"
+"                                          int     y,                  \n"
+"                                          int     radius,             \n"
+"                                          int     samples,            \n"
+"                                          int     iterations,         \n"
+"                                          float4 *min_envelope,       \n"
+"                                          float4 *max_envelope)       \n"
+"{                                                                     \n"
+"    float4 range_sum = 0;                                             \n"
+"    float4 relative_brightness_sum = 0;                               \n"
+"    float4 pixel = *(src_buf + src_width * y + x);                    \n"
+"                                                                      \n"
+"    int i;                                                            \n"
+"    for(i =0; i<iterations; i++)                                      \n"
+"    {                                                                 \n"
+"        float4 min,max;                                               \n"
+"        float4 range, relative_brightness;                            \n"
+"                                                                      \n"
+"        sample_min_max(src_buf, src_width, src_height,                \n"
+"                        radiuses, lut_cos, lut_sin, x, y,             \n"
+"                        radius,samples,&min,&max,i,iterations);       \n"
+"        range = max - min;                                            \n"
+"        relative_brightness = range <= 0.0f ?                         \n"
+"                               0.5f : (pixel - min) / range;          \n"
+"        relative_brightness_sum += relative_brightness;               \n"
+"        range_sum += range;                                           \n"
+"    }                                                                 \n"
+"                                                                      \n"
+"    float4 relative_brightness = relative_brightness_sum / iterations;\n"
+"    float4 range = range_sum / iterations;                            \n"
+"                                                                      \n"
+"    if(max_envelope)                                                  \n"
+"        *max_envelope = pixel + (1.0f - relative_brightness) * range; \n"
+"                                                                      \n"
+"    if(min_envelope)                                                  \n"
+"        *min_envelope = pixel - relative_brightness * range;          \n"
+"}                                                                     \n"
+"                                                                      \n"
+"__kernel void C2g_CL(const __global float4 *src_buf,                  \n"
+"                                    int     src_width,                \n"
+"                                    int     src_height,               \n"
+"                     const __global float  *radiuses,                 \n"
+"                     const __global float  *lut_cos,                  \n"
+"                     const __global float  *lut_sin,                  \n"
+"                           __global float2 *dst_buf,                  \n"
+"                                    int     radius,                   \n"
+"                                    int     samples,                  \n"
+"                                    int     iterations)               \n"
+"{                                                                     \n"
+"    int gidx = get_global_id(0);                                      \n"
+"    int gidy = get_global_id(1);                                      \n"
+"                                                                      \n"
+"    int x = gidx + radius;                                            \n"
+"    int y = gidy + radius;                                            \n"
+"                                                                      \n"
+"    int src_offset = (src_width * y + x);                             \n"
+"    int dst_offset = gidx + get_global_size(0) * gidy;                \n"
+"    float4 min,max;                                                   \n"
+"                                                                      \n"
+"    compute_envelopes_CL(src_buf, src_width, src_height,              \n"
+"                         radiuses, lut_cos, lut_sin, x, y,            \n"
+"                         radius, samples, iterations, &min, &max);    \n"
+"                                                                      \n"
+"    float4 pixel = *(src_buf + src_offset);                           \n"
+"                                                                      \n"
+"    float nominator=0, denominator=0;                                 \n"
+"    float4 t1 = (pixel - min) * (pixel - min);                        \n"
+"    float4 t2 = (pixel - max) * (pixel - max);                        \n"
+"                                                                      \n"
+"    nominator   = t1.x + t1.y + t1.z;                                 \n"
+"    denominator = t2.x + t2.y + t2.z;                                 \n"
+"                                                                      \n"
+"    nominator   = sqrt(nominator);                                    \n"
+"    denominator = sqrt(denominator);                                  \n"
+"    denominator+= nominator + denominator;                            \n"
+"                                                                      \n"
+"    dst_buf[dst_offset].x = (denominator > 0.000f)                    \n"
+"                             ? (nominator / denominator) : 0.5f;      \n"
+"    dst_buf[dst_offset].y =  src_buf[src_offset].w;                   \n"
+"}                                                                     \n"
+"                                                                      \n";
+
+static gegl_cl_run_data *cl_data = NULL;
+
+static cl_int
+cl_c2g (cl_mem                in_tex,
+    cl_mem                    out_tex,
+    size_t                    global_worksize,
+    const GeglRectangle      *src_roi,
+    const GeglRectangle      *roi,
+    gint                      radius,
+    gint                      samples,
+    gint                      iterations,
+    gdouble                   rgamma)
+{
+  cl_int cl_err = 0;
+  cl_mem cl_lut_cos, cl_lut_sin, cl_radiuses;
+  const size_t gbl_size[2] = {roi->width, roi->height};
+
+  if (!cl_data)
+    {
+      const char *kernel_name[] ={"C2g_CL", NULL};
+      cl_data = gegl_cl_compile_and_build(kernel_source, kernel_name);
+    }
+  if (!cl_data)  return 0;
+
+  compute_luts(rgamma);
+
+  cl_lut_cos = gegl_clCreateBuffer(gegl_cl_get_context(),
+                                   CL_MEM_READ_ONLY,
+                                   ANGLE_PRIME * sizeof(cl_float), NULL, &cl_err);
+
+  cl_err |= gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_lut_cos,
+                                      CL_TRUE, 0, ANGLE_PRIME * sizeof(cl_float), lut_cos,
+                                      0, NULL, NULL);
+  if (CL_SUCCESS != cl_err)   return cl_err;
+
+  cl_lut_sin = gegl_clCreateBuffer(gegl_cl_get_context(),
+                                   CL_MEM_READ_ONLY,
+                                   ANGLE_PRIME * sizeof(cl_float), NULL, &cl_err);
+
+  cl_err |= gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_lut_sin,
+                                      CL_TRUE, 0, ANGLE_PRIME * sizeof(cl_float), lut_sin,
+                                      0, NULL, NULL);
+  if (CL_SUCCESS != cl_err)    return cl_err;
+
+  cl_radiuses = gegl_clCreateBuffer(gegl_cl_get_context(),
+                                    CL_MEM_READ_ONLY,
+                                    RADIUS_PRIME * sizeof(cl_float), NULL, &cl_err);
+
+  cl_err |= gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_radiuses,
+                                      CL_TRUE, 0, RADIUS_PRIME * sizeof(cl_float), radiuses,
+                                      0, NULL, NULL);
+  if (CL_SUCCESS != cl_err)    return cl_err;
+
+  {
+  cl_int cl_src_width  = src_roi->width;
+  cl_int cl_src_height = src_roi->height;
+  cl_int cl_radius     = radius;
+  cl_int cl_samples    = samples;
+  cl_int cl_iterations = iterations;
+
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&cl_src_width);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int), (void*)&cl_src_height);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_mem), (void*)&cl_radiuses);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_mem), (void*)&cl_lut_cos);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_mem), (void*)&cl_lut_sin);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 6, sizeof(cl_mem), (void*)&out_tex);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 7, sizeof(cl_int), (void*)&cl_radius);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 8, sizeof(cl_int), (void*)&cl_samples);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 9, sizeof(cl_int), (void*)&cl_iterations);
+  if (cl_err != CL_SUCCESS) return cl_err;
+  }
+
+  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(), cl_data->kernel[0],
+                                       2, NULL, gbl_size, NULL,
+                                       0, NULL, NULL);
+  if (cl_err != CL_SUCCESS) return cl_err;
+
+  cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
+  if (CL_SUCCESS != cl_err) return cl_err;
+
+  gegl_clFinish(gegl_cl_get_command_queue ());
+
+  gegl_clReleaseMemObject(cl_radiuses);
+  gegl_clReleaseMemObject(cl_lut_cos);
+  gegl_clReleaseMemObject(cl_lut_sin);
+
+  return cl_err;
+}
+
+static gboolean
+cl_process (GeglOperation       *operation,
+      GeglBuffer          *input,
+      GeglBuffer          *output,
+      const GeglRectangle *result)
+{
+  const Babl *in_format  = babl_format("RGBA float");
+  const Babl *out_format = gegl_operation_get_format (operation, "output");
+  gint err;
+  gint j;
+  cl_int cl_err;
+
+  GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
+  GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
+
+  GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output,result, out_format, GEGL_CL_BUFFER_WRITE, GEGL_ABYSS_NONE);
+                gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format, GEGL_CL_BUFFER_READ,
+                                                           op_area->left, op_area->right, op_area->top, op_area->bottom, GEGL_ABYSS_NONE);
+  while (gegl_buffer_cl_iterator_next (i, &err))
+  {
+    if (err) return FALSE;
+    for (j=0; j < i->n; j++)
+    {
+      cl_err = cl_c2g(i->tex[read][j], i->tex[0][j],i->size[0][j], &i->roi[read][j],&i->roi[0][j],
+                      o->radius,o->samples,o->iterations,RGAMMA);
+      if (cl_err != CL_SUCCESS)
+      {
+        g_warning("[OpenCL] Error in gegl:c2g: %s", gegl_cl_errstring(cl_err));
+        return FALSE;
+      }
+    }
+  }
+  return TRUE;
+}
+
 static gboolean
 process (GeglOperation       *operation,
          GeglBuffer          *input,
@@ -158,6 +451,10 @@ process (GeglOperation       *operation,
   GeglRectangle compute;
   compute = gegl_operation_get_required_for_output (operation, "input",result);
 
+  if (o->radius < 500 && gegl_cl_is_accelerated ())
+    if(cl_process(operation, input, output, result))
+      return TRUE;
+
   c2g (input, &compute, output, result,
        o->radius,
        o->samples,
@@ -179,6 +476,7 @@ gegl_chant_class_init (GeglChantClass *klass)
 
   filter_class->process = process;
   operation_class->prepare  = prepare;
+
   /* we override defined region to avoid growing the size of what is defined
    * by the filter. This also allows the tricks used to treat alpha==0 pixels
    * in the image as source data not to be skipped by the stochastic sampling
@@ -186,6 +484,8 @@ gegl_chant_class_init (GeglChantClass *klass)
    */
   operation_class->get_bounding_box = get_bounding_box;
 
+  operation_class->opencl_support = TRUE;
+
   gegl_operation_class_set_keys (operation_class,
       "name",        "gegl:c2g",
       "categories",  "enhance",
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]