[gegl] performance optimization in box-blur filter.



commit d2af5dc22dbab3ebd9542a8bfa6b106d39bc3efa
Author: RPG <roshin scriptumplus ru>
Date:   Mon Dec 31 16:30:45 2012 -0200

    performance optimization in box-blur filter.

 operations/common/box-blur.c |  238 +++++++++++++++++-------------------------
 1 files changed, 98 insertions(+), 140 deletions(-)
---
diff --git a/operations/common/box-blur.c b/operations/common/box-blur.c
index 6c0b255..eae2136 100644
--- a/operations/common/box-blur.c
+++ b/operations/common/box-blur.c
@@ -14,6 +14,7 @@
  * License along with GEGL; if not, see <http://www.gnu.org/licenses/>.
  *
  * Copyright 2006 Ãyvind KolÃs <pippin gimp org>
+ *           2012 Pavel Roschin <roshin scriptumplus ru>
  */
 
 #include "config.h"
@@ -34,91 +35,8 @@ gegl_chant_double_ui (radius, _("Radius"), 0.0, 1000.0, 4.0, 0.0, 100.0, 1.5,
 #include <stdio.h>
 #include <math.h>
 
-#ifdef USE_DEAD_CODE
-static inline float
-get_mean_component (gfloat *buf,
-                    gint    buf_width,
-                    gint    buf_height,
-                    gint    x0,
-                    gint    y0,
-                    gint    width,
-                    gint    height,
-                    gint    component)
-{
-  gint    x, y;
-  gdouble acc=0;
-  gint    count=0;
-
-  gint offset = (y0 * buf_width + x0) * 4 + component;
-
-  for (y=y0; y<y0+height; y++)
-    {
-    for (x=x0; x<x0+width; x++)
-      {
-        if (x>=0 && x<buf_width &&
-            y>=0 && y<buf_height)
-          {
-            acc += buf [offset];
-            count++;
-          }
-        offset+=4;
-      }
-      offset+= (buf_width * 4) - 4 * width;
-    }
-   if (count)
-     return acc/count;
-   return 0.0;
-}
-#endif
-
-static inline void
-get_mean_components (gfloat *buf,
-                     gint    buf_width,
-                     gint    buf_height,
-                     gint    x0,
-                     gint    y0,
-                     gint    width,
-                     gint    height,
-                     gfloat *components)
-{
-  gint    y;
-  gdouble acc[4]={0,0,0,0};
-  gint    count[4]={0,0,0,0};
-
-  gint offset = (y0 * buf_width + x0) * 4;
+#define SRC_OFFSET (row + u + radius * 2) * 4
 
-  for (y=y0; y<y0+height; y++)
-    {
-    gint x;
-    for (x=x0; x<x0+width; x++)
-      {
-        if (x>=0 && x<buf_width &&
-            y>=0 && y<buf_height)
-          {
-            gint c;
-            for (c=0;c<4;c++)
-              {
-                acc[c] += buf [offset+c];
-                count[c]++;
-              }
-          }
-        offset+=4;
-      }
-      offset+= (buf_width * 4) - 4 * width;
-    }
-    {
-      gint c;
-      for (c=0;c<4;c++)
-        {
-         if (count[c])
-           components[c] = acc[c]/count[c];
-         else
-           components[c] = 0.0;
-        }
-    }
-}
-
-/* expects src and dst buf to have the same extent */
 static void
 hor_blur (GeglBuffer          *src,
           const GeglRectangle *src_rect,
@@ -127,44 +45,58 @@ hor_blur (GeglBuffer          *src,
           gint                 radius)
 {
   gint u,v;
+  gint i;
   gint offset;
+  gint src_offset;
+  gint prev_rad = radius * 4 + 4;
+  gint next_rad = radius * 4;
+  gint row;
   gfloat *src_buf;
   gfloat *dst_buf;
+  gfloat rad1 = 1.0 / (gfloat)(radius * 2 + 1);
 
-  /* src == dst for hor blur */
   src_buf = g_new0 (gfloat, src_rect->width * src_rect->height * 4);
   dst_buf = g_new0 (gfloat, dst_rect->width * dst_rect->height * 4);
 
-  gegl_buffer_get (src, src_rect, 1.0, babl_format ("RaGaBaA float"), src_buf, GEGL_AUTO_ROWSTRIDE,
-                   GEGL_ABYSS_NONE);
+  gegl_buffer_get (src, src_rect, 1.0, babl_format ("RaGaBaA float"),
+                   src_buf, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_CLAMP);
 
   offset = 0;
-  for (v=0; v<dst_rect->height; v++)
-    for (u=0; u<dst_rect->width; u++)
-      {
-        gint i;
-        gfloat components[4];
-
-        get_mean_components (src_buf,
-                             src_rect->width,
-                             src_rect->height,
-                             u - radius,
-                             v,
-                             1 + radius*2,
-                             1,
-                             components);
-
-        for (i=0; i<4; i++)
-          dst_buf [offset++] = components[i];
-      }
-
-  gegl_buffer_set (dst, dst_rect, 0, babl_format ("RaGaBaA float"), dst_buf, GEGL_AUTO_ROWSTRIDE);
+  for (v = 0; v < dst_rect->height; v++)
+    {
+      /* here just radius, not radius * 2 as in ver_blur because
+       * we enlarged dst_buf by y earlier */
+      row = (v + radius) * src_rect->width;
+      /* prepare - set first column of pixels */
+      for (u = -radius; u <= radius; u++)
+        {
+          src_offset = SRC_OFFSET;
+          for (i = 0; i < 4; i++)
+            dst_buf[offset + i] += src_buf[src_offset + i] * rad1;
+        }
+      offset += 4;
+      /* iterate other pixels by moving a window - very fast */
+      for (u = 1; u < dst_rect->width; u++)
+        {
+          src_offset = SRC_OFFSET;
+          for (i = 0; i < 4; i++)
+          {
+            dst_buf[offset] = dst_buf[offset - 4]
+                            - src_buf[src_offset - prev_rad] * rad1
+                            + src_buf[src_offset + next_rad] * rad1;
+            src_offset++;
+            offset++;
+          }
+        }
+    }
+
+  gegl_buffer_set (dst, dst_rect, 0, babl_format ("RaGaBaA float"),
+                   dst_buf, GEGL_AUTO_ROWSTRIDE);
+
   g_free (src_buf);
   g_free (dst_buf);
 }
 
-
-/* expects dst buf to be radius smaller than src buf */
 static void
 ver_blur (GeglBuffer          *src,
           const GeglRectangle *src_rect,
@@ -172,42 +104,62 @@ ver_blur (GeglBuffer          *src,
           const GeglRectangle *dst_rect,
           gint                 radius)
 {
-  gint u,v;
+  gint u, v;
+  gint i;
   gint offset;
+  gint src_offset;
+  gint prev_rad = (radius * 4 + 4) * src_rect->width;
+  gint next_rad = (radius * 4) * src_rect->width;
+  gint row;
   gfloat *src_buf;
   gfloat *dst_buf;
+  gfloat rad1 = 1.0 / (gfloat)(radius * 2 + 1);
 
   src_buf = g_new0 (gfloat, src_rect->width * src_rect->height * 4);
   dst_buf = g_new0 (gfloat, dst_rect->width * dst_rect->height * 4);
 
-  gegl_buffer_get (src, src_rect, 1.0, babl_format ("RaGaBaA float"), src_buf, GEGL_AUTO_ROWSTRIDE,
-                   GEGL_ABYSS_NONE);
-
-  offset=0;
-  for (v=0; v<dst_rect->height; v++)
-    for (u=0; u<dst_rect->width; u++)
-      {
-        gfloat components[4];
-        gint c;
-
-        get_mean_components (src_buf,
-                             src_rect->width,
-                             src_rect->height,
-                             u + radius,  /* 1x radius is the offset between the bufs */
-                             v - radius + radius, /* 1x radius is the offset between the bufs */
-                             1,
-                             1 + radius * 2,
-                             components);
-
-        for (c=0; c<4; c++)
-          dst_buf [offset++] = components[c];
-      }
-
-  gegl_buffer_set (dst, dst_rect, 0, babl_format ("RaGaBaA float"), dst_buf, GEGL_AUTO_ROWSTRIDE);
+  gegl_buffer_get (src, src_rect, 1.0, babl_format ("RaGaBaA float"),
+                   src_buf, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_CLAMP);
+
+  /* prepare: set first row of pixels */
+  for (v = -radius; v <= radius; v++)
+    {
+      row = (v + radius * 2) * src_rect->width;
+      for (u = 0; u < dst_rect->width; u++)
+        {
+          src_offset = SRC_OFFSET;
+          for (i = 0; i < 4; i++)
+            dst_buf[u * 4 + i] += src_buf[src_offset + i] * rad1;
+        }
+    }
+  /* skip first row */
+  offset = dst_rect->width * 4;
+  for (v = 1; v < dst_rect->height; v++)
+    {
+      row = (v + radius * 2) * src_rect->width;
+      for (u = 0; u < dst_rect->width; u++)
+        {
+          src_offset = SRC_OFFSET;
+          for (i = 0; i < 4; i++)
+          {
+            dst_buf[offset] = dst_buf[offset - 4 * dst_rect->width]
+                            - src_buf[src_offset - prev_rad] * rad1
+                            + src_buf[src_offset + next_rad] * rad1;
+            src_offset++;
+            offset++;
+          }
+        }
+    }
+
+  gegl_buffer_set (dst, dst_rect, 0, babl_format ("RaGaBaA float"),
+                   dst_buf, GEGL_AUTO_ROWSTRIDE);
+
   g_free (src_buf);
   g_free (dst_buf);
 }
 
+#undef SRC_OFFSET
+
 static void prepare (GeglOperation *operation)
 {
   GeglChantO              *o;
@@ -378,6 +330,7 @@ process (GeglOperation       *operation,
          gint                 level)
 {
   GeglRectangle rect;
+  GeglRectangle tmprect;
   GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
   GeglBuffer *temp;
   GeglOperationAreaFilter *op_area;
@@ -388,16 +341,21 @@ process (GeglOperation       *operation,
       return TRUE;
 
   rect = *result;
+  tmprect = *result;
 
-  rect.x-=op_area->left;
-  rect.y-=op_area->top;
-  rect.width+=op_area->left + op_area->right;
-  rect.height+=op_area->top + op_area->bottom;
+  rect.x       -= op_area->left * 2;
+  rect.y       -= op_area->top * 2;
+  rect.width   += (op_area->left + op_area->right) * 2;
+  rect.height  += (op_area->top + op_area->bottom) * 2;
+  /* very tricky: enlarge temp buffer to avoid seams in second pass */
+  tmprect.y      -= o->radius;
+  tmprect.height += o->radius * 2;
 
-  temp  = gegl_buffer_new (&rect,
+  temp  = gegl_buffer_new (&tmprect,
                            babl_format ("RaGaBaA float"));
 
-  hor_blur (input, &rect, temp, &rect, o->radius);
+  /* doing second pass in separate gegl op may be significantly faster */
+  hor_blur (input, &rect, temp, &tmprect, o->radius);
   ver_blur (temp, &rect, output, result, o->radius);
 
   g_object_unref (temp);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]