[gegl/openmp: 1/5] 2x2-downscale: unroll and add openmp



commit 827c494bd7267e1cdf39a380a49ab8d0f062ec8e
Author: Øyvind Kolås <pippin gimp org>
Date:   Wed Jun 4 09:32:05 2014 +0200

    2x2-downscale: unroll and add openmp

 gegl/gegl-algorithms-2x2-downscale.inc |  256 +++++++++++++++++++++++++++++---
 1 files changed, 237 insertions(+), 19 deletions(-)
---
diff --git a/gegl/gegl-algorithms-2x2-downscale.inc b/gegl/gegl-algorithms-2x2-downscale.inc
index 5f5ba63..d725997 100644
--- a/gegl/gegl-algorithms-2x2-downscale.inc
+++ b/gegl/gegl-algorithms-2x2-downscale.inc
@@ -1,3 +1,6 @@
+#include "gegl-init.h"
+#include "gegl-config.h"
+
 void
 DOWNSCALE_FUNCNAME (gint    bpp,
                     gint    src_width,
@@ -8,36 +11,251 @@ DOWNSCALE_FUNCNAME (gint    bpp,
                     gint    dst_rowstride)
 {
   gint y;
+  gint diag = src_rowstride + bpp;
   const gint components = bpp / sizeof(DOWNSCALE_TYPE);
 
   if (!src_data || !dst_data)
     return;
 
-  for (y = 0; y < src_height / 2; y++)
-    {
-      gint    x;
-      guchar *src = src_data;
-      guchar *dst = dst_data;
+#ifdef HAVE_OPENMP
+  if (gegl_config()->use_openmp)
+  switch (components)
+  {
+    case 1:
+#pragma omp parallel for
+      for (y = 0; y < src_height / 2; y++)
+        {
+          gint    x;
+          guchar *src = src_data + src_rowstride * y * 2;
+          guchar *dst = dst_data + dst_rowstride * y;
+
+          for (x = 0; x < src_width / 2; x++)
+            {
+              DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+              DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+              DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+              DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+              ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+
+              dst += bpp;
+              src += bpp * 2;
+            }
+        }
+    break;
+    case 2:
+#pragma omp parallel for
+      for (y = 0; y < src_height / 2; y++)
+        {
+          gint    x;
+          guchar *src = src_data + src_rowstride * y * 2;
+          guchar *dst = dst_data + dst_rowstride * y;
+
+          for (x = 0; x < src_width / 2; x++)
+            {
+              DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+              DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+              DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+              DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+              ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+              ((DOWNSCALE_TYPE *)dst)[1] = (aa[1] + ab[1] + ba[1] + bb[1]) / DOWNSCALE_DIVISOR;
+
+              dst += bpp;
+              src += bpp * 2;
+            }
+        }
+    break;
+    case 3:
+#pragma omp parallel for
+      for (y = 0; y < src_height / 2; y++)
+        {
+          gint    x;
+          guchar *src = src_data + src_rowstride * y * 2;
+          guchar *dst = dst_data + dst_rowstride * y;
+
+          for (x = 0; x < src_width / 2; x++)
+            {
+              DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+              DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+              DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+              DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+              ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+              ((DOWNSCALE_TYPE *)dst)[1] = (aa[1] + ab[1] + ba[1] + bb[1]) / DOWNSCALE_DIVISOR;
+              ((DOWNSCALE_TYPE *)dst)[2] = (aa[2] + ab[2] + ba[2] + bb[2]) / DOWNSCALE_DIVISOR;
+
+              dst += bpp;
+              src += bpp * 2;
+            }
+        }
+    break;
+    case 4:
+#pragma omp parallel for
+      for (y = 0; y < src_height / 2; y++)
+        {
+          gint    x;
+          guchar *src = src_data + src_rowstride * y * 2;
+          guchar *dst = dst_data + dst_rowstride * y;
+
+          for (x = 0; x < src_width / 2; x++)
+            {
+              DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+              DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+              DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+              DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+              ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+              ((DOWNSCALE_TYPE *)dst)[1] = (aa[1] + ab[1] + ba[1] + bb[1]) / DOWNSCALE_DIVISOR;
+              ((DOWNSCALE_TYPE *)dst)[2] = (aa[2] + ab[2] + ba[2] + bb[2]) / DOWNSCALE_DIVISOR;
+              ((DOWNSCALE_TYPE *)dst)[3] = (aa[3] + ab[3] + ba[3] + bb[3]) / DOWNSCALE_DIVISOR;
+
+              dst += bpp;
+              src += bpp * 2;
+            }
+        }
+    break;
+    default:
+#pragma omp parallel for
+      for (y = 0; y < src_height / 2; y++)
+        {
+          gint    x;
+          guchar *src = src_data + src_rowstride * y * 2;
+          guchar *dst = dst_data + dst_rowstride * y;
+
+          for (x = 0; x < src_width / 2; x++)
+            {
+              gint i;
+              DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+              DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+              DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+              DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+              for (i = 0; i < components; i++)
+                ((DOWNSCALE_TYPE *)dst)[i] = (aa[i] + ab[i] + ba[i] + bb[i]) / DOWNSCALE_DIVISOR;
 
-      for (x = 0; x < src_width / 2; x++)
+              dst += bpp;
+              src += bpp * 2;
+            }
+        }
+    break;
+  }
+#endif
+else
+  switch (components)
+  {
+    case 1:
+      for (y = 0; y < src_height / 2; y++)
         {
-          gint i;
+          gint    x;
+          guchar *src = src_data + src_rowstride * y * 2;
+          guchar *dst = dst_data + dst_rowstride * y;
 
-          for (i = 0; i < components; i++)
+          for (x = 0; x < src_width / 2; x++)
             {
-              DOWNSCALE_SUM aa = ((DOWNSCALE_TYPE *)(src))[i];
-              DOWNSCALE_SUM ab = ((DOWNSCALE_TYPE *)(src + bpp))[i];
-              DOWNSCALE_SUM ba = ((DOWNSCALE_TYPE *)(src + src_rowstride))[i];
-              DOWNSCALE_SUM bb = ((DOWNSCALE_TYPE *)(src + src_rowstride + bpp))[i];
+              DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+              DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+              DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+              DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
 
-              ((DOWNSCALE_TYPE *)dst)[i] = (aa + ab + ba + bb) / DOWNSCALE_DIVISOR;
+              ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+
+              dst += bpp;
+              src += bpp * 2;
             }
+        }
+    break;
+    case 2:
+      for (y = 0; y < src_height / 2; y++)
+        {
+          gint    x;
+          guchar *src = src_data + src_rowstride * y * 2;
+          guchar *dst = dst_data + dst_rowstride * y;
+
+          for (x = 0; x < src_width / 2; x++)
+            {
+              DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+              DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+              DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+              DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+              ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+              ((DOWNSCALE_TYPE *)dst)[1] = (aa[1] + ab[1] + ba[1] + bb[1]) / DOWNSCALE_DIVISOR;
 
-          dst += bpp;
-          src += bpp * 2;
+              dst += bpp;
+              src += bpp * 2;
+            }
         }
+    break;
+    case 3:
+      for (y = 0; y < src_height / 2; y++)
+        {
+          gint    x;
+          guchar *src = src_data + src_rowstride * y * 2;
+          guchar *dst = dst_data + dst_rowstride * y;
 
-      dst_data += dst_rowstride;
-      src_data += 2 * src_rowstride;
-    }
-}
\ No newline at end of file
+          for (x = 0; x < src_width / 2; x++)
+            {
+              DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+              DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+              DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+              DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+              ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+              ((DOWNSCALE_TYPE *)dst)[1] = (aa[1] + ab[1] + ba[1] + bb[1]) / DOWNSCALE_DIVISOR;
+              ((DOWNSCALE_TYPE *)dst)[2] = (aa[2] + ab[2] + ba[2] + bb[2]) / DOWNSCALE_DIVISOR;
+
+              dst += bpp;
+              src += bpp * 2;
+            }
+        }
+    break;
+    case 4:
+      for (y = 0; y < src_height / 2; y++)
+        {
+          gint    x;
+          guchar *src = src_data + src_rowstride * y * 2;
+          guchar *dst = dst_data + dst_rowstride * y;
+
+          for (x = 0; x < src_width / 2; x++)
+            {
+              DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+              DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+              DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+              DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+              ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+              ((DOWNSCALE_TYPE *)dst)[1] = (aa[1] + ab[1] + ba[1] + bb[1]) / DOWNSCALE_DIVISOR;
+              ((DOWNSCALE_TYPE *)dst)[2] = (aa[2] + ab[2] + ba[2] + bb[2]) / DOWNSCALE_DIVISOR;
+              ((DOWNSCALE_TYPE *)dst)[3] = (aa[3] + ab[3] + ba[3] + bb[3]) / DOWNSCALE_DIVISOR;
+
+              dst += bpp;
+              src += bpp * 2;
+            }
+        }
+    break;
+    default:
+      for (y = 0; y < src_height / 2; y++)
+        {
+          gint    x;
+          guchar *src = src_data + src_rowstride * y * 2;
+          guchar *dst = dst_data + dst_rowstride * y;
+
+          for (x = 0; x < src_width / 2; x++)
+            {
+              gint i;
+              DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+              DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+              DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+              DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+              for (i = 0; i < components; i++)
+                ((DOWNSCALE_TYPE *)dst)[i] = (aa[i] + ab[i] + ba[i] + bb[i]) / DOWNSCALE_DIVISOR;
+
+              dst += bpp;
+              src += bpp * 2;
+            }
+        }
+    break;
+  }
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]