[gimp] Bug 785890 - libappgegl doesn't use SSE2 compiler flags ...



commit 7ecd3f27836263768a89d82b36bf68b209d2cc55
Author: Ell <ell_se yahoo com>
Date:   Sun Aug 6 13:59:41 2017 -0400

    Bug 785890 - libappgegl doesn't use SSE2 compiler flags ...
    
    ... causing compilation to fail on 32 bit targets
    
    Use SSE2 compiler flags when building libappgegl, since it's used by
    the new smudge tool code.
    
    Avoid using SSE for the smudge tool if SSE acceleration is disabled
    at runtime, or if the buffers are not properly aligned.

 app/gegl/Makefile.am       |    1 +
 app/gegl/gimp-gegl-loops.c |   98 +++++++++++++++++++++++++++-----------------
 2 files changed, 61 insertions(+), 38 deletions(-)
---
diff --git a/app/gegl/Makefile.am b/app/gegl/Makefile.am
index 28db805..f825326 100644
--- a/app/gegl/Makefile.am
+++ b/app/gegl/Makefile.am
@@ -9,6 +9,7 @@ AM_CPPFLAGS = \
        $(CAIRO_CFLAGS)                 \
        $(GEGL_CFLAGS)                  \
        $(GDK_PIXBUF_CFLAGS)            \
+       $(SSE2_EXTRA_CFLAGS)            \
        -I$(includedir)
 
 noinst_LIBRARIES = libappgegl.a
diff --git a/app/gegl/gimp-gegl-loops.c b/app/gegl/gimp-gegl-loops.c
index 21508c8..9fe1d91 100644
--- a/app/gegl/gimp-gegl-loops.c
+++ b/app/gegl/gimp-gegl-loops.c
@@ -30,8 +30,9 @@
 #include <gdk-pixbuf/gdk-pixbuf.h>
 #include <gegl.h>
 
-#include "libgimpmath/gimpmath.h"
+#include "libgimpbase/gimpbase.h"
 #include "libgimpcolor/gimpcolor.h"
+#include "libgimpmath/gimpmath.h"
 
 #include "gimp-gegl-types.h"
 
@@ -362,53 +363,60 @@ gimp_gegl_smudge_with_paint_blend (const gfloat *src1,
                                    const gfloat *src2,
                                    gfloat        src2_rate,
                                    gfloat       *dest,
-                                   gboolean      no_erasing_src2)
+                                   gboolean      no_erasing_src2,
+                                   gboolean      sse)
 {
+  gfloat orginal_src2_alpha;
+  gfloat src1_alpha;
+  gfloat src2_alpha;
+  gfloat result_alpha;
 
 /* 2017/4/13 shark0r : According to my test, SSE decreases about 25%
  * execution time
  */
 
 #if defined COMPILE_SSE2_INTRINISICS
+  if (sse)
+    {
+      __m128  v_src1 = _mm_loadu_ps (src1);
+      __m128  v_src2 = _mm_loadu_ps (src2);
+      __m128 *v_dest = (__v4sf *) dest;
 
-  __m128 v_src1  = _mm_loadu_ps (src1);
-  __m128 v_src2  = _mm_loadu_ps (src2);
-  __m128 *v_dest = (__v4sf *) dest;
+      orginal_src2_alpha = v_src2[3];
+      src1_alpha         = src1_rate * v_src1[3];
+      src2_alpha         = src2_rate * orginal_src2_alpha;
+      result_alpha       = src1_alpha + src2_alpha;
 
-  gfloat orginal_src2_alpha = v_src2[3];
-  gfloat src1_alpha         = src1_rate * v_src1[3];
-  gfloat src2_alpha         = src2_rate * orginal_src2_alpha;
-  gfloat result_alpha       = src1_alpha + src2_alpha;
+      if (result_alpha == 0)
+        {
+          *v_dest = _mm_set1_ps (0);
+          return;
+        }
 
-  if (result_alpha == 0)
-    {
-      *v_dest = _mm_set1_ps (0);
-      return;
+      *v_dest = (v_src1 * _mm_set1_ps (src1_alpha) +
+                 v_src2 * _mm_set1_ps (src2_alpha)) /
+                _mm_set1_ps (result_alpha);
     }
+  else
+#endif
+    {
+      gint b;
 
-  *v_dest = (v_src1 * _mm_set1_ps (src1_alpha) +
-             v_src2 * _mm_set1_ps (src2_alpha)) /
-            _mm_set1_ps (result_alpha);
-
-#else
+      orginal_src2_alpha = src2[3];
+      src1_alpha         = src1_rate * src1[3];
+      src2_alpha         = src2_rate * orginal_src2_alpha;
+      result_alpha       = src1_alpha + src2_alpha;
 
-  gfloat orginal_src2_alpha = src2[3];
-  gfloat src1_alpha         = src1_rate * src1[3];
-  gfloat src2_alpha         = src2_rate * orginal_src2_alpha;
-  gfloat result_alpha       = src1_alpha + src2_alpha;
-  gint   b;
+      if (result_alpha == 0)
+        {
+          memset (dest, 0, sizeof (gfloat) * 4);
+          return;
+        }
 
-  if (result_alpha == 0)
-    {
-      memset (dest, 0, sizeof (gfloat) * 4);
-      return;
+      for (b = 0; b < 3; b++)
+        dest[b] = (src1[b] * src1_alpha + src2[b] * src2_alpha) / result_alpha;
     }
 
-  for (b = 0; b < 3; b++)
-    dest[b] = (src1[b] * src1_alpha + src2[b] * src2_alpha) / result_alpha;
-
-#endif
-
   if (no_erasing_src2)
     {
       result_alpha = MAX (result_alpha, orginal_src2_alpha);
@@ -468,16 +476,30 @@ gimp_gegl_smudge_with_paint (GeglBuffer          *accum_buffer,
 
   while (gegl_buffer_iterator_next (iter))
     {
-      gfloat       *accum  = iter->data[0];
-      const gfloat *canvas = iter->data[1];
-      gfloat       *paint  = iter->data[2];
-      gint          count  = iter->length;
+      gfloat       *accum      = iter->data[0];
+      const gfloat *canvas     = iter->data[1];
+      gfloat       *paint      = iter->data[2];
+      gint          count      = iter->length;
+      gboolean      sse_canvas = FALSE;
+      gboolean      sse_brush  = FALSE;
+
+#if defined COMPILE_SSE2_INTRINISICS
+      if (gimp_cpu_accel_get_support () & GIMP_CPU_ACCEL_X86_SSE2)
+        {
+          sse_canvas = ((guintptr) accum |
+                        (guintptr) canvas) % 16 == 0;
+
+          sse_brush  = ((guintptr) (brush_color ? brush_color_float : paint) |
+                        (guintptr) accum                                     |
+                        (guintptr) paint) % 16 == 0;
+        }
+#endif
 
       while (count--)
         {
           /* blend accum_buffer and canvas_buffer to accum_buffer */
           gimp_gegl_smudge_with_paint_blend (accum, rate, canvas, 1 - rate,
-                                             accum, no_erasing);
+                                             accum, no_erasing, sse_canvas);
 
           /* blend accum_buffer and brush color/pixmap to paint_buffer */
           if (brush_a == 0) /* pure smudge */
@@ -489,7 +511,7 @@ gimp_gegl_smudge_with_paint (GeglBuffer          *accum_buffer,
               gfloat *src1 = brush_color ? brush_color_float : paint;
 
               gimp_gegl_smudge_with_paint_blend (src1, flow, accum, 1 - flow,
-                                                 paint, no_erasing);
+                                                 paint, no_erasing, sse_brush);
             }
 
           accum  += 4;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]