[gimp] app: move libappgegl's SSE2 bits to a separate library



commit 64ade97702bb1ad64f0a1c5cdf035ee08ad0e7a0
Author: Ell <ell_se yahoo com>
Date:   Thu Aug 17 12:42:34 2017 -0400

    app: move libappgegl's SSE2 bits to a separate library
    
    Split libappgegl into libappgegl-generic and libappgegl-sse2, and
    move the SSE2 code (part of the newly added smudge code) to the
    latter, so that the rest of the code can be compiled without SSE2
    compiler flags.  This allows building GIMP with SSE acceleration
    enabled, while running the resulting binary on a target with no
    SSE accelration.

 app/gegl/Makefile.am            |   31 +++++++--
 app/gegl/gimp-gegl-loops-sse2.c |  127 +++++++++++++++++++++++++++++++++
 app/gegl/gimp-gegl-loops-sse2.h |   40 +++++++++++
 app/gegl/gimp-gegl-loops.c      |  148 ++++++++++++++++++---------------------
 4 files changed, 261 insertions(+), 85 deletions(-)
---
diff --git a/app/gegl/Makefile.am b/app/gegl/Makefile.am
index f825326..1643c8c 100644
--- a/app/gegl/Makefile.am
+++ b/app/gegl/Makefile.am
@@ -9,12 +9,14 @@ AM_CPPFLAGS = \
        $(CAIRO_CFLAGS)                 \
        $(GEGL_CFLAGS)                  \
        $(GDK_PIXBUF_CFLAGS)            \
-       $(SSE2_EXTRA_CFLAGS)            \
        -I$(includedir)
 
-noinst_LIBRARIES = libappgegl.a
+noinst_LIBRARIES = \
+       libappgegl-generic.a    \
+       libappgegl-sse2.a       \
+       libappgegl.a
 
-libappgegl_a_sources = \
+libappgegl_generic_a_sources = \
        gimp-gegl-enums.h               \
        gimp-gegl-types.h               \
        gimp-babl.c                     \
@@ -42,9 +44,28 @@ libappgegl_a_sources = \
        gimptilehandlervalidate.c       \
        gimptilehandlervalidate.h
 
-libappgegl_a_built_sources = gimp-gegl-enums.c
+libappgegl_generic_a_built_sources = gimp-gegl-enums.c
+
+libappgegl_sse2_a_sources = \
+       gimp-gegl-loops-sse2.c          \
+       gimp-gegl-loops-sse2.h
+
+libappgegl_generic_a_SOURCES = $(libappgegl_generic_a_built_sources) $(libappgegl_generic_a_sources)
+
+libappgegl_sse2_a_SOURCES = $(libappgegl_sse2_a_sources)
+
+libappgegl_sse2_a_CFLAGS = $(SSE2_EXTRA_CFLAGS)
+
+libappgegl_a_SOURCES =
+
+
+libappgegl.a: libappgegl-generic.a \
+             libappgegl-sse2.a
+       $(AR) $(ARFLAGS) libappgegl.a \
+         $(libappgegl_generic_a_OBJECTS) \
+         $(libappgegl_sse2_a_OBJECTS)
+       $(RANLIB) libappgegl.a
 
-libappgegl_a_SOURCES = $(libappgegl_a_built_sources) $(libappgegl_a_sources)
 
 #
 # rules to generate built sources
diff --git a/app/gegl/gimp-gegl-loops-sse2.c b/app/gegl/gimp-gegl-loops-sse2.c
new file mode 100644
index 0000000..4a930e9
--- /dev/null
+++ b/app/gegl/gimp-gegl-loops-sse2.c
@@ -0,0 +1,127 @@
+/* GIMP - The GNU Image Manipulation Program
+ * Copyright (C) 1995 Spencer Kimball and Peter Mattis
+ *
+ * gimp-gegl-loops-sse2.c
+ * Copyright (C) 2012 Michael Natterer <mitch gimp org>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <string.h>
+
+#include <cairo.h>
+#include <gdk-pixbuf/gdk-pixbuf.h>
+#include <gegl.h>
+
+#include "gimp-gegl-types.h"
+
+#include "gimp-gegl-loops-sse2.h"
+
+
+#if COMPILE_SSE2_INTRINISICS
+
+#include <emmintrin.h>
+
+
+/* helper function of gimp_gegl_smudge_with_paint_process_sse2()
+ * src and dest can be the same address
+ */
+static inline void
+gimp_gegl_smudge_with_paint_blend_sse2 (const gfloat *src1,
+                                        gfloat        src1_rate,
+                                        const gfloat *src2,
+                                        gfloat        src2_rate,
+                                        gfloat       *dest,
+                                        gboolean      no_erasing_src2)
+{
+  /* 2017/4/13 shark0r : According to my test, SSE decreases about 25%
+   * execution time
+   */
+
+  __m128  v_src1 = _mm_loadu_ps (src1);
+  __m128  v_src2 = _mm_loadu_ps (src2);
+  __m128 *v_dest = (__v4sf *) dest;
+
+  gfloat  orginal_src2_alpha;
+  gfloat  src1_alpha;
+  gfloat  src2_alpha;
+  gfloat  result_alpha;
+
+  orginal_src2_alpha = v_src2[3];
+  src1_alpha         = src1_rate * v_src1[3];
+  src2_alpha         = src2_rate * orginal_src2_alpha;
+  result_alpha       = src1_alpha + src2_alpha;
+
+  if (result_alpha == 0)
+    {
+      *v_dest = _mm_set1_ps (0);
+      return;
+    }
+
+  *v_dest = (v_src1 * _mm_set1_ps (src1_alpha) +
+             v_src2 * _mm_set1_ps (src2_alpha)) /
+            _mm_set1_ps (result_alpha);
+
+  if (no_erasing_src2)
+    {
+      result_alpha = MAX (result_alpha, orginal_src2_alpha);
+    }
+
+  dest[3] = result_alpha;
+}
+
+/* helper function of gimp_gegl_smudge_with_paint()
+ *
+ * note that it's the caller's responsibility to verify that the buffers are
+ * properly aligned
+ */
+void
+gimp_gegl_smudge_with_paint_process_sse2 (gfloat       *accum,
+                                          const gfloat *canvas,
+                                          gfloat       *paint,
+                                          gint          count,
+                                          const gfloat *brush_color,
+                                          gfloat        brush_a,
+                                          gboolean      no_erasing,
+                                          gfloat        flow,
+                                          gfloat        rate)
+{
+  while (count--)
+    {
+      /* blend accum_buffer and canvas_buffer to accum_buffer */
+      gimp_gegl_smudge_with_paint_blend_sse2 (accum, rate, canvas, 1 - rate,
+                                              accum, no_erasing);
+
+      /* blend accum_buffer and brush color/pixmap to paint_buffer */
+      if (brush_a == 0) /* pure smudge */
+        {
+          memcpy (paint, accum, sizeof (gfloat) * 4);
+        }
+      else
+        {
+          const gfloat *src1 = brush_color ? brush_color : paint;
+
+          gimp_gegl_smudge_with_paint_blend_sse2 (src1, flow, accum, 1 - flow,
+                                                  paint, no_erasing);
+        }
+
+      accum  += 4;
+      canvas += 4;
+      paint  += 4;
+    }
+}
+
+#endif /* COMPILE_SSE2_INTRINISICS */
diff --git a/app/gegl/gimp-gegl-loops-sse2.h b/app/gegl/gimp-gegl-loops-sse2.h
new file mode 100644
index 0000000..6254bf5
--- /dev/null
+++ b/app/gegl/gimp-gegl-loops-sse2.h
@@ -0,0 +1,40 @@
+/* GIMP - The GNU Image Manipulation Program
+ * Copyright (C) 1995 Spencer Kimball and Peter Mattis
+ *
+ * gimp-gegl-loops-sse2.h
+ * Copyright (C) 2012 Michael Natterer <mitch gimp org>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __GIMP_GEGL_LOOPS_SSE2_H__
+#define __GIMP_GEGL_LOOPS_SSE2_H__
+
+
+#if COMPILE_SSE2_INTRINISICS
+
+void   gimp_gegl_smudge_with_paint_process_sse2 (gfloat       *accum,
+                                                 const gfloat *canvas,
+                                                 gfloat       *paint,
+                                                 gint          count,
+                                                 const gfloat *brush_color,
+                                                 gfloat        brush_a,
+                                                 gboolean      no_erasing,
+                                                 gfloat        flow,
+                                                 gfloat        rate);
+
+#endif /* COMPILE_SSE2_INTRINISICS */
+
+
+#endif /* __GIMP_GEGL_LOOPS_SSE2_H__ */
diff --git a/app/gegl/gimp-gegl-loops.c b/app/gegl/gimp-gegl-loops.c
index 9fe1d91..1b20654 100644
--- a/app/gegl/gimp-gegl-loops.c
+++ b/app/gegl/gimp-gegl-loops.c
@@ -22,10 +22,6 @@
 
 #include <string.h>
 
-#if COMPILE_SSE2_INTRINISICS
-#include <emmintrin.h>
-#endif
-
 #include <cairo.h>
 #include <gdk-pixbuf/gdk-pixbuf.h>
 #include <gegl.h>
@@ -38,6 +34,7 @@
 
 #include "gimp-babl.h"
 #include "gimp-gegl-loops.h"
+#include "gimp-gegl-loops-sse2.h"
 
 #include "core/gimpprogress.h"
 
@@ -354,75 +351,80 @@ gimp_gegl_dodgeburn (GeglBuffer          *src_buffer,
     }
 }
 
-/* helper function of gimp_gegl_smudge_with_paint()
+/* helper function of gimp_gegl_smudge_with_paint_process()
    src and dest can be the same address
  */
-static void
+static inline void
 gimp_gegl_smudge_with_paint_blend (const gfloat *src1,
                                    gfloat        src1_rate,
                                    const gfloat *src2,
                                    gfloat        src2_rate,
                                    gfloat       *dest,
-                                   gboolean      no_erasing_src2,
-                                   gboolean      sse)
+                                   gboolean      no_erasing_src2)
 {
   gfloat orginal_src2_alpha;
   gfloat src1_alpha;
   gfloat src2_alpha;
   gfloat result_alpha;
+  gint   b;
 
-/* 2017/4/13 shark0r : According to my test, SSE decreases about 25%
- * execution time
- */
+  orginal_src2_alpha = src2[3];
+  src1_alpha         = src1_rate * src1[3];
+  src2_alpha         = src2_rate * orginal_src2_alpha;
+  result_alpha       = src1_alpha + src2_alpha;
 
-#if defined COMPILE_SSE2_INTRINISICS
-  if (sse)
+  if (result_alpha == 0)
     {
-      __m128  v_src1 = _mm_loadu_ps (src1);
-      __m128  v_src2 = _mm_loadu_ps (src2);
-      __m128 *v_dest = (__v4sf *) dest;
-
-      orginal_src2_alpha = v_src2[3];
-      src1_alpha         = src1_rate * v_src1[3];
-      src2_alpha         = src2_rate * orginal_src2_alpha;
-      result_alpha       = src1_alpha + src2_alpha;
+      memset (dest, 0, sizeof (gfloat) * 4);
+      return;
+    }
 
-      if (result_alpha == 0)
-        {
-          *v_dest = _mm_set1_ps (0);
-          return;
-        }
+  for (b = 0; b < 3; b++)
+    dest[b] = (src1[b] * src1_alpha + src2[b] * src2_alpha) / result_alpha;
 
-      *v_dest = (v_src1 * _mm_set1_ps (src1_alpha) +
-                 v_src2 * _mm_set1_ps (src2_alpha)) /
-                _mm_set1_ps (result_alpha);
-    }
-  else
-#endif
+  if (no_erasing_src2)
     {
-      gint b;
+      result_alpha = MAX (result_alpha, orginal_src2_alpha);
+    }
 
-      orginal_src2_alpha = src2[3];
-      src1_alpha         = src1_rate * src1[3];
-      src2_alpha         = src2_rate * orginal_src2_alpha;
-      result_alpha       = src1_alpha + src2_alpha;
+  dest[3] = result_alpha;
+}
 
-      if (result_alpha == 0)
+/* helper function of gimp_gegl_smudge_with_paint() */
+static void
+gimp_gegl_smudge_with_paint_process (gfloat       *accum,
+                                     const gfloat *canvas,
+                                     gfloat       *paint,
+                                     gint          count,
+                                     const gfloat *brush_color,
+                                     gfloat        brush_a,
+                                     gboolean      no_erasing,
+                                     gfloat        flow,
+                                     gfloat        rate)
+{
+  while (count--)
+    {
+      /* blend accum_buffer and canvas_buffer to accum_buffer */
+      gimp_gegl_smudge_with_paint_blend (accum, rate, canvas, 1 - rate,
+                                         accum, no_erasing);
+
+      /* blend accum_buffer and brush color/pixmap to paint_buffer */
+      if (brush_a == 0) /* pure smudge */
         {
-          memset (dest, 0, sizeof (gfloat) * 4);
-          return;
+          memcpy (paint, accum, sizeof (gfloat) * 4);
         }
+      else
+        {
+          const gfloat *src1 = brush_color ? brush_color : paint;
 
-      for (b = 0; b < 3; b++)
-        dest[b] = (src1[b] * src1_alpha + src2[b] * src2_alpha) / result_alpha;
-    }
+          gimp_gegl_smudge_with_paint_blend (src1, flow, accum, 1 - flow,
+                                             paint, no_erasing);
+        }
 
-  if (no_erasing_src2)
-    {
-      result_alpha = MAX (result_alpha, orginal_src2_alpha);
+      accum  += 4;
+      canvas += 4;
+      paint  += 4;
     }
-
-  dest[3] = result_alpha;
 }
 
 /*  smudge painting calculation. Currently only smudge tool uses this function
@@ -449,6 +451,8 @@ gimp_gegl_smudge_with_paint (GeglBuffer          *accum_buffer,
   GeglAccessMode      paint_buffer_access_mode = (brush_color ?
                                                   GEGL_ACCESS_WRITE :
                                                   GEGL_ACCESS_READWRITE);
+  gboolean            sse2 = (gimp_cpu_accel_get_support () &
+                              GIMP_CPU_ACCEL_X86_SSE2);
 
   iter = gegl_buffer_iterator_new (accum_buffer, accum_rect, 0,
                                    babl_format ("RGBA float"),
@@ -480,43 +484,27 @@ gimp_gegl_smudge_with_paint (GeglBuffer          *accum_buffer,
       const gfloat *canvas     = iter->data[1];
       gfloat       *paint      = iter->data[2];
       gint          count      = iter->length;
-      gboolean      sse_canvas = FALSE;
-      gboolean      sse_brush  = FALSE;
 
-#if defined COMPILE_SSE2_INTRINISICS
-      if (gimp_cpu_accel_get_support () & GIMP_CPU_ACCEL_X86_SSE2)
+#if COMPILE_SSE2_INTRINISICS
+      if (sse2 && ((guintptr) accum                                     |
+                   (guintptr) canvas                                    |
+                   (guintptr) (brush_color ? brush_color_float : paint) |
+                   (guintptr) paint) % 16 == 0)
         {
-          sse_canvas = ((guintptr) accum |
-                        (guintptr) canvas) % 16 == 0;
-
-          sse_brush  = ((guintptr) (brush_color ? brush_color_float : paint) |
-                        (guintptr) accum                                     |
-                        (guintptr) paint) % 16 == 0;
+          gimp_gegl_smudge_with_paint_process_sse2 (accum, canvas, paint, count,
+                                                    brush_color ? brush_color_float :
+                                                                  NULL,
+                                                    brush_a,
+                                                    no_erasing, flow, rate);
         }
+      else
 #endif
-
-      while (count--)
         {
-          /* blend accum_buffer and canvas_buffer to accum_buffer */
-          gimp_gegl_smudge_with_paint_blend (accum, rate, canvas, 1 - rate,
-                                             accum, no_erasing, sse_canvas);
-
-          /* blend accum_buffer and brush color/pixmap to paint_buffer */
-          if (brush_a == 0) /* pure smudge */
-            {
-              memcpy (paint, accum, sizeof (gfloat) * 4);
-            }
-          else
-            {
-              gfloat *src1 = brush_color ? brush_color_float : paint;
-
-              gimp_gegl_smudge_with_paint_blend (src1, flow, accum, 1 - flow,
-                                                 paint, no_erasing, sse_brush);
-            }
-
-          accum  += 4;
-          canvas += 4;
-          paint  += 4;
+          gimp_gegl_smudge_with_paint_process (accum, canvas, paint, count,
+                                               brush_color ? brush_color_float :
+                                                             NULL,
+                                               brush_a,
+                                               no_erasing, flow, rate);
         }
     }
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]