[gimp] Bug 785890 - libappgegl doesn't use SSE2 compiler flags ...
- From: N/A <ell src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gimp] Bug 785890 - libappgegl doesn't use SSE2 compiler flags ...
- Date: Sun, 6 Aug 2017 18:20:46 +0000 (UTC)
commit 7ecd3f27836263768a89d82b36bf68b209d2cc55
Author: Ell <ell_se yahoo com>
Date: Sun Aug 6 13:59:41 2017 -0400
Bug 785890 - libappgegl doesn't use SSE2 compiler flags ...
... causing compilation to fail on 32 bit targets
Use SSE2 compiler flags when building libappgegl, since it's used by
the new smudge tool code.
Avoid using SSE for the smudge tool if SSE acceleration is disabled
at runtime, or if the buffers are not properly aligned.
app/gegl/Makefile.am | 1 +
app/gegl/gimp-gegl-loops.c | 98 +++++++++++++++++++++++++++-----------------
2 files changed, 61 insertions(+), 38 deletions(-)
---
diff --git a/app/gegl/Makefile.am b/app/gegl/Makefile.am
index 28db805..f825326 100644
--- a/app/gegl/Makefile.am
+++ b/app/gegl/Makefile.am
@@ -9,6 +9,7 @@ AM_CPPFLAGS = \
$(CAIRO_CFLAGS) \
$(GEGL_CFLAGS) \
$(GDK_PIXBUF_CFLAGS) \
+ $(SSE2_EXTRA_CFLAGS) \
-I$(includedir)
noinst_LIBRARIES = libappgegl.a
diff --git a/app/gegl/gimp-gegl-loops.c b/app/gegl/gimp-gegl-loops.c
index 21508c8..9fe1d91 100644
--- a/app/gegl/gimp-gegl-loops.c
+++ b/app/gegl/gimp-gegl-loops.c
@@ -30,8 +30,9 @@
#include <gdk-pixbuf/gdk-pixbuf.h>
#include <gegl.h>
-#include "libgimpmath/gimpmath.h"
+#include "libgimpbase/gimpbase.h"
#include "libgimpcolor/gimpcolor.h"
+#include "libgimpmath/gimpmath.h"
#include "gimp-gegl-types.h"
@@ -362,53 +363,60 @@ gimp_gegl_smudge_with_paint_blend (const gfloat *src1,
const gfloat *src2,
gfloat src2_rate,
gfloat *dest,
- gboolean no_erasing_src2)
+ gboolean no_erasing_src2,
+ gboolean sse)
{
+ gfloat orginal_src2_alpha;
+ gfloat src1_alpha;
+ gfloat src2_alpha;
+ gfloat result_alpha;
/* 2017/4/13 shark0r : According to my test, SSE decreases about 25%
* execution time
*/
#if defined COMPILE_SSE2_INTRINISICS
+ if (sse)
+ {
+ __m128 v_src1 = _mm_loadu_ps (src1);
+ __m128 v_src2 = _mm_loadu_ps (src2);
+ __m128 *v_dest = (__v4sf *) dest;
- __m128 v_src1 = _mm_loadu_ps (src1);
- __m128 v_src2 = _mm_loadu_ps (src2);
- __m128 *v_dest = (__v4sf *) dest;
+ orginal_src2_alpha = v_src2[3];
+ src1_alpha = src1_rate * v_src1[3];
+ src2_alpha = src2_rate * orginal_src2_alpha;
+ result_alpha = src1_alpha + src2_alpha;
- gfloat orginal_src2_alpha = v_src2[3];
- gfloat src1_alpha = src1_rate * v_src1[3];
- gfloat src2_alpha = src2_rate * orginal_src2_alpha;
- gfloat result_alpha = src1_alpha + src2_alpha;
+ if (result_alpha == 0)
+ {
+ *v_dest = _mm_set1_ps (0);
+ return;
+ }
- if (result_alpha == 0)
- {
- *v_dest = _mm_set1_ps (0);
- return;
+ *v_dest = (v_src1 * _mm_set1_ps (src1_alpha) +
+ v_src2 * _mm_set1_ps (src2_alpha)) /
+ _mm_set1_ps (result_alpha);
}
+ else
+#endif
+ {
+ gint b;
- *v_dest = (v_src1 * _mm_set1_ps (src1_alpha) +
- v_src2 * _mm_set1_ps (src2_alpha)) /
- _mm_set1_ps (result_alpha);
-
-#else
+ orginal_src2_alpha = src2[3];
+ src1_alpha = src1_rate * src1[3];
+ src2_alpha = src2_rate * orginal_src2_alpha;
+ result_alpha = src1_alpha + src2_alpha;
- gfloat orginal_src2_alpha = src2[3];
- gfloat src1_alpha = src1_rate * src1[3];
- gfloat src2_alpha = src2_rate * orginal_src2_alpha;
- gfloat result_alpha = src1_alpha + src2_alpha;
- gint b;
+ if (result_alpha == 0)
+ {
+ memset (dest, 0, sizeof (gfloat) * 4);
+ return;
+ }
- if (result_alpha == 0)
- {
- memset (dest, 0, sizeof (gfloat) * 4);
- return;
+ for (b = 0; b < 3; b++)
+ dest[b] = (src1[b] * src1_alpha + src2[b] * src2_alpha) / result_alpha;
}
- for (b = 0; b < 3; b++)
- dest[b] = (src1[b] * src1_alpha + src2[b] * src2_alpha) / result_alpha;
-
-#endif
-
if (no_erasing_src2)
{
result_alpha = MAX (result_alpha, orginal_src2_alpha);
@@ -468,16 +476,30 @@ gimp_gegl_smudge_with_paint (GeglBuffer *accum_buffer,
while (gegl_buffer_iterator_next (iter))
{
- gfloat *accum = iter->data[0];
- const gfloat *canvas = iter->data[1];
- gfloat *paint = iter->data[2];
- gint count = iter->length;
+ gfloat *accum = iter->data[0];
+ const gfloat *canvas = iter->data[1];
+ gfloat *paint = iter->data[2];
+ gint count = iter->length;
+ gboolean sse_canvas = FALSE;
+ gboolean sse_brush = FALSE;
+
+#if defined COMPILE_SSE2_INTRINISICS
+ if (gimp_cpu_accel_get_support () & GIMP_CPU_ACCEL_X86_SSE2)
+ {
+ sse_canvas = ((guintptr) accum |
+ (guintptr) canvas) % 16 == 0;
+
+ sse_brush = ((guintptr) (brush_color ? brush_color_float : paint) |
+ (guintptr) accum |
+ (guintptr) paint) % 16 == 0;
+ }
+#endif
while (count--)
{
/* blend accum_buffer and canvas_buffer to accum_buffer */
gimp_gegl_smudge_with_paint_blend (accum, rate, canvas, 1 - rate,
- accum, no_erasing);
+ accum, no_erasing, sse_canvas);
/* blend accum_buffer and brush color/pixmap to paint_buffer */
if (brush_a == 0) /* pure smudge */
@@ -489,7 +511,7 @@ gimp_gegl_smudge_with_paint (GeglBuffer *accum_buffer,
gfloat *src1 = brush_color ? brush_color_float : paint;
gimp_gegl_smudge_with_paint_blend (src1, flow, accum, 1 - flow,
- paint, no_erasing);
+ paint, no_erasing, sse_brush);
}
accum += 4;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]