[gegl] Remove gegl-simd.h
- From: Øyvind Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl] Remove gegl-simd.h
- Date: Fri, 17 Dec 2010 15:40:34 +0000 (UTC)
commit 8b010612a3fb5dc7ad7590fa5eb2c66000dcbf16
Author: �yvind Kolås <pippin gimp org>
Date: Tue Dec 14 03:05:11 2010 +0000
Remove gegl-simd.h
Replace the GCC vector extensions code with code that is friendlier to the
auto-vectorizer in gcc. This optimizes some areas that didn't have optimizations before as well as simplifies code elsewhere. The core ops that had SIMD optimizations are also triggering the auto-vectorizer now, performance difference
seems hard to measure.
configure.ac | 2 +-
gegl/Makefile.am | 1 -
gegl/gegl-plugin.h | 1 -
gegl/gegl-simd.h | 54 ------------
gegl/gegl.h | 1 +
operations/affine/affine.c | 2 +-
operations/common/brightness-contrast.c | 13 +--
operations/common/invert.c | 29 ------
operations/common/opacity.c | 51 -----------
operations/common/over.c | 62 ++------------
operations/generated/math.rb | 10 +-
operations/generated/other-blend.rb | 53 ++----------
operations/generated/svg-12-blend.rb | 6 +-
operations/generated/svg-12-porter-duff.rb | 128 +++++----------------------
perf/Makefile | 2 +-
perf/tests/bcontrast-minichunk.c | 2 +-
perf/tests/bcontrast.c | 2 +-
perf/tests/comp.c | 2 +-
perf/tests/passthrough.c | 2 +-
19 files changed, 60 insertions(+), 363 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index 87f87e8..5233c35 100644
--- a/configure.ac
+++ b/configure.ac
@@ -357,7 +357,7 @@ if test "x$enable_mmx" = "xyes"; then
AC_MSG_RESULT(yes)
if test "x$enable_sse" = "xyes"; then
- AS_COMPILER_FLAG([-msse], [SSE_EXTRA_CFLAGS="-msse"])
+ AS_COMPILER_FLAG([-msse], [SSE_EXTRA_CFLAGS="-msse -msse2 -ftree-vectorize -ffast-math -ftree-vectorizer-verbose=3"])
AC_MSG_CHECKING(whether we can compile SSE code)
diff --git a/gegl/Makefile.am b/gegl/Makefile.am
index b4b2c43..bb161df 100644
--- a/gegl/Makefile.am
+++ b/gegl/Makefile.am
@@ -32,7 +32,6 @@ GEGL_public_HEADERS = \
gegl-matrix.h \
gegl-chant.h \
gegl-lookup.h \
- gegl-simd.h \
gegl-plugin.h \
gegl-version.h \
buffer/gegl-buffer.h \
diff --git a/gegl/gegl-plugin.h b/gegl/gegl-plugin.h
index 8529696..e9929fe 100644
--- a/gegl/gegl-plugin.h
+++ b/gegl/gegl-plugin.h
@@ -105,7 +105,6 @@ const gchar * gegl_extension_handler_get_saver (const gchar *extensi
#include <operation/gegl-operation-source.h>
#include <operation/gegl-operation-sink.h>
#include <operation/gegl-operation-meta.h>
-#include <gegl-simd.h>
#include <gegl-lookup.h>
diff --git a/gegl/gegl.h b/gegl/gegl.h
index bc9fa49..10d7d92 100644
--- a/gegl/gegl.h
+++ b/gegl/gegl.h
@@ -891,6 +891,7 @@ GeglNode *gegl_node (const gchar *op_type,
*/
GeglNode *gegl_graph (GeglNode *node);
+#define GEGL_ALIGNED __restrict__ __attribute__((__aligned__ (16)))
G_END_DECLS
#endif /* __GEGL_H__ */
diff --git a/operations/affine/affine.c b/operations/affine/affine.c
index 42ca40e..3d5167d 100644
--- a/operations/affine/affine.c
+++ b/operations/affine/affine.c
@@ -678,7 +678,7 @@ affine_generic (GeglBuffer *dest,
GeglBufferIterator *i;
const GeglRectangle *dest_extent;
gint x, y;
- gfloat *dest_buf,
+ gfloat * restrict dest_buf,
*dest_ptr;
GeglMatrix3 inverse;
GeglMatrix2 inverse_jacobian;
diff --git a/operations/common/brightness-contrast.c b/operations/common/brightness-contrast.c
index ade7864..c70d147 100644
--- a/operations/common/brightness-contrast.c
+++ b/operations/common/brightness-contrast.c
@@ -82,8 +82,8 @@ process (GeglOperation *op,
* chanted properties
*/
GeglChantO *o = GEGL_CHANT_PROPERTIES (op);
- gfloat *in_pixel;
- gfloat *out_pixel;
+ gfloat * GEGL_ALIGNED in_pixel;
+ gfloat * GEGL_ALIGNED out_pixel;
gfloat brightness, contrast;
glong i;
@@ -95,12 +95,9 @@ process (GeglOperation *op,
for (i=0; i<n_pixels; i++)
{
- gint component;
- for (component=0; component <3 ; component++)
- {
- out_pixel[component] =
- (in_pixel[component] - 0.5) * contrast + brightness + 0.5;
- }
+ out_pixel[0] = (in_pixel[0] - 0.5f) * contrast + brightness + 0.5;
+ out_pixel[1] = (in_pixel[1] - 0.5f) * contrast + brightness + 0.5;
+ out_pixel[2] = (in_pixel[2] - 0.5f) * contrast + brightness + 0.5;
out_pixel[3] = in_pixel[3]; /* copy the alpha */
in_pixel += 4;
out_pixel += 4;
diff --git a/operations/common/invert.c b/operations/common/invert.c
index 02c4937..613cea0 100644
--- a/operations/common/invert.c
+++ b/operations/common/invert.c
@@ -60,30 +60,6 @@ process (GeglOperation *op,
return TRUE;
}
-#ifdef HAS_G4FLOAT
-static gboolean
-process_simd (GeglOperation *op,
- void *in_buf,
- void *out_buf,
- glong samples,
- const GeglRectangle *roi)
-{
- g4float *in = in_buf;
- g4float *out = out_buf;
- g4float one = g4float_one;
-
- while (samples--)
- {
- gfloat a= g4float_a(*in)[3];
- *out = one - *in;
- g4float_a(*out)[3]=a;
- in ++;
- out ++;
- }
- return TRUE;
-}
-#endif
-
static void
gegl_chant_class_init (GeglChantClass *klass)
{
@@ -100,11 +76,6 @@ gegl_chant_class_init (GeglChantClass *klass)
operation_class->description =
_("Inverts the components (except alpha), the result is the "
"corresponding \"negative\" image.");
-
-#ifdef HAS_G4FLOAT
- gegl_operation_class_add_processor (operation_class,
- G_CALLBACK (process_simd), "simd");
-#endif
}
#endif
diff --git a/operations/common/opacity.c b/operations/common/opacity.c
index 325be96..a63d22f 100644
--- a/operations/common/opacity.c
+++ b/operations/common/opacity.c
@@ -88,47 +88,6 @@ process (GeglOperation *op,
return TRUE;
}
-
-#ifdef HAS_G4FLOAT
-static gboolean
-process_simd (GeglOperation *op,
- void *in_buf,
- void *aux_buf,
- void *out_buf,
- glong samples,
- const GeglRectangle *roi)
-{
- GeglChantO *o = GEGL_CHANT_PROPERTIES (op);
- g4float *in = in_buf;
- gfloat *aux = aux_buf;
- g4float *out = out_buf;
-
- /* add 0.5 to brightness here to make the logic in the innerloop tighter
- */
- g4float value = g4float_all(o->value);
-
- if (aux == NULL)
- {
- g_assert (o->value != 1.0); /* should haven been passed through */
- while (samples--)
- *(out ++) = *(in ++) * value;
- }
- else if (o->value == 1.0)
- while (samples--)
- {
- *(out++) = *(in++) * g4float_all (*(aux));
- aux++;
- }
- else
- while (samples--)
- {
- *(out++) = *(in++) * g4float_all ((*(aux))) * value;
- aux++;
- }
- return TRUE;
-}
-#endif
-
/* Fast path when opacity is a no-op
*/
static gboolean operation_process (GeglOperation *operation,
@@ -170,16 +129,6 @@ gegl_chant_class_init (GeglChantClass *klass)
operation_class->process = operation_process;
point_composer_class->process = process;
-#ifdef HAS_G4FLOAT
- /* add conditionally compiled variation of process(), gegl should be able
- * to determine which is fastest and hopefully if any implementation is
- * broken and not conforming to the reference implementation.
- */
- gegl_operation_class_add_processor (operation_class,
- G_CALLBACK (process_simd), "simd");
-#endif
-
-
operation_class->name = "gegl:opacity";
operation_class->categories = "transparency";
operation_class->description =
diff --git a/operations/common/over.c b/operations/common/over.c
index d026dac..b2a4290 100644
--- a/operations/common/over.c
+++ b/operations/common/over.c
@@ -49,31 +49,20 @@ process (GeglOperation *op,
const GeglRectangle *roi)
{
gint i;
- gfloat *in = in_buf;
- gfloat *aux = aux_buf;
- gfloat *out = out_buf;
+ gfloat * GEGL_ALIGNED in = in_buf;
+ gfloat * GEGL_ALIGNED aux = aux_buf;
+ gfloat * GEGL_ALIGNED out = out_buf;
if (aux==NULL)
return TRUE;
for (i = 0; i < n_pixels; i++)
{
- gint j;
- gfloat aA, aB, aD;
-
- aB = in[3];
- aA = aux[3];
- aD = aA + aB - aA * aB;
-
- for (j = 0; j < 3; j++)
- {
- gfloat cA, cB;
-
- cB = in[j];
- cA = aux[j];
- out[j] = cA + cB * (1 - aA);
- }
- out[3] = aD;
+ out[0] = aux[0] + in[0] * (1.0f - aux[3]);
+ out[1] = aux[1] + in[1] * (1.0f - aux[3]);
+ out[2] = aux[2] + in[2] * (1.0f - aux[3]);
+ out[3] = aux[3] + in[3] - aux[3] * in[3];
+
in += 4;
aux += 4;
out += 4;
@@ -81,35 +70,6 @@ process (GeglOperation *op,
return TRUE;
}
-#ifdef HAS_G4FLOAT
-
-static gboolean
-process_gegl4float (GeglOperation *op,
- void *in_buf,
- void *aux_buf,
- void *out_buf,
- glong n_pixels,
- const GeglRectangle *roi)
-{
- g4float *A = aux_buf;
- g4float *B = in_buf;
- g4float *D = out_buf;
-
- if (B==NULL || n_pixels == 0)
- return TRUE;
-
- while (n_pixels--)
- {
- *D = *A + *B * (g4float_one - g4float_all(g4float_a(*A)[3]));
-
- A++; B++; D++;
- }
-
- return TRUE;
-}
-
-#endif
-
/* Fast paths */
static gboolean operation_process (GeglOperation *operation,
GeglOperationContext *context,
@@ -169,12 +129,6 @@ gegl_chant_class_init (GeglChantClass *klass)
point_composer_class->process = process;
-#ifdef HAS_G4FLOAT
- gegl_operation_class_add_processor (operation_class,
- G_CALLBACK (process_gegl4float), "simd");
-#endif
-
-
operation_class->name = "gegl:over";
operation_class->description =
_("Porter Duff operation over (d = cA + cB * (1 - aA))");
diff --git a/operations/generated/math.rb b/operations/generated/math.rb
index f8c558d..8871843 100755
--- a/operations/generated/math.rb
+++ b/operations/generated/math.rb
@@ -27,9 +27,9 @@ a = [
['add', 'c = c + value', 0.0],
['subtract', 'c = c - value', 0.0],
['multiply', 'c = c * value', 1.0],
- ['divide', 'c = value==0.0?0.0:c/value', 1.0],
+ ['divide', 'c = value==0.0f?0.0f:c/value', 1.0],
['gamma', 'c = powf (c, value)', 1.0],
-# ['threshold', 'c = c>=value?1.0:0.0', 0.5],
+# ['threshold', 'c = c>=value?1.0f:0.0f', 0.5],
# ['invert', 'c = 1.0-c']
]
@@ -87,9 +87,9 @@ process (GeglOperation *op,
glong n_pixels,
const GeglRectangle *roi)
{
- gfloat *in = in_buf;
- gfloat *out = out_buf;
- gfloat *aux = aux_buf;
+ gfloat * GEGL_ALIGNED in = in_buf;
+ gfloat * GEGL_ALIGNED out = out_buf;
+ gfloat * GEGL_ALIGNED aux = aux_buf;
gint i;
if (aux == NULL)
diff --git a/operations/generated/other-blend.rb b/operations/generated/other-blend.rb
index a486585..5c482af 100755
--- a/operations/generated/other-blend.rb
+++ b/operations/generated/other-blend.rb
@@ -33,13 +33,11 @@ copyright = '
a = [
# Alias for porter-duff src-over
-# ['normal', 'cA + cB * (1 - aA)',
-# 'aA + aB - aA * aB',
-# '*D = *A + *B * (g4float_one - g4float_all(g4float_a(*A)[3]))'],
+# ['normal', 'cA + cB * (1.0f - aA)',
+# 'aA + aB - aA * aB'],
# Alias for porter-duff src-over
-# ['over', 'cA + cB * (1 - aA)',
-# 'aA + aB - aA * aB',
-# '*D = *A + *B * (g4float_one - g4float_all(g4float_a(*A)[3]))'],
+# ['over', 'cA + cB * (1.0f - aA)',
+# 'aA + aB - aA * aB'],
]
file_head1 = '
@@ -73,9 +71,9 @@ process (GeglOperation *op,
const GeglRectangle *roi)
{
gint i;
- gfloat *in = in_buf;
- gfloat *aux = aux_buf;
- gfloat *out = out_buf;
+ gfloat * GEGL_ALIGNED in = in_buf;
+ gfloat * GEGL_ALIGNED aux = aux_buf;
+ gfloat * GEGL_ALIGNED out = out_buf;
if (aux==NULL)
return TRUE;
@@ -94,12 +92,6 @@ gegl_chant_class_init (GeglChantClass *klass)
point_composer_class->process = process;
operation_class->prepare = prepare;
-
-#ifdef HAS_G4FLOAT
- gegl_operation_class_add_processor (operation_class,
- G_CALLBACK (process_gegl4float), "simd");
-#endif
-
'
file_tail2 = ' operation_class->categories = "compositors:porter-duff";
@@ -122,7 +114,6 @@ a.each do
swapcased = name.swapcase
c_formula = item[1]
a_formula = item[2]
- sse_formula = item[3]
file.write copyright
file.write file_head1
@@ -159,36 +150,6 @@ a.each do
return TRUE;
}
-#ifdef HAS_G4FLOAT
-
-static gboolean
-process_gegl4float (GeglOperation *op,
- void *in_buf,
- void *aux_buf,
- void *out_buf,
- glong n_pixels,
- const GeglRectangle *roi)
-{
- g4float *A = aux_buf;
- g4float *B = in_buf;
- g4float *D = out_buf;
-
- if (B==NULL || n_pixels == 0)
- return TRUE;
-
- while (n_pixels--)
- {
- #{sse_formula};
-
- A++; B++; D++;
- }
-
- return TRUE;
-}
-
-#endif
-
-
"
file.write file_tail1
file.write "
diff --git a/operations/generated/svg-12-blend.rb b/operations/generated/svg-12-blend.rb
index 4db325b..b304d3e 100755
--- a/operations/generated/svg-12-blend.rb
+++ b/operations/generated/svg-12-blend.rb
@@ -99,9 +99,9 @@ process (GeglOperation *op,
glong n_pixels,
const GeglRectangle *roi)
{
- gfloat *in = in_buf;
- gfloat *aux = aux_buf;
- gfloat *out = out_buf;
+ gfloat * GEGL_ALIGNED in = in_buf;
+ gfloat * GEGL_ALIGNED aux = aux_buf;
+ gfloat * GEGL_ALIGNED out = out_buf;
gint i;
if (aux==NULL)
diff --git a/operations/generated/svg-12-porter-duff.rb b/operations/generated/svg-12-porter-duff.rb
index f905f75..21af131 100755
--- a/operations/generated/svg-12-porter-duff.rb
+++ b/operations/generated/svg-12-porter-duff.rb
@@ -32,45 +32,33 @@ copyright = '
*/'
a = [
- ['clear', '0.0',
- '0.0',
- '*D = g4float_zero'],
+ ['clear', '0.0f',
+ '0.0f'],
['src', 'cA',
- 'aA',
- '*D = *A'],
+ 'aA'],
['dst', 'cB',
- 'aB',
- '*D = *B'],
- ['src_over', 'cA + cB * (1 - aA)',
- 'aA + aB - aA * aB',
- '*D = *A + g4float_mul (*B, 1.0 - g4floatA(*A))'],
- ['dst_over', 'cB + cA * (1 - aB)',
- 'aA + aB - aA * aB',
- '*D = *B + g4float_mul (*A, 1.0 - g4floatA(*B))'],
+ 'aB'],
+ ['src_over', 'cA + cB * (1.0f - aA)',
+ 'aA + aB - aA * aB'],
+ ['dst_over', 'cB + cA * (1.0f - aB)',
+ 'aA + aB - aA * aB'],
['dst_in', 'cB * aA', # <- XXX: typo?
- 'aA * aB',
- '*D = g4float_mul (*B, g4floatA(*A))'],
- ['src_out', 'cA * (1 - aB)',
- 'aA * (1 - aB)',
- '*D = g4float_mul (*A, 1.0 - g4floatA(*B))'],
- ['dst_out', 'cB * (1 - aA)',
- 'aB * (1 - aA)',
- '*D = g4float_mul (*B, 1.0 - g4floatA(*A))'],
- ['src_atop', 'cA * aB + cB * (1 - aA)',
- 'aB',
- '*D = g4float_mul (*A, g4floatA(*B)) + g4float_mul (*B, 1.0 - g4floatA(*A));g4floatA(*D)=g4floatA(*B)'],
-
- ['dst_atop', 'cB * aA + cA * (1 - aB)',
- 'aA',
- '*D = g4float_mul (*B, g4floatA(*A)) + g4float_mul (*A, 1.0 - g4floatA(*B));g4floatA(*D)=g4floatA(*A)'],
- ['xor', 'cA * (1 - aB)+ cB * (1 - aA)',
- 'aA + aB - 2 * aA * aB',
- '*D = *A * *B'] # FIXME this is wrong
+ 'aA * aB'],
+ ['src_out', 'cA * (1.0f - aB)',
+ 'aA * (1.0f - aB)'],
+ ['dst_out', 'cB * (1.0f - aA)',
+ 'aB * (1.0f - aA)'],
+ ['src_atop', 'cA * aB + cB * (1.0f - aA)',
+ 'aB'],
+
+ ['dst_atop', 'cB * aA + cA * (1.0f - aB)',
+ 'aA'],
+ ['xor', 'cA * (1.0f - aB)+ cB * (1.0f - aA)',
+ 'aA + aB - 2.0f * aA * aB'],
]
b = [ ['src_in', 'cA * aB', # the bounding box of this mode is the
- 'aA * aB', # bounding box of the input only.
- '*D = g4float_mul(*A, g4floatA(*B))']]
+ 'aA * aB']] # bounding box of the input only.
file_head1 = '
#include "config.h"
@@ -103,9 +91,9 @@ process (GeglOperation *op,
const GeglRectangle *roi)
{
gint i;
- gfloat *in = in_buf;
- gfloat *aux = aux_buf;
- gfloat *out = out_buf;
+ gfloat * GEGL_ALIGNED in = in_buf;
+ gfloat * GEGL_ALIGNED aux = aux_buf;
+ gfloat * GEGL_ALIGNED out = out_buf;
if (aux==NULL)
return TRUE;
@@ -125,11 +113,6 @@ gegl_chant_class_init (GeglChantClass *klass)
point_composer_class->process = process;
operation_class->prepare = prepare;
-#ifdef HAS_G4FLOAT
- gegl_operation_class_add_processor (operation_class,
- G_CALLBACK (process_simd), "simd");
-#endif
-
'
file_tail2 = ' operation_class->categories = "compositors:porter-duff";
@@ -152,7 +135,6 @@ a.each do
swapcased = name.swapcase
c_formula = item[1]
a_formula = item[2]
- sse_formula = item[3]
file.write copyright
file.write file_head1
@@ -188,37 +170,6 @@ a.each do
}
return TRUE;
}
-
-#ifdef HAS_G4FLOAT
-
-static gboolean
-process_simd (GeglOperation *op,
- void *in_buf,
- void *aux_buf,
- void *out_buf,
- glong n_pixels,
- const GeglRectangle *roi)
-{
- g4float *A = aux_buf;
- g4float *B = in_buf;
- g4float *D = out_buf;
-
- if (B==NULL || n_pixels == 0)
- return TRUE;
-
- while (n_pixels--)
- {
- #{sse_formula};
-
- A++; B++; D++;
- }
-
- return TRUE;
-}
-
-#endif
-
-
"
file.write file_tail1
file.write "
@@ -248,7 +199,6 @@ b.each do
swapcased = name.swapcase
c_formula = item[1]
a_formula = item[2]
- sse_formula = item[3]
file.write copyright
file.write file_head1
@@ -285,36 +235,6 @@ b.each do
return TRUE;
}
-#ifdef HAS_G4FLOAT
-
-static gboolean
-process_simd (GeglOperation *op,
- void *in_buf,
- void *aux_buf,
- void *out_buf,
- glong n_pixels,
- const GeglRectangle *roi)
-{
- g4float *A = aux_buf;
- g4float *B = in_buf;
- g4float *D = out_buf;
-
- if (B==NULL || n_pixels == 0)
- return TRUE;
-
- while (n_pixels--)
- {
- #{sse_formula};
-
- A++; B++; D++;
- }
-
- return TRUE;
-}
-
-#endif
-
-
static GeglRectangle get_bounding_box (GeglOperation *self)
{
GeglRectangle *in_rect = gegl_operation_source_get_bounding_box (self, \"input\");
diff --git a/perf/Makefile b/perf/Makefile
index c77f053..320f1f4 100644
--- a/perf/Makefile
+++ b/perf/Makefile
@@ -2,7 +2,7 @@ PROJECT_PATH = ../
# number of revisions to create
START_REV = master
-REVISIONS = 20
+REVISIONS = 4
#comment the following line out to see what the makefile is doing
.SILENT:
diff --git a/perf/tests/bcontrast-minichunk.c b/perf/tests/bcontrast-minichunk.c
index 19afd13..48989f8 100644
--- a/perf/tests/bcontrast-minichunk.c
+++ b/perf/tests/bcontrast-minichunk.c
@@ -15,7 +15,7 @@ main (gint argc,
buffer = test_buffer (2048, 2048, babl_format ("RGBA float"));
-#define ITERATIONS 1
+#define ITERATIONS 2
test_start ();
for (i=0;i< ITERATIONS;i++)
{
diff --git a/perf/tests/bcontrast.c b/perf/tests/bcontrast.c
index 070b186..c6a1891 100644
--- a/perf/tests/bcontrast.c
+++ b/perf/tests/bcontrast.c
@@ -13,7 +13,7 @@ main (gint argc,
buffer = test_buffer (2048, 2048, babl_format ("RGBA float"));
-#define ITERATIONS 3
+#define ITERATIONS 4
test_start ();
for (i=0;i< ITERATIONS;i++)
{
diff --git a/perf/tests/comp.c b/perf/tests/comp.c
index 4402dfb..5a62c04 100644
--- a/perf/tests/comp.c
+++ b/perf/tests/comp.c
@@ -15,7 +15,7 @@ main (gint argc,
bufferB = test_buffer (2048, 2048, babl_format ("RGBA float"));
buffer = test_buffer (2048, 2048, babl_format ("RGBA float"));
-#define ITERATIONS 5
+#define ITERATIONS 6
test_start ();
for (i=0;i< ITERATIONS;i++)
{
diff --git a/perf/tests/passthrough.c b/perf/tests/passthrough.c
index eefeeaf..fded1ba 100644
--- a/perf/tests/passthrough.c
+++ b/perf/tests/passthrough.c
@@ -13,7 +13,7 @@ main (gint argc,
buffer = test_buffer (2048, 2048, babl_format ("RGBA float"));
-#define ITERATIONS 3
+#define ITERATIONS 4
test_start ();
for (i=0;i< ITERATIONS;i++)
gegl = gegl_graph (sink = gegl_node ("gegl:buffer-sink", "buffer", &buffer2, NULL,
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]