[gthumb] cairo scale: speed optimizations



commit f41d6816805d92c2d71a4251cac101d7ff1756d2
Author: Paolo Bacchilega <paobac src gnome org>
Date:   Fri Jun 28 19:34:39 2013 +0200

    cairo scale: speed optimizations
    
    use float operations instead of fixed operations, optionally use vector
    operations if the compiler supports them.

 configure.ac         |   19 +++
 gthumb/cairo-scale.c |  301 ++++++++++++++++++++++++++++----------------------
 2 files changed, 187 insertions(+), 133 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index 12b7f74..cda526f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -596,6 +596,25 @@ AM_CONDITIONAL(ENABLE_WEB_ALBUMS, test "x$enable_web_albums" = xyes)
 
 dnl ===========================================================================
 
+AC_MSG_CHECKING([[if gcc supports vector operations]])
+AC_COMPILE_IFELSE([ AC_LANG_SOURCE(
+                         [[
+#include <ansidecl.h>
+#if GCC_VERSION < 4007
+#error "wrong version"
+#endif
+int main(int c, char**v) { return 0; }
+                         ]]) ],
+                         [AC_MSG_RESULT(yes)
+                          have_vector_operations=yes],
+                         [AC_MSG_RESULT(no)
+                          have_vector_operations=no])
+if test "x$have_vector_operations" = "xyes"; then
+       AC_DEFINE(HAVE_VECTOR_OPERATIONS, 1, [Define to 1 if the compiler supports vector operations])
+fi
+
+dnl ===========================================================================
+
 GDK_TARGET="$($PKG_CONFIG --variable targets gdk-3.0)"
 
 AC_MSG_CHECKING([which smclient backend to use])
diff --git a/gthumb/cairo-scale.c b/gthumb/cairo-scale.c
index 5a72048..6782376 100644
--- a/gthumb/cairo-scale.c
+++ b/gthumb/cairo-scale.c
@@ -28,7 +28,25 @@
 #include "gfixed.h"
 
 
-#if 1
+#define CLAMP_PIXEL(v) (((v) <= 0) ? 0  : ((v) >= 255) ? 255 : (v));
+#define EPSILON ((ScaleReal) 1.0e-16)
+
+
+typedef float ScaleReal;
+typedef ScaleReal (*weight_func_t) (ScaleReal distance);
+
+
+#ifdef HAVE_VECTOR_OPERATIONS
+
+
+typedef ScaleReal v4r __attribute__ ((vector_size(sizeof(ScaleReal)*4)));
+typedef union {
+       v4r       v;
+       ScaleReal r[4];
+} r4vector;
+
+
+#endif /* HAVE_VECTOR_OPERATIONS */
 
 
 /* -- _cairo_image_surface_scale_nearest -- */
@@ -134,31 +152,25 @@ _cairo_image_surface_scale_nearest (cairo_surface_t *image,
  * */
 
 
-#define EPSILON ((double) 1.0e-8)
-
-
-typedef double (*weight_func_t) (double distance);
-
-
-static double
-box (double x)
+static ScaleReal inline
+box (ScaleReal x)
 {
        return 1.0;
 }
 
 
-static double
-triangle (double x)
+static ScaleReal inline
+triangle (ScaleReal x)
 {
        return (x < 1.0) ? 1.0 - x : 0.0;
 }
 
 
-static double
-sinc_fast (double x)
+static ScaleReal inline
+sinc_fast (ScaleReal x)
 {
        if (x > 4.0) {
-               const double alpha = G_PI * x;
+               const ScaleReal alpha = G_PI * x;
                return sin (alpha) / alpha;
        }
 
@@ -167,20 +179,20 @@ sinc_fast (double x)
                 * The approximations only depend on x^2 (sinc is an even function).
                 */
 
-               const double xx = x*x;
+               const ScaleReal xx = x*x;
 
                /*
                 * Maximum absolute relative error 6.3e-6 < 1/2^17.
                 */
-               const double c0 = 0.173610016489197553621906385078711564924e-2L;
-               const double c1 = -0.384186115075660162081071290162149315834e-3L;
-               const double c2 = 0.393684603287860108352720146121813443561e-4L;
-               const double c3 = -0.248947210682259168029030370205389323899e-5L;
-               const double c4 = 0.107791837839662283066379987646635416692e-6L;
-               const double c5 = -0.324874073895735800961260474028013982211e-8L;
-               const double c6 = 0.628155216606695311524920882748052490116e-10L;
-               const double c7 = -0.586110644039348333520104379959307242711e-12L;
-               const double p = c0+xx*(c1+xx*(c2+xx*(c3+xx*(c4+xx*(c5+xx*(c6+xx*c7))))));
+               const ScaleReal c0 = 0.173610016489197553621906385078711564924e-2L;
+               const ScaleReal c1 = -0.384186115075660162081071290162149315834e-3L;
+               const ScaleReal c2 = 0.393684603287860108352720146121813443561e-4L;
+               const ScaleReal c3 = -0.248947210682259168029030370205389323899e-5L;
+               const ScaleReal c4 = 0.107791837839662283066379987646635416692e-6L;
+               const ScaleReal c5 = -0.324874073895735800961260474028013982211e-8L;
+               const ScaleReal c6 = 0.628155216606695311524920882748052490116e-10L;
+               const ScaleReal c7 = -0.586110644039348333520104379959307242711e-12L;
+               const ScaleReal p = c0+xx*(c1+xx*(c2+xx*(c3+xx*(c4+xx*(c5+xx*(c6+xx*c7))))));
 
                return (xx-1.0)*(xx-4.0)*(xx-9.0)*(xx-16.0)*p;
        }
@@ -189,7 +201,7 @@ sinc_fast (double x)
 
 static struct {
        weight_func_t weight_func;
-       double        support;
+       ScaleReal     support;
 }
 const filters[N_SCALE_FILTERS] = {
        { box,           .0 },
@@ -204,7 +216,7 @@ const filters[N_SCALE_FILTERS] = {
 
 typedef struct {
        weight_func_t  weight_func;
-       double         support;
+       ScaleReal      support;
        GthAsyncTask  *task;
        gulong         total_lines;
        gulong         processed_lines;
@@ -218,7 +230,7 @@ resize_filter_create (scale_filter_t  filter_type,
 {
        resize_filter_t *resize_filter;
 
-       resize_filter = g_slice_new (resize_filter_t);
+       resize_filter = g_new (resize_filter_t, 1);
        resize_filter->weight_func = filters[filter_type].weight_func;
        resize_filter->support = filters[filter_type].support;
        resize_filter->task = task;
@@ -230,18 +242,18 @@ resize_filter_create (scale_filter_t  filter_type,
 }
 
 
-static double inline
+static ScaleReal inline
 resize_filter_get_support (resize_filter_t *resize_filter)
 {
        return resize_filter->support;
 }
 
 
-static double inline
+static ScaleReal inline
 resize_filter_get_weight (resize_filter_t *resize_filter,
-                         double           distance)
+                         ScaleReal        distance)
 {
-       double scale = 1.0;
+       ScaleReal scale = 1.0;
 
        if (resize_filter->weight_func == sinc_fast)
                scale = resize_filter->weight_func (fabs (distance));
@@ -253,171 +265,209 @@ resize_filter_get_weight (resize_filter_t *resize_filter,
 static void
 resize_filter_destroy (resize_filter_t *resize_filter)
 {
-       g_slice_free (resize_filter_t, resize_filter);
+       g_free (resize_filter);
 }
 
 
-static inline double
-reciprocal (double x)
+static inline ScaleReal
+reciprocal (ScaleReal x)
 {
-       double sign = x < 0.0 ? -1.0 : 1.0;
+       ScaleReal sign = x < 0.0 ? -1.0 : 1.0;
        return (sign * x) >= EPSILON ? 1.0 / x : sign * (1.0 / EPSILON);
 }
 
 
-#define CLAMP_PIXEL(v) (((v) <= 0) ? 0  : ((v) >= 255) ? 255 : (v));
-
-
 static void
 horizontal_scale_transpose (cairo_surface_t *image,
                            cairo_surface_t *scaled,
-                           double           scale_factor,
+                           ScaleReal        scale_factor,
                            resize_filter_t *resize_filter)
 {
-       double    scale;
-       double    support;
-       int       y;
-       guchar   *p_src;
-        guchar   *p_dest;
-        int       src_rowstride;
-        int       dest_rowstride;
-        double   *weights;
-        gfixed   *fixed_weights;
+       ScaleReal  scale;
+       ScaleReal  support;
+       int        y;
+       int        image_width;
+       int        scaled_width;
+       int        scaled_height;
+       guchar    *p_src;
+        guchar    *p_dest;
+        int        src_rowstride;
+        int        dest_rowstride;
+        ScaleReal *weights;
 
        if (resize_filter->cancelled)
                return;
 
-       scale = MAX (1.0 / scale_factor + EPSILON, 1.0);
+       scale = MAX ((ScaleReal) 1.0 / scale_factor + EPSILON, 1.0);
        support = scale * resize_filter_get_support (resize_filter);
        if (support < 0.5) {
                support = 0.5;
                scale = 1.0;
        }
 
+       image_width = cairo_image_surface_get_width (image);
+       scaled_width = cairo_image_surface_get_width (scaled);
+       scaled_height = cairo_image_surface_get_height (scaled);
        p_src = _cairo_image_surface_flush_and_get_data (image);
        p_dest = _cairo_image_surface_flush_and_get_data (scaled);
        src_rowstride = cairo_image_surface_get_stride (image);
        dest_rowstride = cairo_image_surface_get_stride (scaled);
-       weights = g_new (double, 2.0 * support + 3.0);
-       fixed_weights = g_new (gfixed, 2.0 * support + 3.0);
+       weights = g_new (ScaleReal, 2.0 * support + 3.0);
 
        scale = reciprocal (scale);
-       for (y = 0; ! resize_filter->cancelled && (y < cairo_image_surface_get_height (scaled)); y++) {
-               guchar *p_src_row;
-               guchar *p_dest_pixel;
-               double  bisect;
-               int     start;
-               int     stop;
-               double  density;
-               int     n;
-               int     x;
-               int     i;
+       for (y = 0; y < scaled_height; y++) {
+               guchar    *p_src_row;
+               guchar    *p_dest_pixel;
+               ScaleReal  bisect;
+               int        start;
+               int        stop;
+               ScaleReal  density;
+               int        n;
+               int        x;
+               int        i;
+#ifdef HAVE_VECTOR_OPERATIONS
+               r4vector   v_pixel, v_weight, v_rgba;
+#endif /* HAVE_VECTOR_OPERATIONS */
 
                if (resize_filter->task != NULL) {
                        double progress = (double) resize_filter->processed_lines++ / 
resize_filter->total_lines;
                        gth_async_task_set_data (resize_filter->task, NULL, NULL, &progress);
                }
 
-               bisect = (y + 0.5) / scale_factor + EPSILON;
-               start = MAX (bisect - support + 0.5, 0.0);
-               stop = MIN (bisect + support + 0.5, cairo_image_surface_get_width (image));
+               bisect = ((ScaleReal) y + 0.5) / scale + EPSILON;
+               start = bisect - support + 0.5;
+               start = CLAMP (start, 0, image_width - 1);
+               stop = bisect + support + 0.5;
+               stop = CLAMP (stop, 0, image_width - 1);
 
                density = 0.0;
                for (n = 0; n < stop - start; n++) {
-                       weights[n] = resize_filter_get_weight (resize_filter, scale * ((double) (start + n) - 
bisect + 0.5));
+                       weights[n] = resize_filter_get_weight (resize_filter, scale * ((ScaleReal) (start + 
n) - bisect + 0.5));
                        density += weights[n];
                }
 
-               density = reciprocal (density);
-               for (i = 0; i < n; i++) {
-                       double w = weights[i] * density;
-                       fixed_weights[i] = GDOUBLE_TO_FIXED (w);
+               /*
+               g_assert (n == stop - start);
+               g_assert (stop - start <= (2.0 * support) + 3);
+               */
+
+               if ((density != 0.0) && (density != 1.0)) {
+                       density = reciprocal (density);
+                       for (i = 0; i < n; i++)
+                               weights[i] *= density;
                }
 
-               p_src_row = p_src;
-               p_dest_pixel = p_dest + (y * dest_rowstride);
-               for (x = 0; x < cairo_image_surface_get_width (scaled); x++) {
+               p_src_row = p_src + (start * 4);
+               p_dest_pixel = p_dest;
+               for (x = 0; x < scaled_width; x++) {
                        guchar *p_src_pixel;
-                       gfixed  r, g, b, a;
-                       gfixed  w;
 
                        if (resize_filter->task != NULL) {
                                gth_async_task_get_data (resize_filter->task, NULL, 
&resize_filter->cancelled, NULL);
                                if (resize_filter->cancelled)
-                                       break;
+                                       goto out;
                        }
 
-                       p_src_pixel = p_src_row + (start * 4);
-                       r = g = b = a = GFIXED_0;
+                       p_src_pixel = p_src_row;
+
+#ifdef HAVE_VECTOR_OPERATIONS
 
+                       v_rgba.v = (v4r) { 0.0, 0.0, 0.0, 0.0 };
                        for (i = 0; i < n; i++) {
-                               w = fixed_weights[i];
-                               r += gfixed_mul (w, GINT_TO_FIXED (p_src_pixel[CAIRO_RED]));
-                               g += gfixed_mul (w, GINT_TO_FIXED (p_src_pixel[CAIRO_GREEN]));
-                               b += gfixed_mul (w, GINT_TO_FIXED (p_src_pixel[CAIRO_BLUE]));
-                               a += gfixed_mul (w, GINT_TO_FIXED (p_src_pixel[CAIRO_ALPHA]));
+                               v_pixel.v = (v4r) { p_src_pixel[CAIRO_RED],
+                                                   p_src_pixel[CAIRO_GREEN],
+                                                   p_src_pixel[CAIRO_BLUE],
+                                                   p_src_pixel[CAIRO_ALPHA] };
+                               v_weight.v = (v4r) { weights[i],
+                                                    weights[i],
+                                                    weights[i],
+                                                    weights[i]};
+                               v_rgba.v = v_rgba.v + (v_pixel.v * v_weight.v);
+
+                               p_src_pixel += 4;
+                       }
+                       v_rgba.v = v_rgba.v + 0.5;
+
+                       p_dest_pixel[CAIRO_RED] = CLAMP_PIXEL (v_rgba.r[0]);
+                       p_dest_pixel[CAIRO_GREEN] = CLAMP_PIXEL (v_rgba.r[1]);
+                       p_dest_pixel[CAIRO_BLUE] = CLAMP_PIXEL (v_rgba.r[2]);
+                       p_dest_pixel[CAIRO_ALPHA] = CLAMP_PIXEL (v_rgba.r[3]);
+
+#else /* ! HAVE_VECTOR_OPERATIONS */
+
+                       ScaleReal r, g, b, a, w;
+
+                       r = g = b = a = 0.0;
+                       for (i = 0; i < n; i++) {
+                               w = weights[i];
+
+                               r += w * p_src_pixel[CAIRO_RED];
+                               g += w * p_src_pixel[CAIRO_GREEN];
+                               b += w * p_src_pixel[CAIRO_BLUE];
+                               a += w * p_src_pixel[CAIRO_ALPHA];
 
                                p_src_pixel += 4;
                        }
 
-                       r = GFIXED_TO_INT (r);
-                       g = GFIXED_TO_INT (g);
-                       b = GFIXED_TO_INT (b);
-                       a = GFIXED_TO_INT (a);
+                       p_dest_pixel[CAIRO_RED] = CLAMP_PIXEL (r+0.5);
+                       p_dest_pixel[CAIRO_GREEN] = CLAMP_PIXEL (g+0.5);
+                       p_dest_pixel[CAIRO_BLUE] = CLAMP_PIXEL (b+0.5);
+                       p_dest_pixel[CAIRO_ALPHA] = CLAMP_PIXEL (a+0.5);
 
-                       p_dest_pixel[CAIRO_RED] = CLAMP_PIXEL (r);
-                       p_dest_pixel[CAIRO_GREEN] = CLAMP_PIXEL (g);
-                       p_dest_pixel[CAIRO_BLUE] = CLAMP_PIXEL (b);
-                       p_dest_pixel[CAIRO_ALPHA] = CLAMP_PIXEL (a);
+#endif /* HAVE_VECTOR_OPERATIONS */
 
                        p_dest_pixel += 4;
                        p_src_row += src_rowstride;
                }
+
+               p_dest += dest_rowstride;
        }
 
+       out:
+
        cairo_surface_mark_dirty (scaled);
 
        g_free (weights);
-       g_free (fixed_weights);
 }
 
 
-static cairo_surface_t *
-_cairo_image_surface_scale_filter (cairo_surface_t *image,
-                                  int              new_width,
-                                  int              new_height,
-                                  scale_filter_t   filter,
-                                  GthAsyncTask    *task)
+cairo_surface_t *
+_cairo_image_surface_scale (cairo_surface_t  *image,
+                           int               scaled_width,
+                           int               scaled_height,
+                           scale_filter_t    filter,
+                           GthAsyncTask     *task)
 {
        int              src_width;
        int              src_height;
        cairo_surface_t *scaled;
        resize_filter_t *resize_filter;
-       double           x_factor;
-       double           y_factor;
+       ScaleReal        x_factor;
+       ScaleReal        y_factor;
        cairo_surface_t *tmp;
 
-       src_width = cairo_image_surface_get_width  (image);
+       src_width = cairo_image_surface_get_width (image);
        src_height = cairo_image_surface_get_height (image);
 
-       if ((src_width == new_width) && (src_height == new_height))
+       if ((src_width == scaled_width) && (src_height == scaled_height))
                return _cairo_image_surface_copy (image);
 
-       scaled = cairo_image_surface_create (CAIRO_FORMAT_ARGB32,
-                                            new_width,
-                                            new_height);
+       scaled = _cairo_image_surface_create (CAIRO_FORMAT_ARGB32,
+                                             scaled_width,
+                                             scaled_height);
        if (scaled == NULL)
                return NULL;
 
        resize_filter = resize_filter_create (filter, task);
-       resize_filter->total_lines = new_width + new_height;
+       resize_filter->total_lines = scaled_width + scaled_height;
        resize_filter->processed_lines = 0;
 
-       x_factor = (double) new_width / src_width;
-       y_factor = (double) new_height / src_height;
-       tmp = cairo_image_surface_create (CAIRO_FORMAT_ARGB32,
-                                         src_height,
-                                         new_width);
+       x_factor = (ScaleReal) scaled_width / src_width;
+       y_factor = (ScaleReal) scaled_height / src_height;
+       tmp = _cairo_image_surface_create (CAIRO_FORMAT_ARGB32,
+                                          src_height,
+                                          scaled_width);
+
        horizontal_scale_transpose (image, tmp, x_factor, resize_filter);
        horizontal_scale_transpose (tmp, scaled, y_factor, resize_filter);
 
@@ -429,21 +479,6 @@ _cairo_image_surface_scale_filter (cairo_surface_t *image,
 
 
 cairo_surface_t *
-_cairo_image_surface_scale (cairo_surface_t  *image,
-                           int               scaled_width,
-                           int               scaled_height,
-                           scale_filter_t    filter,
-                           GthAsyncTask     *task)
-{
-       return _cairo_image_surface_scale_filter (image,
-                                                 scaled_width,
-                                                 scaled_height,
-                                                 filter,
-                                                 task);
-}
-
-
-cairo_surface_t *
 _cairo_image_surface_scale_squared (cairo_surface_t *image,
                                    int              size,
                                    scale_filter_t   quality,
@@ -515,12 +550,12 @@ _cairo_image_surface_scale_bilinear_2x2 (cairo_surface_t *image,
        guchar          *p_dest;
        int              src_rowstride;
        int              dest_rowstride;
-       double           step_x, step_y;
+       ScaleReal           step_x, step_y;
        guchar          *p_dest_row;
        guchar          *p_src_row;
        guchar          *p_src_col;
        guchar          *p_dest_col;
-       double           x_src, y_src;
+       ScaleReal           x_src, y_src;
        int              x, y;
        guchar           r00, r01, r10, r11;
        guchar           g00, g01, g10, g11;
@@ -528,8 +563,8 @@ _cairo_image_surface_scale_bilinear_2x2 (cairo_surface_t *image,
        guchar           a00, a01, a10, a11;
        guchar           r, g, b, a;
        guint32          pixel;
-       double           tmp;
-       double           x_fract, y_fract;
+       ScaleReal           tmp;
+       ScaleReal           x_fract, y_fract;
        int              col, row;
 
        scaled = cairo_image_surface_create (CAIRO_FORMAT_ARGB32,
@@ -545,8 +580,8 @@ _cairo_image_surface_scale_bilinear_2x2 (cairo_surface_t *image,
 
        cairo_surface_flush (scaled);
 
-       step_x = (double) src_width / new_width;
-       step_y = (double) src_height / new_height;
+       step_x = (ScaleReal) src_width / new_width;
+       step_y = (ScaleReal) src_height / new_height;
        p_dest_row = p_dest;
        p_src_row = p_src;
        y_src = 0;
@@ -636,7 +671,7 @@ _cairo_surface_reduce_row (guchar *dest_data,
                           int     src_width)
 {
        int     x, b;
-       double  sum;
+       ScaleReal  sum;
        int     col0, col1, col2;
        guchar  c[4];
        guint32 pixel;
@@ -732,7 +767,7 @@ _cairo_surface_reduce_by_half (cairo_surface_t *src)
 #endif
 
 
-cairo_surface_t *
+static cairo_surface_t *
 _cairo_image_surface_scale_bilinear_2x2 (cairo_surface_t *image,
                                         int              new_width,
                                         int              new_height)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]