[gdk-pixbuf] Drop the MMX assembly optimizations

From: Emmanuele Bassi <ebassi src gnome org>
To: commits-list gnome org
Cc:
Subject: [gdk-pixbuf] Drop the MMX assembly optimizations
Date: Mon, 30 Apr 2018 17:18:07 +0000 (UTC)
commit 4b3d5be2c50d270326fad68f3a7a1c1b3a9dfd3f
Author: Emmanuele Bassi <ebassi gnome org>
Date:   Sat Apr 28 11:36:19 2018 +0100

    Drop the MMX assembly optimizations
    
    We haven't built them on anything that isn't a 32bit IA platform, and
    we could probably get better mileage out of the currently implemented
    pixops just by rearranging the C code and letting compilers do the
    optimizations for us. We should definitely consider either using pixman
    directly, or replacing slow pixops with SSE builtins, instead.

 gdk-pixbuf/pixops/DETAILS                          |   73 ------
 gdk-pixbuf/pixops/README                           |   17 --
 gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S      |  239 -------------------
 .../pixops/composite_line_color_22_4a4_mmx.S       |  251 --------------------
 gdk-pixbuf/pixops/have_mmx.S                       |   74 ------
 gdk-pixbuf/pixops/pixops-internal.h                |   23 --
 gdk-pixbuf/pixops/pixops.c                         |  136 +-----------
 gdk-pixbuf/pixops/scale_line_22_33_mmx.S           |  204 ----------------
 8 files changed, 3 insertions(+), 1014 deletions(-)
---
diff --git a/gdk-pixbuf/pixops/DETAILS b/gdk-pixbuf/pixops/DETAILS
index acf16f5..08597f5 100644
--- a/gdk-pixbuf/pixops/DETAILS
+++ b/gdk-pixbuf/pixops/DETAILS
@@ -280,76 +280,3 @@ Integer tricks for compositing
 
 
 
-MMX Code
-========
-
-Line functions are provided in MMX functionsfor a few special 
-cases:
-
- n_x = n_y = 2
-
-   src_channels = 3 dest_channels = 3    op = scale
-   src_channels = 4 with alpha dest_channels = 4 no alpha  op = composite
-   src_channels = 4 with alpha dest_channels = 4 no alpha  op = composite_color
-
-For the case n_x = n_y = 2 - primarily hit when scaling up with bilinear
-scaling, we can take advantage of the fact that multiple destination
-pixels will be composed from the same source pixels.
-
-That is a destination pixel is a linear combination of the source
-pixels around it:
-
-
-  S0                     S1
-
-
-
-
-
-       D  D' D'' ...
-
-
-
-
-  S2                     S3
-
-Each mmx register is 64 bits wide, so we can unpack a source pixel
-into the low 8 bits of 4 16 bit words, and store it into a mmx 
-register.
-
-For each destination pixel, we first make sure that we have pixels S0
-... S3 loaded into registers mm0 ...mm3. (This will often involve not
-doing anything or moving mm1 and mm3 into mm0 and mm1 then reloading
-mm1 and mm3 with new values).
-
-Then we load up the appropriate weights for the 4 corner pixels
-based on the offsets of the destination pixel within the source
-pixels.
-
-We have preexpanded the weights to 64 bits wide and truncated the
-range to 8 bits, so an original filter value of 
-
- 0x5321 would be expanded to
-
- 0x0053005300530053
-
-For source buffers without alpha, we simply do a multiply-add
-of the weights, giving us a 16 bit quantity for the result
-that we shift left by 8 and store in the destination buffer.
-
-When the source buffer has alpha, then things become more
-complicated - when we load up mm0 and mm3, we premultiply
-the alpha, so they contain:
-
- (a*ff >> 8) (r*a >> 8) (g*a >> 8) (b*a >> a)
-
-Then when we multiply by the weights, and add we end up
-with premultiplied r,g,b,a in the range of 0 .. 0xff * 0ff,
-call them A,R,G,B
-
-We then need to composite with the dest pixels - which 
-we do by:
-
- r_dest = (R + ((0xff * 0xff - A) >> 8) * r_dest) >> 8
-
-(0xff * 0xff) 
diff --git a/gdk-pixbuf/pixops/README b/gdk-pixbuf/pixops/README
index 354c3a1..382fca1 100644
--- a/gdk-pixbuf/pixops/README
+++ b/gdk-pixbuf/pixops/README
@@ -138,23 +138,6 @@ TODO
   switching around conditionals and inner loops in various
   places.
 
-* Right now, in several of the most common cases, there are
-  optimized mmx routines, but no optimized C routines.
-
-  For instance, there is a 
-
-    pixops_composite_line_22_4a4_mmx()
-
-  But no 
-  
-    pixops_composite_line_22_4a4()
-
-  Also, it may be desirable to include a few more special cases - in particular:
-
-    pixops_composite_line_22_4a3()
-
-  May be desirable.
-
 * Scaling down images by large scale factors is _slow_ since huge filter
   matrixes are computed. (e.g., to scale down by a factor of 100, we compute
   101x101 filter matrixes. At some point, it would be more efficent to
diff --git a/gdk-pixbuf/pixops/pixops.c b/gdk-pixbuf/pixops/pixops.c
index f6535f1..f1b75f8 100644
--- a/gdk-pixbuf/pixops/pixops.c
+++ b/gdk-pixbuf/pixops/pixops.c
@@ -23,7 +23,6 @@
 
 #include "../fallback-c89.c"
 #include "pixops.h"
-#include "pixops-internal.h"
 
 #define SUBSAMPLE_BITS 4
 #define SUBSAMPLE (1 << SUBSAMPLE_BITS)
@@ -837,36 +836,6 @@ composite_line_22_4a4 (int *weights, int n_x, int n_y,
   return dest;
 }
 
-#ifdef USE_MMX
-static guchar *
-composite_line_22_4a4_mmx_stub (int *weights, int n_x, int n_y, guchar *dest,
-                               int dest_x, guchar *dest_end,
-                               int dest_channels, int dest_has_alpha,
-                               guchar **src, int src_channels,
-                               gboolean src_has_alpha, int x_init,
-                               int x_step, int src_width, int check_size,
-                               guint32 color1, guint32 color2)
-{
-  guint32 mmx_weights[16][8];
-  int j;
-
-  for (j=0; j<16; j++)
-    {
-      mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
-      mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
-      mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
-      mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
-      mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
-      mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
-      mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
-      mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
-    }
-
-  return _pixops_composite_line_22_4a4_mmx (mmx_weights, dest, src[0], src[1],
-                                           x_step, dest_end, x_init);
-}
-#endif /* USE_MMX */
-
 static void
 composite_pixel_color (guchar *dest, int dest_x, int dest_channels,
                       int dest_has_alpha, int src_has_alpha, int check_size,
@@ -980,44 +949,6 @@ composite_line_color (int *weights, int n_x, int n_y, guchar *dest,
   return dest;
 }
 
-#ifdef USE_MMX
-static guchar *
-composite_line_color_22_4a4_mmx_stub (int *weights, int n_x, int n_y,
-                                     guchar *dest, int dest_x,
-                                     guchar *dest_end, int dest_channels,
-                                     int dest_has_alpha, guchar **src,
-                                     int src_channels, gboolean src_has_alpha,
-                                     int x_init, int x_step, int src_width,
-                                     int check_size, guint32 color1,
-                                     guint32 color2)
-{
-  guint32 mmx_weights[16][8];
-  int check_shift = get_check_shift (check_size);
-  int colors[4];
-  int j;
-
-  for (j=0; j<16; j++)
-    {
-      mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
-      mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
-      mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
-      mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
-      mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
-      mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
-      mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
-      mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
-    }
-
-  colors[0] = (color1 & 0xff00) << 8 | (color1 & 0xff);
-  colors[1] = (color1 & 0xff0000) >> 16;
-  colors[2] = (color2 & 0xff00) << 8 | (color2 & 0xff);
-  colors[3] = (color2 & 0xff0000) >> 16;
-
-  return _pixops_composite_line_color_22_4a4_mmx (mmx_weights, dest, src[0],
-    src[1], x_step, dest_end, x_init, dest_x, check_shift, colors);
-}
-#endif /* USE_MMX */
-
 static void
 scale_pixel (guchar *dest, int dest_x, int dest_channels, int dest_has_alpha,
             int src_has_alpha, int check_size, guint32 color1, guint32 color2,
@@ -1142,34 +1073,6 @@ scale_line (int *weights, int n_x, int n_y, guchar *dest, int dest_x,
   return dest;
 }
 
-#ifdef USE_MMX 
-static guchar *
-scale_line_22_33_mmx_stub (int *weights, int n_x, int n_y, guchar *dest,
-                          int dest_x, guchar *dest_end, int dest_channels,
-                          int dest_has_alpha, guchar **src, int src_channels,
-                          gboolean src_has_alpha, int x_init, int x_step,
-                          int src_width, int check_size, guint32 color1,
-                          guint32 color2)
-{
-  guint32 mmx_weights[16][8];
-  int j;
-
-  for (j=0; j<16; j++)
-    {
-      mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
-      mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
-      mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
-      mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
-      mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
-      mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
-      mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
-      mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
-    }
-
-  return _pixops_scale_line_22_33_mmx (mmx_weights, dest, src[0], src[1],
-                                      x_step, dest_end, x_init);
-}
-#endif /* USE_MMX */
 
 static guchar *
 scale_line_22_33 (int *weights, int n_x, int n_y, guchar *dest, int dest_x,
@@ -1910,10 +1813,6 @@ _pixops_composite_color_real (guchar          *dest_buf,
   PixopsLineFunc line_func;
   guchar *tmp_buf = NULL;
   
-#ifdef USE_MMX
-  gboolean found_mmx = _pixops_have_mmx ();
-#endif
-
   g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
   g_return_if_fail (!(src_channels == 3 && src_has_alpha));
 
@@ -1941,14 +1840,7 @@ _pixops_composite_color_real (guchar          *dest_buf,
   if (!make_weights (&filter, interp_type, scale_x, scale_y))
     return;
 
-#ifdef USE_MMX
-  if (filter.x.n == 2 && filter.y.n == 2 &&
-      dest_channels == 4 && src_channels == 4 &&
-      src_has_alpha && !dest_has_alpha && found_mmx)
-    line_func = composite_line_color_22_4a4_mmx_stub;
-  else
-#endif
-    line_func = composite_line_color;
+  line_func = composite_line_color;
   
   pixops_process (dest_buf, render_x0, render_y0, render_x1, render_y1,
                  dest_rowstride, dest_channels, dest_has_alpha,
@@ -2071,10 +1963,6 @@ _pixops_composite_real (guchar          *dest_buf,
   PixopsLineFunc line_func;
   guchar *tmp_buf = NULL;
   
-#ifdef USE_MMX
-  gboolean found_mmx = _pixops_have_mmx ();
-#endif
-
   g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
   g_return_if_fail (!(src_channels == 3 && src_has_alpha));
 
@@ -2107,14 +1995,7 @@ _pixops_composite_real (guchar          *dest_buf,
 
   if (filter.x.n == 2 && filter.y.n == 2 && dest_channels == 4 &&
       src_channels == 4 && src_has_alpha && !dest_has_alpha)
-    {
-#ifdef USE_MMX
-      if (found_mmx)
-       line_func = composite_line_22_4a4_mmx_stub;
-      else
-#endif 
-       line_func = composite_line_22_4a4;
-    }
+    line_func = composite_line_22_4a4;
   else
     line_func = composite_line;
   
@@ -2491,10 +2372,6 @@ _pixops_scale_real (guchar        *dest_buf,
   PixopsLineFunc line_func;
   guchar *tmp_buf = NULL;      /* Temporary image for two-step scaling */
 
-#ifdef USE_MMX
-  gboolean found_mmx = _pixops_have_mmx ();
-#endif
-
   g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
   g_return_if_fail (!(src_channels == 3 && src_has_alpha));
   g_return_if_fail (!(src_has_alpha && !dest_has_alpha));
@@ -2522,14 +2399,7 @@ _pixops_scale_real (guchar        *dest_buf,
     return;
 
   if (filter.x.n == 2 && filter.y.n == 2 && dest_channels == 3 && src_channels == 3)
-    {
-#ifdef USE_MMX
-      if (found_mmx)
-       line_func = scale_line_22_33_mmx_stub;
-      else
-#endif
-       line_func = scale_line_22_33;
-    }
+    line_func = scale_line_22_33;
   else
     line_func = scale_line;
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]