[gdk-pixbuf] Drop the MMX assembly optimizations
- From: Emmanuele Bassi <ebassi src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gdk-pixbuf] Drop the MMX assembly optimizations
- Date: Mon, 30 Apr 2018 17:18:07 +0000 (UTC)
commit 4b3d5be2c50d270326fad68f3a7a1c1b3a9dfd3f
Author: Emmanuele Bassi <ebassi gnome org>
Date: Sat Apr 28 11:36:19 2018 +0100
Drop the MMX assembly optimizations
We haven't built them on anything that isn't a 32bit IA platform, and
we could probably get better mileage out of the currently implemented
pixops just by rearranging the C code and letting compilers do the
optimizations for us. We should definitely consider either using pixman
directly, or replacing slow pixops with SSE builtins, instead.
gdk-pixbuf/pixops/DETAILS | 73 ------
gdk-pixbuf/pixops/README | 17 --
gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S | 239 -------------------
.../pixops/composite_line_color_22_4a4_mmx.S | 251 --------------------
gdk-pixbuf/pixops/have_mmx.S | 74 ------
gdk-pixbuf/pixops/pixops-internal.h | 23 --
gdk-pixbuf/pixops/pixops.c | 136 +-----------
gdk-pixbuf/pixops/scale_line_22_33_mmx.S | 204 ----------------
8 files changed, 3 insertions(+), 1014 deletions(-)
---
diff --git a/gdk-pixbuf/pixops/DETAILS b/gdk-pixbuf/pixops/DETAILS
index acf16f5..08597f5 100644
--- a/gdk-pixbuf/pixops/DETAILS
+++ b/gdk-pixbuf/pixops/DETAILS
@@ -280,76 +280,3 @@ Integer tricks for compositing
-MMX Code
-========
-
-Line functions are provided in MMX functionsfor a few special
-cases:
-
- n_x = n_y = 2
-
- src_channels = 3 dest_channels = 3 op = scale
- src_channels = 4 with alpha dest_channels = 4 no alpha op = composite
- src_channels = 4 with alpha dest_channels = 4 no alpha op = composite_color
-
-For the case n_x = n_y = 2 - primarily hit when scaling up with bilinear
-scaling, we can take advantage of the fact that multiple destination
-pixels will be composed from the same source pixels.
-
-That is a destination pixel is a linear combination of the source
-pixels around it:
-
-
- S0 S1
-
-
-
-
-
- D D' D'' ...
-
-
-
-
- S2 S3
-
-Each mmx register is 64 bits wide, so we can unpack a source pixel
-into the low 8 bits of 4 16 bit words, and store it into a mmx
-register.
-
-For each destination pixel, we first make sure that we have pixels S0
-... S3 loaded into registers mm0 ...mm3. (This will often involve not
-doing anything or moving mm1 and mm3 into mm0 and mm1 then reloading
-mm1 and mm3 with new values).
-
-Then we load up the appropriate weights for the 4 corner pixels
-based on the offsets of the destination pixel within the source
-pixels.
-
-We have preexpanded the weights to 64 bits wide and truncated the
-range to 8 bits, so an original filter value of
-
- 0x5321 would be expanded to
-
- 0x0053005300530053
-
-For source buffers without alpha, we simply do a multiply-add
-of the weights, giving us a 16 bit quantity for the result
-that we shift left by 8 and store in the destination buffer.
-
-When the source buffer has alpha, then things become more
-complicated - when we load up mm0 and mm3, we premultiply
-the alpha, so they contain:
-
- (a*ff >> 8) (r*a >> 8) (g*a >> 8) (b*a >> a)
-
-Then when we multiply by the weights, and add we end up
-with premultiplied r,g,b,a in the range of 0 .. 0xff * 0ff,
-call them A,R,G,B
-
-We then need to composite with the dest pixels - which
-we do by:
-
- r_dest = (R + ((0xff * 0xff - A) >> 8) * r_dest) >> 8
-
-(0xff * 0xff)
diff --git a/gdk-pixbuf/pixops/README b/gdk-pixbuf/pixops/README
index 354c3a1..382fca1 100644
--- a/gdk-pixbuf/pixops/README
+++ b/gdk-pixbuf/pixops/README
@@ -138,23 +138,6 @@ TODO
switching around conditionals and inner loops in various
places.
-* Right now, in several of the most common cases, there are
- optimized mmx routines, but no optimized C routines.
-
- For instance, there is a
-
- pixops_composite_line_22_4a4_mmx()
-
- But no
-
- pixops_composite_line_22_4a4()
-
- Also, it may be desirable to include a few more special cases - in particular:
-
- pixops_composite_line_22_4a3()
-
- May be desirable.
-
* Scaling down images by large scale factors is _slow_ since huge filter
matrixes are computed. (e.g., to scale down by a factor of 100, we compute
101x101 filter matrixes. At some point, it would be more efficent to
diff --git a/gdk-pixbuf/pixops/pixops.c b/gdk-pixbuf/pixops/pixops.c
index f6535f1..f1b75f8 100644
--- a/gdk-pixbuf/pixops/pixops.c
+++ b/gdk-pixbuf/pixops/pixops.c
@@ -23,7 +23,6 @@
#include "../fallback-c89.c"
#include "pixops.h"
-#include "pixops-internal.h"
#define SUBSAMPLE_BITS 4
#define SUBSAMPLE (1 << SUBSAMPLE_BITS)
@@ -837,36 +836,6 @@ composite_line_22_4a4 (int *weights, int n_x, int n_y,
return dest;
}
-#ifdef USE_MMX
-static guchar *
-composite_line_22_4a4_mmx_stub (int *weights, int n_x, int n_y, guchar *dest,
- int dest_x, guchar *dest_end,
- int dest_channels, int dest_has_alpha,
- guchar **src, int src_channels,
- gboolean src_has_alpha, int x_init,
- int x_step, int src_width, int check_size,
- guint32 color1, guint32 color2)
-{
- guint32 mmx_weights[16][8];
- int j;
-
- for (j=0; j<16; j++)
- {
- mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
- mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
- mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
- mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
- mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
- mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
- mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
- mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
- }
-
- return _pixops_composite_line_22_4a4_mmx (mmx_weights, dest, src[0], src[1],
- x_step, dest_end, x_init);
-}
-#endif /* USE_MMX */
-
static void
composite_pixel_color (guchar *dest, int dest_x, int dest_channels,
int dest_has_alpha, int src_has_alpha, int check_size,
@@ -980,44 +949,6 @@ composite_line_color (int *weights, int n_x, int n_y, guchar *dest,
return dest;
}
-#ifdef USE_MMX
-static guchar *
-composite_line_color_22_4a4_mmx_stub (int *weights, int n_x, int n_y,
- guchar *dest, int dest_x,
- guchar *dest_end, int dest_channels,
- int dest_has_alpha, guchar **src,
- int src_channels, gboolean src_has_alpha,
- int x_init, int x_step, int src_width,
- int check_size, guint32 color1,
- guint32 color2)
-{
- guint32 mmx_weights[16][8];
- int check_shift = get_check_shift (check_size);
- int colors[4];
- int j;
-
- for (j=0; j<16; j++)
- {
- mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
- mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
- mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
- mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
- mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
- mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
- mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
- mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
- }
-
- colors[0] = (color1 & 0xff00) << 8 | (color1 & 0xff);
- colors[1] = (color1 & 0xff0000) >> 16;
- colors[2] = (color2 & 0xff00) << 8 | (color2 & 0xff);
- colors[3] = (color2 & 0xff0000) >> 16;
-
- return _pixops_composite_line_color_22_4a4_mmx (mmx_weights, dest, src[0],
- src[1], x_step, dest_end, x_init, dest_x, check_shift, colors);
-}
-#endif /* USE_MMX */
-
static void
scale_pixel (guchar *dest, int dest_x, int dest_channels, int dest_has_alpha,
int src_has_alpha, int check_size, guint32 color1, guint32 color2,
@@ -1142,34 +1073,6 @@ scale_line (int *weights, int n_x, int n_y, guchar *dest, int dest_x,
return dest;
}
-#ifdef USE_MMX
-static guchar *
-scale_line_22_33_mmx_stub (int *weights, int n_x, int n_y, guchar *dest,
- int dest_x, guchar *dest_end, int dest_channels,
- int dest_has_alpha, guchar **src, int src_channels,
- gboolean src_has_alpha, int x_init, int x_step,
- int src_width, int check_size, guint32 color1,
- guint32 color2)
-{
- guint32 mmx_weights[16][8];
- int j;
-
- for (j=0; j<16; j++)
- {
- mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
- mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
- mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
- mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
- mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
- mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
- mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
- mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
- }
-
- return _pixops_scale_line_22_33_mmx (mmx_weights, dest, src[0], src[1],
- x_step, dest_end, x_init);
-}
-#endif /* USE_MMX */
static guchar *
scale_line_22_33 (int *weights, int n_x, int n_y, guchar *dest, int dest_x,
@@ -1910,10 +1813,6 @@ _pixops_composite_color_real (guchar *dest_buf,
PixopsLineFunc line_func;
guchar *tmp_buf = NULL;
-#ifdef USE_MMX
- gboolean found_mmx = _pixops_have_mmx ();
-#endif
-
g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
g_return_if_fail (!(src_channels == 3 && src_has_alpha));
@@ -1941,14 +1840,7 @@ _pixops_composite_color_real (guchar *dest_buf,
if (!make_weights (&filter, interp_type, scale_x, scale_y))
return;
-#ifdef USE_MMX
- if (filter.x.n == 2 && filter.y.n == 2 &&
- dest_channels == 4 && src_channels == 4 &&
- src_has_alpha && !dest_has_alpha && found_mmx)
- line_func = composite_line_color_22_4a4_mmx_stub;
- else
-#endif
- line_func = composite_line_color;
+ line_func = composite_line_color;
pixops_process (dest_buf, render_x0, render_y0, render_x1, render_y1,
dest_rowstride, dest_channels, dest_has_alpha,
@@ -2071,10 +1963,6 @@ _pixops_composite_real (guchar *dest_buf,
PixopsLineFunc line_func;
guchar *tmp_buf = NULL;
-#ifdef USE_MMX
- gboolean found_mmx = _pixops_have_mmx ();
-#endif
-
g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
g_return_if_fail (!(src_channels == 3 && src_has_alpha));
@@ -2107,14 +1995,7 @@ _pixops_composite_real (guchar *dest_buf,
if (filter.x.n == 2 && filter.y.n == 2 && dest_channels == 4 &&
src_channels == 4 && src_has_alpha && !dest_has_alpha)
- {
-#ifdef USE_MMX
- if (found_mmx)
- line_func = composite_line_22_4a4_mmx_stub;
- else
-#endif
- line_func = composite_line_22_4a4;
- }
+ line_func = composite_line_22_4a4;
else
line_func = composite_line;
@@ -2491,10 +2372,6 @@ _pixops_scale_real (guchar *dest_buf,
PixopsLineFunc line_func;
guchar *tmp_buf = NULL; /* Temporary image for two-step scaling */
-#ifdef USE_MMX
- gboolean found_mmx = _pixops_have_mmx ();
-#endif
-
g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
g_return_if_fail (!(src_channels == 3 && src_has_alpha));
g_return_if_fail (!(src_has_alpha && !dest_has_alpha));
@@ -2522,14 +2399,7 @@ _pixops_scale_real (guchar *dest_buf,
return;
if (filter.x.n == 2 && filter.y.n == 2 && dest_channels == 3 && src_channels == 3)
- {
-#ifdef USE_MMX
- if (found_mmx)
- line_func = scale_line_22_33_mmx_stub;
- else
-#endif
- line_func = scale_line_22_33;
- }
+ line_func = scale_line_22_33;
else
line_func = scale_line;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]