babl r324 - in trunk: . extensions
- From: jheller svn gnome org
- To: svn-commits-list gnome org
- Subject: babl r324 - in trunk: . extensions
- Date: Thu, 12 Jun 2008 17:10:55 +0000 (UTC)
Author: jheller
Date: Thu Jun 12 17:10:55 2008
New Revision: 324
URL: http://svn.gnome.org/viewvc/babl?rev=324&view=rev
Log:
2008-06-12 Jan Heller <jheller svn gnome org>
* extensions/sse-fixups.c (conv_rgbaF_linear_rgb8_linear),
(conv_rgbaF_linear_rgba8_linear): Added fallback non-SSE routine
if the input buffer is not aligned to 16 bytes.
Modified:
trunk/ChangeLog
trunk/extensions/sse-fixups.c
Modified: trunk/extensions/sse-fixups.c
==============================================================================
--- trunk/extensions/sse-fixups.c (original)
+++ trunk/extensions/sse-fixups.c Thu Jun 12 17:10:55 2008
@@ -25,6 +25,7 @@
#include "babl.h"
#include "babl-cpuaccel.h"
+#include "extensions/util.h"
#define INLINE inline
@@ -49,28 +50,53 @@
long samples)
{
long n = samples;
- g4float *g4src = (g4float *) src;
- g4float v;
- union {
- g2int si;
- unsigned char c[8];
- } u;
-
- while (n--)
+ if ((int) src & 0xF)
{
- v = *g4src++ * g4float_ff;
- v = g4float_min(v, g4float_ff);
- v = g4float_max(v, g4float_zero);
- u.si = g4float_cvt2pi (v);
- *dst++ = u.c[0];
- *dst++ = u.c[4];
- v = g4float_movhl (v, v);
- u.si = g4float_cvt2pi (v);
- *dst++ = u.c[0];
+ // nonaligned buffers, we have to use fallback x87 code
+ float *fsrc = (float *) src;
+ int v;
+
+ while (n--)
+ {
+ v = rint (*fsrc++ * 255.0);
+ *dst++ = (v < 0) ? 0 : ((v > 255) ? 255 : v);
+
+ v = rint (*fsrc++ * 255.0);
+ *dst++ = (v < 0) ? 0 : ((v > 255) ? 255 : v);
+
+ v = rint (*fsrc++ * 255.0);
+ *dst++ = (v < 0) ? 0 : ((v > 255) ? 255 : v);
+
+ fsrc++;
+ }
}
+ else
+ {
+ // all is well, buffers are SSE compatible
+ g4float *g4src = (g4float *) src;
+ g4float v;
+
+ union {
+ g2int si;
+ unsigned char c[8];
+ } u;
+
+ while (n--)
+ {
+ v = *g4src++ * g4float_ff;
+ v = g4float_min(v, g4float_ff);
+ v = g4float_max(v, g4float_zero);
+ u.si = g4float_cvt2pi (v);
+ *dst++ = u.c[0];
+ *dst++ = u.c[4];
+ v = g4float_movhl (v, v);
+ u.si = g4float_cvt2pi (v);
+ *dst++ = u.c[0];
+ }
- g4float_emms ();
+ g4float_emms ();
+ }
return samples;
}
@@ -82,29 +108,54 @@
long samples)
{
long n = samples;
- g4float *g4src = (g4float *) src;
- g4float v;
-
- union {
- g2int si;
- unsigned char c[8];
- } u;
-
- while (n--)
+ if ((int) src & 0xF)
{
- v = *g4src++ * g4float_ff;
- v = g4float_min(v, g4float_ff);
- v = g4float_max(v, g4float_zero);
- u.si = g4float_cvt2pi (v);
- *dst++ = u.c[0];
- *dst++ = u.c[4];
- v = g4float_movhl (v, v);
- u.si = g4float_cvt2pi (v);
- *dst++ = u.c[0];
- *dst++ = u.c[4];
+ // nonaligned buffers, we have to use fallback x87 code
+ float *fsrc = (float *) src;
+ int v;
+
+ while (n--)
+ {
+ v = rint (*fsrc++ * 255.0);
+ *dst++ = (v < 0) ? 0 : ((v > 255) ? 255 : v);
+
+ v = rint (*fsrc++ * 255.0);
+ *dst++ = (v < 0) ? 0 : ((v > 255) ? 255 : v);
+
+ v = rint (*fsrc++ * 255.0);
+ *dst++ = (v < 0) ? 0 : ((v > 255) ? 255 : v);
+
+ v = rint (*fsrc++ * 255.0);
+ *dst++ = (v < 0) ? 0 : ((v > 255) ? 255 : v);
+ }
}
+ else
+ {
+ // all is well, buffers are SSE compatible
+ g4float *g4src = (g4float *) src;
+ g4float v;
+
+ union {
+ g2int si;
+ unsigned char c[8];
+ } u;
+
+ while (n--)
+ {
+ v = *g4src++ * g4float_ff;
+ v = g4float_min(v, g4float_ff);
+ v = g4float_max(v, g4float_zero);
+ u.si = g4float_cvt2pi (v);
+ *dst++ = u.c[0];
+ *dst++ = u.c[4];
+ v = g4float_movhl (v, v);
+ u.si = g4float_cvt2pi (v);
+ *dst++ = u.c[0];
+ *dst++ = u.c[4];
+ }
- g4float_emms ();
+ g4float_emms ();
+ }
return samples;
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]