[babl/sse-conversions-2013] SSE Float: draft 4
- From: Daniel Sabo <daniels src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl/sse-conversions-2013] SSE Float: draft 4
- Date: Tue, 2 Apr 2013 05:01:13 +0000 (UTC)
commit f222c8699d057c154cb8da001f9fa521280e58a7
Author: Daniel Sabo <DanielSabo gmail com>
Date: Mon Apr 1 20:15:56 2013 -0700
SSE Float: draft 4
Don't divide by zero, more Bably handling of negative alpha.
extensions/sse-float.c | 69 ++++++++++++++++++++++++++++-------------------
1 files changed, 41 insertions(+), 28 deletions(-)
---
diff --git a/extensions/sse-float.c b/extensions/sse-float.c
index bc3c42e..96695d4 100644
--- a/extensions/sse-float.c
+++ b/extensions/sse-float.c
@@ -29,10 +29,13 @@
#include "babl.h"
#include "babl-cpuaccel.h"
+#include "base/util.h"
#include "extensions/util.h"
#define Q(a) { a, a, a, a }
+static const float BABL_ALPHA_THRESHOLD_FLOAT = (float)BABL_ALPHA_THRESHOLD;
+
static long
conv_rgbaF_linear_rgbAF_linear (const float *src, float *dst, long samples)
{
@@ -99,33 +102,39 @@ conv_rgbAF_linear_rgbaF_linear (const float *src, float *dst, long samples)
if (((uintptr_t)src % 16) + ((uintptr_t)dst % 16) == 0)
{
- const long n = (samples / 2) * 2;
+ const long n = samples;
const __v4sf *s = (const __v4sf*) src;
__v4sf *d = (__v4sf*)dst;
- for ( ; i < n; i += 2)
+ for ( ; i < n; i += 1)
{
- __v4sf rbaa0, rbaa1;
+ __v4sf pre_rgba0, rgba0, rbaa0, raaaa0;
- __v4sf rgba0 = *s++;
- __v4sf rgba1 = *s++;
-
- /* Expand alpha */
- __v4sf aaaa0 = (__v4sf)_mm_shuffle_epi32((__m128i)rgba0, _MM_SHUFFLE(3, 3, 3, 3));
- __v4sf aaaa1 = (__v4sf)_mm_shuffle_epi32((__m128i)rgba1, _MM_SHUFFLE(3, 3, 3, 3));
-
- /* Premultiply */
- rgba0 = rgba0 / aaaa0;
- rgba1 = rgba1 / aaaa1;
+ float alpha0 = ((float *)s)[3];
+ pre_rgba0 = *s;
+ if (alpha0 <= 0.0f)
+ {
+ /* Zero RGB */
+ rgba0 = _mm_setzero_ps();
+ }
+ else
+ {
+ float recip0 = 1.0f/alpha0;
+
+ /* Expand reciprocal */
+ raaaa0 = _mm_load1_ps(&recip0);
+
+ /* Un-Premultiply */
+ rgba0 = pre_rgba0 * raaaa0;
+ }
+
/* Shuffle the original alpha value back in */
- rbaa0 = _mm_shuffle_ps(rgba0, aaaa0, _MM_SHUFFLE(0, 0, 2, 0));
- rbaa1 = _mm_shuffle_ps(rgba1, aaaa1, _MM_SHUFFLE(0, 0, 2, 0));
+ rbaa0 = _mm_shuffle_ps(rgba0, pre_rgba0, _MM_SHUFFLE(3, 3, 2, 0));
rgba0 = _mm_shuffle_ps(rgba0, rbaa0, _MM_SHUFFLE(2, 1, 1, 0));
- rgba1 = _mm_shuffle_ps(rgba1, rbaa1, _MM_SHUFFLE(2, 1, 1, 0));
+ s++;
*d++ = rgba0;
- *d++ = rgba1;
}
_mm_empty ();
}
@@ -134,17 +143,21 @@ conv_rgbAF_linear_rgbaF_linear (const float *src, float *dst, long samples)
src += i * 4;
remainder = samples - i;
while (remainder--)
- {
- const float a = src[3];
- const float a_term = 1.0f / a;
- dst[0] = src[0] * a_term;
- dst[1] = src[1] * a_term;
- dst[2] = src[2] * a_term;
- dst[3] = a;
-
- src += 4;
- dst += 4;
- }
+ {
+ float alpha = src[3];
+ float recip;
+ if (alpha <= 0.0f)
+ recip = 0.0f;
+ else
+ recip = 1.0f/alpha;
+ dst[0] = src[0] * recip;
+ dst[1] = src[1] * recip;
+ dst[2] = src[2] * recip;
+ dst[3] = alpha;
+
+ src += 4;
+ dst += 4;
+ }
return samples;
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]