[babl] babl: do one SSE2 memory load per pixel for color matrix transform
- From: Øyvind Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl] babl: do one SSE2 memory load per pixel for color matrix transform
- Date: Fri, 1 Sep 2017 17:20:29 +0000 (UTC)
commit fb772a8cfc52cd4ab67d8d2fd660d493491d6e27
Author: Øyvind Kolås <pippin gimp org>
Date: Fri Sep 1 19:19:39 2017 +0200
babl: do one SSE2 memory load per pixel for color matrix transform
babl/babl-fish-path.c | 13 +++++--------
1 files changed, 5 insertions(+), 8 deletions(-)
---
diff --git a/babl/babl-fish-path.c b/babl/babl-fish-path.c
index 7f69d5e..9236119 100644
--- a/babl/babl-fish-path.c
+++ b/babl/babl-fish-path.c
@@ -607,14 +607,11 @@ static inline void babl_matrix_mul_vectorff_buf4_sse2 (const float *mat,
int i;
for (i = 0; i < samples; i ++)
{
- const __v4sf a = _mm_load1_ps(&v_in[0]);
- const __v4sf b = _mm_load1_ps(&v_in[1]);
- const __v4sf c = _mm_load1_ps(&v_in[2]);
- __v4sf out; // = m___0 * a + m___1 * b + m___2 * c;
- out = _mm_mul_ps (m___0, a);
- out = _mm_add_ps (out, _mm_mul_ps (m___1, b));
- out = _mm_add_ps (out, _mm_mul_ps (m___2, c));
- _mm_store_ps (v_out, out);
+ __v4sf a, b, c = _mm_load_ps(&v_in[0]);
+ a = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(0,0,0,0));
+ b = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(1,1,1,1));
+ c = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(2,2,2,2));
+ _mm_store_ps (v_out, m___0 * a + m___1 * b + m___2 * c);
v_out[3] = v_in[3];
v_out += 4;
v_in += 4;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]