[babl] babl: refactor matrix_mul_vector further towards SIMD
- From: Øyvind Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl] babl: refactor matrix_mul_vector further towards SIMD
- Date: Thu, 31 Aug 2017 21:07:51 +0000 (UTC)
commit f315c78d2c0a32f333790f7ac689ee7b56f74c65
Author: Øyvind Kolås <pippin gimp org>
Date: Thu Aug 31 23:07:34 2017 +0200
babl: refactor matrix_mul_vector further towards SIMD
babl/babl-fish-path.c | 63 ++++++---------------
babl/babl-fish-reference.c | 4 +-
babl/babl-matrix.h | 132 +++++++++++++++++++++++++++++++++++++++-----
3 files changed, 137 insertions(+), 62 deletions(-)
---
diff --git a/babl/babl-fish-path.c b/babl/babl-fish-path.c
index f8b5d8d..1dd8516 100644
--- a/babl/babl-fish-path.c
+++ b/babl/babl-fish-path.c
@@ -454,9 +454,6 @@ universal_nonlinear_rgb_converter (const Babl *conversion,unsigned char *src_cha
float (*from_linear_blue) (void *trc, float value);
float * matrixf = conversion->conversion.data;
- const float mat[9] = {matrixf[0], matrixf[1],matrixf[2],
- matrixf[3], matrixf[4],matrixf[5],
- matrixf[6], matrixf[7],matrixf[8]};
int i;
float *rgba_in = (void*)src_char;
float *rgba_out = (void*)dst_char;
@@ -478,13 +475,16 @@ universal_nonlinear_rgb_converter (const Babl *conversion,unsigned char *src_cha
for (i = 0; i < samples; i++)
{
- rgba_out[0]=to_linear_red(to_trc_red, rgba_in[0]);
- rgba_out[1]=to_linear_green(to_trc_green, rgba_in[1]);
- rgba_out[2]=to_linear_blue(to_trc_blue, rgba_in[2]);
- rgba_out[3]=rgba_in[3];
+ rgba_out[i*4] =to_linear_red(to_trc_red, rgba_in[i*4]);
+ rgba_out[i*4+1]=to_linear_green(to_trc_green, rgba_in[i*4+1]);
+ rgba_out[i*4+2]=to_linear_blue(to_trc_blue, rgba_in[i*4+1]);
+ rgba_out[i*4+3]=rgba_in[3];
+ }
- babl_matrix_mul_vectorff (mat, rgba_out, rgba_out);
+ babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples);
+ for (i = 0; i < samples; i++)
+ {
rgba_out[0] = from_linear_red(from_trc_red, rgba_out[0]);
rgba_out[1] = from_linear_green(from_trc_green, rgba_out[1]);
rgba_out[2] = from_linear_blue(from_trc_blue, rgba_out[2]);
@@ -508,9 +508,6 @@ universal_nonlinear_rgb_linear_converter (const Babl *conversion,unsigned char *
float (*to_linear_blue) (void *trc, float value);
float * matrixf = conversion->conversion.data;
- const float mat[9] = {matrixf[0], matrixf[1],matrixf[2],
- matrixf[3], matrixf[4],matrixf[5],
- matrixf[6], matrixf[7],matrixf[8]};
int i;
float *rgba_in = (void*)src_char;
float *rgba_out = (void*)dst_char;
@@ -524,17 +521,15 @@ universal_nonlinear_rgb_linear_converter (const Babl *conversion,unsigned char *
for (i = 0; i < samples; i++)
{
- rgba_out[0]=to_linear_red(to_trc_red, rgba_in[0]);
- rgba_out[1]=to_linear_green(to_trc_green, rgba_in[1]);
- rgba_out[2]=to_linear_blue(to_trc_blue, rgba_in[2]);
- rgba_out[3]=rgba_in[3];
-
- babl_matrix_mul_vectorff (mat, rgba_out, rgba_out);
-
+ rgba_out[i*4]=to_linear_red(to_trc_red, rgba_in[0]);
+ rgba_out[i*4+1]=to_linear_green(to_trc_green, rgba_in[1]);
+ rgba_out[i*4+2]=to_linear_blue(to_trc_blue, rgba_in[2]);
+ rgba_out[i*4+3]=rgba_in[3];
rgba_in += 4;
- rgba_out += 4;
}
+ babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples);
+
return samples;
}
@@ -559,10 +554,7 @@ universal_nonlinear_rgba_u8_converter (const Babl *conversion,unsigned char *src
rgb[i*3+2]=in_trc_lut[rgba_in_u8[i*4+2]];
}
- for (i = 0; i < samples; i++)
- {
- babl_matrix_mul_vectorff (matrixf, &rgb[i*3], &rgb[i*3]);
- }
+ babl_matrix_mul_vectorff_buf3 (matrixf, rgb, rgb, samples);
{
const Babl *from_trc_red = (void*)destination_space->space.trc[0];
@@ -601,8 +593,8 @@ universal_nonlinear_rgba_u8_float_converter (const Babl *conversion,unsigned cha
rgba_out[i*3+1]=in_trc_lut[rgba_in_u8[i*4+1]];
rgba_out[i*3+2]=in_trc_lut[rgba_in_u8[i*4+2]];
rgba_out[i*3+2]=rgba_in_u8[i*4+3] / 255.0;
- babl_matrix_mul_vectorff (matrixf, &rgba_out[i*4], &rgba_out[i*4]);
}
+ babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples);
return samples;
}
@@ -611,20 +603,10 @@ static inline long
universal_rgba_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long
samples)
{
float *matrixf = conversion->conversion.data;
- float mat[9] = {matrixf[0], matrixf[1],matrixf[2],
- matrixf[3], matrixf[4],matrixf[5],
- matrixf[6], matrixf[7],matrixf[8]};
- int i;
float *rgba_in = (void*)src_char;
float *rgba_out = (void*)dst_char;
- for (i = 0; i < samples; i++)
- {
- babl_matrix_mul_vectorff (mat, rgba_in, rgba_out);
- rgba_out[3] = rgba_in[3];
- rgba_in += 4;
- rgba_out += 4;
- }
+ babl_matrix_mul_vectorff_buf4 (matrixf, rgba_in, rgba_out, samples);
return samples;
}
@@ -634,19 +616,10 @@ static inline long
universal_rgb_converter (const Babl *conversion,unsigned char *src_char, unsigned char *dst_char, long
samples)
{
float *matrixf = conversion->conversion.data;
- float mat[9] = {matrixf[0], matrixf[1],matrixf[2],
- matrixf[3], matrixf[4],matrixf[5],
- matrixf[6], matrixf[7],matrixf[8]};
- int i;
float *rgb_in = (void*)src_char;
float *rgb_out = (void*)dst_char;
- for (i = 0; i < samples; i++)
- {
- babl_matrix_mul_vectorff (mat, rgb_in, rgb_out);
- rgb_in += 3;
- rgb_out += 3;
- }
+ babl_matrix_mul_vectorff_buf3 (matrixf, rgb_in, rgb_out, samples);
return samples;
}
diff --git a/babl/babl-fish-reference.c b/babl/babl-fish-reference.c
index 8273da5..d97cca3 100644
--- a/babl/babl-fish-reference.c
+++ b/babl/babl-fish-reference.c
@@ -496,15 +496,13 @@ babl_fish_reference_process (const Babl *babl,
((babl->fish.destination)->format.space)))
{
double matrix[9];
- int i;
double *rgba = rgba_double_buf;
babl_matrix_mul_matrix (
(babl->fish.destination)->format.space->space.XYZtoRGB,
(babl->fish.source)->format.space->space.RGBtoXYZ,
matrix);
- for (i = 0; i < n; i++)
- babl_matrix_mul_vector (matrix, &rgba[i * 4], &rgba[i * 4]);
+ babl_matrix_mul_vector_buf4 (matrix, rgba, rgba, n);
}
{
diff --git a/babl/babl-matrix.h b/babl/babl-matrix.h
index 19707cb..d4e29d9 100644
--- a/babl/babl-matrix.h
+++ b/babl/babl-matrix.h
@@ -61,29 +61,133 @@ static inline void babl_matrix_invert (const double *in, double *out)
static inline void babl_matrix_mul_vector (const double *mat, const double *v_in, double *v_out)
{
- double val[3]={v_in[0], v_in[1], v_in[2]};
-
- v_out[0] = m(mat, 0, 0) * val[0] + m(mat, 0, 1) * val[1] + m(mat, 0, 2) * val[2];
- v_out[1] = m(mat, 1, 0) * val[0] + m(mat, 1, 1) * val[1] + m(mat, 1, 2) * val[2];
- v_out[2] = m(mat, 2, 0) * val[0] + m(mat, 2, 1) * val[1] + m(mat, 2, 2) * val[2];
+ double a = v_in[0], b = v_in[1], c = v_in[2];
+ double m_0_0 = m(mat, 0, 0);
+ double m_0_1 = m(mat, 0, 1);
+ double m_0_2 = m(mat, 0, 2);
+ double m_1_0 = m(mat, 1, 0);
+ double m_1_1 = m(mat, 1, 1);
+ double m_1_2 = m(mat, 1, 2);
+ double m_2_0 = m(mat, 2, 0);
+ double m_2_1 = m(mat, 2, 1);
+ double m_2_2 = m(mat, 2, 2);
+
+ v_out[0] = m_0_0 * a + m_0_1 * b + m_0_2 * c;
+ v_out[1] = m_1_0 * a + m_1_1 * b + m_1_2 * c;
+ v_out[2] = m_2_0 * a + m_2_1 * b + m_2_2 * c;
}
static inline void babl_matrix_mul_vectorf (const double *mat, const float *v_in, float *v_out)
{
- float val[3]={v_in[0], v_in[1], v_in[2]};
-
- v_out[0] = m(mat, 0, 0) * val[0] + m(mat, 0, 1) * val[1] + m(mat, 0, 2) * val[2];
- v_out[1] = m(mat, 1, 0) * val[0] + m(mat, 1, 1) * val[1] + m(mat, 1, 2) * val[2];
- v_out[2] = m(mat, 2, 0) * val[0] + m(mat, 2, 1) * val[1] + m(mat, 2, 2) * val[2];
+ float a = v_in[0], b = v_in[1], c = v_in[2];
+ float m_0_0 = m(mat, 0, 0);
+ float m_0_1 = m(mat, 0, 1);
+ float m_0_2 = m(mat, 0, 2);
+ float m_1_0 = m(mat, 1, 0);
+ float m_1_1 = m(mat, 1, 1);
+ float m_1_2 = m(mat, 1, 2);
+ float m_2_0 = m(mat, 2, 0);
+ float m_2_1 = m(mat, 2, 1);
+ float m_2_2 = m(mat, 2, 2);
+
+ v_out[0] = m_0_0 * a + m_0_1 * b + m_0_2 * c;
+ v_out[1] = m_1_0 * a + m_1_1 * b + m_1_2 * c;
+ v_out[2] = m_2_0 * a + m_2_1 * b + m_2_2 * c;
}
static inline void babl_matrix_mul_vectorff (const float *mat, const float *v_in, float *v_out)
{
- float val[3]={v_in[0], v_in[1], v_in[2]};
+ float a = v_in[0], b = v_in[1], c = v_in[2];
+ float m_0_0 = m(mat, 0, 0);
+ float m_0_1 = m(mat, 0, 1);
+ float m_0_2 = m(mat, 0, 2);
+ float m_1_0 = m(mat, 1, 0);
+ float m_1_1 = m(mat, 1, 1);
+ float m_1_2 = m(mat, 1, 2);
+ float m_2_0 = m(mat, 2, 0);
+ float m_2_1 = m(mat, 2, 1);
+ float m_2_2 = m(mat, 2, 2);
+
+ v_out[0] = m_0_0 * a + m_0_1 * b + m_0_2 * c;
+ v_out[1] = m_1_0 * a + m_1_1 * b + m_1_2 * c;
+ v_out[2] = m_2_0 * a + m_2_1 * b + m_2_2 * c;
+}
+
+static inline void babl_matrix_mul_vectorff_buf3 (const float *mat, const float *v_in, float *v_out,
+ int samples)
+{
+ int i;
+ float m_0_0 = m(mat, 0, 0);
+ float m_0_1 = m(mat, 0, 1);
+ float m_0_2 = m(mat, 0, 2);
+ float m_1_0 = m(mat, 1, 0);
+ float m_1_1 = m(mat, 1, 1);
+ float m_1_2 = m(mat, 1, 2);
+ float m_2_0 = m(mat, 2, 0);
+ float m_2_1 = m(mat, 2, 1);
+ float m_2_2 = m(mat, 2, 2);
+ for (i = 0; i < samples; i ++)
+ {
+ float a = v_in[0], b = v_in[1], c = v_in[2];
+
+ v_out[0] = m_0_0 * a + m_0_1 * b + m_0_2 * c;
+ v_out[1] = m_1_0 * a + m_1_1 * b + m_1_2 * c;
+ v_out[2] = m_2_0 * a + m_2_1 * b + m_2_2 * c;
+ v_in += 3;
+ v_out += 3;
+ }
+}
+
+static inline void babl_matrix_mul_vectorff_buf4 (const float *mat, const float *v_in, float *v_out,
+ int samples)
+{
+ int i;
+ float m_0_0 = m(mat, 0, 0);
+ float m_0_1 = m(mat, 0, 1);
+ float m_0_2 = m(mat, 0, 2);
+ float m_1_0 = m(mat, 1, 0);
+ float m_1_1 = m(mat, 1, 1);
+ float m_1_2 = m(mat, 1, 2);
+ float m_2_0 = m(mat, 2, 0);
+ float m_2_1 = m(mat, 2, 1);
+ float m_2_2 = m(mat, 2, 2);
+ for (i = 0; i < samples; i ++)
+ {
+ float a = v_in[0], b = v_in[1], c = v_in[2];
+
+ v_out[0] = m_0_0 * a + m_0_1 * b + m_0_2 * c;
+ v_out[1] = m_1_0 * a + m_1_1 * b + m_1_2 * c;
+ v_out[2] = m_2_0 * a + m_2_1 * b + m_2_2 * c;
+ v_out[3] = v_in[3];
+ v_in += 4;
+ v_out += 4;
+ }
+}
- v_out[0] = m(mat, 0, 0) * val[0] + m(mat, 0, 1) * val[1] + m(mat, 0, 2) * val[2];
- v_out[1] = m(mat, 1, 0) * val[0] + m(mat, 1, 1) * val[1] + m(mat, 1, 2) * val[2];
- v_out[2] = m(mat, 2, 0) * val[0] + m(mat, 2, 1) * val[1] + m(mat, 2, 2) * val[2];
+static inline void babl_matrix_mul_vector_buf4 (const double *mat, const double *v_in, double *v_out,
+ int samples)
+{
+ int i;
+ double m_0_0 = m(mat, 0, 0);
+ double m_0_1 = m(mat, 0, 1);
+ double m_0_2 = m(mat, 0, 2);
+ double m_1_0 = m(mat, 1, 0);
+ double m_1_1 = m(mat, 1, 1);
+ double m_1_2 = m(mat, 1, 2);
+ double m_2_0 = m(mat, 2, 0);
+ double m_2_1 = m(mat, 2, 1);
+ double m_2_2 = m(mat, 2, 2);
+ for (i = 0; i < samples; i ++)
+ {
+ double a = v_in[0], b = v_in[1], c = v_in[2];
+
+ v_out[0] = m_0_0 * a + m_0_1 * b + m_0_2 * c;
+ v_out[1] = m_1_0 * a + m_1_1 * b + m_1_2 * c;
+ v_out[2] = m_2_0 * a + m_2_1 * b + m_2_2 * c;
+ v_out[3] = v_in[3];
+ v_in += 4;
+ v_out += 4;
+ }
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]