[babl] Revert "make trampoline for lut processing"
- From: Øyvind "pippin" Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl] Revert "make trampoline for lut processing"
- Date: Mon, 24 Jan 2022 06:39:55 +0000 (UTC)
commit 22bd31c3666bfc9a07033a62ff7e94fae9b7af6d
Author: Øyvind Kolås <pippin gimp org>
Date: Mon Jan 24 07:39:06 2022 +0100
Revert "make trampoline for lut processing"
This reverts commit b3e884edf3b5c58fb4c2cede1346bd8a9d9c4a1e.
benchmarking on x86_64 saw no effect, and as a surprise on arm
it pessimises performance.
babl/babl-fish-path.c | 137 ++++++++++++++++++++++++++++++++++-------
babl/babl-internal.h | 13 ----
babl/babl.c | 41 ++----------
babl/base/babl-rgb-converter.c | 122 ------------------------------------
4 files changed, 120 insertions(+), 193 deletions(-)
---
diff --git a/babl/babl-fish-path.c b/babl/babl-fish-path.c
index 7278ec2a1..f709c3fd1 100644
--- a/babl/babl-fish-path.c
+++ b/babl/babl-fish-path.c
@@ -76,7 +76,7 @@ get_path_instrumentation (FishPathInstrumentation *fpi,
static inline void
-_babl_process_conversion_path (BablList *path,
+process_conversion_path (BablList *path,
const void *source_buffer,
int source_bpp,
void *destination_buffer,
@@ -756,6 +756,118 @@ babl_gc_fishes (void)
// is responsibility of higher layers
}
+static int babl_fish_lut_process_maybe (const Babl *babl,
+ const char *source,
+ const char *destination,
+ long n,
+ void *data)
+{
+ uint32_t *lut = (uint32_t*)babl->fish_path.u8_lut;
+ ((Babl*)babl)->fish.pixels += n;
+
+
+ if (!lut && babl->fish.pixels > 256 * 128)
+ {
+#if 0
+ fprintf (stderr, "building LUT for %s to %s\n",
+ babl_get_name (babl->conversion.source),
+ babl_get_name (babl->conversion.destination));
+#endif
+ lut = malloc (256 * 256 * 256 * 4);
+ if (babl->fish_path.source_bpp == 8)
+ {
+ uint64_t *lut_in = malloc (256 * 256 * 256 * 8);
+ for (int o = 0; o < 256 * 256 * 256; o++)
+ {
+ uint64_t v = o;
+ uint64_t v0 = v & 0xff;
+ uint64_t v1 = (v & 0xff00) >> 8;
+ uint64_t v2 = (v & 0xff0000) >> 16;
+
+#if 1
+ // gives same results... but purer white is better?
+ v0 = (v0 << 8) | (((v0&1)?0xff:0)<<0);
+ v1 = (v1 << 24) | (((v1&1)?(uint64_t)0xff:0)<<16);
+ v2 = (v2 << 40) | (((v2&1)?(uint64_t)0xff:0)<<32);
+#else
+ v0 = (v0 << 8);
+ v1 = (v1 << 24);
+ v2 = (v2 << 40);
+#endif
+ lut_in[o] = v;
+ }
+
+ process_conversion_path (babl->fish_path.conversion_list,
+ lut_in,
+ babl->fish_path.source_bpp,
+ lut,
+ babl->fish_path.dest_bpp,
+ 256*256*256);
+ free (lut_in);
+ }
+ else
+ {
+ for (int o = 0; o < 256 * 256 * 256; o++)
+ lut[o] = o;
+ process_conversion_path (babl->fish_path.conversion_list,
+ lut,
+ babl->fish_path.source_bpp,
+ lut,
+ babl->fish_path.dest_bpp,
+ 256*256*256);
+ }
+ // XXX : there is still a micro race, if lost we should only
+ // leak a LUT not produce wrong results.
+ if (babl->fish_path.u8_lut == NULL)
+ {
+ (((Babl*)babl)->fish_path.u8_lut) = (uint8_t*)lut;
+
+ }
+ else
+ {
+ free (lut);
+ lut = (uint32_t*)babl->fish_path.u8_lut;
+ }
+ }
+ if (lut)
+ {
+ if (babl->fish_path.source_bpp == 8) // 16 bit, not working yet
+ { // half and u16 need their
+ // own separate handling
+ uint32_t *src = (uint32_t*)source;
+ uint32_t *dst = (uint32_t*)destination;
+ lut = (uint32_t*)babl->fish_path.u8_lut;
+ while (n--)
+ {
+ uint32_t col_a = *src++;
+ uint32_t col_b = *src++;
+ uint32_t col;
+
+ uint32_t c_ar = ((col_a & 0xff000000)|
+ ((col_a & 0x0000ff00) << 8));
+ uint32_t c_gb = ((col_b & 0xff000000)|
+ ((col_b & 0x0000ff00) << 8))>>16;
+ col = c_ar|c_gb;
+
+ *dst++ = lut[col & 0xffffff] | (col & 0xff000000);
+ }
+ }
+ else
+ {
+ uint32_t *src = (uint32_t*)source;
+ uint32_t *dst = (uint32_t*)destination;
+ lut = (uint32_t*)babl->fish_path.u8_lut;
+ while (n--)
+ {
+ uint32_t col = *src++;
+ *dst++ = lut[col & 0xffffff] | (col & 0xff000000);
+ }
+ }
+ BABL(babl)->fish_path.last_lut_use = babl_ticks ();
+ return 1;
+ }
+ return 0;
+}
static void
babl_fish_path_process (const Babl *babl,
@@ -783,7 +895,7 @@ babl_fish_path_process (const Babl *babl,
conv_counter = 0;
}
}
- _babl_process_conversion_path (babl->fish_path.conversion_list,
+ process_conversion_path (babl->fish_path.conversion_list,
source,
babl->fish_path.source_bpp,
destination,
@@ -925,7 +1037,7 @@ static void inline *align_16 (unsigned char *ret)
}
static inline void
-_babl_process_conversion_path (BablList *path,
+process_conversion_path (BablList *path,
const void *source_buffer,
int source_bpp,
void *destination_buffer,
@@ -997,23 +1109,6 @@ _babl_process_conversion_path (BablList *path,
}
}
-void
-babl_process_conversion_path (BablList *path,
- const void *source_buffer,
- int source_bpp,
- void *destination_buffer,
- int dest_bpp,
- long n)
-{
- _babl_process_conversion_path (path,
- source_buffer,
- source_bpp,
- destination_buffer,
- dest_bpp,
- n);
-}
-
-
static void
init_path_instrumentation (FishPathInstrumentation *fpi,
Babl *fmt_source,
@@ -1149,7 +1244,7 @@ get_path_instrumentation (FishPathInstrumentation *fpi,
/* calculate this path's view of what the result should be */
ticks_start = babl_ticks ();
for (int i = 0; i < BABL_TEST_ITER; i ++)
- _babl_process_conversion_path (path, fpi->source, source_bpp, fpi->destination,
+ process_conversion_path (path, fpi->source, source_bpp, fpi->destination,
dest_bpp, fpi->num_test_pixels);
ticks_end = babl_ticks ();
*path_cost = (ticks_end - ticks_start);
diff --git a/babl/babl-internal.h b/babl/babl-internal.h
index 4377ec379..ec6008b6d 100644
--- a/babl/babl-internal.h
+++ b/babl/babl-internal.h
@@ -373,12 +373,6 @@ extern const Babl *
extern const Babl *
(*babl_trc_lookup_by_name) (const char *name);
-extern int (*babl_fish_lut_process_maybe) (const Babl *babl,
- const char *source,
- const char *destination,
- long n,
- void *data);
-
void babl_space_to_xyz (const Babl *space, const double *rgb, double *xyz);
void babl_space_from_xyz (const Babl *space, const double *xyz, double *rgb);
@@ -479,12 +473,5 @@ _babl_space_for_lcms (const char *icc_data, int icc_length); // XXX pass profile
void
babl_trc_class_init (void);
-void
-babl_process_conversion_path (BablList *path,
- const void *source_buffer,
- int source_bpp,
- void *destination_buffer,
- int dest_bpp,
- long n);
#endif
diff --git a/babl/babl.c b/babl/babl.c
index 7bfe60f6a..515fa09b0 100644
--- a/babl/babl.c
+++ b/babl/babl.c
@@ -200,19 +200,6 @@ void (*babl_base_init) (void) = babl_base_init_generic;
const Babl * babl_trc_lookup_by_name_generic (const char *name);
-int babl_fish_lut_process_maybe_generic (const Babl *babl,
- const char *source,
- const char *destination,
- long n,
- void *data);
-
-int (*babl_fish_lut_process_maybe) (const Babl *babl,
- const char *source,
- const char *destination,
- long n,
- void *data) =
- babl_fish_lut_process_maybe_generic;
-
const Babl *
babl_trc_new_generic (const char *name,
@@ -235,25 +222,15 @@ const Babl *
float *lut) = babl_trc_new_generic;
#ifdef ARCH_X86_64
-
-int babl_fish_lut_process_maybe_x86_64_v2 (const Babl *babl,
- const char *source,
- const char *destination,
- long n,
- void *data);
-int babl_fish_lut_process_maybe_x86_64_v3 (const Babl *babl,
- const char *source,
- const char *destination,
- long n,
- void *data);
-
void babl_base_init_x86_64_v2 (void);
void babl_base_init_x86_64_v3 (void);
void _babl_space_add_universal_rgb_x86_64_v2 (const Babl *space);
void _babl_space_add_universal_rgb_x86_64_v3 (const Babl *space);
-const Babl * babl_trc_lookup_by_name_x86_64_v2 (const char *name);
-const Babl * babl_trc_lookup_by_name_x86_64_v3 (const char *name);
+const Babl *
+babl_trc_lookup_by_name_x86_64_v2 (const char *name);
+const Babl *
+babl_trc_lookup_by_name_x86_64_v3 (const char *name);
const Babl *
babl_trc_new_x86_64_v2 (const char *name,
@@ -270,13 +247,6 @@ babl_trc_new_x86_64_v3 (const char *name,
#endif
#ifdef ARCH_ARM
-
-int babl_fish_lut_process_maybe_arm_neon (const Babl *babl,
- const char *source,
- const char *destination,
- long n,
- void *data);
-
void babl_base_init_arm_neon (void);
void _babl_space_add_universal_rgb_arm_neon (const Babl *space);
@@ -298,7 +268,6 @@ static void simd_init (void)
BablCpuAccelFlags accel = babl_cpu_accel_get_support ();
if ((accel & BABL_CPU_ACCEL_X86_64_V3) == BABL_CPU_ACCEL_X86_64_V3)
{
- babl_fish_lut_process_maybe = babl_fish_lut_process_maybe_x86_64_v3;
babl_base_init = babl_base_init_x86_64_v2; /// !!
// this is correct,
// it performs better
@@ -309,7 +278,6 @@ static void simd_init (void)
}
else if ((accel & BABL_CPU_ACCEL_X86_64_V2) == BABL_CPU_ACCEL_X86_64_V2)
{
- babl_fish_lut_process_maybe = babl_fish_lut_process_maybe_x86_64_v2;
babl_base_init = babl_base_init_x86_64_v2;
babl_trc_new = babl_trc_new_x86_64_v2;
babl_trc_lookup_by_name = babl_trc_lookup_by_name_x86_64_v2;
@@ -320,7 +288,6 @@ static void simd_init (void)
BablCpuAccelFlags accel = babl_cpu_accel_get_support ();
if ((accel & BABL_CPU_ACCEL_ARM_NEON) == BABL_CPU_ACCEL_ARM_NEON)
{
- babl_fish_lut_process_maybe = babl_fish_lut_process_maybe_arm_neon;
babl_base_init = babl_base_init_arm_neon;
babl_trc_new = babl_trc_new_arm_neon;
babl_trc_lookup_by_name = babl_trc_lookup_by_name_arm_neon;
diff --git a/babl/base/babl-rgb-converter.c b/babl/base/babl-rgb-converter.c
index 5c3d2ca08..3f4da04d3 100644
--- a/babl/base/babl-rgb-converter.c
+++ b/babl/base/babl-rgb-converter.c
@@ -533,125 +533,3 @@ BABL_SIMD_SUFFIX(_babl_space_add_universal_rgb) (const Babl *space)
{
babl_space_class_for_each (add_rgb_adapter, (void*)space);
}
-
-void
-babl_process_conversion_path (BablList *path,
- const void *source_buffer,
- int source_bpp,
- void *destination_buffer,
- int dest_bpp,
- long n);
-
-int BABL_SIMD_SUFFIX(babl_fish_lut_process_maybe) (const Babl *babl,
- const char *source,
- const char *destination,
- long n,
- void *data)
-{
- uint32_t *lut = (uint32_t*)babl->fish_path.u8_lut;
- ((Babl*)babl)->fish.pixels += n;
-
-
- if (!lut && babl->fish.pixels > 256 * 128)
- {
-#if 0
- fprintf (stderr, "building LUT for %s to %s\n",
- babl_get_name (babl->conversion.source),
- babl_get_name (babl->conversion.destination));
-#endif
- lut = malloc (256 * 256 * 256 * 4);
- if (babl->fish_path.source_bpp == 8)
- {
- uint64_t *lut_in = malloc (256 * 256 * 256 * 8);
- for (int o = 0; o < 256 * 256 * 256; o++)
- {
- uint64_t v = o;
- uint64_t v0 = v & 0xff;
- uint64_t v1 = (v & 0xff00) >> 8;
- uint64_t v2 = (v & 0xff0000) >> 16;
-
-#if 1
- // gives same results... but purer white is better?
- v0 = (v0 << 8) | (((v0&1)?0xff:0)<<0);
- v1 = (v1 << 24) | (((v1&1)?(uint64_t)0xff:0)<<16);
- v2 = (v2 << 40) | (((v2&1)?(uint64_t)0xff:0)<<32);
-#else
- v0 = (v0 << 8);
- v1 = (v1 << 24);
- v2 = (v2 << 40);
-#endif
- lut_in[o] = v;
- }
-
- babl_process_conversion_path (babl->fish_path.conversion_list,
- lut_in,
- babl->fish_path.source_bpp,
- lut,
- babl->fish_path.dest_bpp,
- 256*256*256);
- free (lut_in);
- }
- else
- {
- for (int o = 0; o < 256 * 256 * 256; o++)
- lut[o] = o;
- babl_process_conversion_path (babl->fish_path.conversion_list,
- lut,
- babl->fish_path.source_bpp,
- lut,
- babl->fish_path.dest_bpp,
- 256*256*256);
- }
- // XXX : there is still a micro race, if lost we should only
- // leak a LUT not produce wrong results.
- if (babl->fish_path.u8_lut == NULL)
- {
- (((Babl*)babl)->fish_path.u8_lut) = (uint8_t*)lut;
-
- }
- else
- {
- free (lut);
- lut = (uint32_t*)babl->fish_path.u8_lut;
- }
- }
- if (lut)
- {
- if (babl->fish_path.source_bpp == 8) // 16 bit, not working yet
- { // half and u16 need their
- // own separate handling
- uint32_t *src = (uint32_t*)source;
- uint32_t *dst = (uint32_t*)destination;
- lut = (uint32_t*)babl->fish_path.u8_lut;
- while (n--)
- {
- uint32_t col_a = *src++;
- uint32_t col_b = *src++;
- uint32_t col;
-
- uint32_t c_ar = ((col_a & 0xff000000)|
- ((col_a & 0x0000ff00) << 8));
- uint32_t c_gb = ((col_b & 0xff000000)|
- ((col_b & 0x0000ff00) << 8))>>16;
- col = c_ar|c_gb;
-
- *dst++ = lut[col & 0xffffff] | (col & 0xff000000);
- }
- }
- else
- {
- uint32_t *src = (uint32_t*)source;
- uint32_t *dst = (uint32_t*)destination;
- lut = (uint32_t*)babl->fish_path.u8_lut;
- while (n--)
- {
- uint32_t col = *src++;
- *dst++ = lut[col & 0xffffff] | (col & 0xff000000);
- }
- }
- BABL(babl)->fish_path.last_lut_use = babl_ticks ();
- return 1;
- }
- return 0;
-}
-
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]