[gegl] utils: improve performance of gegl_memset_pattern()
- From: N/A <ell src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl] utils: improve performance of gegl_memset_pattern()
- Date: Mon, 6 Feb 2017 13:29:27 +0000 (UTC)
commit d195c300f164921d74f24ee06eb99d8d22e8ba2f
Author: Ell <ell_se yahoo com>
Date: Mon Feb 6 08:00:32 2017 -0500
utils: improve performance of gegl_memset_pattern()
Copy the pattern using exponentially increasing block sizes, using
the already-copied data as the source, yielding a logarithmic number
of memcpy()s, instead of linear. The block size is not increased
indefinitely, as to not saturate the cache, so for large copies, the
number of memcpy()s is still linear, but using a big-enough block
size as to make it irrelevant.
I actually had a benchmark for this, but it died with the old hdd :P
IIRC, in some cases we care about, this about doubles performance.
In other cases, it's far less significant, but it never (in realistic
terms) seems to be a pessimization.
gegl/gegl-utils.c | 81 ++++++++++++++++++++++------------------------------
1 files changed, 34 insertions(+), 47 deletions(-)
---
diff --git a/gegl/gegl-utils.c b/gegl/gegl-utils.c
index ffe0650..4c3e491 100644
--- a/gegl/gegl-utils.c
+++ b/gegl/gegl-utils.c
@@ -276,64 +276,51 @@ gegl_free (gpointer buf)
g_free (*((gpointer*)buf -1));
}
-#define MAKE_COPY_CASE(typesize)\
-case typesize: \
- while (count--) \
- { \
- memcpy (dst, src, typesize); \
- dst += typesize; \
- } \
- return;
-
void
gegl_memset_pattern (void * restrict dst_ptr,
const void * restrict src_ptr,
gint pattern_size,
gint count)
{
- guchar *dst = dst_ptr;
+ guchar *dst = dst_ptr;
const guchar *src = src_ptr;
- switch (pattern_size)
- {
- case 1: /* Y u8 */
+ /* g_assert (pattern_size > 0 && count >= 0); */
+
+ if (pattern_size == 1 || count == 0)
+ {
memset (dst, *src, count);
- return;
-MAKE_COPY_CASE(2) /* YA u8 */
-MAKE_COPY_CASE(3) /* RGB u8 */
-#ifdef ARCH_X86_64
- case 4: /* RGBA u8 */
- if (count >= 2)
- {
- guint64 pat2 = *(guint32 *)src_ptr;
- pat2 = pat2 | pat2 << 32;
- do {
- memcpy (dst, &pat2, 8);
- dst += 8;
- count -= 2;
- } while (count >= 2);
- }
- if (count)
- {
- memcpy (dst, src, 4);
- dst += 4;
- }
- return;
-#else
-MAKE_COPY_CASE(4) /* RGBA u8 */
-#endif /* ARCH_X86_64 */
-MAKE_COPY_CASE(6) /* RGB u16 */
-MAKE_COPY_CASE(8) /* RGBA u16 */
-MAKE_COPY_CASE(12) /* RGB float */
-MAKE_COPY_CASE(16) /* RGBA float */
- default:
- while (count--)
+ }
+ else
+ {
+ gsize block_size;
+ gsize remaining_size;
+
+ block_size = pattern_size,
+
+ memcpy (dst, src, block_size);
+ src = dst;
+ dst += block_size;
+
+ remaining_size = (count - 1) * block_size;
+
+ while (block_size < remaining_size)
{
- memcpy (dst, src, pattern_size);
- dst += pattern_size;
+ memcpy (dst, src, block_size);
+ dst += block_size;
+
+ remaining_size -= block_size;
+
+ /* limit the block size, so that we don't saturate the cache.
+ *
+ * FIXME: optimal limit could use more benchmarking.
+ */
+ if (block_size <= 2048)
+ block_size *= 2;
}
- return;
- }
+
+ memcpy (dst, src, remaining_size);
+ }
}
#undef MAKE_COPY_CASE
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]