[gegl] gegl/buffer: some more micro-optimizations for column get/set
- From: Øyvind Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl] gegl/buffer: some more micro-optimizations for column get/set
- Date: Thu, 28 Dec 2017 15:41:52 +0000 (UTC)
commit bacba9796ec0e914d790c3aa9aafa815c17b978e
Author: Øyvind Kolås <pippin gimp org>
Date: Thu Dec 28 16:18:40 2017 +0100
gegl/buffer: some more micro-optimizations for column get/set
Factor out a multiplication from loops, add more px size optimizations.
gegl/buffer/gegl-buffer-access.c | 258 ++++++++++++++++++++++++++++++++------
1 files changed, 222 insertions(+), 36 deletions(-)
---
diff --git a/gegl/buffer/gegl-buffer-access.c b/gegl/buffer/gegl-buffer-access.c
index 40b751c..be484f0 100644
--- a/gegl/buffer/gegl-buffer-access.c
+++ b/gegl/buffer/gegl-buffer-access.c
@@ -478,6 +478,7 @@ gegl_buffer_iterate_write (GeglBuffer *buffer,
}
else
{
+ int lskip_offset = lskip * px_size;
switch (pixels * px_size)
{
case 1:
@@ -488,7 +489,7 @@ gegl_buffer_iterate_write (GeglBuffer *buffer,
if (buffer_y + y >= buffer_abyss_y &&
buffer_y + y < abyss_y_total)
{
- tp[lskip * px_size] = bp[lskip * px_size];
+ tp[lskip_offset] = bp[lskip_offset];
}
tp += tile_stride;
bp += buf_stride;
@@ -502,8 +503,8 @@ gegl_buffer_iterate_write (GeglBuffer *buffer,
if (buffer_y + y >= buffer_abyss_y &&
buffer_y + y < abyss_y_total)
{
- ((uint16_t*)(&tp[lskip * px_size]))[0] =
- ((uint16_t*)(&bp[lskip * px_size]))[0];
+ ((uint16_t*)(&tp[lskip_offset]))[0] =
+ ((uint16_t*)(&bp[lskip_offset]))[0];
}
tp += tile_stride;
bp += buf_stride;
@@ -517,9 +518,9 @@ gegl_buffer_iterate_write (GeglBuffer *buffer,
if (buffer_y + y >= buffer_abyss_y &&
buffer_y + y < abyss_y_total)
{
- tp[lskip * px_size] = bp[lskip * px_size];
- tp[lskip * px_size+1] = bp[lskip * px_size+1];
- tp[lskip * px_size+2] = bp[lskip * px_size+2];
+ tp[lskip_offset] = bp[lskip_offset];
+ tp[lskip_offset+1] = bp[lskip_offset+1];
+ tp[lskip_offset+2] = bp[lskip_offset+2];
}
tp += tile_stride;
bp += buf_stride;
@@ -533,8 +534,8 @@ gegl_buffer_iterate_write (GeglBuffer *buffer,
if (buffer_y + y >= buffer_abyss_y &&
buffer_y + y < abyss_y_total)
{
- ((uint32_t*)(&tp[lskip * px_size]))[0] =
- ((uint32_t*)(&bp[lskip * px_size]))[0];
+ ((uint32_t*)(&tp[lskip_offset]))[0] =
+ ((uint32_t*)(&bp[lskip_offset]))[0];
}
tp += tile_stride;
bp += buf_stride;
@@ -548,8 +549,8 @@ gegl_buffer_iterate_write (GeglBuffer *buffer,
if (buffer_y + y >= buffer_abyss_y &&
buffer_y + y < abyss_y_total)
{
- ((uint64_t*)(&tp[lskip * px_size]))[0] =
- ((uint64_t*)(&bp[lskip * px_size]))[0];
+ ((uint64_t*)(&tp[lskip_offset]))[0] =
+ ((uint64_t*)(&bp[lskip_offset]))[0];
}
tp += tile_stride;
bp += buf_stride;
@@ -563,12 +564,12 @@ gegl_buffer_iterate_write (GeglBuffer *buffer,
if (buffer_y + y >= buffer_abyss_y &&
buffer_y + y < abyss_y_total)
{
- ((uint32_t*)(&tp[lskip * px_size]))[0] =
- ((uint32_t*)(&bp[lskip * px_size]))[0];
- ((uint32_t*)(&tp[lskip * px_size]))[1] =
- ((uint32_t*)(&bp[lskip * px_size]))[1];
- ((uint32_t*)(&tp[lskip * px_size]))[2] =
- ((uint32_t*)(&bp[lskip * px_size]))[2];
+ ((uint32_t*)(&tp[lskip_offset]))[0] =
+ ((uint32_t*)(&bp[lskip_offset]))[0];
+ ((uint32_t*)(&tp[lskip_offset]))[1] =
+ ((uint32_t*)(&bp[lskip_offset]))[1];
+ ((uint32_t*)(&tp[lskip_offset]))[2] =
+ ((uint32_t*)(&bp[lskip_offset]))[2];
}
tp += tile_stride;
bp += buf_stride;
@@ -582,10 +583,10 @@ gegl_buffer_iterate_write (GeglBuffer *buffer,
if (buffer_y + y >= buffer_abyss_y &&
buffer_y + y < abyss_y_total)
{
- ((uint64_t*)(&tp[lskip * px_size]))[0] =
- ((uint64_t*)(&bp[lskip * px_size]))[0];
- ((uint64_t*)(&tp[lskip * px_size]))[1] =
- ((uint64_t*)(&bp[lskip * px_size]))[1];
+ ((uint64_t*)(&tp[lskip_offset]))[0] =
+ ((uint64_t*)(&bp[lskip_offset]))[0];
+ ((uint64_t*)(&tp[lskip_offset]))[1] =
+ ((uint64_t*)(&bp[lskip_offset]))[1];
}
tp += tile_stride;
bp += buf_stride;
@@ -599,12 +600,12 @@ gegl_buffer_iterate_write (GeglBuffer *buffer,
if (buffer_y + y >= buffer_abyss_y &&
buffer_y + y < abyss_y_total)
{
- ((uint64_t*)(&tp[lskip * px_size]))[0] =
- ((uint64_t*)(&bp[lskip * px_size]))[0];
- ((uint64_t*)(&tp[lskip * px_size]))[1] =
- ((uint64_t*)(&bp[lskip * px_size]))[1];
- ((uint64_t*)(&tp[lskip * px_size]))[2] =
- ((uint64_t*)(&bp[lskip * px_size]))[2];
+ ((uint64_t*)(&tp[lskip_offset]))[0] =
+ ((uint64_t*)(&bp[lskip_offset]))[0];
+ ((uint64_t*)(&tp[lskip_offset]))[1] =
+ ((uint64_t*)(&bp[lskip_offset]))[1];
+ ((uint64_t*)(&tp[lskip_offset]))[2] =
+ ((uint64_t*)(&bp[lskip_offset]))[2];
}
tp += tile_stride;
bp += buf_stride;
@@ -618,14 +619,118 @@ gegl_buffer_iterate_write (GeglBuffer *buffer,
if (buffer_y + y >= buffer_abyss_y &&
buffer_y + y < abyss_y_total)
{
- ((uint64_t*)(&tp[lskip * px_size]))[0] =
- ((uint64_t*)(&bp[lskip * px_size]))[0];
- ((uint64_t*)(&tp[lskip * px_size]))[1] =
- ((uint64_t*)(&bp[lskip * px_size]))[1];
- ((uint64_t*)(&tp[lskip * px_size]))[2] =
- ((uint64_t*)(&bp[lskip * px_size]))[2];
- ((uint64_t*)(&tp[lskip * px_size]))[3] =
- ((uint64_t*)(&bp[lskip * px_size]))[3];
+ ((uint64_t*)(&tp[lskip_offset]))[0] =
+ ((uint64_t*)(&bp[lskip_offset]))[0];
+ ((uint64_t*)(&tp[lskip_offset]))[1] =
+ ((uint64_t*)(&bp[lskip_offset]))[1];
+ ((uint64_t*)(&tp[lskip_offset]))[2] =
+ ((uint64_t*)(&bp[lskip_offset]))[2];
+ ((uint64_t*)(&tp[lskip_offset]))[3] =
+ ((uint64_t*)(&bp[lskip_offset]))[3];
+ }
+ tp += tile_stride;
+ bp += buf_stride;
+ }
+ break;
+ case 40:
+ for (row = offsety;
+ row < tile_height && y < height;
+ row++, y++)
+ {
+ if (buffer_y + y >= buffer_abyss_y &&
+ buffer_y + y < abyss_y_total)
+ {
+ ((uint64_t*)(&tp[lskip_offset]))[0] =
+ ((uint64_t*)(&bp[lskip_offset]))[0];
+ ((uint64_t*)(&tp[lskip_offset]))[1] =
+ ((uint64_t*)(&bp[lskip_offset]))[1];
+ ((uint64_t*)(&tp[lskip_offset]))[2] =
+ ((uint64_t*)(&bp[lskip_offset]))[2];
+ ((uint64_t*)(&tp[lskip_offset]))[3] =
+ ((uint64_t*)(&bp[lskip_offset]))[3];
+ ((uint64_t*)(&tp[lskip_offset]))[4] =
+ ((uint64_t*)(&bp[lskip_offset]))[4];
+ }
+ tp += tile_stride;
+ bp += buf_stride;
+ }
+ break;
+ case 48:
+ for (row = offsety;
+ row < tile_height && y < height;
+ row++, y++)
+ {
+ if (buffer_y + y >= buffer_abyss_y &&
+ buffer_y + y < abyss_y_total)
+ {
+ ((uint64_t*)(&tp[lskip_offset]))[0] =
+ ((uint64_t*)(&bp[lskip_offset]))[0];
+ ((uint64_t*)(&tp[lskip_offset]))[1] =
+ ((uint64_t*)(&bp[lskip_offset]))[1];
+ ((uint64_t*)(&tp[lskip_offset]))[2] =
+ ((uint64_t*)(&bp[lskip_offset]))[2];
+ ((uint64_t*)(&tp[lskip_offset]))[3] =
+ ((uint64_t*)(&bp[lskip_offset]))[3];
+ ((uint64_t*)(&tp[lskip_offset]))[4] =
+ ((uint64_t*)(&bp[lskip_offset]))[4];
+ ((uint64_t*)(&tp[lskip_offset]))[5] =
+ ((uint64_t*)(&bp[lskip_offset]))[5];
+ }
+ tp += tile_stride;
+ bp += buf_stride;
+ }
+ break;
+ case 56:
+ for (row = offsety;
+ row < tile_height && y < height;
+ row++, y++)
+ {
+ if (buffer_y + y >= buffer_abyss_y &&
+ buffer_y + y < abyss_y_total)
+ {
+ ((uint64_t*)(&tp[lskip_offset]))[0] =
+ ((uint64_t*)(&bp[lskip_offset]))[0];
+ ((uint64_t*)(&tp[lskip_offset]))[1] =
+ ((uint64_t*)(&bp[lskip_offset]))[1];
+ ((uint64_t*)(&tp[lskip_offset]))[2] =
+ ((uint64_t*)(&bp[lskip_offset]))[2];
+ ((uint64_t*)(&tp[lskip_offset]))[3] =
+ ((uint64_t*)(&bp[lskip_offset]))[3];
+ ((uint64_t*)(&tp[lskip_offset]))[4] =
+ ((uint64_t*)(&bp[lskip_offset]))[4];
+ ((uint64_t*)(&tp[lskip_offset]))[5] =
+ ((uint64_t*)(&bp[lskip_offset]))[5];
+ ((uint64_t*)(&tp[lskip_offset]))[6] =
+ ((uint64_t*)(&bp[lskip_offset]))[6];
+ }
+ tp += tile_stride;
+ bp += buf_stride;
+ }
+ break;
+ case 64:
+ for (row = offsety;
+ row < tile_height && y < height;
+ row++, y++)
+ {
+ if (buffer_y + y >= buffer_abyss_y &&
+ buffer_y + y < abyss_y_total)
+ {
+ ((uint64_t*)(&tp[lskip_offset]))[0] =
+ ((uint64_t*)(&bp[lskip_offset]))[0];
+ ((uint64_t*)(&tp[lskip_offset]))[1] =
+ ((uint64_t*)(&bp[lskip_offset]))[1];
+ ((uint64_t*)(&tp[lskip_offset]))[2] =
+ ((uint64_t*)(&bp[lskip_offset]))[2];
+ ((uint64_t*)(&tp[lskip_offset]))[3] =
+ ((uint64_t*)(&bp[lskip_offset]))[3];
+ ((uint64_t*)(&tp[lskip_offset]))[4] =
+ ((uint64_t*)(&bp[lskip_offset]))[4];
+ ((uint64_t*)(&tp[lskip_offset]))[5] =
+ ((uint64_t*)(&bp[lskip_offset]))[5];
+ ((uint64_t*)(&tp[lskip_offset]))[6] =
+ ((uint64_t*)(&bp[lskip_offset]))[6];
+ ((uint64_t*)(&tp[lskip_offset]))[7] =
+ ((uint64_t*)(&bp[lskip_offset]))[7];
}
tp += tile_stride;
bp += buf_stride;
@@ -639,8 +744,8 @@ gegl_buffer_iterate_write (GeglBuffer *buffer,
if (buffer_y + y >= buffer_abyss_y &&
buffer_y + y < abyss_y_total)
{
- memcpy (tp + lskip * px_size,
- bp + lskip * px_size,
+ memcpy (tp + lskip_offset,
+ bp + lskip_offset,
pixels * px_size);
}
tp += tile_stride;
@@ -830,6 +935,17 @@ gegl_buffer_iterate_read_simple (GeglBuffer *buffer,
bp += buf_stride;
}
break;
+ case 3:
+ for (row = offsety; row < tile_height && y < height;
+ row++, y++)
+ {
+ bp[0] = tp[0];
+ bp[1] = tp[1];
+ bp[2] = tp[2];
+ tp += tile_stride;
+ bp += buf_stride;
+ }
+ break;
case 4:
for (row = offsety; row < tile_height && y < height;
row++, y++)
@@ -839,6 +955,17 @@ gegl_buffer_iterate_read_simple (GeglBuffer *buffer,
bp += buf_stride;
}
break;
+ case 6:
+ for (row = offsety; row < tile_height && y < height;
+ row++, y++)
+ {
+ ((uint16_t*)bp)[0] = ((uint16_t*)tp)[0];
+ ((uint16_t*)bp)[1] = ((uint16_t*)tp)[1];
+ ((uint16_t*)bp)[2] = ((uint16_t*)tp)[2];
+ tp += tile_stride;
+ bp += buf_stride;
+ }
+ break;
case 8:
for (row = offsety; row < tile_height && y < height;
row++, y++)
@@ -892,6 +1019,65 @@ gegl_buffer_iterate_read_simple (GeglBuffer *buffer,
bp += buf_stride;
}
break;
+ case 40:
+ for (row = offsety; row < tile_height && y < height;
+ row++, y++)
+ {
+ ((uint64_t*)bp)[0] = ((uint64_t*)tp)[0];
+ ((uint64_t*)bp)[1] = ((uint64_t*)tp)[1];
+ ((uint64_t*)bp)[2] = ((uint64_t*)tp)[2];
+ ((uint64_t*)bp)[3] = ((uint64_t*)tp)[3];
+ ((uint64_t*)bp)[4] = ((uint64_t*)tp)[4];
+ tp += tile_stride;
+ bp += buf_stride;
+ }
+ break;
+ case 48:
+ for (row = offsety; row < tile_height && y < height;
+ row++, y++)
+ {
+ ((uint64_t*)bp)[0] = ((uint64_t*)tp)[0];
+ ((uint64_t*)bp)[1] = ((uint64_t*)tp)[1];
+ ((uint64_t*)bp)[2] = ((uint64_t*)tp)[2];
+ ((uint64_t*)bp)[3] = ((uint64_t*)tp)[3];
+ ((uint64_t*)bp)[4] = ((uint64_t*)tp)[4];
+ ((uint64_t*)bp)[5] = ((uint64_t*)tp)[5];
+ tp += tile_stride;
+ bp += buf_stride;
+ }
+ break;
+ case 56:
+ for (row = offsety; row < tile_height && y < height;
+ row++, y++)
+ {
+ ((uint64_t*)bp)[0] = ((uint64_t*)tp)[0];
+ ((uint64_t*)bp)[1] = ((uint64_t*)tp)[1];
+ ((uint64_t*)bp)[2] = ((uint64_t*)tp)[2];
+ ((uint64_t*)bp)[3] = ((uint64_t*)tp)[3];
+ ((uint64_t*)bp)[4] = ((uint64_t*)tp)[4];
+ ((uint64_t*)bp)[5] = ((uint64_t*)tp)[5];
+ ((uint64_t*)bp)[6] = ((uint64_t*)tp)[6];
+ tp += tile_stride;
+ bp += buf_stride;
+ }
+ break;
+ case 64:
+ for (row = offsety; row < tile_height && y < height;
+ row++, y++)
+ {
+ ((uint64_t*)bp)[0] = ((uint64_t*)tp)[0];
+ ((uint64_t*)bp)[1] = ((uint64_t*)tp)[1];
+ ((uint64_t*)bp)[2] = ((uint64_t*)tp)[2];
+ ((uint64_t*)bp)[3] = ((uint64_t*)tp)[3];
+ ((uint64_t*)bp)[4] = ((uint64_t*)tp)[4];
+ ((uint64_t*)bp)[5] = ((uint64_t*)tp)[5];
+ ((uint64_t*)bp)[6] = ((uint64_t*)tp)[6];
+ ((uint64_t*)bp)[7] = ((uint64_t*)tp)[7];
+ tp += tile_stride;
+ bp += buf_stride;
+ }
+ break;
+
default:
for (row = offsety;
row < tile_height && y < height;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]