[gegl] gegl/buffer: optimize column setting



commit 543a46eace5842f18c8bd9d706cf9fca81e2a3f4
Author: Øyvind Kolås <pippin gimp org>
Date:   Wed Dec 27 12:44:06 2017 +0100

    gegl/buffer: optimize column setting

 gegl/buffer/gegl-buffer-access.c |  174 ++++++++++++++++++++++++++++++++++++--
 1 files changed, 165 insertions(+), 9 deletions(-)
---
diff --git a/gegl/buffer/gegl-buffer-access.c b/gegl/buffer/gegl-buffer-access.c
index 2a49027..21b488c 100644
--- a/gegl/buffer/gegl-buffer-access.c
+++ b/gegl/buffer/gegl-buffer-access.c
@@ -478,18 +478,174 @@ gegl_buffer_iterate_write (GeglBuffer          *buffer,
             }
           else
             {
-              for (row = offsety;
-                   row < tile_height && y < height;
-                   row++, y++)
+              switch (pixels * px_size)
                 {
-                  if (buffer_y + y >= buffer_abyss_y &&
-                      buffer_y + y < abyss_y_total)
+                  case 1:
+                    for (row = offsety;
+                         row < tile_height && y < height;
+                         row++, y++)
                     {
-                      memcpy (tp + lskip * px_size, bp + lskip * px_size,
-                              pixels * px_size);
+                      if (buffer_y + y >= buffer_abyss_y &&
+                          buffer_y + y < abyss_y_total)
+                        {
+                          tp[lskip * px_size] = bp[lskip * px_size];
+                        }
+                      tp += tile_stride;
+                      bp += buf_stride;
+                    }
+                    break;
+                  case 2:
+                    for (row = offsety;
+                         row < tile_height && y < height;
+                         row++, y++)
+                    {
+                      if (buffer_y + y >= buffer_abyss_y &&
+                          buffer_y + y < abyss_y_total)
+                        {
+                          ((uint16_t*)(&tp[lskip * px_size]))[0] =
+                          ((uint16_t*)(&bp[lskip * px_size]))[0];
+                        }
+                      tp += tile_stride;
+                      bp += buf_stride;
+                    }
+                    break;
+                  case 3:
+                    for (row = offsety;
+                         row < tile_height && y < height;
+                         row++, y++)
+                    {
+                      if (buffer_y + y >= buffer_abyss_y &&
+                          buffer_y + y < abyss_y_total)
+                        {
+                          tp[lskip * px_size] = bp[lskip * px_size];
+                          tp[lskip * px_size+1] = bp[lskip * px_size+1];
+                          tp[lskip * px_size+2] = bp[lskip * px_size+2];
+                        }
+                      tp += tile_stride;
+                      bp += buf_stride;
+                    }
+                    break;
+                  case 4:
+                    for (row = offsety;
+                         row < tile_height && y < height;
+                         row++, y++)
+                    {
+                      if (buffer_y + y >= buffer_abyss_y &&
+                          buffer_y + y < abyss_y_total)
+                        {
+                          ((uint32_t*)(&tp[lskip * px_size]))[0] =
+                          ((uint32_t*)(&bp[lskip * px_size]))[0];
+                        }
+                      tp += tile_stride;
+                      bp += buf_stride;
+                    }
+                    break;
+                  case 8:
+                    for (row = offsety;
+                         row < tile_height && y < height;
+                         row++, y++)
+                    {
+                      if (buffer_y + y >= buffer_abyss_y &&
+                          buffer_y + y < abyss_y_total)
+                        {
+                          ((uint64_t*)(&tp[lskip * px_size]))[0] =
+                          ((uint64_t*)(&bp[lskip * px_size]))[0];
+                        }
+                      tp += tile_stride;
+                      bp += buf_stride;
+                    }
+                    break;
+                  case 12:
+                    for (row = offsety;
+                         row < tile_height && y < height;
+                         row++, y++)
+                    {
+                      if (buffer_y + y >= buffer_abyss_y &&
+                          buffer_y + y < abyss_y_total)
+                        {
+                          ((uint32_t*)(&tp[lskip * px_size]))[0] =
+                          ((uint32_t*)(&bp[lskip * px_size]))[0];
+                          ((uint32_t*)(&tp[lskip * px_size]))[1] =
+                          ((uint32_t*)(&bp[lskip * px_size]))[1];
+                          ((uint32_t*)(&tp[lskip * px_size]))[2] =
+                          ((uint32_t*)(&bp[lskip * px_size]))[2];
+                        }
+                      tp += tile_stride;
+                      bp += buf_stride;
+                    }
+                    break;
+                  case 16:
+                    for (row = offsety;
+                         row < tile_height && y < height;
+                         row++, y++)
+                    {
+                      if (buffer_y + y >= buffer_abyss_y &&
+                          buffer_y + y < abyss_y_total)
+                        {
+                          ((uint64_t*)(&tp[lskip * px_size]))[0] =
+                          ((uint64_t*)(&bp[lskip * px_size]))[0];
+                          ((uint64_t*)(&tp[lskip * px_size]))[1] =
+                          ((uint64_t*)(&bp[lskip * px_size]))[1];
+                        }
+                      tp += tile_stride;
+                      bp += buf_stride;
+                    }
+                    break;
+                  case 24:
+                    for (row = offsety;
+                         row < tile_height && y < height;
+                         row++, y++)
+                    {
+                      if (buffer_y + y >= buffer_abyss_y &&
+                          buffer_y + y < abyss_y_total)
+                        {
+                          ((uint64_t*)(&tp[lskip * px_size]))[0] =
+                          ((uint64_t*)(&bp[lskip * px_size]))[0];
+                          ((uint64_t*)(&tp[lskip * px_size]))[1] =
+                          ((uint64_t*)(&bp[lskip * px_size]))[1];
+                          ((uint64_t*)(&tp[lskip * px_size]))[2] =
+                          ((uint64_t*)(&bp[lskip * px_size]))[2];
+                        }
+                      tp += tile_stride;
+                      bp += buf_stride;
+                    }
+                    break;
+                  case 32:
+                    for (row = offsety;
+                         row < tile_height && y < height;
+                         row++, y++)
+                    {
+                      if (buffer_y + y >= buffer_abyss_y &&
+                          buffer_y + y < abyss_y_total)
+                        {
+                          ((uint64_t*)(&tp[lskip * px_size]))[0] =
+                          ((uint64_t*)(&bp[lskip * px_size]))[0];
+                          ((uint64_t*)(&tp[lskip * px_size]))[1] =
+                          ((uint64_t*)(&bp[lskip * px_size]))[1];
+                          ((uint64_t*)(&tp[lskip * px_size]))[2] =
+                          ((uint64_t*)(&bp[lskip * px_size]))[2];
+                          ((uint64_t*)(&tp[lskip * px_size]))[3] =
+                          ((uint64_t*)(&bp[lskip * px_size]))[3];
+                        }
+                      tp += tile_stride;
+                      bp += buf_stride;
+                    }
+                    break;
+                  default:
+                    for (row = offsety;
+                         row < tile_height && y < height;
+                         row++, y++)
+                    {
+                      if (buffer_y + y >= buffer_abyss_y &&
+                          buffer_y + y < abyss_y_total)
+                        {
+                          memcpy (tp + lskip * px_size,
+                                  bp + lskip * px_size,
+                                  pixels * px_size);
+                        }
+                      tp += tile_stride;
+                      bp += buf_stride;
                     }
-                  tp += tile_stride;
-                  bp += buf_stride;
                 }
             }
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]