[gegl] gegl: implement u8 fast paths for boxfilter/2x2 downscale
- From: Øyvind Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl] gegl: implement u8 fast paths for boxfilter/2x2 downscale
- Date: Sat, 20 Jan 2018 02:57:40 +0000 (UTC)
commit 5f40ca579be3569f08be53ffbb365f233af8dae8
Author: Øyvind Kolås <pippin gimp org>
Date: Sat Jan 20 02:52:30 2018 +0100
gegl: implement u8 fast paths for boxfilter/2x2 downscale
Bringing back the performance lost when first moving to always rendering in
linear, we're now at about 80% of the scaled fetch speed for u8 buffers before
the linear correctness fix.
gegl/gegl-algorithms.c | 622 ++++++++++++++++++++++++++++++++++++++++----
gegl/gegl-init.c | 4 +
gegl/gegl-types-internal.h | 3 +
3 files changed, 572 insertions(+), 57 deletions(-)
---
diff --git a/gegl/gegl-algorithms.c b/gegl/gegl-algorithms.c
index 5bb9d4e..66201ce 100644
--- a/gegl/gegl-algorithms.c
+++ b/gegl/gegl-algorithms.c
@@ -13,14 +13,14 @@
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*
- * Copyright 2006,2007,2015 Øyvind Kolås <pippin gimp org>
+ * Copyright 2006,2007,2015,2018 Øyvind Kolås <pippin gimp org>
* 2013 Daniel Sabo
*/
#include "config.h"
#include <string.h>
-
+#include <stdint.h>
#include <glib-object.h>
#include <babl/babl.h>
@@ -32,47 +32,6 @@
#include <math.h>
-static void
-gegl_downscale_2x2_generic (const Babl *format,
- gint src_width,
- gint src_height,
- guchar *src_data,
- gint src_rowstride,
- guchar *dst_data,
- gint dst_rowstride);
-
-
-GeglDownscale2x2Fun gegl_downscale_2x2_get_fun (const Babl *format)
-{
- const Babl *comp_type = babl_format_get_type (format, 0);
- const Babl *model = babl_format_get_model (format);
-
- if (gegl_babl_model_is_linear (model))
- {
- if (comp_type == gegl_babl_float())
- {
- return gegl_downscale_2x2_float;
- }
- else if (comp_type == gegl_babl_u8())
- {
- return gegl_downscale_2x2_u8;
- }
- else if (comp_type == gegl_babl_u16())
- {
- return gegl_downscale_2x2_u16;
- }
- else if (comp_type == gegl_babl_u32())
- {
- return gegl_downscale_2x2_u32;
- }
- else if (comp_type == gegl_babl_double())
- {
- return gegl_downscale_2x2_double;
- }
- }
- return gegl_downscale_2x2_generic;
-}
-
void gegl_downscale_2x2 (const Babl *format,
gint src_width,
gint src_height,
@@ -87,7 +46,6 @@ void gegl_downscale_2x2 (const Babl *format,
}
#include <stdio.h>
-#define ALLOCA_THRESHOLD 8192 * 4 /* maybe this needs to be reduced for win32? */
static void
gegl_downscale_2x2_generic (const Babl *format,
@@ -111,7 +69,7 @@ gegl_downscale_2x2_generic (const Babl *format,
void *in_tmp;
void *out_tmp;
- if (src_height * in_tmp_rowstride + dst_height * out_tmp_rowstride < ALLOCA_THRESHOLD)
+ if (src_height * in_tmp_rowstride + dst_height * out_tmp_rowstride < GEGL_ALLOCA_THRESHOLD)
{
in_tmp = alloca (src_height * in_tmp_rowstride);
out_tmp = alloca (dst_height * out_tmp_rowstride);
@@ -142,6 +100,503 @@ gegl_downscale_2x2_generic (const Babl *format,
}
}
+static uint16_t lut_u8_to_u16[256];
+static uint8_t lut_u16_to_u8[65537];
+
+void _gegl_init_u8_lut (void);
+void _gegl_init_u8_lut (void)
+{
+ static int lut_inited = 0;
+ uint8_t u8_ramp[256];
+ uint16_t u16_ramp[65537];
+ int i;
+
+ if (lut_inited)
+ return;
+ for (i = 0; i < 256; i++) u8_ramp[i]=i;
+ for (i = 0; i < 65536; i++) u16_ramp[i]=i;
+ babl_process (babl_fish (babl_format ("Y' u8"), babl_format("Y u16")),
+ &u8_ramp[0], &lut_u8_to_u16[0],
+ 256);
+
+ /* workaround for bug, doing this conversion sample by sample */
+ for (i = 0; i < 65536; i++)
+ babl_process (babl_fish (babl_format ("Y u16"), babl_format("Y' u8")),
+ &u16_ramp[i], &lut_u16_to_u8[i],
+ 1);
+
+ lut_inited = 1;
+}
+
+static inline void
+u8_to_u16_rows (int components,
+ const uint8_t *source_buf,
+ int source_stride,
+ uint16_t *dest_buf,
+ int dest_stride,
+ int n,
+ int rows)
+{
+ n *= components;
+
+ while (rows--)
+ {
+ const uint8_t *src = source_buf;
+ uint16_t *dest = dest_buf;
+ int i = n;
+ while (i--)
+ *(dest++) = lut_u8_to_u16[*(src++)];
+ source_buf += source_stride;
+ dest_buf += (dest_stride / 2);
+ }
+}
+
+static inline void
+u16_to_u8_rows (int components,
+ const uint16_t *source_buf,
+ int source_stride,
+ uint8_t *dest_buf,
+ int dest_stride,
+ int n,
+ int rows)
+{
+ n *= components;
+ while (rows--)
+ {
+ int i = n;
+ const uint16_t *src = source_buf;
+ uint8_t *dest = dest_buf;
+ while (i--)
+ *(dest++) = lut_u16_to_u8[*(src++)];
+
+ source_buf += (source_stride / 2);
+ dest_buf += dest_stride;
+ }
+}
+
+static inline int int_floorf (float x)
+{
+ int i = (int)x; /* truncate */
+ return i - ( i > x ); /* convert trunc to floor */
+}
+
+
+static void
+gegl_boxfilter_u8_nl (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ const gint s_rowstride,
+ const gdouble scale,
+ const gint bpp,
+ const gint d_rowstride)
+{
+ const uint8_t *src[9];
+ gint components = bpp / sizeof(uint8_t);
+
+ gfloat left_weight[dst_rect->width];
+ gfloat center_weight[dst_rect->width];
+ gfloat right_weight[dst_rect->width];
+
+ gint jj[dst_rect->width];
+
+ for (gint x = 0; x < dst_rect->width; x++)
+ {
+ gfloat sx = (dst_rect->x + x + .5) / scale - src_rect->x;
+ jj[x] = int_floorf (sx);
+
+ left_weight[x] = .5 - scale * (sx - jj[x]);
+ left_weight[x] = MAX (0.0, left_weight[x]);
+ right_weight[x] = .5 - scale * ((jj[x] + 1) - sx);
+ right_weight[x] = MAX (0.0, right_weight[x]);
+ center_weight[x] = 1. - left_weight[x] - right_weight[x];
+
+ jj[x] *= components;
+ }
+
+ for (gint y = 0; y < dst_rect->height; y++)
+ {
+ gfloat top_weight, middle_weight, bottom_weight;
+ const gfloat sy = (dst_rect->y + y + .5) / scale - src_rect->y;
+ const gint ii = int_floorf (sy);
+ uint8_t *dst = (uint8_t*)(dest_buf + y * d_rowstride);
+ const guchar *src_base = source_buf + ii * s_rowstride;
+
+ top_weight = .5 - scale * (sy - ii);
+ top_weight = MAX (0., top_weight);
+ bottom_weight = .5 - scale * ((ii + 1 ) - sy);
+ bottom_weight = MAX (0., bottom_weight);
+ middle_weight = 1. - top_weight - bottom_weight;
+
+ switch (components)
+ {
+ case 4:
+ for (gint x = 0; x < dst_rect->width; x++)
+ {
+ src[4] = (const uint8_t*)src_base + jj[x];
+ src[1] = (const uint8_t*)(src_base - s_rowstride) + jj[x];
+ src[7] = (const uint8_t*)(src_base + s_rowstride) + jj[x];
+ src[2] = src[1] + 4;
+ src[5] = src[4] + 4;
+ src[8] = src[7] + 4;
+ src[0] = src[1] - 4;
+ src[3] = src[4] - 4;
+ src[6] = src[7] - 4;
+
+ if (src[0][3] == 0 && /* XXX: it would be even better to not call this at all for the abyss...
*/
+ src[1][3] == 0 &&
+ src[2][3] == 0 &&
+ src[3][3] == 0 &&
+ src[4][3] == 0 &&
+ src[5][3] == 0 &&
+ src[6][3] == 0 &&
+ src[7][3] == 0)
+ {
+ dst[0] = dst[1] = dst[2] = dst[3] = 0;
+ }
+ else
+ {
+ const gfloat l = left_weight[x];
+ const gfloat c = center_weight[x];
+ const gfloat r = right_weight[x];
+
+ const gfloat t = top_weight;
+ const gfloat m = middle_weight;
+ const gfloat b = bottom_weight;
+
+#define BOXFILTER_ROUND(val) lut_u16_to_u8[((int)((val)+0.5))]
+#define C(val) lut_u8_to_u16[(val)]
+ dst[0] = BOXFILTER_ROUND(
+ (C(src[0][0]) * t + C(src[3][0]) * m + C(src[6][0]) * b) * l +
+ (C(src[1][0]) * t + C(src[4][0]) * m + C(src[7][0]) * b) * c +
+ (C(src[2][0]) * t + C(src[5][0]) * m + C(src[8][0]) * b) * r);
+ dst[1] = BOXFILTER_ROUND(
+ (C(src[0][1]) * t + C(src[3][1]) * m + C(src[6][1]) * b) * l +
+ (C(src[1][1]) * t + C(src[4][1]) * m + C(src[7][1]) * b) * c +
+ (C(src[2][1]) * t + C(src[5][1]) * m + C(src[8][1]) * b) * r);
+ dst[2] = BOXFILTER_ROUND(
+ (C(src[0][2]) * t + C(src[3][2]) * m + C(src[6][2]) * b) * l +
+ (C(src[1][2]) * t + C(src[4][2]) * m + C(src[7][2]) * b) * c +
+ (C(src[2][2]) * t + C(src[5][2]) * m + C(src[8][2]) * b) * r);
+ dst[3] = BOXFILTER_ROUND(
+ (C(src[0][3]) * t + C(src[3][3]) * m + C(src[6][3]) * b) * l +
+ (C(src[1][3]) * t + C(src[4][3]) * m + C(src[7][3]) * b) * c +
+ (C(src[2][3]) * t + C(src[5][3]) * m + C(src[8][3]) * b) * r);
+ }
+ dst += 4;
+ }
+ break;
+ case 3:
+ for (gint x = 0; x < dst_rect->width; x++)
+ {
+ src[4] = (const uint8_t*)src_base + jj[x];
+ src[1] = (const uint8_t*)(src_base - s_rowstride) + jj[x];
+ src[7] = (const uint8_t*)(src_base + s_rowstride) + jj[x];
+ src[2] = src[1] + 3;
+ src[5] = src[4] + 3;
+ src[8] = src[7] + 3;
+ src[0] = src[1] - 3;
+ src[3] = src[4] - 3;
+ src[6] = src[7] - 3;
+ {
+ const gfloat l = left_weight[x];
+ const gfloat c = center_weight[x];
+ const gfloat r = right_weight[x];
+
+ const gfloat t = top_weight;
+ const gfloat m = middle_weight;
+ const gfloat b = bottom_weight;
+
+ dst[0] = BOXFILTER_ROUND(
+ (C(src[0][0]) * t + C(src[3][0]) * m + C(src[6][0]) * b) * l +
+ (C(src[1][0]) * t + C(src[4][0]) * m + C(src[7][0]) * b) * c +
+ (C(src[2][0]) * t + C(src[5][0]) * m + C(src[8][0]) * b) * r);
+ dst[1] = BOXFILTER_ROUND(
+ (C(src[0][1]) * t + C(src[3][1]) * m + C(src[6][1]) * b) * l +
+ (C(src[1][1]) * t + C(src[4][1]) * m + C(src[7][1]) * b) * c +
+ (C(src[2][1]) * t + C(src[5][1]) * m + C(src[8][1]) * b) * r);
+ dst[2] = BOXFILTER_ROUND(
+ (C(src[0][2]) * t + C(src[3][2]) * m + C(src[6][2]) * b) * l +
+ (C(src[1][2]) * t + C(src[4][2]) * m + C(src[7][2]) * b) * c +
+ (C(src[2][2]) * t + C(src[5][2]) * m + C(src[8][2]) * b) * r);
+ }
+ dst += 3;
+ }
+ break;
+ case 2:
+ for (gint x = 0; x < dst_rect->width; x++)
+ {
+ src[4] = (const uint8_t*)src_base + jj[x];
+ src[1] = (const uint8_t*)(src_base - s_rowstride) + jj[x];
+ src[7] = (const uint8_t*)(src_base + s_rowstride) + jj[x];
+ src[2] = src[1] + 2;
+ src[5] = src[4] + 2;
+ src[8] = src[7] + 2;
+ src[0] = src[1] - 2;
+ src[3] = src[4] - 2;
+ src[6] = src[7] - 2;
+ {
+ const gfloat l = left_weight[x];
+ const gfloat c = center_weight[x];
+ const gfloat r = right_weight[x];
+
+ const gfloat t = top_weight;
+ const gfloat m = middle_weight;
+ const gfloat b = bottom_weight;
+
+ dst[0] = BOXFILTER_ROUND(
+ (C(src[0][0]) * t + C(src[3][0]) * m + C(src[6][0]) * b) * l +
+ (C(src[1][0]) * t + C(src[4][0]) * m + C(src[7][0]) * b) * c +
+ (C(src[2][0]) * t + C(src[5][0]) * m + C(src[8][0]) * b) * r);
+ dst[1] = BOXFILTER_ROUND(
+ (C(src[0][1]) * t + C(src[3][1]) * m + C(src[6][1]) * b) * l +
+ (C(src[1][1]) * t + C(src[4][1]) * m + C(src[7][1]) * b) * c +
+ (C(src[2][1]) * t + C(src[5][1]) * m + C(src[8][1]) * b) * r);
+ }
+ dst += 2;
+ }
+ break;
+ case 1:
+ for (gint x = 0; x < dst_rect->width; x++)
+ {
+ src[4] = (const uint8_t*)src_base + jj[x];
+ src[1] = (const uint8_t*)(src_base - s_rowstride) + jj[x];
+ src[7] = (const uint8_t*)(src_base + s_rowstride) + jj[x];
+ src[2] = src[1] + 1;
+ src[5] = src[4] + 1;
+ src[8] = src[7] + 1;
+ src[0] = src[1] - 1;
+ src[3] = src[4] - 1;
+ src[6] = src[7] - 1;
+ {
+ const gfloat l = left_weight[x];
+ const gfloat c = center_weight[x];
+ const gfloat r = right_weight[x];
+
+ const gfloat t = top_weight;
+ const gfloat m = middle_weight;
+ const gfloat b = bottom_weight;
+
+ dst[0] = BOXFILTER_ROUND(
+ (C(src[0][0]) * t + C(src[3][0]) * m + C(src[6][0]) * b) * l +
+ (C(src[1][0]) * t + C(src[4][0]) * m + C(src[7][0]) * b) * c +
+ (C(src[2][0]) * t + C(src[5][0]) * m + C(src[8][0]) * b) * r);
+ }
+ dst += 1;
+ }
+ break;
+ default:
+ for (gint x = 0; x < dst_rect->width; x++)
+ {
+ src[4] = (const uint8_t*)src_base + jj[x];
+ src[1] = (const uint8_t*)(src_base - s_rowstride) + jj[x];
+ src[7] = (const uint8_t*)(src_base + s_rowstride) + jj[x];
+ src[2] = src[1] + components;
+ src[5] = src[4] + components;
+ src[8] = src[7] + components;
+ src[0] = src[1] - components;
+ src[3] = src[4] - components;
+ src[6] = src[7] - components;
+ {
+ const gfloat l = left_weight[x];
+ const gfloat c = center_weight[x];
+ const gfloat r = right_weight[x];
+
+ const gfloat t = top_weight;
+ const gfloat m = middle_weight;
+ const gfloat b = bottom_weight;
+
+ for (gint i = 0; i < components; ++i)
+ {
+ dst[i] = BOXFILTER_ROUND(
+ (C(src[0][i]) * t + C(src[3][i]) * m + C(src[6][i]) * b) * l +
+ (C(src[1][i]) * t + C(src[4][i]) * m + C(src[7][i]) * b) * c +
+ (C(src[2][i]) * t + C(src[5][i]) * m + C(src[8][i]) * b) * r);
+ }
+ }
+ dst += components;
+ }
+ break;
+ }
+ }
+}
+#undef BOXFILTER_ROUND
+#undef C
+
+static void
+gegl_downscale_2x2_u8_nl (const Babl *format,
+ gint src_width,
+ gint src_height,
+ guchar *src_data,
+ gint src_rowstride,
+ guchar *dst_data,
+ gint dst_rowstride)
+{
+ gint y;
+ gint bpp = babl_format_get_bytes_per_pixel (format);
+ gint diag = src_rowstride + bpp;
+ const gint components = bpp / sizeof(uint8_t);
+
+ if (!src_data || !dst_data)
+ return;
+
+ for (y = 0; y < src_height / 2; y++)
+ {
+ gint x;
+ guchar *src = src_data + src_rowstride * y * 2;
+ guchar *dst = dst_data + dst_rowstride * y;
+
+ switch (components)
+ {
+ case 1:
+ for (x = 0; x < src_width / 2; x++)
+ {
+ uint8_t * aa = ((uint8_t *)(src));
+ uint8_t * ab = ((uint8_t *)(src + bpp));
+ uint8_t * ba = ((uint8_t *)(src + src_rowstride));
+ uint8_t * bb = ((uint8_t *)(src + diag));
+
+ ((uint8_t *)dst)[0] = lut_u16_to_u8[ (lut_u8_to_u16[aa[0]] +
+ lut_u8_to_u16[ab[0]] +
+ lut_u8_to_u16[ba[0]] +
+ lut_u8_to_u16[bb[0]])>>2 ];
+
+ dst += bpp;
+ src += bpp * 2;
+ }
+ break;
+ case 2:
+ for (x = 0; x < src_width / 2; x++)
+ {
+ uint8_t * aa = ((uint8_t *)(src));
+ uint8_t * ab = ((uint8_t *)(src + bpp));
+ uint8_t * ba = ((uint8_t *)(src + src_rowstride));
+ uint8_t * bb = ((uint8_t *)(src + diag));
+
+ ((uint8_t *)dst)[0] = lut_u16_to_u8[ (lut_u8_to_u16[aa[0]] +
+ lut_u8_to_u16[ab[0]] +
+ lut_u8_to_u16[ba[0]] +
+ lut_u8_to_u16[bb[0]])>>2 ];
+ ((uint8_t *)dst)[1] = lut_u16_to_u8[ (lut_u8_to_u16[aa[1]] +
+ lut_u8_to_u16[ab[1]] +
+ lut_u8_to_u16[ba[1]] +
+ lut_u8_to_u16[bb[1]])>>2 ];
+
+ dst += bpp;
+ src += bpp * 2;
+ }
+ break;
+ case 3:
+ for (x = 0; x < src_width / 2; x++)
+ {
+ uint8_t * aa = ((uint8_t *)(src));
+ uint8_t * ab = ((uint8_t *)(src + bpp));
+ uint8_t * ba = ((uint8_t *)(src + src_rowstride));
+ uint8_t * bb = ((uint8_t *)(src + diag));
+
+ ((uint8_t *)dst)[0] = lut_u16_to_u8[ (lut_u8_to_u16[aa[0]] +
+ lut_u8_to_u16[ab[0]] +
+ lut_u8_to_u16[ba[0]] +
+ lut_u8_to_u16[bb[0]])>>2 ];
+ ((uint8_t *)dst)[1] = lut_u16_to_u8[ (lut_u8_to_u16[aa[1]] +
+ lut_u8_to_u16[ab[1]] +
+ lut_u8_to_u16[ba[1]] +
+ lut_u8_to_u16[bb[1]])>>2 ];
+ ((uint8_t *)dst)[2] = lut_u16_to_u8[ (lut_u8_to_u16[aa[2]] +
+ lut_u8_to_u16[ab[2]] +
+ lut_u8_to_u16[ba[2]] +
+ lut_u8_to_u16[bb[2]])>>2 ];
+
+ dst += bpp;
+ src += bpp * 2;
+ }
+ break;
+ case 4:
+ for (x = 0; x < src_width / 2; x++)
+ {
+ uint8_t * aa = ((uint8_t *)(src));
+ uint8_t * ab = ((uint8_t *)(src + bpp));
+ uint8_t * ba = ((uint8_t *)(src + src_rowstride));
+ uint8_t * bb = ((uint8_t *)(src + diag));
+
+ ((uint8_t *)dst)[0] = lut_u16_to_u8[ (lut_u8_to_u16[aa[0]] +
+ lut_u8_to_u16[ab[0]] +
+ lut_u8_to_u16[ba[0]] +
+ lut_u8_to_u16[bb[0]])>>2 ];
+ ((uint8_t *)dst)[1] = lut_u16_to_u8[ (lut_u8_to_u16[aa[1]] +
+ lut_u8_to_u16[ab[1]] +
+ lut_u8_to_u16[ba[1]] +
+ lut_u8_to_u16[bb[1]])>>2 ];
+ ((uint8_t *)dst)[2] = lut_u16_to_u8[ (lut_u8_to_u16[aa[2]] +
+ lut_u8_to_u16[ab[2]] +
+ lut_u8_to_u16[ba[2]] +
+ lut_u8_to_u16[bb[2]])>>2 ];
+ ((uint8_t *)dst)[3] = lut_u16_to_u8[ (lut_u8_to_u16[aa[3]] +
+ lut_u8_to_u16[ab[3]] +
+ lut_u8_to_u16[ba[3]] +
+ lut_u8_to_u16[bb[3]])>>2 ];
+
+ dst += bpp;
+ src += bpp * 2;
+ }
+ break;
+ default:
+ for (x = 0; x < src_width / 2; x++)
+ {
+ gint i;
+ uint8_t * aa = ((uint8_t *)(src));
+ uint8_t * ab = ((uint8_t *)(src + bpp));
+ uint8_t * ba = ((uint8_t *)(src + src_rowstride));
+ uint8_t * bb = ((uint8_t *)(src + diag));
+
+ for (i = 0; i < components; i++)
+ ((uint8_t *)dst)[i] =
+ lut_u16_to_u8[ (lut_u8_to_u16[aa[i]] +
+ lut_u8_to_u16[ab[i]] +
+ lut_u8_to_u16[ba[i]] +
+ lut_u8_to_u16[bb[i]])>>2 ];
+ dst += bpp;
+ src += bpp * 2;
+ }
+ }
+ }
+}
+
+
+
+GeglDownscale2x2Fun gegl_downscale_2x2_get_fun (const Babl *format)
+{
+ const Babl *comp_type = babl_format_get_type (format, 0);
+ const Babl *model = babl_format_get_model (format);
+
+ if (gegl_babl_model_is_linear (model))
+ {
+ if (comp_type == gegl_babl_float())
+ {
+ return gegl_downscale_2x2_float;
+ }
+ else if (comp_type == gegl_babl_u8())
+ {
+ return gegl_downscale_2x2_u8;
+ }
+ else if (comp_type == gegl_babl_u16())
+ {
+ return gegl_downscale_2x2_u16;
+ }
+ else if (comp_type == gegl_babl_u32())
+ {
+ return gegl_downscale_2x2_u32;
+ }
+ else if (comp_type == gegl_babl_double())
+ {
+ return gegl_downscale_2x2_double;
+ }
+ }
+ if (comp_type == gegl_babl_u8())
+ return gegl_downscale_2x2_u8_nl;
+ return gegl_downscale_2x2_generic;
+}
+
void
gegl_downscale_2x2_nearest (const Babl *format,
gint src_width,
@@ -173,6 +628,57 @@ gegl_downscale_2x2_nearest (const Babl *format,
}
static void
+gegl_resample_boxfilter_generic_u16 (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint d_rowstride)
+{
+ gint components = babl_format_get_n_components (format);
+ const gint tmp_bpp = 4 * 2;
+ gint in_tmp_rowstride = src_rect->width * tmp_bpp;
+ gint out_tmp_rowstride = dst_rect->width * tmp_bpp;
+ gint do_free = 0;
+
+ guchar *in_tmp, *out_tmp;
+
+ if (src_rect->height * in_tmp_rowstride + dst_rect->height * out_tmp_rowstride < GEGL_ALLOCA_THRESHOLD)
+ {
+ in_tmp = alloca (src_rect->height * in_tmp_rowstride);
+ out_tmp = alloca (dst_rect->height * out_tmp_rowstride);
+ }
+ else
+ {
+ in_tmp = gegl_malloc (src_rect->height * in_tmp_rowstride);
+ out_tmp = gegl_malloc (dst_rect->height * out_tmp_rowstride);
+ do_free = 1;
+ }
+
+ u8_to_u16_rows (components,
+ source_buf, s_rowstride,
+ (void*)in_tmp, in_tmp_rowstride,
+ src_rect->width, src_rect->height);
+
+ gegl_resample_boxfilter_u16 (out_tmp, in_tmp, dst_rect, src_rect,
+ in_tmp_rowstride, scale, tmp_bpp, out_tmp_rowstride);
+
+ u16_to_u8_rows (components,
+ (void*)out_tmp, out_tmp_rowstride,
+ dest_buf, d_rowstride,
+ dst_rect->width, dst_rect->height);
+
+ if (do_free)
+ {
+ gegl_free (in_tmp);
+ gegl_free (out_tmp);
+ }
+}
+
+
+static void
gegl_resample_boxfilter_generic (guchar *dest_buf,
const guchar *source_buf,
const GeglRectangle *dst_rect,
@@ -193,7 +699,7 @@ gegl_resample_boxfilter_generic (guchar *dest_buf,
guchar *in_tmp, *out_tmp;
- if (src_rect->height * in_tmp_rowstride + dst_rect->height * out_tmp_rowstride < ALLOCA_THRESHOLD)
+ if (src_rect->height * in_tmp_rowstride + dst_rect->height * out_tmp_rowstride < GEGL_ALLOCA_THRESHOLD)
{
in_tmp = alloca (src_rect->height * in_tmp_rowstride);
out_tmp = alloca (dst_rect->height * out_tmp_rowstride);
@@ -235,11 +741,11 @@ void gegl_resample_boxfilter (guchar *dest_buf,
gint d_rowstride)
{
const Babl *model = babl_format_get_model (format);
+ const Babl *comp_type = babl_format_get_type (format, 0);
+ const gint bpp = babl_format_get_bytes_per_pixel (format);
if (gegl_babl_model_is_linear (model))
{
- const Babl *comp_type = babl_format_get_type (format, 0);
- const gint bpp = babl_format_get_bytes_per_pixel (format);
if (comp_type == gegl_babl_float())
gegl_resample_boxfilter_float (dest_buf, source_buf, dst_rect, src_rect,
@@ -262,8 +768,16 @@ void gegl_resample_boxfilter (guchar *dest_buf,
}
else
{
- gegl_resample_boxfilter_generic (dest_buf, source_buf, dst_rect, src_rect,
- s_rowstride, scale, format, d_rowstride);
+ if (comp_type == gegl_babl_u8())
+ gegl_boxfilter_u8_nl (dest_buf, source_buf, dst_rect, src_rect,
+ s_rowstride, scale, bpp, d_rowstride);
+#if 0
+ gegl_resample_boxfilter_generic_u16 (dest_buf, source_buf, dst_rect, src_rect,
+ s_rowstride, scale, format, d_rowstride);
+#endif
+ else
+ gegl_resample_boxfilter_generic (dest_buf, source_buf, dst_rect, src_rect,
+ s_rowstride, scale, format, d_rowstride);
}
}
@@ -289,7 +803,7 @@ gegl_resample_bilinear_generic (guchar *dest_buf,
guchar *in_tmp, *out_tmp;
- if (src_rect->height * in_tmp_rowstride + dst_rect->height * out_tmp_rowstride < ALLOCA_THRESHOLD)
+ if (src_rect->height * in_tmp_rowstride + dst_rect->height * out_tmp_rowstride < GEGL_ALLOCA_THRESHOLD)
{
in_tmp = alloca (src_rect->height * in_tmp_rowstride);
out_tmp = alloca (dst_rect->height * out_tmp_rowstride);
@@ -363,12 +877,6 @@ void gegl_resample_bilinear (guchar *dest_buf,
}
}
-static inline int int_floorf (float x)
-{
- int i = (int)x; /* truncate */
- return i - ( i > x ); /* convert trunc to floor */
-}
-
void
gegl_resample_nearest (guchar *dst,
const guchar *src,
diff --git a/gegl/gegl-init.c b/gegl/gegl-init.c
index f45d0d0..b85d3f7 100644
--- a/gegl/gegl-init.c
+++ b/gegl/gegl-init.c
@@ -238,6 +238,8 @@ gboolean gegl_is_main_thread (void)
return g_thread_self () == main_thread;
}
+void _gegl_init_u8_lut (void);
+
void
gegl_init (gint *argc,
gchar ***argv)
@@ -250,6 +252,7 @@ gegl_init (gint *argc,
return;
+
initialized = TRUE;
context = g_option_context_new (NULL);
@@ -650,6 +653,7 @@ gegl_post_parse_hook (GOptionContext *context,
gegl_config_parse_env (config);
babl_init ();
+ _gegl_init_u8_lut ();
#ifdef GEGL_ENABLE_DEBUG
{
diff --git a/gegl/gegl-types-internal.h b/gegl/gegl-types-internal.h
index 992ea1f..299fed3 100644
--- a/gegl/gegl-types-internal.h
+++ b/gegl/gegl-types-internal.h
@@ -98,6 +98,7 @@ static inline gboolean gegl_babl_model_is_linear (const Babl *babl)
GEGL_CACHED_BABL(format, rgba_float, "R'G'B'A float")
GEGL_CACHED_BABL(format, rgbA_float, "R'aG'aB'aA float")
GEGL_CACHED_BABL(format, rgba_linear_float, "RGBA float")
+GEGL_CACHED_BABL(format, rgba_linear_u16, "RGBA u16")
GEGL_CACHED_BABL(format, rgbA_linear_float, "RaGaBaA float")
GEGL_CACHED_BABL(format, ya_float, "Y'A float")
GEGL_CACHED_BABL(format, yA_float, "Y'aA float")
@@ -106,4 +107,6 @@ GEGL_CACHED_BABL(format, yA_linear_float, "YaA float")
G_END_DECLS
+#define GEGL_ALLOCA_THRESHOLD 8192 * 8 /* maybe this needs to be reduced for win32? */
+
#endif /* __GEGL_TYPES_INTERNAL_H__ */
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]