[cogl/wip/neil/pipeline-uniforms: 9/17] Move POPCOUNTL to cogl-util



commit 928957a476ef61bfa425023186baa799421dbba2
Author: Neil Roberts <neil linux intel com>
Date:   Tue Nov 1 13:10:59 2011 +0000

    Move POPCOUNTL to cogl-util
    
    This moves the POPCOUNTL macro from cogl-winsys-glx to cogl-util and
    renames it to _cogl_util_popcountl so that it can be used in more
    places. The fallback function for when the GCC builtin is not
    available has been replaced with an 8-bit lookup table because the
    HAKMEM implementation doesn't look like it would work when longs are
    64-bit so it's not suitable for a general purpose function on 64-bit
    architectures. Some of the pages regarding population counts seem to
    suggest that using a lookup table is the fastest method anyway.

 cogl/cogl-util.c              |   20 ++++++++++++++++++++
 cogl/cogl-util.h              |   34 +++++++++++++++++++++++++++++++---
 cogl/winsys/cogl-winsys-glx.c |   21 ++++-----------------
 3 files changed, 55 insertions(+), 20 deletions(-)
---
diff --git a/cogl/cogl-util.c b/cogl/cogl-util.c
index 1cb3814..487a762 100644
--- a/cogl/cogl-util.c
+++ b/cogl/cogl-util.c
@@ -100,3 +100,23 @@ _cogl_util_ffsl_wrapper (long int num)
 }
 
 #endif /* COGL_UTIL_HAVE_BUILTIN_FFSL */
+
+#ifndef COGL_UTIL_HAVE_BUILTIN_POPCOUNTL
+
+const unsigned char
+_cogl_util_popcount_table[256] =
+  {
+    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
+    2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
+    2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
+    4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
+    3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
+    4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+  };
+
+#endif /* COGL_UTIL_HAVE_BUILTIN_POPCOUNTL */
diff --git a/cogl/cogl-util.h b/cogl/cogl-util.h
index b3267f9..5f544aa 100644
--- a/cogl/cogl-util.h
+++ b/cogl/cogl-util.h
@@ -99,6 +99,12 @@ _cogl_util_one_at_a_time_hash (unsigned int hash,
 unsigned int
 _cogl_util_one_at_a_time_mix (unsigned int hash);
 
+/* These two builtins are available since GCC 3.4 */
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#define COGL_UTIL_HAVE_BUILTIN_FFSL
+#define COGL_UTIL_HAVE_BUILTIN_POPCOUNTL
+#endif
+
 /* The 'ffs' function is part of C99 so it isn't always available */
 #ifdef HAVE_FFS
 #define _cogl_util_ffs ffs
@@ -109,9 +115,8 @@ _cogl_util_ffs (int num);
 
 /* The 'ffsl' function is non-standard but GCC has a builtin for it
    since 3.4 which we can use */
-#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#ifdef COGL_UTIL_HAVE_BUILTIN_FFSL
 #define _cogl_util_ffsl __builtin_ffsl
-#define COGL_UTIL_HAVE_BUILTIN_FFSL
 #else
 /* If ints and longs are the same size we can just use ffs. Hopefully
    the compiler will optimise away this conditional */
@@ -120,7 +125,30 @@ _cogl_util_ffs (int num);
    _cogl_util_ffsl_wrapper (x))
 int
 _cogl_util_ffsl_wrapper (long int num);
-#endif
+#endif /* COGL_UTIL_HAVE_BUILTIN_FFSL */
+
+#ifdef COGL_UTIL_HAVE_BUILTIN_POPCOUNTL
+#define _cogl_util_popcountl __builtin_popcountl
+#else
+extern const unsigned char _cogl_util_popcount_table[256];
+
+/* There are many ways of doing popcount but doing a table lookup
+   seems to be the most robust against different sizes for long. Some
+   pages seem to claim it's the fastest method anyway. */
+static inline int
+_cogl_util_popcountl (unsigned long num)
+{
+  int i;
+  int sum = 0;
+
+  /* Let's hope GCC will unroll this loop.. */
+  for (i = 0; i < sizeof (num); i++)
+    sum += _cogl_util_popcount_table[(num >> (i * 8)) & 0xff];
+
+  return sum;
+}
+
+#endif /* COGL_UTIL_HAVE_BUILTIN_POPCOUNTL */
 
 #ifdef COGL_HAS_GLIB_SUPPORT
 #define _COGL_RETURN_IF_FAIL(EXPR) g_return_if_fail(EXPR)
diff --git a/cogl/winsys/cogl-winsys-glx.c b/cogl/winsys/cogl-winsys-glx.c
index f1a2f68..fb11c6f 100644
--- a/cogl/winsys/cogl-winsys-glx.c
+++ b/cogl/winsys/cogl-winsys-glx.c
@@ -49,6 +49,7 @@
 #include "cogl-onscreen-private.h"
 #include "cogl-swap-chain-private.h"
 #include "cogl-xlib-renderer.h"
+#include "cogl-util.h"
 
 #include <stdlib.h>
 #include <sys/types.h>
@@ -1699,22 +1700,6 @@ should_use_rectangle (CoglContext *context)
   return context->rectangle_state == COGL_WINSYS_RECTANGLE_STATE_ENABLE;
 }
 
-/* GCC's population count builtin is available since version 3.4 */
-#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
-#define POPCOUNTL(n) __builtin_popcountl(n)
-#else
-/* HAKMEM 169 */
-static int
-hakmem_popcountl (unsigned long n)
-{
-  unsigned long tmp;
-
-  tmp = n - ((n >> 1) & 033333333333) - ((n >> 2) & 011111111111);
-  return ((tmp + (tmp >> 3)) & 030707070707) % 63;
-}
-#define POPCOUNTL(n) hakmem_popcountl(n)
-#endif
-
 static gboolean
 try_create_glx_pixmap (CoglContext *context,
                        CoglTexturePixmapX11 *tex_pixmap,
@@ -1767,7 +1752,9 @@ try_create_glx_pixmap (CoglContext *context,
    * number of 1-bits in color masks against the color depth requested
    * by the client.
    */
-  if (POPCOUNTL(visual->red_mask|visual->green_mask|visual->blue_mask) == depth)
+  if (_cogl_util_popcountl (visual->red_mask |
+                            visual->green_mask |
+                            visual->blue_mask) == depth)
     attribs[i++] = GLX_TEXTURE_FORMAT_RGB_EXT;
   else
     attribs[i++] = GLX_TEXTURE_FORMAT_RGBA_EXT;



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]