[glib] bitlock: hand-code assembly version for x86



commit 1a80405a36eae6777cf8dc809da5883ba666f41c
Author: Ryan Lortie <desrt desrt ca>
Date:   Fri Jun 3 20:29:41 2011 +0200

    bitlock: hand-code assembly version for x86
    
    The __sync_fetch_and_or() operation on x86 is a bit suboptimal when the
    result isn't ignored.  Normally we could use the 'lock or' assembly
    instruction to accomplish this, but this instruction discards the
    previous value.
    
    In order to work around this issue, GCC is forced to emit a
    compare-and-exchange loop.
    
    We can easily use the 'lock bts' instruction, though.  It can't be used
    in the general case for __sync_fetch_and_or() but it works great for our
    case (test and set a single bit).
    
    I filed a bug against GCC[1] to get this exposed as a new intrinsic (or
    have the optimiser detect the case) but until then we'll hand-code it on
    x86 and amd64.
    
    The uncontended case sees a 31% improvement on my test machine.
    
     [1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49244
    
    https://bugzilla.gnome.org/show_bug.cgi?id=651467

 glib/gbitlock.c |   57 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 54 insertions(+), 3 deletions(-)
---
diff --git a/glib/gbitlock.c b/glib/gbitlock.c
index 4e91e9a..75e045c 100644
--- a/glib/gbitlock.c
+++ b/glib/gbitlock.c
@@ -205,6 +205,33 @@ void
 g_bit_lock (volatile gint *address,
             gint           lock_bit)
 {
+#if defined (__GNUC__) && (defined (i386) || defined (__amd64__))
+ retry:
+  asm volatile goto ("lock bts %1, (%0)\n"
+                     "jc %l[contended]"
+                     : /* no output */
+                     : "r" (address), "r" (lock_bit)
+                     : "cc", "memory"
+                     : contended);
+  return;
+
+ contended:
+  {
+    guint mask = 1u << lock_bit;
+    guint v;
+
+    v = g_atomic_int_get (address);
+    if (v & mask)
+      {
+        guint class = ((gsize) address) % G_N_ELEMENTS (g_bit_lock_contended);
+
+        g_atomic_int_add (&g_bit_lock_contended[class], +1);
+        g_futex_wait (address, v);
+        g_atomic_int_add (&g_bit_lock_contended[class], -1);
+      }
+  }
+  goto retry;
+#else
   guint mask = 1u << lock_bit;
   guint v;
 
@@ -221,6 +248,7 @@ g_bit_lock (volatile gint *address,
 
       goto retry;
     }
+#endif
 }
 
 /**
@@ -248,12 +276,25 @@ gboolean
 g_bit_trylock (volatile gint *address,
                gint           lock_bit)
 {
+#if defined (__GNUC__) && (defined (i386) || defined (__amd64__))
+  gboolean result;
+
+  asm volatile ("lock bts %2, (%1)\n"
+                "setnc %%al\n"
+                "movzx %%al, %0"
+                : "=r" (result)
+                : "r" (address), "r" (lock_bit)
+                : "cc", "memory");
+
+  return result;
+#else
   guint mask = 1u << lock_bit;
   guint v;
 
   v = g_atomic_int_or (address, mask);
 
   return ~v & mask;
+#endif
 }
 
 /**
@@ -275,11 +316,21 @@ void
 g_bit_unlock (volatile gint *address,
               gint           lock_bit)
 {
-  guint class = ((gsize) address) % G_N_ELEMENTS (g_bit_lock_contended);
+#if defined (__GNUC__) && (defined (i386) || defined (__amd64__))
+  asm volatile ("lock btr %1, (%0)"
+                : /* no output */
+                : "r" (address), "r" (lock_bit)
+                : "cc", "memory");
+#else
   guint mask = 1u << lock_bit;
 
   g_atomic_int_and (address, ~mask);
+#endif
+
+  {
+    guint class = ((gsize) address) % G_N_ELEMENTS (g_bit_lock_contended);
 
-  if (g_atomic_int_get (&g_bit_lock_contended[class]))
-    g_futex_wake (address);
+    if (g_atomic_int_get (&g_bit_lock_contended[class]))
+      g_futex_wake (address);
+  }
 }



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]