Optimizations



   Hi!

So here is a set of patches I've made (during a profiling session), to make
procedure invocations (which go through the SFI glue layer) faster. My test
environment was a fresh build of beast CVS and glib-2.2.3 (Athlon-XP 1800+,
Linux-2.6.0-k7, Debian unstable).

Since I used --disable-debug for building glib and beast, no checks are made,
and the initial performance is better than the one I mentioned in the
profiling mails, although the code is the same.

* No patches: performance of the unmodified build.

stefan@luna1:~/src/beast-performance-eval/beast/tests$ perftest
0.980416 seconds for 10000 invocations => 10199.751787 invocations/second, 0.000098 sec per invocation

* Patch #1: removes redundant value copy, by appending an empty value and
filling it directly.

Index: sfiglue.c
===================================================================
RCS file: /cvs/gnome/beast/sfi/sfiglue.c,v
retrieving revision 1.9
diff -u -p -r1.9 sfiglue.c
--- sfiglue.c	23 Dec 2003 04:50:55 -0000	1.9
+++ sfiglue.c	2 Feb 2004 20:57:02 -0000
@@ -339,14 +339,8 @@ sfi_glue_call_valist (const gchar *proc_
 	error = g_strdup_printf ("%s: invalid category_type (%u)", G_STRLOC, arg_type);
       else
 	{
-	  GValue value = { 0, };
-	  g_value_init (&value, collect_type);
-	  G_VALUE_COLLECT (&value, var_args, 0, &error);
-	  if (!error)
-	    {
-	      sfi_seq_append (seq, &value);
-	      g_value_unset (&value);
-	    }
+	  GValue *value = sfi_seq_append_empty (seq, collect_type);
+	  G_VALUE_COLLECT (value, var_args, 0, &error);
 	}
       if (error)
 	{

stefan@luna1:~/src/beast-performance-eval/beast/tests$ perftest
0.965613 seconds for 10000 invocations => 10356.114726 invocations/second, 0.000097 sec per invocation

* Patch #2: It's unnecessary to relookup the GQuark every time we do some
logging output.

Index: sfilog.c
===================================================================
RCS file: /cvs/gnome/beast/sfi/sfilog.c,v
retrieving revision 1.5
diff -u -p -r1.5 sfilog.c
--- sfilog.c	7 Mar 2003 14:01:38 -0000	1.5
+++ sfilog.c	2 Feb 2004 20:57:04 -0000
@@ -26,19 +26,22 @@
 
 
 /* --- functions --- */
+
+static GQuark sfi_log_key = 0;
+
 static inline const gchar*
 sfi_log_pop_key (const gchar *fallback)
 {
-  const gchar *key = sfi_thread_get_data ("SFI-log-key");
+  const gchar *key = sfi_thread_get_qdata (sfi_log_key);
   if (key)
-    sfi_thread_set_data ("SFI-log-key", NULL);
+    sfi_thread_set_qdata (sfi_log_key, NULL);
   return key ? key : fallback;
 }
 
 void
 sfi_log_push_key (const gchar *static_key)
 {
-  sfi_thread_set_data ("SFI-log-key", (gchar*) static_key);
+  sfi_thread_set_qdata (sfi_log_key, (gchar*) static_key);
 }
 
 static void
@@ -206,6 +209,7 @@ _sfi_init_log (void)
   sfi_mutex_init (&key_mutex);
   sfi_log_reset_info ();
   sfi_log_reset_debug ();
+  sfi_log_key = g_quark_from_string ("SFI-log-key");
 }
 
 void

stefan@luna1:~/src/beast-performance-eval/beast/tests$ perftest
0.915921 seconds for 10000 invocations => 10917.970867 invocations/second, 0.000092 sec per invocation

* Patch #3: Removes redundant value copies (like #1). I could imagine that
  calling g_value_set_boxed instead of the variants (sfi_value_set_bblock)
  could be a small speedup, but I am not sure if thats worth it, because it
  does move knowledge about the fact that bblocks for instance can be set
  using sfi_value_set_boxed into the sequence code.

Index: sfiprimitives.c
===================================================================
RCS file: /cvs/gnome/beast/sfi/sfiprimitives.c,v
retrieving revision 1.16
diff -u -p -r1.16 sfiprimitives.c
--- sfiprimitives.c	25 Jan 2004 03:46:36 -0000	1.16
+++ sfiprimitives.c	2 Feb 2004 20:57:08 -0000
@@ -417,122 +417,96 @@ void
 sfi_seq_append_bool (SfiSeq      *seq,
 		     SfiBool      v_bool)
 {
-  GValue *value = sfi_value_bool (v_bool);
-  sfi_seq_append (seq, value);
-  sfi_value_free (value);
+  GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_BOOL);
+  sfi_value_set_bool (value, v_bool);
 }
 
 void
 sfi_seq_append_int (SfiSeq      *seq,
 		    SfiInt       v_int)
 {
-  GValue *value = sfi_value_int (v_int);
-  sfi_seq_append (seq, value);
-  sfi_value_free (value);
+  GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_INT);
+  sfi_value_set_int (value, v_int);
 }
 
 void
 sfi_seq_append_num (SfiSeq      *seq,
 		    SfiNum       v_num)
 {
-  GValue *value = sfi_value_num (v_num);
-  sfi_seq_append (seq, value);
-  sfi_value_free (value);
+  GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_NUM);
+  sfi_value_set_num (value, v_num);
 }
 
 void
 sfi_seq_append_real (SfiSeq          *seq,
 		     SfiReal         v_real)
 {
-  GValue *value = sfi_value_real (v_real);
-  sfi_seq_append (seq, value);
-  sfi_value_free (value);
+  GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_REAL);
+  sfi_value_set_real (value, v_real);
 }
 
 void
 sfi_seq_append_string (SfiSeq      *seq,
 		       const gchar *string)
 {
-  GValue value = { 0, };
-  g_value_init (&value, SFI_TYPE_STRING);
-  g_value_set_static_string (&value, string);
-  sfi_seq_append (seq, &value);
-  g_value_unset (&value);
+  GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_STRING);
+  sfi_value_set_string (value, string);
 }
 
 void
 sfi_seq_append_choice (SfiSeq      *seq,
 		       const gchar *choice)
 {
-  GValue value = { 0, };
-  g_value_init (&value, SFI_TYPE_CHOICE);
-  g_value_set_static_string (&value, choice);
-  sfi_seq_append (seq, &value);
-  g_value_unset (&value);
+  GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_CHOICE);
+  sfi_value_set_choice (value, choice);
 }
 
 void
 sfi_seq_append_bblock (SfiSeq      *seq,
 		       SfiBBlock   *bblock)
 {
-  GValue value = { 0, };
-  g_value_init (&value, SFI_TYPE_BBLOCK);
-  g_value_set_static_boxed (&value, bblock);
-  sfi_seq_append (seq, &value);
-  g_value_unset (&value);
+  GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_BBLOCK);
+  sfi_value_set_bblock (value, bblock);
 }
 
 void
 sfi_seq_append_fblock (SfiSeq      *seq,
 		       SfiFBlock   *fblock)
 {
-  GValue value = { 0, };
-  g_value_init (&value, SFI_TYPE_FBLOCK);
-  g_value_set_static_boxed (&value, fblock);
-  sfi_seq_append (seq, &value);
-  g_value_unset (&value);
+  GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_FBLOCK);
+  sfi_value_set_fblock (value, fblock);
 }
 
 void
 sfi_seq_append_pspec (SfiSeq      *seq,
 		      GParamSpec  *pspec)
 {
-  GValue *value = sfi_value_pspec (pspec);
-  sfi_seq_append (seq, value);
-  sfi_value_free (value);
+  GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_PSPEC);
+  sfi_value_set_pspec (value, pspec);
 }
 
 void
 sfi_seq_append_seq (SfiSeq      *seq,
 		    SfiSeq      *v_seq)
 {
-  GValue value = { 0, };
-  g_value_init (&value, SFI_TYPE_SEQ);
-  g_value_set_static_boxed (&value, v_seq);
-  sfi_seq_append (seq, &value);
-  g_value_unset (&value);
+  GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_SEQ);
+  sfi_value_set_seq (value, v_seq);
 }
 
 void
 sfi_seq_append_rec (SfiSeq      *seq,
 		    SfiRec      *rec)
 {
-  GValue value = { 0, };
-  g_value_init (&value, SFI_TYPE_REC);
-  g_value_set_static_boxed (&value, rec);
-  sfi_seq_append (seq, &value);
-  g_value_unset (&value);
+  GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_REC);
+  sfi_value_set_rec (value, rec);
 }
 
 void
 sfi_seq_append_proxy (SfiSeq      *seq,
 		      SfiProxy     proxy)
 {
-  GValue value = { 0, };
-  g_value_init (&value, SFI_TYPE_PROXY);
-  sfi_value_set_proxy (&value, proxy);
-  sfi_seq_append (seq, &value);
-  g_value_unset (&value);
+  GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_PROXY);
+  sfi_value_set_proxy (value, proxy);
 }
 
 static inline SfiNum

stefan@luna1:~/src/beast-performance-eval/beast/tests$ perftest
0.901161 seconds for 10000 invocations => 11096.796453 invocations/second, 0.000090 sec per invocation

* Patch #5: Categorizing and using switch is more efficient than looking up
  each value type seperately. There are one or two more cases in the code
  where this could be done (value_as_num and value_as_real in sfiprimitives.c).

  Should I also remove the g_return_if_fails, since the values are already
  categorized?

Index: sfivalues.c
===================================================================
RCS file: /cvs/gnome/beast/sfi/sfivalues.c,v
retrieving revision 1.14
diff -u -p -r1.14 sfivalues.c
--- sfivalues.c	27 Jan 2004 15:17:09 -0000	1.14
+++ sfivalues.c	2 Feb 2004 20:57:09 -0000
@@ -362,39 +362,49 @@ void
 sfi_value_copy_deep (const GValue *src_value,
 		     GValue       *dest_value)
 {
+  SfiSCategory scat;
+
   g_return_if_fail (G_IS_VALUE (src_value));
   g_return_if_fail (G_IS_VALUE (dest_value));
-
-  if (SFI_VALUE_HOLDS_SEQ (src_value))
+  
+  scat = sfi_categorize_type (G_VALUE_TYPE (src_value)) & SFI_SCAT_TYPE_MASK;
+  switch (scat)
     {
-      SfiSeq *seq;
-      g_return_if_fail (SFI_VALUE_HOLDS_SEQ (dest_value));
-      seq = sfi_value_get_seq (src_value);
-      sfi_value_take_seq (dest_value, seq ? sfi_seq_copy_deep (seq) : NULL);
-    }
-  else if (SFI_VALUE_HOLDS_REC (src_value))
-    {
-      SfiRec *rec;
-      g_return_if_fail (SFI_VALUE_HOLDS_REC (dest_value));
-      rec = sfi_value_get_rec (src_value);
-      sfi_value_take_rec (dest_value, rec ? sfi_rec_copy_deep (rec) : NULL);
-    }
-  else if (SFI_VALUE_HOLDS_BBLOCK (src_value))
-    {
-      SfiBBlock *bblock;
-      g_return_if_fail (SFI_VALUE_HOLDS_BBLOCK (dest_value));
-      bblock = sfi_value_get_bblock (src_value);
-      sfi_value_take_bblock (dest_value, bblock ? sfi_bblock_copy_deep (bblock) : NULL);
-    }
-  else if (SFI_VALUE_HOLDS_FBLOCK (src_value))
-    {
-      SfiFBlock *fblock;
-      g_return_if_fail (SFI_VALUE_HOLDS_FBLOCK (dest_value));
-      fblock = sfi_value_get_fblock (src_value);
-      sfi_value_take_fblock (dest_value, fblock ? sfi_fblock_copy_deep (fblock) : NULL);
+      case SFI_SCAT_SEQ:
+	{
+	  SfiSeq *seq;
+	  g_return_if_fail (SFI_VALUE_HOLDS_SEQ (dest_value));
+	  seq = sfi_value_get_seq (src_value);
+	  sfi_value_take_seq (dest_value, seq ? sfi_seq_copy_deep (seq) : NULL);
+	}
+	break;
+      case SFI_SCAT_REC:
+	{
+	  SfiRec *rec;
+	  g_return_if_fail (SFI_VALUE_HOLDS_REC (dest_value));
+	  rec = sfi_value_get_rec (src_value);
+	  sfi_value_take_rec (dest_value, rec ? sfi_rec_copy_deep (rec) : NULL);
+	}
+	break;
+      case SFI_SCAT_BBLOCK:
+	{
+	  SfiBBlock *bblock;
+	  g_return_if_fail (SFI_VALUE_HOLDS_BBLOCK (dest_value));
+	  bblock = sfi_value_get_bblock (src_value);
+	  sfi_value_take_bblock (dest_value, bblock ? sfi_bblock_copy_deep (bblock) : NULL);
+	}
+	break;
+      case SFI_SCAT_FBLOCK:
+	{
+	  SfiFBlock *fblock;
+	  g_return_if_fail (SFI_VALUE_HOLDS_FBLOCK (dest_value));
+	  fblock = sfi_value_get_fblock (src_value);
+	  sfi_value_take_fblock (dest_value, fblock ? sfi_fblock_copy_deep (fblock) : NULL);
+	}
+	break;
+      default:
+	g_value_copy (src_value, dest_value);
     }
-  else
-    g_value_copy (src_value, dest_value);
 }
 
stefan@luna1:~/src/beast-performance-eval/beast/tests$ perftest
0.887342 seconds for 10000 invocations => 11269.611276 invocations/second, 0.000089 sec per invocation

* Patch #6: Remove the need for bse_procedure_execvl, by calling
  bse_procedure_marshal directly. It is possible to remove bse_procedure_execvl
  entierly now.

Index: bseglue.c
===================================================================
RCS file: /cvs/gnome/beast/bse/bseglue.c,v
retrieving revision 1.22
diff -u -p -r1.22 bseglue.c
--- bseglue.c	27 Jan 2004 15:17:32 -0000	1.22
+++ bseglue.c	3 Feb 2004 00:11:16 -0000
@@ -682,8 +682,9 @@ bglue_exec_proc (SfiGlueContext *context
   if (BSE_TYPE_IS_PROCEDURE (ptype) && G_TYPE_IS_DERIVED (ptype))
     {
       BseProcedureClass *proc = g_type_class_ref (ptype);
-      GValue *ovalues = g_new0 (GValue, proc->n_out_pspecs);
-      GSList *ilist = NULL, *olist = NULL, *clearlist = NULL;
+      GValue *ivalues = g_newa (GValue, proc->n_in_pspecs + proc->n_out_pspecs);
+      GValue *ovalues = ivalues + proc->n_in_pspecs;
+      GSList *element = NULL, *clearlist = NULL;
       guint i, sl = sfi_seq_length (params);
       BseErrorType error;
       
@@ -694,7 +695,7 @@ bglue_exec_proc (SfiGlueContext *context
 	    {
 	      GValue *sfivalue = sfi_seq_get (params, i);
 	      GValue *bsevalue = bglue_value_from_serializable (sfivalue, pspec);
-	      ilist = g_slist_prepend (ilist, bsevalue ? bsevalue : sfivalue);
+	      memcpy (ivalues + i, bsevalue ? bsevalue : sfivalue, sizeof (GValue));
 	      if (bsevalue)
 		clearlist = g_slist_prepend (clearlist, bsevalue);
 	    }
@@ -703,23 +704,21 @@ bglue_exec_proc (SfiGlueContext *context
 	      GValue *value = sfi_value_empty ();
 	      g_value_init (value, G_PARAM_SPEC_VALUE_TYPE (pspec));
 	      g_param_value_set_default (pspec, value);
-	      ilist = g_slist_prepend (ilist, value);
+	      memcpy (ivalues + i, value, sizeof (GValue));
 	      clearlist = g_slist_prepend (clearlist, value);
 	    }
 	}
       for (i = 0; i < proc->n_out_pspecs; i++)
 	{
+	  ovalues[i].g_type = 0;
 	  g_value_init (ovalues + i, G_PARAM_SPEC_VALUE_TYPE (proc->out_pspecs[i]));
-	  olist = g_slist_prepend (olist, ovalues + i);
 	}
       
-      ilist = g_slist_reverse (ilist);
-      olist = g_slist_reverse (olist);
-      error = bse_procedure_execvl (proc, ilist, olist, bglue_marshal_proc, NULL);
-      g_slist_free (ilist);
-      g_slist_free (olist);
-      for (ilist = clearlist; ilist; ilist = ilist->next)
-	sfi_value_free (ilist->data);
+      error = bse_procedure_marshal (BSE_PROCEDURE_TYPE (proc), ivalues, ovalues,
+	                             bglue_marshal_proc, NULL);
+
+      for (element = clearlist; element; element = element->next)
+	sfi_value_free (element->data);
       g_slist_free (clearlist);
       
       if (error)
@@ -733,7 +732,6 @@ bglue_exec_proc (SfiGlueContext *context
 	retval = bglue_value_to_serializable (ovalues + 0);
       for (i = 0; i < proc->n_out_pspecs; i++)
         g_value_unset (ovalues + i);
-      g_free (ovalues);
       g_type_class_unref (proc);
     }
   else

stefan@luna1:~/src/beast-performance-eval/beast/tests$ perftest
0.875381 seconds for 10000 invocations => 11423.596657 invocations/second, 0.000088 sec per invocation

* Patch #7: Class caching: its not a good idea to reinitialize the procedure
  classes each time. I don't want to apply this patch, because I know that Tim
  is working on an improved version, but I thought it would be nice to measure
  the impact of class caching anyway.

Index: bsemain.c
===================================================================
RCS file: /cvs/gnome/beast/bse/bsemain.c,v
retrieving revision 1.34
diff -u -p -r1.34 bsemain.c
--- bsemain.c	27 Jan 2004 15:17:33 -0000	1.34
+++ bsemain.c	3 Feb 2004 00:11:17 -0000
@@ -170,6 +170,65 @@ bse_init_glue_context (const gchar *clie
   return adata.context;
 }
 
+typedef struct _BseClassCache {
+  GSList *classes;
+  guint	  timeout_source;
+} BseClassCache;
+
+static gboolean
+bse_class_cache_func (gpointer cache_data, GTypeClass *g_class)
+{
+  BseClassCache *ccache = (BseClassCache *)cache_data;
+
+  GType type = G_TYPE_FROM_CLASS (g_class);
+  g_type_class_ref (type);
+  g_printerr ("class %s added to cache.\n", g_type_name (type));
+  ccache->classes = g_slist_prepend (ccache->classes, g_class);
+
+  return TRUE; /* stop calling cache functions: we have referenced the class now */
+}
+
+static gboolean
+bse_class_cache_cleanup (gpointer cache_data)
+{
+  BseClassCache *ccache = (BseClassCache *)cache_data;
+
+  GSList *classes = ccache->classes, *element;
+
+  g_printerr ("cache cleanup.\n");
+  /* set list to NULL first, as g_type_class_unref_uncached could produce more cache entries */
+  ccache->classes = NULL;
+  for (element = classes; element; element = element->next)
+    {
+      GTypeClass *g_class = element->data;
+      GType type = G_TYPE_FROM_CLASS (g_class);
+      g_printerr ("removing class %s from cache.\n", g_type_name (type));
+      g_type_class_unref_uncached (g_class);
+      g_printerr ("done.\n");
+    }
+  g_slist_free (classes);
+
+  return TRUE; /* call me again */
+}
+
+static BseClassCache*
+bse_class_cache_init (void)
+{
+  BseClassCache *ccache = g_new0 (BseClassCache, 1);
+  g_type_add_class_cache_func (ccache, bse_class_cache_func);
+  ccache->timeout_source = g_timeout_add (5000 /* 5 seconds */, bse_class_cache_cleanup, ccache);
+
+  return ccache;
+}
+
+static void
+bse_class_cache_free (BseClassCache *ccache)
+{
+  bse_class_cache_cleanup (ccache);
+  g_source_remove (ccache->timeout_source);
+  g_free (ccache);
+}
+
 static void
 bse_init_core (void)
 {
@@ -192,6 +251,7 @@ bse_init_core (void)
   bse_type_init ();
   bse_cxx_init ();
   bse_cxx_checks ();
+  bse_class_cache_init ();
   
   /* FIXME: global spawn dir is evil */
   {

stefan@luna1:~/src/beast-performance-eval/beast/tests$ ./perftest
0.618372 seconds for 10000 invocations => 16171.496128 invocations/second, 0.000062 sec per invocation

* Patch #8 (against glib-2.2.3, from Tim): I just included that for
  completeness.

--- ./gtype.h	2002-12-04 00:54:54.000000000 +0100
+++ /home/stefan/src/glibprof/glib-2.2.3/gobject/gtype.h	2004-01-29 21:53:42.000000000 +0100
@@ -349,7 +349,18 @@
 #  define _G_TYPE_CCC(cp, gt, ct)       ((ct*) cp)
 #endif /* G_DISABLE_CAST_CHECKS */
 #define _G_TYPE_CHI(ip)			(g_type_check_instance ((GTypeInstance*) ip))
-#define _G_TYPE_CVH(vl, gt)             (g_type_check_value_holds ((GValue*) vl, gt))
+#ifdef  __GNUC__
+#  define _G_TYPE_CVH(vl, gt)             (G_GNUC_EXTENSION ({ \
+  GValue *__val = (GValue*) vl; GType __t = gt; gboolean __r; \
+  if (__val && __val->g_type == __t) \
+    __r = TRUE; \
+  else \
+    __r = g_type_check_value_holds (__val, __t); \
+  __r; \
+}))
+#else  /* !__GNUC__ */
+#  define _G_TYPE_CVH(vl, gt)           (g_type_check_value_holds ((GValue*) vl, gt))
+#endif /* !__GNUC__ */
 #define _G_TYPE_CHV(vl)			(g_type_check_value ((GValue*) vl))
 #define _G_TYPE_IGC(ip, gt, ct)         ((ct*) (((GTypeInstance*) ip)->g_class))
 #define _G_TYPE_IGI(ip, gt, ct)         ((ct*) g_type_interface_peek (((GTypeInstance*) ip)->g_class, gt))

stefan@luna1:~/src/beast-performance-eval/beast/tests$ ./perftest
0.613326 seconds for 10000 invocations => 16304.543860 invocations/second, 0.000061 sec per invocation

* Patch #9: That was just a try - I think we can trivially say that g_type_is_a
  is TRUE if the arguments match. I am not sure if its worth the extra code,
  though.

--- gtype.orig.c        2004-02-03 01:42:50.000000000 +0100
+++ gtype.c     2004-02-03 01:40:39.000000000 +0100
@@ -2341,6 +2341,9 @@ g_type_is_a (GType type,
   TypeNode *node, *iface_node;
   gboolean is_a;

+  if (type == iface_type)
+    return TRUE;
+
   node = lookup_type_node_I (type);
   iface_node = lookup_type_node_I (iface_type);
   is_a = node && iface_node && type_node_conforms_to_U (node, iface_node, TRUE, TRUE);

stefan@luna1:~/src/beast-performance-eval/beast/tests$ ./perftest
0.608987 seconds for 10000 invocations => 16420.712819 invocations/second, 0.000061 sec per invocation

Any comments? I'd like to put the changes #1 - #6 into the CVS.

   Cu... Stefan
-- 
  -* Stefan Westerfeld, stefan@space.twc.de (PGP!), Hamburg/Germany
     KDE Developer, project infos at http://space.twc.de/~stefan/kde *-         



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]