Optimizations
- From: Stefan Westerfeld <stefan space twc de>
- To: beast gnome org
- Subject: Optimizations
- Date: Mon Feb 2 20:27:14 2004
Hi!
So here is a set of patches I've made (during a profiling session), to make
procedure invocations (which go through the SFI glue layer) faster. My test
environment was a fresh build of beast CVS and glib-2.2.3 (Athlon-XP 1800+,
Linux-2.6.0-k7, Debian unstable).
Since I used --disable-debug for building glib and beast, no checks are made,
and the initial performance is better than the one I mentioned in the
profiling mails, although the code is the same.
* No patches: performance of the unmodified build.
stefan@luna1:~/src/beast-performance-eval/beast/tests$ perftest
0.980416 seconds for 10000 invocations => 10199.751787 invocations/second, 0.000098 sec per invocation
* Patch #1: removes redundant value copy, by appending an empty value and
filling it directly.
Index: sfiglue.c
===================================================================
RCS file: /cvs/gnome/beast/sfi/sfiglue.c,v
retrieving revision 1.9
diff -u -p -r1.9 sfiglue.c
--- sfiglue.c 23 Dec 2003 04:50:55 -0000 1.9
+++ sfiglue.c 2 Feb 2004 20:57:02 -0000
@@ -339,14 +339,8 @@ sfi_glue_call_valist (const gchar *proc_
error = g_strdup_printf ("%s: invalid category_type (%u)", G_STRLOC, arg_type);
else
{
- GValue value = { 0, };
- g_value_init (&value, collect_type);
- G_VALUE_COLLECT (&value, var_args, 0, &error);
- if (!error)
- {
- sfi_seq_append (seq, &value);
- g_value_unset (&value);
- }
+ GValue *value = sfi_seq_append_empty (seq, collect_type);
+ G_VALUE_COLLECT (value, var_args, 0, &error);
}
if (error)
{
stefan@luna1:~/src/beast-performance-eval/beast/tests$ perftest
0.965613 seconds for 10000 invocations => 10356.114726 invocations/second, 0.000097 sec per invocation
* Patch #2: It's unnecessary to relookup the GQuark every time we do some
logging output.
Index: sfilog.c
===================================================================
RCS file: /cvs/gnome/beast/sfi/sfilog.c,v
retrieving revision 1.5
diff -u -p -r1.5 sfilog.c
--- sfilog.c 7 Mar 2003 14:01:38 -0000 1.5
+++ sfilog.c 2 Feb 2004 20:57:04 -0000
@@ -26,19 +26,22 @@
/* --- functions --- */
+
+static GQuark sfi_log_key = 0;
+
static inline const gchar*
sfi_log_pop_key (const gchar *fallback)
{
- const gchar *key = sfi_thread_get_data ("SFI-log-key");
+ const gchar *key = sfi_thread_get_qdata (sfi_log_key);
if (key)
- sfi_thread_set_data ("SFI-log-key", NULL);
+ sfi_thread_set_qdata (sfi_log_key, NULL);
return key ? key : fallback;
}
void
sfi_log_push_key (const gchar *static_key)
{
- sfi_thread_set_data ("SFI-log-key", (gchar*) static_key);
+ sfi_thread_set_qdata (sfi_log_key, (gchar*) static_key);
}
static void
@@ -206,6 +209,7 @@ _sfi_init_log (void)
sfi_mutex_init (&key_mutex);
sfi_log_reset_info ();
sfi_log_reset_debug ();
+ sfi_log_key = g_quark_from_string ("SFI-log-key");
}
void
stefan@luna1:~/src/beast-performance-eval/beast/tests$ perftest
0.915921 seconds for 10000 invocations => 10917.970867 invocations/second, 0.000092 sec per invocation
* Patch #3: Removes redundant value copies (like #1). I could imagine that
calling g_value_set_boxed instead of the variants (sfi_value_set_bblock)
could be a small speedup, but I am not sure if thats worth it, because it
does move knowledge about the fact that bblocks for instance can be set
using sfi_value_set_boxed into the sequence code.
Index: sfiprimitives.c
===================================================================
RCS file: /cvs/gnome/beast/sfi/sfiprimitives.c,v
retrieving revision 1.16
diff -u -p -r1.16 sfiprimitives.c
--- sfiprimitives.c 25 Jan 2004 03:46:36 -0000 1.16
+++ sfiprimitives.c 2 Feb 2004 20:57:08 -0000
@@ -417,122 +417,96 @@ void
sfi_seq_append_bool (SfiSeq *seq,
SfiBool v_bool)
{
- GValue *value = sfi_value_bool (v_bool);
- sfi_seq_append (seq, value);
- sfi_value_free (value);
+ GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_BOOL);
+ sfi_value_set_bool (value, v_bool);
}
void
sfi_seq_append_int (SfiSeq *seq,
SfiInt v_int)
{
- GValue *value = sfi_value_int (v_int);
- sfi_seq_append (seq, value);
- sfi_value_free (value);
+ GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_INT);
+ sfi_value_set_int (value, v_int);
}
void
sfi_seq_append_num (SfiSeq *seq,
SfiNum v_num)
{
- GValue *value = sfi_value_num (v_num);
- sfi_seq_append (seq, value);
- sfi_value_free (value);
+ GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_NUM);
+ sfi_value_set_num (value, v_num);
}
void
sfi_seq_append_real (SfiSeq *seq,
SfiReal v_real)
{
- GValue *value = sfi_value_real (v_real);
- sfi_seq_append (seq, value);
- sfi_value_free (value);
+ GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_REAL);
+ sfi_value_set_real (value, v_real);
}
void
sfi_seq_append_string (SfiSeq *seq,
const gchar *string)
{
- GValue value = { 0, };
- g_value_init (&value, SFI_TYPE_STRING);
- g_value_set_static_string (&value, string);
- sfi_seq_append (seq, &value);
- g_value_unset (&value);
+ GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_STRING);
+ sfi_value_set_string (value, string);
}
void
sfi_seq_append_choice (SfiSeq *seq,
const gchar *choice)
{
- GValue value = { 0, };
- g_value_init (&value, SFI_TYPE_CHOICE);
- g_value_set_static_string (&value, choice);
- sfi_seq_append (seq, &value);
- g_value_unset (&value);
+ GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_CHOICE);
+ sfi_value_set_choice (value, choice);
}
void
sfi_seq_append_bblock (SfiSeq *seq,
SfiBBlock *bblock)
{
- GValue value = { 0, };
- g_value_init (&value, SFI_TYPE_BBLOCK);
- g_value_set_static_boxed (&value, bblock);
- sfi_seq_append (seq, &value);
- g_value_unset (&value);
+ GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_BBLOCK);
+ sfi_value_set_bblock (value, bblock);
}
void
sfi_seq_append_fblock (SfiSeq *seq,
SfiFBlock *fblock)
{
- GValue value = { 0, };
- g_value_init (&value, SFI_TYPE_FBLOCK);
- g_value_set_static_boxed (&value, fblock);
- sfi_seq_append (seq, &value);
- g_value_unset (&value);
+ GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_FBLOCK);
+ sfi_value_set_fblock (value, fblock);
}
void
sfi_seq_append_pspec (SfiSeq *seq,
GParamSpec *pspec)
{
- GValue *value = sfi_value_pspec (pspec);
- sfi_seq_append (seq, value);
- sfi_value_free (value);
+ GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_PSPEC);
+ sfi_value_set_pspec (value, pspec);
}
void
sfi_seq_append_seq (SfiSeq *seq,
SfiSeq *v_seq)
{
- GValue value = { 0, };
- g_value_init (&value, SFI_TYPE_SEQ);
- g_value_set_static_boxed (&value, v_seq);
- sfi_seq_append (seq, &value);
- g_value_unset (&value);
+ GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_SEQ);
+ sfi_value_set_seq (value, v_seq);
}
void
sfi_seq_append_rec (SfiSeq *seq,
SfiRec *rec)
{
- GValue value = { 0, };
- g_value_init (&value, SFI_TYPE_REC);
- g_value_set_static_boxed (&value, rec);
- sfi_seq_append (seq, &value);
- g_value_unset (&value);
+ GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_REC);
+ sfi_value_set_rec (value, rec);
}
void
sfi_seq_append_proxy (SfiSeq *seq,
SfiProxy proxy)
{
- GValue value = { 0, };
- g_value_init (&value, SFI_TYPE_PROXY);
- sfi_value_set_proxy (&value, proxy);
- sfi_seq_append (seq, &value);
- g_value_unset (&value);
+ GValue *value = sfi_seq_append_empty (seq, SFI_TYPE_PROXY);
+ sfi_value_set_proxy (value, proxy);
}
static inline SfiNum
stefan@luna1:~/src/beast-performance-eval/beast/tests$ perftest
0.901161 seconds for 10000 invocations => 11096.796453 invocations/second, 0.000090 sec per invocation
* Patch #5: Categorizing and using switch is more efficient than looking up
each value type seperately. There are one or two more cases in the code
where this could be done (value_as_num and value_as_real in sfiprimitives.c).
Should I also remove the g_return_if_fails, since the values are already
categorized?
Index: sfivalues.c
===================================================================
RCS file: /cvs/gnome/beast/sfi/sfivalues.c,v
retrieving revision 1.14
diff -u -p -r1.14 sfivalues.c
--- sfivalues.c 27 Jan 2004 15:17:09 -0000 1.14
+++ sfivalues.c 2 Feb 2004 20:57:09 -0000
@@ -362,39 +362,49 @@ void
sfi_value_copy_deep (const GValue *src_value,
GValue *dest_value)
{
+ SfiSCategory scat;
+
g_return_if_fail (G_IS_VALUE (src_value));
g_return_if_fail (G_IS_VALUE (dest_value));
-
- if (SFI_VALUE_HOLDS_SEQ (src_value))
+
+ scat = sfi_categorize_type (G_VALUE_TYPE (src_value)) & SFI_SCAT_TYPE_MASK;
+ switch (scat)
{
- SfiSeq *seq;
- g_return_if_fail (SFI_VALUE_HOLDS_SEQ (dest_value));
- seq = sfi_value_get_seq (src_value);
- sfi_value_take_seq (dest_value, seq ? sfi_seq_copy_deep (seq) : NULL);
- }
- else if (SFI_VALUE_HOLDS_REC (src_value))
- {
- SfiRec *rec;
- g_return_if_fail (SFI_VALUE_HOLDS_REC (dest_value));
- rec = sfi_value_get_rec (src_value);
- sfi_value_take_rec (dest_value, rec ? sfi_rec_copy_deep (rec) : NULL);
- }
- else if (SFI_VALUE_HOLDS_BBLOCK (src_value))
- {
- SfiBBlock *bblock;
- g_return_if_fail (SFI_VALUE_HOLDS_BBLOCK (dest_value));
- bblock = sfi_value_get_bblock (src_value);
- sfi_value_take_bblock (dest_value, bblock ? sfi_bblock_copy_deep (bblock) : NULL);
- }
- else if (SFI_VALUE_HOLDS_FBLOCK (src_value))
- {
- SfiFBlock *fblock;
- g_return_if_fail (SFI_VALUE_HOLDS_FBLOCK (dest_value));
- fblock = sfi_value_get_fblock (src_value);
- sfi_value_take_fblock (dest_value, fblock ? sfi_fblock_copy_deep (fblock) : NULL);
+ case SFI_SCAT_SEQ:
+ {
+ SfiSeq *seq;
+ g_return_if_fail (SFI_VALUE_HOLDS_SEQ (dest_value));
+ seq = sfi_value_get_seq (src_value);
+ sfi_value_take_seq (dest_value, seq ? sfi_seq_copy_deep (seq) : NULL);
+ }
+ break;
+ case SFI_SCAT_REC:
+ {
+ SfiRec *rec;
+ g_return_if_fail (SFI_VALUE_HOLDS_REC (dest_value));
+ rec = sfi_value_get_rec (src_value);
+ sfi_value_take_rec (dest_value, rec ? sfi_rec_copy_deep (rec) : NULL);
+ }
+ break;
+ case SFI_SCAT_BBLOCK:
+ {
+ SfiBBlock *bblock;
+ g_return_if_fail (SFI_VALUE_HOLDS_BBLOCK (dest_value));
+ bblock = sfi_value_get_bblock (src_value);
+ sfi_value_take_bblock (dest_value, bblock ? sfi_bblock_copy_deep (bblock) : NULL);
+ }
+ break;
+ case SFI_SCAT_FBLOCK:
+ {
+ SfiFBlock *fblock;
+ g_return_if_fail (SFI_VALUE_HOLDS_FBLOCK (dest_value));
+ fblock = sfi_value_get_fblock (src_value);
+ sfi_value_take_fblock (dest_value, fblock ? sfi_fblock_copy_deep (fblock) : NULL);
+ }
+ break;
+ default:
+ g_value_copy (src_value, dest_value);
}
- else
- g_value_copy (src_value, dest_value);
}
stefan@luna1:~/src/beast-performance-eval/beast/tests$ perftest
0.887342 seconds for 10000 invocations => 11269.611276 invocations/second, 0.000089 sec per invocation
* Patch #6: Remove the need for bse_procedure_execvl, by calling
bse_procedure_marshal directly. It is possible to remove bse_procedure_execvl
entierly now.
Index: bseglue.c
===================================================================
RCS file: /cvs/gnome/beast/bse/bseglue.c,v
retrieving revision 1.22
diff -u -p -r1.22 bseglue.c
--- bseglue.c 27 Jan 2004 15:17:32 -0000 1.22
+++ bseglue.c 3 Feb 2004 00:11:16 -0000
@@ -682,8 +682,9 @@ bglue_exec_proc (SfiGlueContext *context
if (BSE_TYPE_IS_PROCEDURE (ptype) && G_TYPE_IS_DERIVED (ptype))
{
BseProcedureClass *proc = g_type_class_ref (ptype);
- GValue *ovalues = g_new0 (GValue, proc->n_out_pspecs);
- GSList *ilist = NULL, *olist = NULL, *clearlist = NULL;
+ GValue *ivalues = g_newa (GValue, proc->n_in_pspecs + proc->n_out_pspecs);
+ GValue *ovalues = ivalues + proc->n_in_pspecs;
+ GSList *element = NULL, *clearlist = NULL;
guint i, sl = sfi_seq_length (params);
BseErrorType error;
@@ -694,7 +695,7 @@ bglue_exec_proc (SfiGlueContext *context
{
GValue *sfivalue = sfi_seq_get (params, i);
GValue *bsevalue = bglue_value_from_serializable (sfivalue, pspec);
- ilist = g_slist_prepend (ilist, bsevalue ? bsevalue : sfivalue);
+ memcpy (ivalues + i, bsevalue ? bsevalue : sfivalue, sizeof (GValue));
if (bsevalue)
clearlist = g_slist_prepend (clearlist, bsevalue);
}
@@ -703,23 +704,21 @@ bglue_exec_proc (SfiGlueContext *context
GValue *value = sfi_value_empty ();
g_value_init (value, G_PARAM_SPEC_VALUE_TYPE (pspec));
g_param_value_set_default (pspec, value);
- ilist = g_slist_prepend (ilist, value);
+ memcpy (ivalues + i, value, sizeof (GValue));
clearlist = g_slist_prepend (clearlist, value);
}
}
for (i = 0; i < proc->n_out_pspecs; i++)
{
+ ovalues[i].g_type = 0;
g_value_init (ovalues + i, G_PARAM_SPEC_VALUE_TYPE (proc->out_pspecs[i]));
- olist = g_slist_prepend (olist, ovalues + i);
}
- ilist = g_slist_reverse (ilist);
- olist = g_slist_reverse (olist);
- error = bse_procedure_execvl (proc, ilist, olist, bglue_marshal_proc, NULL);
- g_slist_free (ilist);
- g_slist_free (olist);
- for (ilist = clearlist; ilist; ilist = ilist->next)
- sfi_value_free (ilist->data);
+ error = bse_procedure_marshal (BSE_PROCEDURE_TYPE (proc), ivalues, ovalues,
+ bglue_marshal_proc, NULL);
+
+ for (element = clearlist; element; element = element->next)
+ sfi_value_free (element->data);
g_slist_free (clearlist);
if (error)
@@ -733,7 +732,6 @@ bglue_exec_proc (SfiGlueContext *context
retval = bglue_value_to_serializable (ovalues + 0);
for (i = 0; i < proc->n_out_pspecs; i++)
g_value_unset (ovalues + i);
- g_free (ovalues);
g_type_class_unref (proc);
}
else
stefan@luna1:~/src/beast-performance-eval/beast/tests$ perftest
0.875381 seconds for 10000 invocations => 11423.596657 invocations/second, 0.000088 sec per invocation
* Patch #7: Class caching: its not a good idea to reinitialize the procedure
classes each time. I don't want to apply this patch, because I know that Tim
is working on an improved version, but I thought it would be nice to measure
the impact of class caching anyway.
Index: bsemain.c
===================================================================
RCS file: /cvs/gnome/beast/bse/bsemain.c,v
retrieving revision 1.34
diff -u -p -r1.34 bsemain.c
--- bsemain.c 27 Jan 2004 15:17:33 -0000 1.34
+++ bsemain.c 3 Feb 2004 00:11:17 -0000
@@ -170,6 +170,65 @@ bse_init_glue_context (const gchar *clie
return adata.context;
}
+typedef struct _BseClassCache {
+ GSList *classes;
+ guint timeout_source;
+} BseClassCache;
+
+static gboolean
+bse_class_cache_func (gpointer cache_data, GTypeClass *g_class)
+{
+ BseClassCache *ccache = (BseClassCache *)cache_data;
+
+ GType type = G_TYPE_FROM_CLASS (g_class);
+ g_type_class_ref (type);
+ g_printerr ("class %s added to cache.\n", g_type_name (type));
+ ccache->classes = g_slist_prepend (ccache->classes, g_class);
+
+ return TRUE; /* stop calling cache functions: we have referenced the class now */
+}
+
+static gboolean
+bse_class_cache_cleanup (gpointer cache_data)
+{
+ BseClassCache *ccache = (BseClassCache *)cache_data;
+
+ GSList *classes = ccache->classes, *element;
+
+ g_printerr ("cache cleanup.\n");
+ /* set list to NULL first, as g_type_class_unref_uncached could produce more cache entries */
+ ccache->classes = NULL;
+ for (element = classes; element; element = element->next)
+ {
+ GTypeClass *g_class = element->data;
+ GType type = G_TYPE_FROM_CLASS (g_class);
+ g_printerr ("removing class %s from cache.\n", g_type_name (type));
+ g_type_class_unref_uncached (g_class);
+ g_printerr ("done.\n");
+ }
+ g_slist_free (classes);
+
+ return TRUE; /* call me again */
+}
+
+static BseClassCache*
+bse_class_cache_init (void)
+{
+ BseClassCache *ccache = g_new0 (BseClassCache, 1);
+ g_type_add_class_cache_func (ccache, bse_class_cache_func);
+ ccache->timeout_source = g_timeout_add (5000 /* 5 seconds */, bse_class_cache_cleanup, ccache);
+
+ return ccache;
+}
+
+static void
+bse_class_cache_free (BseClassCache *ccache)
+{
+ bse_class_cache_cleanup (ccache);
+ g_source_remove (ccache->timeout_source);
+ g_free (ccache);
+}
+
static void
bse_init_core (void)
{
@@ -192,6 +251,7 @@ bse_init_core (void)
bse_type_init ();
bse_cxx_init ();
bse_cxx_checks ();
+ bse_class_cache_init ();
/* FIXME: global spawn dir is evil */
{
stefan@luna1:~/src/beast-performance-eval/beast/tests$ ./perftest
0.618372 seconds for 10000 invocations => 16171.496128 invocations/second, 0.000062 sec per invocation
* Patch #8 (against glib-2.2.3, from Tim): I just included that for
completeness.
--- ./gtype.h 2002-12-04 00:54:54.000000000 +0100
+++ /home/stefan/src/glibprof/glib-2.2.3/gobject/gtype.h 2004-01-29 21:53:42.000000000 +0100
@@ -349,7 +349,18 @@
# define _G_TYPE_CCC(cp, gt, ct) ((ct*) cp)
#endif /* G_DISABLE_CAST_CHECKS */
#define _G_TYPE_CHI(ip) (g_type_check_instance ((GTypeInstance*) ip))
-#define _G_TYPE_CVH(vl, gt) (g_type_check_value_holds ((GValue*) vl, gt))
+#ifdef __GNUC__
+# define _G_TYPE_CVH(vl, gt) (G_GNUC_EXTENSION ({ \
+ GValue *__val = (GValue*) vl; GType __t = gt; gboolean __r; \
+ if (__val && __val->g_type == __t) \
+ __r = TRUE; \
+ else \
+ __r = g_type_check_value_holds (__val, __t); \
+ __r; \
+}))
+#else /* !__GNUC__ */
+# define _G_TYPE_CVH(vl, gt) (g_type_check_value_holds ((GValue*) vl, gt))
+#endif /* !__GNUC__ */
#define _G_TYPE_CHV(vl) (g_type_check_value ((GValue*) vl))
#define _G_TYPE_IGC(ip, gt, ct) ((ct*) (((GTypeInstance*) ip)->g_class))
#define _G_TYPE_IGI(ip, gt, ct) ((ct*) g_type_interface_peek (((GTypeInstance*) ip)->g_class, gt))
stefan@luna1:~/src/beast-performance-eval/beast/tests$ ./perftest
0.613326 seconds for 10000 invocations => 16304.543860 invocations/second, 0.000061 sec per invocation
* Patch #9: That was just a try - I think we can trivially say that g_type_is_a
is TRUE if the arguments match. I am not sure if its worth the extra code,
though.
--- gtype.orig.c 2004-02-03 01:42:50.000000000 +0100
+++ gtype.c 2004-02-03 01:40:39.000000000 +0100
@@ -2341,6 +2341,9 @@ g_type_is_a (GType type,
TypeNode *node, *iface_node;
gboolean is_a;
+ if (type == iface_type)
+ return TRUE;
+
node = lookup_type_node_I (type);
iface_node = lookup_type_node_I (iface_type);
is_a = node && iface_node && type_node_conforms_to_U (node, iface_node, TRUE, TRUE);
stefan@luna1:~/src/beast-performance-eval/beast/tests$ ./perftest
0.608987 seconds for 10000 invocations => 16420.712819 invocations/second, 0.000061 sec per invocation
Any comments? I'd like to put the changes #1 - #6 into the CVS.
Cu... Stefan
--
-* Stefan Westerfeld, stefan@space.twc.de (PGP!), Hamburg/Germany
KDE Developer, project infos at http://space.twc.de/~stefan/kde *-
[Date Prev][
Date Next] [Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]