[anjuta] dir-project: Load big project faster
- From: Sebastien Granjoux <sgranjoux src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [anjuta] dir-project: Load big project faster
- Date: Wed, 3 Jul 2013 19:34:44 +0000 (UTC)
commit 49bddd2d32951bd0019cffb5a8ff9a7cc66b21eb
Author: Sébastien Granjoux <seb sfo free fr>
Date: Wed Jul 3 21:10:17 2013 +0200
dir-project: Load big project faster
Use an hash table instead of a regular expression when checking for the file
extension only.
plugins/dir-project/dir-project.c | 114 +++++++++++++++++++++++++++++++------
1 files changed, 96 insertions(+), 18 deletions(-)
---
diff --git a/plugins/dir-project/dir-project.c b/plugins/dir-project/dir-project.c
index a33d01e..924075e 100644
--- a/plugins/dir-project/dir-project.c
+++ b/plugins/dir-project/dir-project.c
@@ -83,8 +83,8 @@ struct _DirPattern
{
gboolean match;
gboolean directory;
- GRegex *source;
gchar *object;
+ GRegex *regex;
};
/* A list of pattern found in one file */
@@ -92,8 +92,10 @@ typedef struct _DirPatternList DirPatternList;
struct _DirPatternList
{
- GList *pattern;
+ GList *sources;
+ GList *objects;
GFile *directory;
+ GHashTable *extensions;
};
/* ----- Standard GObject types and variables ----- */
@@ -203,10 +205,10 @@ project_node_new (DirProject *project, AnjutaProjectNode *parent, AnjutaProjectN
static void
dir_pattern_free (DirPattern *pat)
{
- if (pat->source != NULL) g_regex_unref (pat->source);
g_free (pat->object);
+ if (pat->regex != NULL) g_regex_unref (pat->regex);
- g_slice_free (DirPattern, pat);
+ g_slice_free (DirPattern, pat);
}
/* Create a new pattern matching a directory of a file name in a path */
@@ -214,7 +216,7 @@ dir_pattern_free (DirPattern *pat)
static DirPattern*
dir_pattern_new (const gchar *pattern, gboolean reverse)
{
- DirPattern *pat = NULL;
+ DirPattern *pat = NULL;
GString *regex = g_string_new (NULL);
const char *ptr = pattern;
@@ -298,8 +300,8 @@ dir_pattern_new (const gchar *pattern, gboolean reverse)
g_string_truncate (regex, regex->len - 1);
}
g_string_append_c (regex, '$');
- pat->source = g_regex_new (regex->str, 0, 0, NULL);
- if (pat->source == NULL)
+ pat->regex = g_regex_new (regex->str, G_REGEX_OPTIMIZE, 0, NULL);
+ if (pat->regex == NULL)
{
dir_pattern_free (pat);
pat = NULL;
@@ -352,6 +354,38 @@ dir_pattern_new (const gchar *pattern, gboolean reverse)
return pat;
}
+/* Replace regular expression by a lookup in a hash table if we look only for
+ * a file with a particular extension as it's much faster.
+ * Return TRUE if it is possible */
+
+static gboolean
+dir_pattern_optimize (DirPattern *pat, DirPattern *last, GHashTable *extensions)
+{
+ const gchar *pattern = g_regex_get_pattern (pat->regex);
+ const gchar *ext;
+
+ ext = strrchr (pattern, '.');
+ if ((ext != NULL) &&
+ (strncmp (pattern, "(?:^|\\/)(.+)\\", ext - pattern) == 0))
+ {
+ const gchar *ptr;
+
+ for (ptr = ext + 1; isalnum(*ptr) || (*ptr == '_') || ((ptr[0] == '\\') && (ptr[1] == '+'));
*ptr == '\\' ? ptr += 2 : ptr++);
+ if ((ptr[0] == '$') && (ptr[1] == '\0'))
+ {
+ gchar *key = g_strndup (ext + 1, strlen(ext) - 2);
+ if (g_hash_table_lookup (extensions, key) == NULL)
+ {
+ g_hash_table_insert (extensions, key, last == NULL ? pat : last);
+
+ return TRUE;
+ }
+ }
+ }
+
+ return FALSE;
+}
+
/* Read a file containing pattern, the syntax is similar to .gitignore file.
*
* It is not a regular expression, only * and ? are used as joker.
@@ -376,6 +410,7 @@ dir_push_pattern_list (GList *stack, GFile *dir, GFile *file, gboolean ignore, G
char *ptr;
DirPatternList *list = NULL;
guint line;
+ DirPattern *last = NULL;
if (!g_file_load_contents (file, NULL, &content, NULL, NULL, error))
{
@@ -383,8 +418,10 @@ dir_push_pattern_list (GList *stack, GFile *dir, GFile *file, gboolean ignore, G
}
list = g_slice_new0(DirPatternList);
- list->pattern = NULL;
+ list->sources = NULL;
+ list->objects = NULL;
list->directory = dir;
+ list->extensions = g_hash_table_new_full (g_str_hash, g_str_equal, (GDestroyNotify)g_free, NULL);
line = 1;
for (ptr = content; *ptr != '\0';)
@@ -402,12 +439,40 @@ dir_push_pattern_list (GList *stack, GFile *dir, GFile *file, gboolean ignore, G
{
/* Create pattern */
DirPattern *pat = NULL;
+ gboolean used = FALSE;
if (next != NULL) *next = '\0';
pat = dir_pattern_new (ptr, ignore);
if (pat != NULL)
{
- list->pattern = g_list_prepend (list->pattern, pat);
+ if ((last != NULL) && (last->match != pat->match)) last = NULL;
+ if (dir_pattern_optimize (pat, last, list->extensions))
+ {
+ if (last == NULL)
+ {
+ last = pat;
+ g_regex_unref (pat->regex);
+ pat->regex = NULL;
+ list->sources = g_list_prepend (list->sources, pat);
+ used = TRUE;
+ }
+ }
+ else
+ {
+ list->sources = g_list_prepend (list->sources, pat);
+ last = NULL;
+ used = TRUE;
+ }
+
+ if (pat->object != NULL)
+ {
+ if (used) pat = dir_pattern_new (ptr, ignore);
+ list->objects = g_list_prepend (list->objects, pat);
+ }
+ else if (!used)
+ {
+ dir_pattern_free (pat);
+ }
}
else
{
@@ -422,7 +487,8 @@ dir_push_pattern_list (GList *stack, GFile *dir, GFile *file, gboolean ignore, G
}
g_free (content);
- list->pattern = g_list_reverse (list->pattern);
+ list->sources = g_list_reverse (list->sources);
+ list->objects = g_list_reverse (list->objects);
return g_list_prepend (stack, list);
}
@@ -434,9 +500,12 @@ dir_pop_pattern_list (GList *stack)
stack = g_list_remove_link (stack, stack);
- g_list_foreach (top->pattern, (GFunc)dir_pattern_free, NULL);
- g_list_free (top->pattern);
+ g_list_foreach (top->sources, (GFunc)dir_pattern_free, NULL);
+ g_list_free (top->sources);
+ g_list_foreach (top->objects, (GFunc)dir_pattern_free, NULL);
+ g_list_free (top->objects);
g_object_unref (top->directory);
+ g_hash_table_destroy (top->extensions);
g_slice_free (DirPatternList, top);
return stack;
@@ -462,15 +531,24 @@ dir_pattern_stack_is_match (GFile *root, GList *stack, GFile *file)
{
DirPatternList *pat_list = (DirPatternList *)list->data;
GList *node;
+ DirPattern *pat_ext = NULL;
+ const gchar *ext;
+
+ /* Check only the extension to be faster on the common case */
+ ext = strrchr (filename, '.');
+ if (ext != NULL)
+ {
+ pat_ext = g_hash_table_lookup (pat_list->extensions, ext + 1);
+ }
- for (node = g_list_first (pat_list->pattern); node != NULL; node = g_list_next (node))
+ for (node = g_list_first (pat_list->sources); node != NULL; node = g_list_next (node))
{
DirPattern *pat = (DirPattern *)node->data;
- if (pat->directory && !directory)
+ if ((pat->directory && !directory) || (!pat->directory && directory))
continue;
- if (g_regex_match (pat->source, filename, 0, NULL))
+ if ((pat == pat_ext) || ((pat->regex != NULL) && g_regex_match (pat->regex, filename,
0, NULL)))
{
match = pat->match;
}
@@ -500,18 +578,18 @@ dir_pattern_find_file_object (GFile *root, GList *stack, GFile *file)
DirPatternList *pat_list = (DirPatternList *)list->data;
GList *node;
- for (node = g_list_first (pat_list->pattern); node != NULL; node = g_list_next (node))
+ for (node = g_list_first (pat_list->objects); node != NULL; node = g_list_next (node))
{
DirPattern *pat = (DirPattern *)node->data;
if (pat->directory || !pat->match || (pat->object == NULL) )
continue;
- if (g_regex_match (pat->source, filename, 0, NULL))
+ if (g_regex_match (pat->regex, filename, 0, NULL))
{
gchar *objname;
- objname = g_regex_replace (pat->source, filename, -1, 0, pat->object,
0, NULL);
+ objname = g_regex_replace (pat->regex, filename, -1, 0, pat->object,
0, NULL);
object = g_file_get_child (root, objname);
g_free (objname);
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]