[glib/wip/pcre-mark: 1/2] regex: Add g_match_info_get_mark
- From: Christian Persch <chpe src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib/wip/pcre-mark: 1/2] regex: Add g_match_info_get_mark
- Date: Mon, 2 Jul 2012 14:17:14 +0000 (UTC)
commit d18c3010156bdccebdf18f09aaaac334c0b5d391
Author: Christian Persch <chpe gnome org>
Date: Fri Jun 8 00:49:00 2012 +0200
regex: Add g_match_info_get_mark
Since PCRE 8.03, PCRE supports backtracking control verbs with a name argument.
g_match_info_get_mark() will return the argument of the last encountered verb
in the whole matching process for failed or partial matches, and in the matching
path only for matches.
docs/reference/glib/glib-sections.txt | 1 +
glib/gregex.c | 40 +++++++++++++++++++++++++-
glib/gregex.h | 2 +
glib/tests/regex.c | 50 +++++++++++++++++++++++++++++++++
4 files changed, 91 insertions(+), 2 deletions(-)
---
diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt
index d2e985e..02652a7 100644
--- a/docs/reference/glib/glib-sections.txt
+++ b/docs/reference/glib/glib-sections.txt
@@ -1026,6 +1026,7 @@ g_regex_check_replacement
GMatchInfo
g_match_info_get_regex
g_match_info_get_string
+g_match_info_get_mark
g_match_info_ref
g_match_info_unref
g_match_info_free
diff --git a/glib/gregex.c b/glib/gregex.c
index 839b1e6..e69b2c0 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -214,6 +214,8 @@ struct _GMatchInfo
gint n_workspace; /* number of workspace elements */
const gchar *string; /* string passed to the match function */
gssize string_len; /* length of string */
+ /* const */ guchar *mark; /* MARK when using backtracing control */
+ pcre_extra extra; /* pcre_extra data */
};
struct _GRegex
@@ -592,6 +594,20 @@ match_info_new (const GRegex *regex,
match_info->offsets[0] = -1;
match_info->offsets[1] = -1;
+ if (!is_dfa)
+ {
+ /* We need a pcre_extra to store a pointer to GMatchInfo::mark
+ * where pcre_exec will store the MARK.
+ * Since pcre_exec does not modify the extra data otherwise,
+ * it should be safe to do a shallow copy here.
+ */
+ if (regex->extra)
+ match_info->extra = *regex->extra;
+
+ match_info->extra.flags |= PCRE_EXTRA_MARK;
+ match_info->extra.mark = &match_info->mark;
+ }
+
return match_info;
}
@@ -634,6 +650,27 @@ g_match_info_get_string (const GMatchInfo *match_info)
}
/**
+ * g_match_info_get_mark:
+ * @match_info: a #GMatchInfo structure
+ *
+ * When the pattern contains backtracking control verbs, and there is
+ * a match, returns the argument of the verb last encountered on the
+ * matching path. If there is a partial match, or no match, returns
+ * the argument of the last verb encountered in the whole matching
+ * process. Otherwise, $NULL is returned.
+ *
+ * Returns: (transfer none): the mark, or %NULL
+ *
+ * Since: 2.34
+ */
+const gchar *
+g_match_info_get_mark (const GMatchInfo *match_info)
+{
+ g_return_val_if_fail (match_info != NULL, NULL);
+ return (const gchar *) match_info->mark;
+}
+
+/**
* g_match_info_ref:
* @match_info: a #GMatchInfo
*
@@ -729,7 +766,7 @@ g_match_info_next (GMatchInfo *match_info,
}
match_info->matches = pcre_exec (match_info->regex->pcre_re,
- match_info->regex->extra,
+ &match_info->extra,
match_info->string,
match_info->string_len,
match_info->pos,
@@ -1223,7 +1260,6 @@ g_match_info_fetch_all (const GMatchInfo *match_info)
return result;
}
-
/* GRegex */
GQuark
diff --git a/glib/gregex.h b/glib/gregex.h
index 29e5c6a..6944406 100644
--- a/glib/gregex.h
+++ b/glib/gregex.h
@@ -527,6 +527,8 @@ gboolean g_regex_check_replacement (const gchar *replacement,
/* Match info */
GRegex *g_match_info_get_regex (const GMatchInfo *match_info);
const gchar *g_match_info_get_string (const GMatchInfo *match_info);
+const gchar *g_match_info_get_mark (const GMatchInfo *match_info);
+
GMatchInfo *g_match_info_ref (GMatchInfo *match_info);
void g_match_info_unref (GMatchInfo *match_info);
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
index 72a0155..ed5ab80 100644
--- a/glib/tests/regex.c
+++ b/glib/tests/regex.c
@@ -2048,6 +2048,48 @@ test_explicit_crlf (void)
g_regex_unref (regex);
}
+
+typedef struct {
+ const gchar *pattern;
+ const gchar *string;
+ const gchar *mark;
+ gboolean expected;
+} TestMarkData;
+
+static void
+test_mark (gconstpointer d)
+{
+ const TestMarkData *data = d;
+ GRegex *regex;
+ GMatchInfo *info;
+ gboolean match;
+ GError *error = NULL;
+
+ regex = g_regex_new (data->pattern, 0, 0, &error);
+ g_assert_no_error (error);
+
+ match = g_regex_match_full (regex, data->string, -1, 0, 0, &info, NULL);
+ g_assert_cmpint (match, ==, data->expected);
+ g_assert_cmpstr (g_match_info_get_mark (info), ==, data->mark);
+
+ g_match_info_free (info);
+ g_regex_unref (regex);
+}
+
+#define TEST_MARK(_pattern, _string, _expected, _mark) \
+{ \
+ TestMarkData *data; \
+ gchar *path; \
+ data = g_new0 (TestMarkData, 1); \
+ data->pattern = _pattern; \
+ data->string = _string; \
+ data->mark = _mark; \
+ data->expected = _expected; \
+ path = g_strdup_printf ("/regex/mark/%d", ++total); \
+ g_test_add_data_func (path, data, test_mark); \
+ g_free (path); \
+}
+
int
main (int argc, char *argv[])
{
@@ -2703,5 +2745,13 @@ main (int argc, char *argv[])
TEST_MATCH_NOTEMPTY("a?b?", "xyz", FALSE);
TEST_MATCH_NOTEMPTY_ATSTART("a?b?", "xyz", TRUE);
+ /* MARK */
+ TEST_MARK("^(A(*PRUNE:A)B|C(*PRUNE:B)D)", "AC", FALSE, "A");
+ TEST_MARK("^(A(*PRUNE:A)B|C(*PRUNE:B)D)", "CB", FALSE, "B");
+ TEST_MARK("(*MARK:A)(*SKIP:B)(C|X)", "C", TRUE, "A");
+ TEST_MARK("(*MARK:A)(*SKIP:B)(C|X)", "D", FALSE, "A");
+ TEST_MARK("X(*MARK:A)Y|X(*MARK:B)Z", "XY", TRUE, "A");
+ TEST_MARK("X(*MARK:A)Y|X(*MARK:B)Z", "XZ", TRUE, "B");
+
return g_test_run ();
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]