[glib] regex: Add PARTIAL_HARD match option
- From: Christian Persch <chpe src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib] regex: Add PARTIAL_HARD match option
- Date: Mon, 2 Jul 2012 14:08:01 +0000 (UTC)
commit 1171215014bb9406ff8ae1ea91b1c251b4e7d71b
Author: Christian Persch <chpe gnome org>
Date: Thu Jun 7 22:50:52 2012 +0200
regex: Add PARTIAL_HARD match option
Since PCRE 8.00 it supports a new partial matching method PCRE_PARTIAL_HARD.
glib/gregex.c | 21 +++++++++++++++++----
glib/gregex.h | 9 ++++++++-
glib/tests/regex.c | 11 +++++++++--
3 files changed, 34 insertions(+), 7 deletions(-)
---
diff --git a/glib/gregex.c b/glib/gregex.c
index cadcc50..184fc20 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -146,7 +146,9 @@
G_REGEX_MATCH_NEWLINE_ANY | \
G_REGEX_MATCH_NEWLINE_ANYCRLF | \
G_REGEX_MATCH_BSR_ANYCRLF | \
- G_REGEX_MATCH_BSR_ANY)
+ G_REGEX_MATCH_BSR_ANY | \
+ G_REGEX_MATCH_PARTIAL_SOFT | \
+ G_REGEX_MATCH_PARTIAL_HARD)
/* we rely on these flags having the same values */
G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS);
@@ -177,6 +179,8 @@ G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
/* These PCRE flags are unused or not exposed publically in GRegexFlags, so
* it should be ok to reuse them for different things.
@@ -849,13 +853,21 @@ g_match_info_get_match_count (const GMatchInfo *match_info)
* able to raise an error as soon as a mistake is made.
*
* GRegex supports the concept of partial matching by means of the
- * #G_REGEX_MATCH_PARTIAL flag. When this is set the return code for
+ * #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD flags.
+ * When they are used, the return code for
* g_regex_match() or g_regex_match_full() is, as usual, %TRUE
* for a complete match, %FALSE otherwise. But, when these functions
* return %FALSE, you can check if the match was partial calling
* g_match_info_is_partial_match().
*
- * When using partial matching you cannot use g_match_info_fetch*().
+ * The difference between #G_REGEX_MATCH_PARTIAL_SOFT and
+ * #G_REGEX_MATCH_PARTIAL_HARD is that when a partial match is encountered
+ * with #G_REGEX_MATCH_PARTIAL_SOFT, matching continues to search for a
+ * possible complete match, while with #G_REGEX_MATCH_PARTIAL_HARD matching
+ * stops at the partial match.
+ * When both #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD
+ * are set, the latter takes precedence.
+ * See <ulink>man:pcrepartial</ulink> for more information on partial matching.
*
* Because of the way certain internal optimizations are implemented
* the partial matching algorithm cannot be used with all patterns.
@@ -864,7 +876,8 @@ g_match_info_get_match_count (const GMatchInfo *match_info)
* of occurrences is greater than one. Optional items such as "\d?"
* (where the maximum is one) are permitted. Quantifiers with any values
* are permitted after parentheses, so the invalid examples above can be
- * coded thus "(a){2,4}" and "(\d)+". If #G_REGEX_MATCH_PARTIAL is set
+ * coded thus "(a){2,4}" and "(\d)+". If #G_REGEX_MATCH_PARTIAL or
+ * #G_REGEX_MATCH_PARTIAL_HARD is set
* for a pattern that does not conform to the restrictions, matching
* functions return an error.
*
diff --git a/glib/gregex.h b/glib/gregex.h
index 91852bf..6550fb6 100644
--- a/glib/gregex.h
+++ b/glib/gregex.h
@@ -361,6 +361,11 @@ typedef enum
* U+2029 PARAGRAPH SEPARATOR. Since: 2.34
* @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
* JavaScript rather than PCRE. Since: 2.34
+ * @G_REGEX_MATCH_PARTIAL_SOFT: An alias for #G_REGEX_MATCH_PARTIAL. Since: 2.34
+ * @G_REGEX_MATCH_PARTIAL_HARD: Turns on the partial matching feature. In contrast to
+ * to #G_REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match
+ * is found, without continuing to search for a possible complete match. See
+ * see g_match_info_is_partial_match() for more information. Since: 2.34
*
* Flags specifying match-time options.
*
@@ -381,7 +386,9 @@ typedef enum
G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
- G_REGEX_MATCH_BSR_ANY = 1 << 24
+ G_REGEX_MATCH_BSR_ANY = 1 << 24,
+ G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
+ G_REGEX_MATCH_PARTIAL_HARD = 1 << 27
} GRegexMatchFlags;
/**
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
index f08db88..005f48f 100644
--- a/glib/tests/regex.c
+++ b/glib/tests/regex.c
@@ -474,7 +474,7 @@ test_partial (gconstpointer d)
g_assert (regex != NULL);
- g_regex_match (regex, data->string, G_REGEX_MATCH_PARTIAL, &match_info);
+ g_regex_match (regex, data->string, data->match_opts, &match_info);
g_assert_cmpint (data->expected, ==, g_match_info_is_partial_match (match_info));
@@ -488,18 +488,21 @@ test_partial (gconstpointer d)
g_regex_unref (regex);
}
-#define TEST_PARTIAL(_pattern, _string, _expected) { \
+#define TEST_PARTIAL_FULL(_pattern, _string, _match_opts, _expected) { \
TestMatchData *data; \
gchar *path; \
data = g_new0 (TestMatchData, 1); \
data->pattern = _pattern; \
data->string = _string; \
+ data->match_opts = _match_opts; \
data->expected = _expected; \
path = g_strdup_printf ("/regex/match/partial/%d", ++total); \
g_test_add_data_func (path, data, test_partial); \
g_free (path); \
}
+#define TEST_PARTIAL(_pattern, _string, _expected) TEST_PARTIAL_FULL(_pattern, _string, G_REGEX_MATCH_PARTIAL, _expected)
+
typedef struct {
const gchar *pattern;
const gchar *string;
@@ -2364,6 +2367,10 @@ main (int argc, char *argv[])
TEST_PARTIAL("(a)+b", "aa", TRUE);
TEST_PARTIAL("a?b", "a", TRUE);
+ /* Test soft vs. hard partial matching */
+ TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_SOFT, FALSE);
+ TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_HARD, TRUE);
+
/* TEST_SUB_PATTERN(pattern, string, start_position, sub_n, expected_sub,
* expected_start, expected_end) */
TEST_SUB_PATTERN("a", "a", 0, 0, "a", 0, 1);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]