[glib] regex: Add PARTIAL_HARD match option



commit 1171215014bb9406ff8ae1ea91b1c251b4e7d71b
Author: Christian Persch <chpe gnome org>
Date:   Thu Jun 7 22:50:52 2012 +0200

    regex: Add PARTIAL_HARD match option
    
    Since PCRE 8.00 it supports a new partial matching method PCRE_PARTIAL_HARD.

 glib/gregex.c      |   21 +++++++++++++++++----
 glib/gregex.h      |    9 ++++++++-
 glib/tests/regex.c |   11 +++++++++--
 3 files changed, 34 insertions(+), 7 deletions(-)
---
diff --git a/glib/gregex.c b/glib/gregex.c
index cadcc50..184fc20 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -146,7 +146,9 @@
                             G_REGEX_MATCH_NEWLINE_ANY      | \
                             G_REGEX_MATCH_NEWLINE_ANYCRLF  | \
                             G_REGEX_MATCH_BSR_ANYCRLF      | \
-                            G_REGEX_MATCH_BSR_ANY)
+                            G_REGEX_MATCH_BSR_ANY          | \
+                            G_REGEX_MATCH_PARTIAL_SOFT     | \
+                            G_REGEX_MATCH_PARTIAL_HARD)
 
 /* we rely on these flags having the same values */
 G_STATIC_ASSERT (G_REGEX_CASELESS          == PCRE_CASELESS);
@@ -177,6 +179,8 @@ G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY     == PCRE_NEWLINE_ANY);
 G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
 G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF     == PCRE_BSR_ANYCRLF);
 G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY         == PCRE_BSR_UNICODE);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT    == PCRE_PARTIAL_SOFT);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD    == PCRE_PARTIAL_HARD);
 
 /* These PCRE flags are unused or not exposed publically in GRegexFlags, so
  * it should be ok to reuse them for different things.
@@ -849,13 +853,21 @@ g_match_info_get_match_count (const GMatchInfo *match_info)
  * able to raise an error as soon as a mistake is made.
  *
  * GRegex supports the concept of partial matching by means of the
- * #G_REGEX_MATCH_PARTIAL flag. When this is set the return code for
+ * #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD flags.
+ * When they are used, the return code for
  * g_regex_match() or g_regex_match_full() is, as usual, %TRUE
  * for a complete match, %FALSE otherwise. But, when these functions
  * return %FALSE, you can check if the match was partial calling
  * g_match_info_is_partial_match().
  *
- * When using partial matching you cannot use g_match_info_fetch*().
+ * The difference between #G_REGEX_MATCH_PARTIAL_SOFT and 
+ * #G_REGEX_MATCH_PARTIAL_HARD is that when a partial match is encountered
+ * with #G_REGEX_MATCH_PARTIAL_SOFT, matching continues to search for a
+ * possible complete match, while with #G_REGEX_MATCH_PARTIAL_HARD matching
+ * stops at the partial match.
+ * When both #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD
+ * are set, the latter takes precedence.
+ * See <ulink>man:pcrepartial</ulink> for more information on partial matching.
  *
  * Because of the way certain internal optimizations are implemented
  * the partial matching algorithm cannot be used with all patterns.
@@ -864,7 +876,8 @@ g_match_info_get_match_count (const GMatchInfo *match_info)
  * of occurrences is greater than one. Optional items such as "\d?"
  * (where the maximum is one) are permitted. Quantifiers with any values
  * are permitted after parentheses, so the invalid examples above can be
- * coded thus "(a){2,4}" and "(\d)+". If #G_REGEX_MATCH_PARTIAL is set
+ * coded thus "(a){2,4}" and "(\d)+". If #G_REGEX_MATCH_PARTIAL or 
+ * #G_REGEX_MATCH_PARTIAL_HARD is set
  * for a pattern that does not conform to the restrictions, matching
  * functions return an error.
  *
diff --git a/glib/gregex.h b/glib/gregex.h
index 91852bf..6550fb6 100644
--- a/glib/gregex.h
+++ b/glib/gregex.h
@@ -361,6 +361,11 @@ typedef enum
  *     U+2029 PARAGRAPH SEPARATOR. Since: 2.34
  * @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
  *     JavaScript rather than PCRE. Since: 2.34
+ * @G_REGEX_MATCH_PARTIAL_SOFT: An alias for #G_REGEX_MATCH_PARTIAL. Since: 2.34
+ * @G_REGEX_MATCH_PARTIAL_HARD: Turns on the partial matching feature. In contrast to
+ *     to #G_REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match
+ *     is found, without continuing to search for a possible complete match. See
+ *     see g_match_info_is_partial_match() for more information. Since: 2.34
  *
  * Flags specifying match-time options.
  *
@@ -381,7 +386,9 @@ typedef enum
   G_REGEX_MATCH_NEWLINE_ANY     = 1 << 22,
   G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
   G_REGEX_MATCH_BSR_ANYCRLF     = 1 << 23,
-  G_REGEX_MATCH_BSR_ANY         = 1 << 24
+  G_REGEX_MATCH_BSR_ANY         = 1 << 24,
+  G_REGEX_MATCH_PARTIAL_SOFT    = G_REGEX_MATCH_PARTIAL,
+  G_REGEX_MATCH_PARTIAL_HARD    = 1 << 27
 } GRegexMatchFlags;
 
 /**
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
index f08db88..005f48f 100644
--- a/glib/tests/regex.c
+++ b/glib/tests/regex.c
@@ -474,7 +474,7 @@ test_partial (gconstpointer d)
 
   g_assert (regex != NULL);
 
-  g_regex_match (regex, data->string, G_REGEX_MATCH_PARTIAL, &match_info);
+  g_regex_match (regex, data->string, data->match_opts, &match_info);
 
   g_assert_cmpint (data->expected, ==, g_match_info_is_partial_match (match_info));
 
@@ -488,18 +488,21 @@ test_partial (gconstpointer d)
   g_regex_unref (regex);
 }
 
-#define TEST_PARTIAL(_pattern, _string, _expected) {               \
+#define TEST_PARTIAL_FULL(_pattern, _string, _match_opts, _expected) { \
   TestMatchData *data;                                          \
   gchar *path;                                                  \
   data = g_new0 (TestMatchData, 1);                             \
   data->pattern = _pattern;                                      \
   data->string = _string;                                        \
+  data->match_opts = _match_opts;                                \
   data->expected = _expected;                                    \
   path = g_strdup_printf ("/regex/match/partial/%d", ++total);  \
   g_test_add_data_func (path, data, test_partial);              \
   g_free (path);                                                \
 }
 
+#define TEST_PARTIAL(_pattern, _string, _expected) TEST_PARTIAL_FULL(_pattern, _string, G_REGEX_MATCH_PARTIAL, _expected)
+
 typedef struct {
   const gchar *pattern;
   const gchar *string;
@@ -2364,6 +2367,10 @@ main (int argc, char *argv[])
   TEST_PARTIAL("(a)+b", "aa", TRUE);
   TEST_PARTIAL("a?b", "a", TRUE);
 
+  /* Test soft vs. hard partial matching */
+  TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_SOFT, FALSE);
+  TEST_PARTIAL_FULL("cat(fish)?", "cat", G_REGEX_MATCH_PARTIAL_HARD, TRUE);
+
   /* TEST_SUB_PATTERN(pattern, string, start_position, sub_n, expected_sub,
    * 		      expected_start, expected_end) */
   TEST_SUB_PATTERN("a", "a", 0, 0, "a", 0, 1);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]