[glib/wip/pcre-mark: 2/2] regex: Add NO_START_OPTIMIZE compile and match flags



commit a6e3eb6eced6b4f1d816a0ea73adb5081145730a
Author: Christian Persch <chpe gnome org>
Date:   Fri Jun 8 00:56:44 2012 +0200

    regex: Add NO_START_OPTIMIZE compile and match flags
    
    PCRE_NO_START_OPTIMIZE exists since PCRE 7.9, but was not usefully before
    since it only affects callout (which GRegex doesn't support) and backtracking
    control verbs which the last commit makes use of.

 glib/gregex.c      |   69 ++++++++++++++++++++++++++++-----------------------
 glib/gregex.h      |   39 ++++++++++++++++-------------
 glib/tests/regex.c |    2 +-
 3 files changed, 61 insertions(+), 49 deletions(-)
---
diff --git a/glib/gregex.c b/glib/gregex.c
index e69b2c0..282700a 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -128,7 +128,8 @@
                               G_REGEX_NEWLINE_CRLF      | \
                               G_REGEX_NEWLINE_ANYCRLF   | \
                               G_REGEX_BSR_ANYCRLF       | \
-                              G_REGEX_JAVASCRIPT_COMPAT)
+                              G_REGEX_JAVASCRIPT_COMPAT | \
+                              G_REGEX_NO_START_OPTIMIZE)
 
 /* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
 #define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
@@ -136,20 +137,21 @@
                                       G_REGEX_OPTIMIZE)
 
 /* Mask of all the possible values for GRegexMatchFlags. */
-#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED         | \
-                            G_REGEX_MATCH_NOTBOL           | \
-                            G_REGEX_MATCH_NOTEOL           | \
-                            G_REGEX_MATCH_NOTEMPTY         | \
-                            G_REGEX_MATCH_PARTIAL          | \
-                            G_REGEX_MATCH_NEWLINE_CR       | \
-                            G_REGEX_MATCH_NEWLINE_LF       | \
-                            G_REGEX_MATCH_NEWLINE_CRLF     | \
-                            G_REGEX_MATCH_NEWLINE_ANY      | \
-                            G_REGEX_MATCH_NEWLINE_ANYCRLF  | \
-                            G_REGEX_MATCH_BSR_ANYCRLF      | \
-                            G_REGEX_MATCH_BSR_ANY          | \
-                            G_REGEX_MATCH_PARTIAL_SOFT     | \
-                            G_REGEX_MATCH_PARTIAL_HARD     | \
+#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED          | \
+                            G_REGEX_MATCH_NOTBOL            | \
+                            G_REGEX_MATCH_NOTEOL            | \
+                            G_REGEX_MATCH_NOTEMPTY          | \
+                            G_REGEX_MATCH_PARTIAL           | \
+                            G_REGEX_MATCH_NEWLINE_CR        | \
+                            G_REGEX_MATCH_NEWLINE_LF        | \
+                            G_REGEX_MATCH_NEWLINE_CRLF      | \
+                            G_REGEX_MATCH_NEWLINE_ANY       | \
+                            G_REGEX_MATCH_NEWLINE_ANYCRLF   | \
+                            G_REGEX_MATCH_BSR_ANYCRLF       | \
+                            G_REGEX_MATCH_BSR_ANY           | \
+                            G_REGEX_MATCH_NO_START_OPTIMIZE | \
+                            G_REGEX_MATCH_PARTIAL_SOFT      | \
+                            G_REGEX_MATCH_PARTIAL_HARD      | \
                             G_REGEX_MATCH_NOTEMPTY_ATSTART)
 
 /* we rely on these flags having the same values */
@@ -169,22 +171,24 @@ G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF      == PCRE_NEWLINE_CRLF);
 G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF   == PCRE_NEWLINE_ANYCRLF);
 G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF       == PCRE_BSR_ANYCRLF);
 G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
-
-G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED         == PCRE_ANCHORED);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL           == PCRE_NOTBOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL           == PCRE_NOTEOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY         == PCRE_NOTEMPTY);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL          == PCRE_PARTIAL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR       == PCRE_NEWLINE_CR);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF       == PCRE_NEWLINE_LF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF     == PCRE_NEWLINE_CRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY      == PCRE_NEWLINE_ANY);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF  == PCRE_NEWLINE_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF      == PCRE_BSR_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY          == PCRE_BSR_UNICODE);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT     == PCRE_PARTIAL_SOFT);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD     == PCRE_PARTIAL_HARD);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
+G_STATIC_ASSERT (G_REGEX_NO_START_OPTIMIZE == PCRE_NO_START_OPTIMIZE);
+
+G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED          == PCRE_ANCHORED);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL            == PCRE_NOTBOL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL            == PCRE_NOTEOL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY          == PCRE_NOTEMPTY);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL           == PCRE_PARTIAL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR        == PCRE_NEWLINE_CR);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF        == PCRE_NEWLINE_LF);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF      == PCRE_NEWLINE_CRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY       == PCRE_NEWLINE_ANY);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF   == PCRE_NEWLINE_ANYCRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF       == PCRE_BSR_ANYCRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY           == PCRE_BSR_UNICODE);
+G_STATIC_ASSERT (G_REGEX_MATCH_NO_START_OPTIMIZE == PCRE_NO_START_OPTIMIZE);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT      == PCRE_PARTIAL_SOFT);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD      == PCRE_PARTIAL_HARD);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART  == PCRE_NOTEMPTY_ATSTART);
 
 /* These PCRE flags are unused or not exposed publically in GRegexFlags, so
  * it should be ok to reuse them for different things.
@@ -659,6 +663,9 @@ g_match_info_get_string (const GMatchInfo *match_info)
  * the argument of the last verb encountered in the whole matching
  * process. Otherwise, $NULL is returned.
  *
+ * See <ulink>man:pcrepattern<ulink> for more information on
+ * backtracking control verbs.
+ *
  * Returns: (transfer none): the mark, or %NULL
  *
  * Since: 2.34
diff --git a/glib/gregex.h b/glib/gregex.h
index 6944406..0640aac 100644
--- a/glib/gregex.h
+++ b/glib/gregex.h
@@ -279,7 +279,8 @@ GQuark g_regex_error_quark (void);
  * G_REGEX_BSR_ANYCRLF: Usually any newline character or character sequence
  *     is recognised. If this option is set, then "\R" only recognizes the newline
  *    characters '\r', '\n' and '\r\n'. Since: 2.34
- * 
+ * @G_REGEX_NO_START_OPTIMIZE: Disable some optimizations that will cause incorrect
+ *     results for g_match_info_get_mark() when using backtracking control verbs. Since: 2.34
  *
  * Flags specifying compile-time options.
  *
@@ -306,7 +307,8 @@ typedef enum
   G_REGEX_NEWLINE_CRLF      = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
   G_REGEX_NEWLINE_ANYCRLF   = G_REGEX_NEWLINE_CR | 1 << 22,
   G_REGEX_BSR_ANYCRLF       = 1 << 23,
-  G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
+  G_REGEX_JAVASCRIPT_COMPAT = 1 << 25,
+  G_REGEX_NO_START_OPTIMIZE = 1 << 26
 } GRegexCompileFlags;
 
 /**
@@ -372,6 +374,8 @@ typedef enum
  * @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like #G_REGEX_MATCH_NOTEMPTY, but only applied to
  *     the start of the matched string. For anchored
  *     patterns this can only happen for pattern containing "\K". Since: 2.34
+ * @G_REGEX_MATCH_NO_START_OPTIMIZE: Disable some optimizations that will cause incorrect
+ *     results for g_match_info_get_mark() when using backtracking control verbs. Since: 2.34
  *
  * Flags specifying match-time options.
  *
@@ -381,21 +385,22 @@ typedef enum
  * adding a new flag. */
 typedef enum
 {
-  G_REGEX_MATCH_ANCHORED         = 1 << 4,
-  G_REGEX_MATCH_NOTBOL           = 1 << 7,
-  G_REGEX_MATCH_NOTEOL           = 1 << 8,
-  G_REGEX_MATCH_NOTEMPTY         = 1 << 10,
-  G_REGEX_MATCH_PARTIAL          = 1 << 15,
-  G_REGEX_MATCH_NEWLINE_CR       = 1 << 20,
-  G_REGEX_MATCH_NEWLINE_LF       = 1 << 21,
-  G_REGEX_MATCH_NEWLINE_CRLF     = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
-  G_REGEX_MATCH_NEWLINE_ANY      = 1 << 22,
-  G_REGEX_MATCH_NEWLINE_ANYCRLF  = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
-  G_REGEX_MATCH_BSR_ANYCRLF      = 1 << 23,
-  G_REGEX_MATCH_BSR_ANY          = 1 << 24,
-  G_REGEX_MATCH_PARTIAL_SOFT     = G_REGEX_MATCH_PARTIAL,
-  G_REGEX_MATCH_PARTIAL_HARD     = 1 << 27,
-  G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
+  G_REGEX_MATCH_ANCHORED          = 1 << 4,
+  G_REGEX_MATCH_NOTBOL            = 1 << 7,
+  G_REGEX_MATCH_NOTEOL            = 1 << 8,
+  G_REGEX_MATCH_NOTEMPTY          = 1 << 10,
+  G_REGEX_MATCH_PARTIAL           = 1 << 15,
+  G_REGEX_MATCH_NEWLINE_CR        = 1 << 20,
+  G_REGEX_MATCH_NEWLINE_LF        = 1 << 21,
+  G_REGEX_MATCH_NEWLINE_CRLF      = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
+  G_REGEX_MATCH_NEWLINE_ANY       = 1 << 22,
+  G_REGEX_MATCH_NEWLINE_ANYCRLF   = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
+  G_REGEX_MATCH_BSR_ANYCRLF       = 1 << 23,
+  G_REGEX_MATCH_BSR_ANY           = 1 << 24,
+  G_REGEX_MATCH_NO_START_OPTIMIZE = 1 << 26,
+  G_REGEX_MATCH_PARTIAL_SOFT      = G_REGEX_MATCH_PARTIAL,
+  G_REGEX_MATCH_PARTIAL_HARD      = 1 << 27,
+  G_REGEX_MATCH_NOTEMPTY_ATSTART  = 1 << 28
 } GRegexMatchFlags;
 
 /**
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
index ed5ab80..54cc50f 100644
--- a/glib/tests/regex.c
+++ b/glib/tests/regex.c
@@ -2146,7 +2146,7 @@ main (int argc, char *argv[])
   TEST_NEW_CHECK_FLAGS ("(*ANYCRLF)a", 0, 0, G_REGEX_NEWLINE_ANYCRLF, 0);
   TEST_NEW_CHECK_FLAGS ("(*BSR_ANYCRLF)a", 0, 0, G_REGEX_BSR_ANYCRLF, 0);
   TEST_NEW_CHECK_FLAGS ("(*BSR_UNICODE)a", 0, 0, 0 /* this is the default in GRegex */, 0);
-  TEST_NEW_CHECK_FLAGS ("(*NO_START_OPT)a", 0, 0, 0 /* not exposed in GRegex */, 0);
+  TEST_NEW_CHECK_FLAGS ("(*NO_START_OPT)", 0, 0, G_REGEX_NO_START_OPTIMIZE, 0);
 
   /* TEST_NEW_FAIL(pattern, compile_opts, expected_error) */
   TEST_NEW_FAIL("(", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]