[glib/wip/pcre-update: 15/17] regex: Add NO_START_OPTIMIZE compile and match flags



commit d9936f9dbd46ec8f934b2e13a2c1039a3b4e09d4
Author: Christian Persch <chpe gnome org>
Date:   Fri Jun 8 00:56:44 2012 +0200

    regex: Add NO_START_OPTIMIZE compile and match flags
    
    PCRE_NO_START_OPTIMIZE exists since PCRE 7.9, but was not usefully before
    since it only affects callout (which GRegex doesn't support) and backtracking
    control verbs which the last commit makes use of.

 glib/gregex.c |   69 +++++++++++++++++++++++++++++++-------------------------
 glib/gregex.h |   39 ++++++++++++++++++--------------
 2 files changed, 60 insertions(+), 48 deletions(-)
---
diff --git a/glib/gregex.c b/glib/gregex.c
index 683e0ed..ed7c5b6 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -115,7 +115,8 @@
                               G_REGEX_NEWLINE_CRLF      | \
                               G_REGEX_NEWLINE_ANYCRLF   | \
                               G_REGEX_BSR_ANYCRLF       | \
-                              G_REGEX_JAVASCRIPT_COMPAT)
+                              G_REGEX_JAVASCRIPT_COMPAT | \
+                              G_REGEX_NO_START_OPTIMIZE)
 
 /* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
 #define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
@@ -123,20 +124,21 @@
                                       G_REGEX_OPTIMIZE)
 
 /* Mask of all the possible values for GRegexMatchFlags. */
-#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED         | \
-                            G_REGEX_MATCH_NOTBOL           | \
-                            G_REGEX_MATCH_NOTEOL           | \
-                            G_REGEX_MATCH_NOTEMPTY         | \
-                            G_REGEX_MATCH_PARTIAL          | \
-                            G_REGEX_MATCH_NEWLINE_CR       | \
-                            G_REGEX_MATCH_NEWLINE_LF       | \
-                            G_REGEX_MATCH_NEWLINE_CRLF     | \
-                            G_REGEX_MATCH_NEWLINE_ANY      | \
-                            G_REGEX_MATCH_NEWLINE_ANYCRLF  | \
-                            G_REGEX_MATCH_BSR_ANYCRLF      | \
-                            G_REGEX_MATCH_BSR_ANY          | \
-                            G_REGEX_MATCH_PARTIAL_SOFT     | \
-                            G_REGEX_MATCH_PARTIAL_HARD     | \
+#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED          | \
+                            G_REGEX_MATCH_NOTBOL            | \
+                            G_REGEX_MATCH_NOTEOL            | \
+                            G_REGEX_MATCH_NOTEMPTY          | \
+                            G_REGEX_MATCH_PARTIAL           | \
+                            G_REGEX_MATCH_NEWLINE_CR        | \
+                            G_REGEX_MATCH_NEWLINE_LF        | \
+                            G_REGEX_MATCH_NEWLINE_CRLF      | \
+                            G_REGEX_MATCH_NEWLINE_ANY       | \
+                            G_REGEX_MATCH_NEWLINE_ANYCRLF   | \
+                            G_REGEX_MATCH_BSR_ANYCRLF       | \
+                            G_REGEX_MATCH_BSR_ANY           | \
+                            G_REGEX_MATCH_NO_START_OPTIMIZE | \
+                            G_REGEX_MATCH_PARTIAL_SOFT      | \
+                            G_REGEX_MATCH_PARTIAL_HARD      | \
                             G_REGEX_MATCH_NOTEMPTY_ATSTART)
 
 /* we rely on these flags having the same values */
@@ -155,22 +157,24 @@ G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF      == PCRE_NEWLINE_CRLF);
 G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF   == PCRE_NEWLINE_ANYCRLF);
 G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF       == PCRE_BSR_ANYCRLF);
 G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
-
-G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED         == PCRE_ANCHORED);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL           == PCRE_NOTBOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL           == PCRE_NOTEOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY         == PCRE_NOTEMPTY);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL          == PCRE_PARTIAL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR       == PCRE_NEWLINE_CR);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF       == PCRE_NEWLINE_LF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF     == PCRE_NEWLINE_CRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY      == PCRE_NEWLINE_ANY);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF  == PCRE_NEWLINE_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF      == PCRE_BSR_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY          == PCRE_BSR_UNICODE);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT     == PCRE_PARTIAL_SOFT);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD     == PCRE_PARTIAL_HARD);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
+G_STATIC_ASSERT (G_REGEX_NO_START_OPTIMIZE == PCRE_NO_START_OPTIMIZE);
+
+G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED          == PCRE_ANCHORED);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL            == PCRE_NOTBOL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL            == PCRE_NOTEOL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY          == PCRE_NOTEMPTY);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL           == PCRE_PARTIAL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR        == PCRE_NEWLINE_CR);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF        == PCRE_NEWLINE_LF);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF      == PCRE_NEWLINE_CRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY       == PCRE_NEWLINE_ANY);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF   == PCRE_NEWLINE_ANYCRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF       == PCRE_BSR_ANYCRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY           == PCRE_BSR_UNICODE);
+G_STATIC_ASSERT (G_REGEX_MATCH_NO_START_OPTIMIZE == PCRE_NO_START_OPTIMIZE);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT      == PCRE_PARTIAL_SOFT);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD      == PCRE_PARTIAL_HARD);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART  == PCRE_NOTEMPTY_ATSTART);
 
 /* These PCRE flags are unused or not exposed publically in GRegexFlags, so
  * it should be ok to reuse them for different things.
@@ -645,6 +649,9 @@ g_match_info_get_string (const GMatchInfo *match_info)
  * the argument of the last verb encountered in the whole matching
  * process. Otherwise, $NULL is returned.
  *
+ * See <ulink>man:pcrepattern<ulink> for more information on
+ * backtracking control verbs.
+ *
  * Returns: (transfer none): the mark, or %NULL
  *
  * Since: 2.34
diff --git a/glib/gregex.h b/glib/gregex.h
index 9e9501e..6417748 100644
--- a/glib/gregex.h
+++ b/glib/gregex.h
@@ -277,7 +277,8 @@ GQuark g_regex_error_quark (void);
  * G_REGEX_BSR_ANYCRLF: Usually any newline character or character sequence
  *     is recognised. If this option is set, then "\R" only recognizes the newline
  *    characters '\r', '\n' and '\r\n'. Since: 2.34
- * 
+ * @G_REGEX_NO_START_OPTIMIZE: Disable some optimizations that will cause incorrect
+ *     results for g_match_info_get_mark() when using backtracking control verbs. Since: 2.34
  *
  * Flags specifying compile-time options.
  *
@@ -303,7 +304,8 @@ typedef enum
   G_REGEX_NEWLINE_CRLF      = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
   G_REGEX_NEWLINE_ANYCRLF   = G_REGEX_NEWLINE_CR | 1 << 22,
   G_REGEX_BSR_ANYCRLF       = 1 << 23,
-  G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
+  G_REGEX_JAVASCRIPT_COMPAT = 1 << 25,
+  G_REGEX_NO_START_OPTIMIZE = 1 << 26
 } GRegexCompileFlags;
 
 /**
@@ -369,6 +371,8 @@ typedef enum
  * @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like #G_REGEX_MATCH_NOTEMPTY, but only applied to
  *     the start of the matched string. For anchored
  *     patterns this can only happen for pattern containing "\K". Since: 2.34
+ * @G_REGEX_MATCH_NO_START_OPTIMIZE: Disable some optimizations that will cause incorrect
+ *     results for g_match_info_get_mark() when using backtracking control verbs. Since: 2.34
  *
  * Flags specifying match-time options.
  *
@@ -378,21 +382,22 @@ typedef enum
  * adding a new flag. */
 typedef enum
 {
-  G_REGEX_MATCH_ANCHORED         = 1 << 4,
-  G_REGEX_MATCH_NOTBOL           = 1 << 7,
-  G_REGEX_MATCH_NOTEOL           = 1 << 8,
-  G_REGEX_MATCH_NOTEMPTY         = 1 << 10,
-  G_REGEX_MATCH_PARTIAL          = 1 << 15,
-  G_REGEX_MATCH_NEWLINE_CR       = 1 << 20,
-  G_REGEX_MATCH_NEWLINE_LF       = 1 << 21,
-  G_REGEX_MATCH_NEWLINE_CRLF     = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
-  G_REGEX_MATCH_NEWLINE_ANY      = 1 << 22,
-  G_REGEX_MATCH_NEWLINE_ANYCRLF  = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
-  G_REGEX_MATCH_BSR_ANYCRLF      = 1 << 23,
-  G_REGEX_MATCH_BSR_ANY          = 1 << 24,
-  G_REGEX_MATCH_PARTIAL_SOFT     = G_REGEX_MATCH_PARTIAL,
-  G_REGEX_MATCH_PARTIAL_HARD     = 1 << 27,
-  G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
+  G_REGEX_MATCH_ANCHORED          = 1 << 4,
+  G_REGEX_MATCH_NOTBOL            = 1 << 7,
+  G_REGEX_MATCH_NOTEOL            = 1 << 8,
+  G_REGEX_MATCH_NOTEMPTY          = 1 << 10,
+  G_REGEX_MATCH_PARTIAL           = 1 << 15,
+  G_REGEX_MATCH_NEWLINE_CR        = 1 << 20,
+  G_REGEX_MATCH_NEWLINE_LF        = 1 << 21,
+  G_REGEX_MATCH_NEWLINE_CRLF      = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
+  G_REGEX_MATCH_NEWLINE_ANY       = 1 << 22,
+  G_REGEX_MATCH_NEWLINE_ANYCRLF   = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
+  G_REGEX_MATCH_BSR_ANYCRLF       = 1 << 23,
+  G_REGEX_MATCH_BSR_ANY           = 1 << 24,
+  G_REGEX_MATCH_NO_START_OPTIMIZE = 1 << 26,
+  G_REGEX_MATCH_PARTIAL_SOFT      = G_REGEX_MATCH_PARTIAL,
+  G_REGEX_MATCH_PARTIAL_HARD      = 1 << 27,
+  G_REGEX_MATCH_NOTEMPTY_ATSTART  = 1 << 28
 } GRegexMatchFlags;
 
 /**



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]