[glib/wip/pcre-update: 20/20] regex: Add NO_START_OPTIMIZE compile and match flags



commit 27f1d2bf84d229227f04f8fd9f3abbcb184b340d
Author: Christian Persch <chpe gnome org>
Date:   Fri Jun 8 00:56:44 2012 +0200

    regex: Add NO_START_OPTIMIZE compile and match flags
    
    PCRE_NO_START_OPTIMIZE exists since PCRE 7.9, but was not usefully before
    since it only affects callout (which GRegex doesn't support) and backtracking
    control verbs which the last commit makes use of.

 glib/gregex.c |   69 +++++++++++++++++++++++++++++++-------------------------
 glib/gregex.h |   39 ++++++++++++++++++--------------
 2 files changed, 60 insertions(+), 48 deletions(-)
---
diff --git a/glib/gregex.c b/glib/gregex.c
index 9413f2a..eb2b6be 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -115,23 +115,25 @@
                               G_REGEX_NEWLINE_CRLF      | \
                               G_REGEX_NEWLINE_ANYCRLF   | \
                               G_REGEX_BSR_ANYCRLF       | \
-                              G_REGEX_JAVASCRIPT_COMPAT)
+                              G_REGEX_JAVASCRIPT_COMPAT | \
+                              G_REGEX_NO_START_OPTIMIZE)
 
 /* Mask of all the possible values for GRegexMatchFlags. */
-#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED         | \
-                            G_REGEX_MATCH_NOTBOL           | \
-                            G_REGEX_MATCH_NOTEOL           | \
-                            G_REGEX_MATCH_NOTEMPTY         | \
-                            G_REGEX_MATCH_PARTIAL          | \
-                            G_REGEX_MATCH_NEWLINE_CR       | \
-                            G_REGEX_MATCH_NEWLINE_LF       | \
-                            G_REGEX_MATCH_NEWLINE_CRLF     | \
-                            G_REGEX_MATCH_NEWLINE_ANY      | \
-                            G_REGEX_MATCH_NEWLINE_ANYCRLF  | \
-                            G_REGEX_MATCH_BSR_ANYCRLF      | \
-                            G_REGEX_MATCH_BSR_ANY          | \
-                            G_REGEX_MATCH_PARTIAL_SOFT     | \
-                            G_REGEX_MATCH_PARTIAL_HARD     | \
+#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED          | \
+                            G_REGEX_MATCH_NOTBOL            | \
+                            G_REGEX_MATCH_NOTEOL            | \
+                            G_REGEX_MATCH_NOTEMPTY          | \
+                            G_REGEX_MATCH_PARTIAL           | \
+                            G_REGEX_MATCH_NEWLINE_CR        | \
+                            G_REGEX_MATCH_NEWLINE_LF        | \
+                            G_REGEX_MATCH_NEWLINE_CRLF      | \
+                            G_REGEX_MATCH_NEWLINE_ANY       | \
+                            G_REGEX_MATCH_NEWLINE_ANYCRLF   | \
+                            G_REGEX_MATCH_BSR_ANYCRLF       | \
+                            G_REGEX_MATCH_BSR_ANY           | \
+                            G_REGEX_MATCH_NO_START_OPTIMIZE | \
+                            G_REGEX_MATCH_PARTIAL_SOFT      | \
+                            G_REGEX_MATCH_PARTIAL_HARD      | \
                             G_REGEX_MATCH_NOTEMPTY_ATSTART)
 
 /* we rely on these flags having the same values */
@@ -150,22 +152,24 @@ G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF      == PCRE_NEWLINE_CRLF);
 G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF   == PCRE_NEWLINE_ANYCRLF);
 G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF       == PCRE_BSR_ANYCRLF);
 G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
-
-G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED         == PCRE_ANCHORED);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL           == PCRE_NOTBOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL           == PCRE_NOTEOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY         == PCRE_NOTEMPTY);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL          == PCRE_PARTIAL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR       == PCRE_NEWLINE_CR);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF       == PCRE_NEWLINE_LF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF     == PCRE_NEWLINE_CRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY      == PCRE_NEWLINE_ANY);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF  == PCRE_NEWLINE_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF      == PCRE_BSR_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY          == PCRE_BSR_UNICODE);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT     == PCRE_PARTIAL_SOFT);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD     == PCRE_PARTIAL_HARD);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
+G_STATIC_ASSERT (G_REGEX_NO_START_OPTIMIZE == PCRE_NO_START_OPTIMIZE);
+
+G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED          == PCRE_ANCHORED);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL            == PCRE_NOTBOL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL            == PCRE_NOTEOL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY          == PCRE_NOTEMPTY);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL           == PCRE_PARTIAL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR        == PCRE_NEWLINE_CR);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF        == PCRE_NEWLINE_LF);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF      == PCRE_NEWLINE_CRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY       == PCRE_NEWLINE_ANY);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF   == PCRE_NEWLINE_ANYCRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF       == PCRE_BSR_ANYCRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY           == PCRE_BSR_UNICODE);
+G_STATIC_ASSERT (G_REGEX_MATCH_NO_START_OPTIMIZE == PCRE_NO_START_OPTIMIZE);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT      == PCRE_PARTIAL_SOFT);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD      == PCRE_PARTIAL_HARD);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART  == PCRE_NOTEMPTY_ATSTART);
 
 /* if the string is in UTF-8 use g_utf8_ functions, else use
  * use just +/- 1. */
@@ -634,6 +638,9 @@ g_match_info_get_string (const GMatchInfo *match_info)
  * the argument of the last verb encountered in the whole matching
  * process. Otherwise, $NULL is returned.
  *
+ * See <ulink>man:pcrepattern<ulink> for more information on
+ * backtracking control verbs.
+ *
  * Returns: (transfer none): the mark, or %NULL
  *
  * Since: 2.34
diff --git a/glib/gregex.h b/glib/gregex.h
index 9e9501e..6417748 100644
--- a/glib/gregex.h
+++ b/glib/gregex.h
@@ -277,7 +277,8 @@ GQuark g_regex_error_quark (void);
  * G_REGEX_BSR_ANYCRLF: Usually any newline character or character sequence
  *     is recognised. If this option is set, then "\R" only recognizes the newline
  *    characters '\r', '\n' and '\r\n'. Since: 2.34
- * 
+ * @G_REGEX_NO_START_OPTIMIZE: Disable some optimizations that will cause incorrect
+ *     results for g_match_info_get_mark() when using backtracking control verbs. Since: 2.34
  *
  * Flags specifying compile-time options.
  *
@@ -303,7 +304,8 @@ typedef enum
   G_REGEX_NEWLINE_CRLF      = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
   G_REGEX_NEWLINE_ANYCRLF   = G_REGEX_NEWLINE_CR | 1 << 22,
   G_REGEX_BSR_ANYCRLF       = 1 << 23,
-  G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
+  G_REGEX_JAVASCRIPT_COMPAT = 1 << 25,
+  G_REGEX_NO_START_OPTIMIZE = 1 << 26
 } GRegexCompileFlags;
 
 /**
@@ -369,6 +371,8 @@ typedef enum
  * @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like #G_REGEX_MATCH_NOTEMPTY, but only applied to
  *     the start of the matched string. For anchored
  *     patterns this can only happen for pattern containing "\K". Since: 2.34
+ * @G_REGEX_MATCH_NO_START_OPTIMIZE: Disable some optimizations that will cause incorrect
+ *     results for g_match_info_get_mark() when using backtracking control verbs. Since: 2.34
  *
  * Flags specifying match-time options.
  *
@@ -378,21 +382,22 @@ typedef enum
  * adding a new flag. */
 typedef enum
 {
-  G_REGEX_MATCH_ANCHORED         = 1 << 4,
-  G_REGEX_MATCH_NOTBOL           = 1 << 7,
-  G_REGEX_MATCH_NOTEOL           = 1 << 8,
-  G_REGEX_MATCH_NOTEMPTY         = 1 << 10,
-  G_REGEX_MATCH_PARTIAL          = 1 << 15,
-  G_REGEX_MATCH_NEWLINE_CR       = 1 << 20,
-  G_REGEX_MATCH_NEWLINE_LF       = 1 << 21,
-  G_REGEX_MATCH_NEWLINE_CRLF     = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
-  G_REGEX_MATCH_NEWLINE_ANY      = 1 << 22,
-  G_REGEX_MATCH_NEWLINE_ANYCRLF  = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
-  G_REGEX_MATCH_BSR_ANYCRLF      = 1 << 23,
-  G_REGEX_MATCH_BSR_ANY          = 1 << 24,
-  G_REGEX_MATCH_PARTIAL_SOFT     = G_REGEX_MATCH_PARTIAL,
-  G_REGEX_MATCH_PARTIAL_HARD     = 1 << 27,
-  G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
+  G_REGEX_MATCH_ANCHORED          = 1 << 4,
+  G_REGEX_MATCH_NOTBOL            = 1 << 7,
+  G_REGEX_MATCH_NOTEOL            = 1 << 8,
+  G_REGEX_MATCH_NOTEMPTY          = 1 << 10,
+  G_REGEX_MATCH_PARTIAL           = 1 << 15,
+  G_REGEX_MATCH_NEWLINE_CR        = 1 << 20,
+  G_REGEX_MATCH_NEWLINE_LF        = 1 << 21,
+  G_REGEX_MATCH_NEWLINE_CRLF      = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
+  G_REGEX_MATCH_NEWLINE_ANY       = 1 << 22,
+  G_REGEX_MATCH_NEWLINE_ANYCRLF   = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
+  G_REGEX_MATCH_BSR_ANYCRLF       = 1 << 23,
+  G_REGEX_MATCH_BSR_ANY           = 1 << 24,
+  G_REGEX_MATCH_NO_START_OPTIMIZE = 1 << 26,
+  G_REGEX_MATCH_PARTIAL_SOFT      = G_REGEX_MATCH_PARTIAL,
+  G_REGEX_MATCH_PARTIAL_HARD      = 1 << 27,
+  G_REGEX_MATCH_NOTEMPTY_ATSTART  = 1 << 28
 } GRegexMatchFlags;
 
 /**



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]