[glib/wip/pcre-update: 5/8] regex: Add G_REGEX_RAW_LOCK

From: Christian Persch <chpe src gnome org>
To: commits-list gnome org
Cc:
Subject: [glib/wip/pcre-update: 5/8] regex: Add G_REGEX_RAW_LOCK
Date: Sun, 23 Nov 2014 18:48:11 +0000 (UTC)
commit 1e9deb5be2ac4133b3b6c0b18795e110fe93b47e
Author: Christian Persch <chpe gnome org>
Date:   Sat Nov 22 21:42:35 2014 +0100

    regex: Add G_REGEX_RAW_LOCK
    
    When using G_REGEX_RAW, the pattern could still unexpectedly to the application
    switch to using UTF mode by using "(*UTF)". PCRE 8.33 adds a new flag to prevent
    this.

 glib/gregex.c      |    8 +++++++-
 glib/gregex.h      |   11 +++++++++--
 glib/tests/regex.c |    1 +
 3 files changed, 17 insertions(+), 3 deletions(-)
---
diff --git a/glib/gregex.c b/glib/gregex.c
index 0658041..ad75bd8 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -125,7 +125,8 @@
                               G_REGEX_NEWLINE_CRLF      | \
                               G_REGEX_NEWLINE_ANYCRLF   | \
                               G_REGEX_BSR_ANYCRLF       | \
-                              G_REGEX_JAVASCRIPT_COMPAT)
+                              G_REGEX_JAVASCRIPT_COMPAT | \
+                              G_REGEX_RAW_LOCK)
 
 /* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
 #define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
@@ -166,6 +167,7 @@ G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF      == PCRE_NEWLINE_CRLF);
 G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF   == PCRE_NEWLINE_ANYCRLF);
 G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF       == PCRE_BSR_ANYCRLF);
 G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
+G_STATIC_ASSERT (G_REGEX_RAW_LOCK          == PCRE_NEVER_UTF);
 
 G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED         == PCRE_ANCHORED);
 G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL           == PCRE_NOTBOL);
@@ -518,6 +520,9 @@ translate_compile_error (gint *errcode, const gchar **errmsg)
     case G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE:
       *errmsg = _("character value in \\u.... sequence is too large");
       break;
+    case G_REGEX_ERROR_RAW_LOCK:
+      *errmsg = _("switching to UTF-8 mode is disallowed");
+      break;
 
     case 116: /* erroffset passed as NULL */
       /* This should not happen as we never pass a NULL erroffset */
@@ -1302,6 +1307,7 @@ g_regex_new (const gchar         *pattern,
   g_return_val_if_fail (pattern != NULL, NULL);
   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
   g_return_val_if_fail ((compile_options & ~G_REGEX_COMPILE_MASK) == 0, NULL);
+  g_return_val_if_fail ((compile_options & G_REGEX_RAW) || (compile_options & G_REGEX_RAW_LOCK) == 0, NULL);
   g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL);
 
   if (g_once_init_enter (&initialised))
diff --git a/glib/gregex.h b/glib/gregex.h
index eff6d60..b22fcf5 100644
--- a/glib/gregex.h
+++ b/glib/gregex.h
@@ -133,6 +133,9 @@ G_BEGIN_DECLS
  *     "(*SKIP)", or "(*THEN)". Since: 2.34
  * @G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: the character value in the \\u sequence is
  *     too large. Since: 2.34
+ * @G_REGEX_ERROR_RAW_LOCK: switching to UTF-8 mode from the pattern is disallowed.
+ *     This happens when using %G_REGEX_RAW_LOCK and the pattern contains "(*UTF)"
+ *     or "(*UTF8)". Since: 2.44
  *
  * Error codes returned by regular expressions functions.
  *
@@ -198,7 +201,8 @@ typedef enum
   G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS = 171,
   G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES = 172,
   G_REGEX_ERROR_NAME_TOO_LONG = 175,
-  G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE = 176
+  G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE = 176,
+  G_REGEX_ERROR_RAW_LOCK = 178
 } GRegexError;
 
 /**
@@ -286,6 +290,8 @@ GQuark g_regex_error_quark (void);
  *    characters '\r', '\n' and '\r\n'. Since: 2.34
  * @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
  *     JavaScript rather than PCRE. Since: 2.34
+ * @G_REGEX_RAW_LOCK: Disallow switching to UTF-8 mode via "(*UTF)" in the pattern
+ *     being compiled. %G_REGEX_RAW must also be set when using this flag. Since: 2.44
  *
  * Flags specifying compile-time options.
  *
@@ -313,7 +319,8 @@ typedef enum
   G_REGEX_NEWLINE_CRLF      = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
   G_REGEX_NEWLINE_ANYCRLF   = G_REGEX_NEWLINE_CR | 1 << 22,
   G_REGEX_BSR_ANYCRLF       = 1 << 23,
-  G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
+  G_REGEX_JAVASCRIPT_COMPAT = 1 << 25,
+  G_REGEX_RAW_LOCK          = 1 << 16
 } GRegexCompileFlags;
 
 /**
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
index 92679b9..b3f2ecd 100644
--- a/glib/tests/regex.c
+++ b/glib/tests/regex.c
@@ -2279,6 +2279,7 @@ main (int argc, char *argv[])
   TEST_NEW_FAIL ("a[\\NB]c", 0, G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS);
   TEST_NEW_FAIL 
("(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEFG)XX",
 0, G_REGEX_ERROR_NAME_TOO_LONG);
   TEST_NEW_FAIL ("\\u0100", G_REGEX_RAW | G_REGEX_JAVASCRIPT_COMPAT, 
G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE);
+  TEST_NEW_FAIL ("(*UTF)", G_REGEX_RAW | G_REGEX_RAW_LOCK, G_REGEX_ERROR_RAW_LOCK);
 
   /* These errors can't really be tested sanely:
    * G_REGEX_ERROR_EXPRESSION_TOO_LARGE
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]