[glib/wip/pcre-update: 10/17] regex: Don't leak internal PCRE options



commit 5ebea3c4674847946d299d7d8b81a8c1ad986e54
Author: Christian Persch <chpe gnome org>
Date:   Thu Jun 7 18:11:49 2012 +0200

    regex: Don't leak internal PCRE options
    
    g_regex_get_compile_get_compile_flags() and g_regex_get_match_flags()
    were leaking PCRE flags that don't exist in the corresponding
    public GRegexCompileFlags and GRegexMatchFlags; this change masks
    these internal flags.

 glib/gregex.c      |   36 +++++++++++++++++++++++++-------
 glib/tests/regex.c |   58 +++++++++++++++++++++++++++++++--------------------
 2 files changed, 63 insertions(+), 31 deletions(-)
---
diff --git a/glib/gregex.c b/glib/gregex.c
index a0aee80..d6ccc88 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -116,6 +116,11 @@
                               G_REGEX_NEWLINE_ANYCRLF   | \
                               G_REGEX_BSR_ANYCRLF)
 
+/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
+#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
+#define G_REGEX_COMPILE_NONPCRE_MASK (G_REGEX_RAW              | \
+                                      G_REGEX_OPTIMIZE)
+
 /* Mask of all the possible values for GRegexMatchFlags. */
 #define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED         | \
                             G_REGEX_MATCH_NOTBOL           | \
@@ -159,14 +164,20 @@ G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
 G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF     == PCRE_BSR_ANYCRLF);
 G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY         == PCRE_BSR_UNICODE);
 
+/* These PCRE flags are unused or not exposed publically in GRegexFlags, so
+ * it should be ok to reuse them for different things.
+ */
+G_STATIC_ASSERT (G_REGEX_OPTIMIZE          == PCRE_NO_UTF8_CHECK);
+G_STATIC_ASSERT (G_REGEX_RAW               == PCRE_UTF8);
+
 /* if the string is in UTF-8 use g_utf8_ functions, else use
  * use just +/- 1. */
-#define NEXT_CHAR(re, s) (((re)->compile_opts & PCRE_UTF8) ? \
-                                g_utf8_next_char (s) : \
-                                ((s) + 1))
-#define PREV_CHAR(re, s) (((re)->compile_opts & PCRE_UTF8) ? \
-                                g_utf8_prev_char (s) : \
-                                ((s) - 1))
+#define NEXT_CHAR(re, s) (((re)->compile_opts & G_REGEX_RAW) ? \
+                                ((s) + 1) : \
+                                g_utf8_next_char (s))
+#define PREV_CHAR(re, s) (((re)->compile_opts & G_REGEX_RAW) ? \
+                                ((s) - 1) : \
+                                g_utf8_prev_char (s))
 
 struct _GMatchInfo
 {
@@ -1269,6 +1280,7 @@ g_regex_new (const gchar         *pattern,
   gboolean optimize = FALSE;
   static volatile gsize initialised = 0;
   unsigned long int pcre_compile_options;
+  GRegexCompileFlags nonpcre_compile_options;
 
   g_return_val_if_fail (pattern != NULL, NULL);
   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
@@ -1297,6 +1309,8 @@ g_regex_new (const gchar         *pattern,
       return NULL;
     }
 
+  nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK;
+
   /* G_REGEX_OPTIMIZE has the same numeric value of PCRE_NO_UTF8_CHECK,
    * as we do not need to wrap PCRE_NO_UTF8_CHECK. */
   if (compile_options & G_REGEX_OPTIMIZE)
@@ -1362,7 +1376,13 @@ g_regex_new (const gchar         *pattern,
    * compile options, e.g. "(?i)foo" will make the pcre structure store
    * PCRE_CASELESS even though it wasn't explicitly given for compilation. */
   pcre_fullinfo (re, NULL, PCRE_INFO_OPTIONS, &pcre_compile_options);
-  compile_options = pcre_compile_options;
+  compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK;
+
+  /* Don't leak PCRE_NEWLINE_ANY, which is part of PCRE_NEWLINE_ANYCRLF */
+  if ((pcre_compile_options & PCRE_NEWLINE_ANYCRLF) != PCRE_NEWLINE_ANYCRLF)
+    compile_options &= ~PCRE_NEWLINE_ANY;
+
+  compile_options |= nonpcre_compile_options;
 
   if (!(compile_options & G_REGEX_DUPNAMES))
     {
@@ -1517,7 +1537,7 @@ g_regex_get_match_flags (const GRegex *regex)
 {
   g_return_val_if_fail (regex != NULL, 0);
 
-  return regex->match_opts;
+  return regex->match_opts & G_REGEX_MATCH_MASK;
 }
 
 /**
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
index b0362cd..a21464f 100644
--- a/glib/tests/regex.c
+++ b/glib/tests/regex.c
@@ -56,6 +56,8 @@ typedef struct {
   GRegexCompileFlags compile_opts;
   GRegexMatchFlags   match_opts;
   gint expected_error;
+  GRegexCompileFlags real_compile_opts;
+  GRegexMatchFlags real_match_opts;
 } TestNewData;
 
 static void
@@ -70,6 +72,9 @@ test_new (gconstpointer d)
   g_assert_no_error (error);
   g_assert_cmpstr (data->pattern, ==, g_regex_get_pattern (regex));
 
+  g_assert_cmphex (g_regex_get_compile_flags (regex) & data->real_compile_opts, ==, data->real_compile_opts);
+  g_assert_cmphex (g_regex_get_match_flags (regex) & data->real_match_opts, ==, data->real_match_opts);
+
   g_regex_unref (regex);
 }
 
@@ -81,11 +86,28 @@ test_new (gconstpointer d)
   data->compile_opts = _compile_opts;                    \
   data->match_opts = _match_opts;                        \
   data->expected_error = 0;                             \
+  data->real_compile_opts = _compile_opts;              \
+  data->real_match_opts = _match_opts;                  \
   path = g_strdup_printf ("/regex/new/%d", ++total);    \
   g_test_add_data_func (path, data, test_new);          \
   g_free (path);                                        \
 }
 
+#define TEST_NEW_CHECK_FLAGS(_pattern, _compile_opts, _match_opts, _real_compile_opts, _real_match_opts) { \
+  TestNewData *data; \
+  gchar *path; \
+  data = g_new0 (TestNewData, 1); \
+  data->pattern = _pattern; \
+  data->compile_opts = _compile_opts; \
+  data->match_opts = 0; \
+  data->expected_error = 0; \
+  data->real_compile_opts = _real_compile_opts; \
+  data->real_match_opts = _real_match_opts; \
+  path = g_strdup_printf ("/regex/new-check-flags/%d", ++total); \
+  g_test_add_data_func (path, data, test_new); \
+  g_free (path); \
+}
+
 static void
 test_new_fail (gconstpointer d)
 {
@@ -1362,28 +1384,6 @@ test_match_all (gconstpointer d)
   }                                                                     \
 }
 
-#define PCRE_UTF8               0x00000800
-#define PCRE_NO_UTF8_CHECK      0x00002000
-#define PCRE_NEWLINE_ANY        0x00400000
-#define PCRE_UCP                0x20000000
-#define PCRE_BSR_UNICODE        0x01000000
-
-static void
-test_basic (void)
-{
-  GRegexCompileFlags cflags = G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE;
-  GRegexMatchFlags mflags = G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL;
-  GRegex *regex;
-
-  regex = g_regex_new ("[A-Z]+", cflags, mflags, NULL);
-
-  g_assert (regex != NULL);
-  g_assert_cmpint (g_regex_get_compile_flags (regex), ==, cflags|PCRE_UTF8|PCRE_NO_UTF8_CHECK|PCRE_NEWLINE_ANY|PCRE_UCP|PCRE_BSR_UNICODE);
-  g_assert_cmpint (g_regex_get_match_flags (regex), ==, mflags|PCRE_NO_UTF8_CHECK);
-
-  g_regex_unref (regex);
-}
-
 static void
 test_properties (void)
 {
@@ -2043,7 +2043,6 @@ main (int argc, char *argv[])
 
   g_test_bug_base ("http://bugzilla.gnome.org/";);
 
-  g_test_add_func ("/regex/basic", test_basic);
   g_test_add_func ("/regex/properties", test_properties);
   g_test_add_func ("/regex/class", test_class);
   g_test_add_func ("/regex/lookahead", test_lookahead);
@@ -2055,6 +2054,7 @@ main (int argc, char *argv[])
   g_test_add_func ("/regex/explicit-crlf", test_explicit_crlf);
 
   /* TEST_NEW(pattern, compile_opts, match_opts) */
+  TEST_NEW("[A-Z]+", G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL);
   TEST_NEW("", 0, 0);
   TEST_NEW(".*", 0, 0);
   TEST_NEW(".*", G_REGEX_OPTIMIZE, 0);
@@ -2069,6 +2069,18 @@ main (int argc, char *argv[])
   /* This gives "internal error: code overflow" with pcre 6.0 */
   TEST_NEW("(?i)(?-i)", 0, 0);
 
+  /* Check that flags are correct if the pattern modifies them */
+  /* TEST_NEW_CHECK_FLAGS(pattern, compile_opts, match_ops, real_compile_opts, real_match_opts) */
+  TEST_NEW_CHECK_FLAGS ("foo", G_REGEX_OPTIMIZE, 0, G_REGEX_OPTIMIZE, 0);
+  TEST_NEW_CHECK_FLAGS ("foo", G_REGEX_RAW, 0, G_REGEX_RAW, 0);
+  TEST_NEW_CHECK_FLAGS ("(?i)foo", 0, 0, G_REGEX_CASELESS, 0);
+  TEST_NEW_CHECK_FLAGS ("(?i)foo", 0, 0, G_REGEX_CASELESS, 0);
+  TEST_NEW_CHECK_FLAGS ("(?x)foo bar", 0, 0, G_REGEX_EXTENDED, 0);
+  TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0);
+  TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0);
+  TEST_NEW_CHECK_FLAGS ("(?U)[a-z]+", 0, 0, G_REGEX_UNGREEDY, 0);
+  TEST_NEW_CHECK_FLAGS ("(?J)foo", 0, 0, G_REGEX_DUPNAMES, 0);
+
   /* TEST_NEW_FAIL(pattern, compile_opts, expected_error) */
   TEST_NEW_FAIL("(", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
   TEST_NEW_FAIL(")", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]