[epiphany/gnome-3-18] uri-tester: Ensure regexps are properly constructed



commit d1d3db843c2c73e5fef732839c9a3c25147a294d
Author: Adrian Perez de Castro <aperez igalia com>
Date:   Fri Feb 3 00:54:51 2017 +0200

    uri-tester: Ensure regexps are properly constructed
    
    This adds a few more cases to the escaping done when converting an AdBlock
    non-regepx "simple pattern" from a rule into a GRegex. This patch does the
    following:
    
    - Adds escaping to some of the regexp metacharacters which were not being
      handled: (){}+.|\
    - Adds support for using a vertical bar at the end of a pattern to anchor the
      match at the end.
    - Adds support for using ^ to match a "separator character" (a non-letter,
      non-number, or one of _-.%).
    
    This also adds as much comment lines as code, which in this particular case
    is probably a good thing, so reading the code in the future does not need
    checking each case against the GRegex documentation.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=777714

 embed/web-extension/uri-tester.c |   32 ++++++++++++++++----------------
 1 files changed, 16 insertions(+), 16 deletions(-)
---
diff --git a/embed/web-extension/uri-tester.c b/embed/web-extension/uri-tester.c
index 271ae80..aa3a639 100644
--- a/embed/web-extension/uri-tester.c
+++ b/embed/web-extension/uri-tester.c
@@ -421,7 +421,6 @@ static GString *
 uri_tester_fixup_regexp (const char *prefix, char *src)
 {
   GString *str;
-  int len = 0;
 
   if (!src)
     return NULL;
@@ -441,21 +440,27 @@ uri_tester_fixup_regexp (const char *prefix, char *src)
         case '*':
           g_string_append (str, ".*");
           break;
-          /*case '.':
-            g_string_append (str, "\\.");
-            break;*/
+        case '^':
+          g_string_append (str, "([^a-zA-Z\\d]|[_\\-\\.%])");
+          break;
+        case '|':
+          if (src[1] == '\0')
+            g_string_append (str, "$");
+          else
+            g_string_append (str, "\\|");
+          break;
+        case '.':
+        case '+':
         case '?':
         case '[':
         case ']':
+        case '{':
+        case '}':
+        case '(':
+        case ')':
+        case '\\':
           g_string_append_printf (str, "\\%c", *src);
           break;
-        case '|':
-          /* FIXME: We actually need to match :[0-9]+ or '/'. Sign means
-             "here could be port number or nothing". So bla.com^ will match
-             bla.com/ or bla.com:8080/ but not bla.com.au/ */
-        case '^':
-        case '+':
-          break;
         default:
           g_string_append_printf (str,"%c", *src);
           break;
@@ -464,11 +469,6 @@ uri_tester_fixup_regexp (const char *prefix, char *src)
     }
   while (*src);
 
-  len = str->len;
-  /* We dont need .* in the end of url. Thats stupid */
-  if (str->str && str->str[len-1] == '*' && str->str[len-2] == '.')
-    g_string_erase (str, len-2, 2);
-
   return str;
 }
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]