[gnome-shell/wip/fmuellner/fix-regex-lockup: 46/46] utils: Simplify URL regex to only support one layer of parentheses



commit 5cc42b18b0e6ced675af446d0d38fd3a354a25f9
Author: Florian Müllner <fmuellner gnome org>
Date:   Tue Feb 27 13:20:02 2018 +0100

    utils: Simplify URL regex to only support one layer of parentheses
    
    The author of the original URL-matching regex warns[0] that the pattern may
    cause certain regex engines to lock up with certain input, namely patterns
    that contain parentheses. It turns out SpiderMonkey is affected, but rather
    than switching to the author's improved version (that is still crazy), sim-
    plify the pattern a bit by removing support for nested parentheses in URLs.
    Even a single pair of parentheses is extremely rare, so this is unlikely to
    make a noticeable difference (other than not locking up SpiderMonkey of
    course) ...
    
    [0] http://daringfireball.net/2010/07/improved_regex_for_matching_urls

 js/misc/util.js   | 2 +-
 tests/unit/url.js | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)
---
diff --git a/js/misc/util.js b/js/misc/util.js
index 1d73d2076..38b67e891 100644
--- a/js/misc/util.js
+++ b/js/misc/util.js
@@ -17,7 +17,7 @@ const Params = imports.misc.params;
 var SCROLL_TIME = 0.1;
 
 // http://daringfireball.net/2010/07/improved_regex_for_matching_urls
-const _balancedParens = '\\((?:[^\\s()<>]+|(?:\\(?:[^\\s()<>]+\\)))*\\)';
+const _balancedParens = '\\([^\\s()<>]+\\)';
 const _leadingJunk = '[\\s`(\\[{\'\\"<\u00AB\u201C\u2018]';
 const _notTrailingJunk = '[^\\s`!()\\[\\]{};:\'\\".,<>?\u00AB\u00BB\u201C\u201D\u2018\u2019]';
 
diff --git a/tests/unit/url.js b/tests/unit/url.js
index 251725ca6..158316bb4 100644
--- a/tests/unit/url.js
+++ b/tests/unit/url.js
@@ -38,6 +38,10 @@ const tests = [
       output: [ { url: 'http://www.gnome.org:99/port', pos: 10 } ] },
     { input: 'This is an ftp://www.gnome.org/ test.',
       output: [ { url: 'ftp://www.gnome.org/', pos: 11 } ] },
+    { input: 'https://www.gnome.org/(some_url,_with_very_unusual_characters)',
+      output: [ { url: 'https://www.gnome.org/(some_url,_with_very_unusual_characters)', pos: 0 } ] },
+    { input: 'https://www.gnome.org/(some_url_with_unbalanced_parenthesis',
+      output: [ { url: 'https://www.gnome.org/', pos: 0 } ] },
 
     { input: 'Visit http://www.gnome.org/ and http://developer.gnome.org',
       output: [ { url: 'http://www.gnome.org/', pos: 6 },
@@ -68,4 +72,4 @@ for (let i = 0; i < tests.length; i++) {
        JsUnit.assertEquals('Test ' + i + ', match ' + j + ' position',
                            match[j].pos, tests[i].output[j].pos);
     }
-}
\ No newline at end of file
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]