[gnome-shell] findUrl: document the URL-matching regex



commit 92a85071bc910f9082b76b0345975b2773b4267e
Author: Dan Winship <danw gnome org>
Date:   Wed Apr 13 09:18:00 2011 -0400

    findUrl: document the URL-matching regex
    
    Explode the regex onto multiple lines, and add comments explaining the
    pieces. Also, change ()s to (?:)s (non-capturing groups) where
    appropriate, and replace the UTF-8 characters with \u escapes so that
    they actually work.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=636252

 js/misc/util.js |   26 ++++++++++++++++++++++++--
 1 files changed, 24 insertions(+), 2 deletions(-)
---
diff --git a/js/misc/util.js b/js/misc/util.js
index fc0b54a..7c14858 100644
--- a/js/misc/util.js
+++ b/js/misc/util.js
@@ -7,8 +7,30 @@ const Shell = imports.gi.Shell;
 
 const Main = imports.ui.main;
 
-/* http://daringfireball.net/2010/07/improved_regex_for_matching_urls */
-const _urlRegexp = new RegExp('\\b(([a-z][\\w-]+:(/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)([^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'\\".,<>?ÂÂââââ]))', 'gi');
+// http://daringfireball.net/2010/07/improved_regex_for_matching_urls
+const _balancedParens = '\\((?:[^\\s()<>]+|(?:\\(?:[^\\s()<>]+\\)))*\\)';
+const _notTrailingJunk = '[^\\s`!()\\[\\]{};:\'\\".,<>?\u00AB\u00BB\u201C\u201D\u2018\u2019]';
+
+const _urlRegexp = new RegExp(
+    '\\b(' +
+        '(?:' +
+            '[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])' + // scheme:data
+            '|' +
+            'www\\d{0,3}[.]' +                    // www.
+            '|' +
+            '[a-z0-9.\\-]+[.][a-z]{2,4}/' +       // foo.xx/
+        ')' +
+        '(?:' +                                   // one or more:
+            '[^\\s()<>]+' +                       // run of non-space non-()
+            '|' +                                 // or
+            _balancedParens +                     // balanced parens
+        ')+' +
+        '(?:' +                                   // end with:
+            _balancedParens +                     // balanced parens
+            '|' +                                 // or
+            _notTrailingJunk +                    // last non-junk char
+        ')' +
+    ')', 'gi');
 
 // findUrls:
 // @str: string to find URLs in



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]