[smuxi: 53/111] Engine: specify common TLDs for heuristic URL matching



commit be5b05a1f3bab190d0efd2243c2a92eb2e986f1f
Author: Mirco Bauer <meebey meebey net>
Date:   Sun Jan 5 21:09:09 2014 +0100

    Engine: specify common TLDs for heuristic URL matching
    
    autogen.sh, configure.ac, Gst.Buffer.Unref() are unlikely URLs

 src/Engine/Config/MessageBuilderSettings.cs |   13 +++++++++----
 1 files changed, 9 insertions(+), 4 deletions(-)
---
diff --git a/src/Engine/Config/MessageBuilderSettings.cs b/src/Engine/Config/MessageBuilderSettings.cs
index db36845..408fdfc 100644
--- a/src/Engine/Config/MessageBuilderSettings.cs
+++ b/src/Engine/Config/MessageBuilderSettings.cs
@@ -54,11 +54,12 @@ namespace Smuxi.Engine
 
         void InitDefaultLinks()
         {
-            string path_last_chars = @"a-z0-9#/%&=\-_+";
+            string path_last_chars = @"a-zA-Z0-9#/%&=\-_+";
             string path_chars = path_last_chars + @")(?.,";
             string domainchars = @"[a-z0-9\-]+";
             string subdomain = domainchars + @"\.";
-            string tld = @"com|net|org|info|biz|gov|name|edu|museum|[a-z][a-z]";
+            string common_tld = @"de|es|im|us|com|net|org|info|biz|gov|name|edu|onion|museum";
+            string tld = common_tld + @"|[a-z][a-z]";
             string domain = @"(?:(?:" + subdomain + ")+(?:" + tld + ")|localhost)";
             string port = ":[1-9][0-9]{1,4}";
             string user = "[a-z0-9._%+-]+@";
@@ -87,8 +88,12 @@ namespace Smuxi.Engine
                 LinkFormat = "mailto:{1}";
             });
 
-            // addresses without protocol
-            regex = new Regex(address, RegexOptions.IgnoreCase);
+            // addresses without protocol (heuristical)
+            // include well known TLDs to prevent autogen.sh, configure.ac or
+            // Gst.Buffer.Unref() from matching
+            string heuristic_domain = @"(?:(?:" + subdomain + ")+(?:" + common_tld + ")|localhost)";
+            string heuristic_address = heuristic_domain + "(?:" + path + ")?";
+            regex = new Regex(heuristic_address, RegexOptions.IgnoreCase);
             SmartLinks.Add(new SmartLink(regex) {
                 LinkFormat = "http://{0}";
             });


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]