[kupfer] plugin.text: Improve fuzzy URL parsing
- From: Ulrik Sverdrup <usverdrup src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [kupfer] plugin.text: Improve fuzzy URL parsing
- Date: Mon, 7 Sep 2009 21:49:17 +0000 (UTC)
commit d49ced9c5feffda38954290479f6cd4957aeef7e
Author: Ulrik Sverdrup <ulrik sverdrup gmail com>
Date: Wed Sep 2 15:25:12 2009 +0200
plugin.text: Improve fuzzy URL parsing
Check that so that we don't parse numbers as URLs (3.14) etc; check
for at least some letters, require all parts to start with
alphanumerics (so that =sin(2.2) is not parsed as URL)
kupfer/plugin/text.py | 8 +++++---
1 files changed, 5 insertions(+), 3 deletions(-)
---
diff --git a/kupfer/plugin/text.py b/kupfer/plugin/text.py
index 2f2ab34..309e65b 100644
--- a/kupfer/plugin/text.py
+++ b/kupfer/plugin/text.py
@@ -51,15 +51,17 @@ class URLTextSource (TextSource):
def get_rank(self):
return 75
def get_items(self, text):
- """ A bit of hackery to recognize URLs and web addresses
- alike"""
text = text.strip()
components = list(urlparse(text))
domain = "".join(components[1:])
dotparts = domain.rsplit(".")
+ # 1. Domain name part is one word (without spaces)
+ # 2. Urlparse parses a scheme (http://), else we apply heuristics
if len(domain.split()) == 1 and (components[0] or ("." in domain and
- len(dotparts) >= 2 and len(dotparts[-1]) >= 2)):
+ len(dotparts) >= 2 and len(dotparts[-1]) >= 2 and
+ any(char.isalpha() for char in domain) and
+ all(part[:1].isalnum() for part in dotparts))):
if not components[0]:
url = "http://" + "".join(components[1:])
else:
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]