[pan2: 3/23] another url update.
- From: Petr Kovář <pmkovar src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [pan2: 3/23] another url update.
- Date: Sun, 29 May 2011 13:03:48 +0000 (UTC)
commit 4512976efbca96a6127b8c88991dd39b5f10f1f7
Author: K. Haley <haleykd users sf net>
Date: Sat Feb 12 01:15:52 2011 -0700
another url update.
pan/usenet-utils/url-find-test.cc | 3 +++
pan/usenet-utils/url-find.cc | 23 ++++++++++++++++-------
2 files changed, 19 insertions(+), 7 deletions(-)
---
diff --git a/pan/usenet-utils/url-find-test.cc b/pan/usenet-utils/url-find-test.cc
index 076213a..62ac3bf 100644
--- a/pan/usenet-utils/url-find-test.cc
+++ b/pan/usenet-utils/url-find-test.cc
@@ -74,6 +74,9 @@ main (void)
check (url_find (in, out))
check (out == "lost foo_bar rebelbase com")
+ in = "A URl 'http://www.www.com'?";
+ check (url_find (in, out))
+ check (out == "http://www.www.com")
// success
return 0;
diff --git a/pan/usenet-utils/url-find.cc b/pan/usenet-utils/url-find.cc
index 2dcabde..8034839 100644
--- a/pan/usenet-utils/url-find.cc
+++ b/pan/usenet-utils/url-find.cc
@@ -28,12 +28,19 @@ namespace {
public:
fooregex(): regex(NULL)
{
- regex = g_regex_new("(?:https?://|"
- "ftps?(?:://|\\.)|" //ftp:// ftp.
- "news:|"
- "www\\.|"
- "[[:alnum:]][[:alnum:]_\\.]*@)" //email
- "[[:alnum:]\\.:/?#\\[\\] !$&'()*+,;=\\-_%~]+" /* uri */,
+ // RFC1738
+ // unsafe in URL (always encoded): {}|\^~[]`<>"
+ // reserved for schemas: ;/?:@=&
+ // % (hex encoding) # (fragment)
+ // allowed: a-z A-Z 0-9 $-_.+!*'(),
+ regex = g_regex_new("(?:"
+ "https?://|"
+ "ftps?(?:://|\\.)|" //ftp:// ftp.
+ "news:|nntp:|"
+ "www\\.|"
+ "[[:alnum:]][[:alnum:]_\\.]*@" //email
+ ")"
+ "[" "[:alnum:]$_\\-\\.!+*()',%#" ";:/?&=@" "]+" /* uri */,
G_REGEX_OPTIMIZE, (GRegexMatchFlags)0, NULL);
}
~fooregex()
@@ -68,7 +75,9 @@ pan :: url_find (const StringView& text, StringView& setme_url)
// for urls at the end of a sentence.
if (!setme_url.empty() && strchr("?!.,", setme_url.back()))
--setme_url.len;
-
+ const char c = text.str[ start - 1 ];
+ if (c == '\'' && c == setme_url.back() )
+ --setme_url.len;
return true;
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]