[pan2: 1/23] change URI detection
- From: Petr Kovář <pmkovar src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [pan2: 1/23] change URI detection
- Date: Sun, 29 May 2011 13:03:38 +0000 (UTC)
commit 4bf51cfd08140377868bbcbb0ef8ae6d61f6cfc7
Author: K. Haley <haleykd users sf net>
Date: Wed Feb 9 00:04:41 2011 -0700
change URI detection
pan/usenet-utils/url-find-test.cc | 16 ++++++----
pan/usenet-utils/url-find.cc | 56 +++++++++++++++++++++++++++++++++++-
2 files changed, 64 insertions(+), 8 deletions(-)
---
diff --git a/pan/usenet-utils/url-find-test.cc b/pan/usenet-utils/url-find-test.cc
index a8db50c..076213a 100644
--- a/pan/usenet-utils/url-find-test.cc
+++ b/pan/usenet-utils/url-find-test.cc
@@ -23,9 +23,9 @@ main (void)
check (url_find (in, out))
check (out == "http://www.gtk.org/")
- in = "Have you ever visited http://www.google.com/?";
+ in = "Have you ever visited https://www.google.com/?";
check (url_find (in, out))
- check (out == "http://www.google.com/")
+ check (out == "https://www.google.com/")
in = "la la lawww.google.com sfadf";
check (url_find (in, out))
@@ -35,9 +35,9 @@ main (void)
check (url_find (in, out))
check (out == "charles rebelbase com")
- in = "Go visit ftp://ftp.gnome.org and get cool software!";
+ in = "Go visit ftps://ftp.gnome.org and get cool software!";
check (url_find (in, out))
- check (out == "ftp://ftp.gnome.org")
+ check (out == "ftps://ftp.gnome.org")
in = "Go visit ftp.gnome.org and get cool software!";
check (url_find (in, out))
@@ -66,9 +66,13 @@ main (void)
check (url_find (in, out))
check (out == "charles rebelbase com")
- in = "blah blah <http://www.dobreprogramy.pl/pirat/dobreprogramy_pl(piratXXX).jpg>, only the";
+ in = "blah blah <http://www.dobreprogramy.pl/pirat/dobreprogramy_pl(piratXXX).jpg#frag>, only the";
check (url_find (in, out))
- check (out == "http://www.dobreprogramy.pl/pirat/dobreprogramy_pl(piratXXX).jpg")
+ check (out == "http://www.dobreprogramy.pl/pirat/dobreprogramy_pl(piratXXX).jpg#frag")
+
+ in = "Here is my email address: lost foo_bar rebelbase com Did you get it?";
+ check (url_find (in, out))
+ check (out == "lost foo_bar rebelbase com")
// success
diff --git a/pan/usenet-utils/url-find.cc b/pan/usenet-utils/url-find.cc
index b33d6c7..2dcabde 100644
--- a/pan/usenet-utils/url-find.cc
+++ b/pan/usenet-utils/url-find.cc
@@ -23,11 +23,63 @@
#include "url-find.h"
using namespace pan;
+namespace {
+ class fooregex {
+ public:
+ fooregex(): regex(NULL)
+ {
+ regex = g_regex_new("(?:https?://|"
+ "ftps?(?:://|\\.)|" //ftp:// ftp.
+ "news:|"
+ "www\\.|"
+ "[[:alnum:]][[:alnum:]_\\.]*@)" //email
+ "[[:alnum:]\\.:/?#\\[\\] !$&'()*+,;=\\-_%~]+" /* uri */,
+ G_REGEX_OPTIMIZE, (GRegexMatchFlags)0, NULL);
+ }
+ ~fooregex()
+ {
+ g_regex_unref(regex);
+ }
+ operator GRegex*() {return regex;}
+
+ GRegex *regex;
+ };
+
+ fooregex regex;
+};
+
+bool
+pan :: url_find (const StringView& text, StringView& setme_url)
+{
+ if (text.empty())
+ return false;
+
+ GMatchInfo *match;
+
+ if (!g_regex_match(regex, text.str, (GRegexMatchFlags)0, &match))
+ return false;
+
+ int start,end;
+
+ g_match_info_fetch_pos(match, 0, &start, &end);
+ g_match_info_free(match);
+ setme_url.assign(text.str+start, end - start);
+
+ // for urls at the end of a sentence.
+ if (!setme_url.empty() && strchr("?!.,", setme_url.back()))
+ --setme_url.len;
+
+ return true;
+}
// This is a cheap little hack that should eventually be replaced
// with something more robust.
+namespace pan {
+bool url_findx (const StringView& text, StringView& setme_url);
+}
+
bool
-pan :: url_find (const StringView& text, StringView& setme_url)
+pan :: url_findx (const StringView& text, StringView& setme_url)
{
if (text.empty())
return false;
@@ -80,7 +132,7 @@ pan :: url_find (const StringView& text, StringView& setme_url)
char ch (start[-1]);
if (ch == '[') bracket = ']';
else if (ch == '<') bracket = '>';
- }
+ }
const char * pch;
for (pch=start; pch!=text.end(); ++pch) {
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]