[california] Improved URI parsing
- From: Jim Nelson <jnelson src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [california] Improved URI parsing
- Date: Fri, 14 Nov 2014 01:27:41 +0000 (UTC)
commit 7ee08ee545768b5d74100113670266b271431694
Author: Jim Nelson <jim yorba org>
Date: Thu Nov 13 17:26:01 2014 -0800
Improved URI parsing
Job Gruber updated the URI-parsing regex we were using in February
2014, so moved to it. This new regex also allows us to catch mailto:
URIs. The linkify code now allows transformation of the URI itself
when it's linked, so (for example) we could in the future ellipisize
long URIs and use a tooltip to give the user the full string.
src/host/host-show-event.vala | 6 ++++--
src/util/util-markup.vala | 37 +++++++++++++++++++++----------------
2 files changed, 25 insertions(+), 18 deletions(-)
---
diff --git a/src/host/host-show-event.vala b/src/host/host-show-event.vala
index 721156d..f5cd1d0 100644
--- a/src/host/host-show-event.vala
+++ b/src/host/host-show-event.vala
@@ -211,17 +211,19 @@ public class ShowEvent : Gtk.Grid, Toolkit.Card {
}
private bool linkify_delegate(string uri, bool known_protocol, out string? pre_markup,
- out string? post_markup) {
+ out string? markup, out string? post_markup) {
// preserve but don't linkify if unknown protocol
if (!known_protocol) {
pre_markup = null;
+ markup = null;
post_markup = null;
return true;
}
- // anchor it
+ // anchor it and preserve uri (i.e. markup = null)
pre_markup = "<a href=\"%s\">".printf(uri);
+ markup = null;
post_markup = "</a>";
return true;
diff --git a/src/util/util-markup.vala b/src/util/util-markup.vala
index e633cf1..b5b0c69 100644
--- a/src/util/util-markup.vala
+++ b/src/util/util-markup.vala
@@ -11,27 +11,32 @@ namespace California.Markup {
*
* known_protocol indicates the URI has a well-known protocol (i.e. http:// or ftp://, etc.)
*
+ * markup can hold a new string that is placed in between the pre- and post-markup strings. If
+ * null or an empty string is returned, uri will be used.
+ *
* Returns false if the uri should not be included in the string returned by { link linkify}. To
- * leave a URI bare, return null for both strings and return true.
+ * leave a URI bare, return null for pre_markup, post_markup, and new_uri.
*/
public delegate bool LinkifyDelegate(string uri, bool known_protocol, out string? pre_markup,
- out string? post_markup);
+ out string? markup, out string? post_markup);
-// Regex to detect URLs.
-// Originally from http://daringfireball.net/2010/07/improved_regex_for_matching_urls
-private const string URL_REGEX =
"(?i)\\b((?:[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))";
+// Regex to detect URIs.
+// Originally from https://gist.github.com/gruber/249502
+// See http://daringfireball.net/2010/07/improved_regex_for_matching_urls for note on earlier version
+// of this regex.
+private const string URI_REGEX =
"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))""";
-// Regex to determine if a URL has a known protocol.
-private const string PROTOCOL_REGEX =
"^(aim|apt|bitcoin|cvs|ed2k|ftp|file|finger|git|gtalk|http|https|irc|ircs|irc6|lastfm|ldap|ldaps|magnet|news|nntp|rsync|sftp|skype|smb|sms|svn|telnet|tftp|ssh|webcal|xmpp):";
+// Regex to determine if a URI has a well-known protocol.
+private const string PROTOCOL_REGEX =
"^(aim|apt|bitcoin|cvs|ed2k|ftp|file|finger|git|gtalk|http|https|irc|ircs|irc6|lastfm|ldap|ldaps|magnet|mailto|news|nntp|rsync|sftp|skype|smb|sms|svn|telnet|tftp|ssh|webcal|xmpp):";
-private Regex url_regex;
+private Regex uri_regex;
private Regex protocol_regex;
/**
* Called by Util.init()
*/
internal void init() throws Error {
- url_regex = new Regex(URL_REGEX, RegexCompileFlags.CASELESS | RegexCompileFlags.OPTIMIZE);
+ uri_regex = new Regex(URI_REGEX, RegexCompileFlags.CASELESS | RegexCompileFlags.OPTIMIZE);
protocol_regex = new Regex(PROTOCOL_REGEX, RegexCompileFlags.CASELESS | RegexCompileFlags.OPTIMIZE);
}
@@ -39,7 +44,7 @@ internal void init() throws Error {
* Called by Util.terminate()
*/
internal void terminate() {
- url_regex = null;
+ uri_regex = null;
protocol_regex = null;
}
@@ -53,21 +58,21 @@ public string? linkify(string? unlinked, LinkifyDelegate linkify_cb) {
return unlinked;
try {
- return url_regex.replace_eval(unlinked, -1, 0, 0, (match_info, result) => {
+ return uri_regex.replace_eval(unlinked, -1, 0, 0, (match_info, result) => {
// match zero is the only match we're interested in
- string? url = match_info.fetch(0);
- if (String.is_empty(url))
+ string? uri = match_info.fetch(0);
+ if (String.is_empty(uri))
return false;
// have original caller provide markup (or drop the URL)
- string? pre_markup, post_markup;
- if (!linkify_cb(url, protocol_regex.match(url), out pre_markup, out post_markup))
+ string? pre_markup, markup, post_markup;
+ if (!linkify_cb(uri, protocol_regex.match(uri), out pre_markup, out markup, out post_markup))
return false;
// put it all together
result.append_printf("%s%s%s",
(pre_markup != null) ? pre_markup : "",
- url,
+ String.is_empty(markup) ? uri : markup,
(post_markup != null) ? post_markup : ""
);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]