[gnome-terminal] regex: Allow apostrophes in URLs, except when enclosed between them
- From: Egmont Koblinger <egmontkob src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnome-terminal] regex: Allow apostrophes in URLs, except when enclosed between them
- Date: Mon, 1 Jan 2018 14:59:22 +0000 (UTC)
commit 51cb07f3244f8999dece9332d928b7529d9a63f9
Author: Egmont Koblinger <egmont gmail com>
Date: Mon Jan 1 15:57:10 2018 +0100
regex: Allow apostrophes in URLs, except when enclosed between them
https://bugzilla.gnome.org/show_bug.cgi?id=448044
src/terminal-regex.c | 7 +++++++
src/terminal-regex.h | 14 +++++++++-----
2 files changed, 16 insertions(+), 5 deletions(-)
---
diff --git a/src/terminal-regex.c b/src/terminal-regex.c
index cf06b76..cf51453 100644
--- a/src/terminal-regex.c
+++ b/src/terminal-regex.c
@@ -288,6 +288,13 @@ main (int argc, char **argv)
assert_match (REGEX_URL_AS_IS, "[markdown](http://foo.bar/(a(b)c)d)e)f", "http://foo.bar/(a(b)c)d");
assert_match (REGEX_URL_AS_IS, "[markdown](http://foo.bar/a)b(c", "http://foo.bar/a");
+ /* Apostrophes are allowed, except at trailing position if the URL is preceded by an apostrophe, see bug
448044. */
+ assert_match (REGEX_URL_AS_IS, "https://en.wikipedia.org/wiki/Moore's_law", ENTIRE);
+ assert_match (REGEX_URL_AS_IS, "<a href=\"https://en.wikipedia.org/wiki/Moore's_law\">",
"https://en.wikipedia.org/wiki/Moore's_law");
+ assert_match (REGEX_URL_AS_IS, "https://en.wikipedia.org/wiki/Cryin'", ENTIRE);
+ assert_match (REGEX_URL_AS_IS, "<a href=\"https://en.wikipedia.org/wiki/Cryin'\">",
"https://en.wikipedia.org/wiki/Cryin'");
+ assert_match (REGEX_URL_AS_IS, "<a href='https://en.wikipedia.org/wiki/Aerosmith'>",
"https://en.wikipedia.org/wiki/Aerosmith");
+
/* No scheme */
assert_match (REGEX_URL_HTTP, "www.foo.bar/baz", ENTIRE);
assert_match (REGEX_URL_HTTP, "WWW3.foo.bar/baz", ENTIRE);
diff --git a/src/terminal-regex.h b/src/terminal-regex.h
index 7136f95..3a3e89a 100644
--- a/src/terminal-regex.h
+++ b/src/terminal-regex.h
@@ -43,6 +43,9 @@
#ifndef TERMINAL_REGEX_H
#define TERMINAL_REGEX_H
+/* Lookbehind to see if there's a preceding apostrophe */
+#define APOS_START_DEF "(?<APOS_START>(?<='))?"
+
#define SCHEME "(?ix: news | telnet | nntp | https? | ftps? | sftp | webcal )"
#define USERCHARS "-+.[:alnum:]"
@@ -122,21 +125,22 @@
#define PORT "(?x: \\:" N_1_65535 " )?"
/* Omit the parentheses, see below */
-#define PATHCHARS_CLASS "[-[:alnum:]\\Q_$.+!*,:;@&=?/~#|%\\E]"
-/* Chars to end a URL */
-#define PATHTERM_CLASS "[-[:alnum:]\\Q_$+*:@&=/~#|%\\E]"
+#define PATHCHARS_CLASS "[-[:alnum:]\\Q_$.+!*,:;@&=?/~#|%'\\E]"
+/* Chars to end a URL. Apostrophe only allowed if there wasn't one in front of the URL, see bug 448044 */
+#define PATHTERM_CLASS "[-[:alnum:]\\Q_$+*:@&=/~#|%'\\E]"
+#define PATHTERM_NOAPOS_CLASS "[-[:alnum:]\\Q_$+*:@&=/~#|%\\E]"
/* Recursive definition of PATH that allows parentheses and square brackets only if balanced, see bug
763980. */
#define PATH_INNER_DEF "(?(DEFINE)(?<PATH_INNER>(?x: (?: " PATHCHARS_CLASS "* (?: \\( (?&PATH_INNER) \\) |
\\[ (?&PATH_INNER) \\] ) )* " PATHCHARS_CLASS "* )))"
/* Same as above, but the last character (if exists and is not a parenthesis) must be from PATHTERM_CLASS. */
-#define PATH_DEF "(?(DEFINE)(?<PATH>(?x: (?: " PATHCHARS_CLASS "* (?: \\( (?&PATH_INNER) \\) | \\[
(?&PATH_INNER) \\] ) )* (?: " PATHCHARS_CLASS "* " PATHTERM_CLASS " )? )))"
+#define PATH_DEF "(?(DEFINE)(?<PATH>(?x: (?: " PATHCHARS_CLASS "* (?: \\( (?&PATH_INNER) \\) | \\[
(?&PATH_INNER) \\] ) )* (?: " PATHCHARS_CLASS "* (?(<APOS_START>)" PATHTERM_NOAPOS_CLASS "|" PATHTERM_CLASS
") )? )))"
#define URLPATH "(?x: /(?&PATH) )?"
#define VOIP_PATH "(?x: [;?](?&PATH) )?"
/* Now let's put these fragments together */
-#define DEFS IP_DEF PATH_INNER_DEF PATH_DEF
+#define DEFS APOS_START_DEF IP_DEF PATH_INNER_DEF PATH_DEF
#define REGEX_URL_AS_IS DEFS SCHEME "://" USERPASS URL_HOST PORT URLPATH
/* TODO: also support file:/etc/passwd */
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]