[gnome-terminal] regex: Fix path-less URL recognition
- From: Christian Persch <chpe src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnome-terminal] regex: Fix path-less URL recognition
- Date: Thu, 3 Feb 2022 21:20:20 +0000 (UTC)
commit 17ee0e43be8bb41e429d5d1d9d4a21fad57c3222
Author: Egmont Koblinger <egmont gmail com>
Date: Thu Feb 3 22:19:57 2022 +0100
regex: Fix path-less URL recognition
URLs are allowed to contain the query ('?') or fragment ('#') directly
after the hostname without a path or even a path separator ('/'), so fix
the regexes to recognise these URLs.
Fixes: https://gitlab.gnome.org/GNOME/gnome-terminal/-/issues/7888
src/terminal-regex.cc | 16 ++++++++++++++++
src/terminal-regex.hh | 2 +-
2 files changed, 17 insertions(+), 1 deletion(-)
---
diff --git a/src/terminal-regex.cc b/src/terminal-regex.cc
index 9b1d2529..3856c646 100644
--- a/src/terminal-regex.cc
+++ b/src/terminal-regex.cc
@@ -220,6 +220,10 @@ main (int argc, char **argv)
assert_match_anchored (DEFS URLPATH, "/().", "/()");
assert_match_anchored (DEFS URLPATH, "/", ENTIRE);
assert_match_anchored (DEFS URLPATH, "", ENTIRE);
+ assert_match_anchored (DEFS URLPATH, "?", ENTIRE);
+ assert_match_anchored (DEFS URLPATH, "?param=value", ENTIRE);
+ assert_match_anchored (DEFS URLPATH, "#", ENTIRE);
+ assert_match_anchored (DEFS URLPATH, "#anchor", ENTIRE);
assert_match_anchored (DEFS URLPATH, "/php?param[]=value1¶m[]=value2", ENTIRE);
assert_match_anchored (DEFS URLPATH, "/foo?param1[index1]=value1¶m2[index2]=value2", ENTIRE);
assert_match_anchored (DEFS URLPATH, "/[[[]][]]", ENTIRE);
@@ -270,6 +274,18 @@ main (int argc, char **argv)
assert_match (REGEX_URL_AS_IS, "http://1.2.3.4:70000", "http://1.2.3.4"); /* TODO:
can/should we totally abort here? */
assert_match (REGEX_URL_AS_IS, "http://[dead::beef:111.222.333.444]", nullptr);
+ /* '?' or '#' without '/', #7888 */
+ assert_match (REGEX_URL_AS_IS, "http://foo.bar?", ENTIRE);
+ assert_match (REGEX_URL_AS_IS, "http://foo.bar?param=value", ENTIRE);
+ assert_match (REGEX_URL_AS_IS, "http://foo.bar:12345?param=value", ENTIRE);
+ assert_match (REGEX_URL_AS_IS, "http://1.2.3.4?param=value", ENTIRE);
+ assert_match (REGEX_URL_AS_IS, "http://[dead::beef]?param=value", ENTIRE);
+ assert_match (REGEX_URL_AS_IS, "http://foo.bar#", ENTIRE);
+ assert_match (REGEX_URL_AS_IS, "http://foo.bar#anchor", ENTIRE);
+ assert_match (REGEX_URL_AS_IS, "http://foo.bar:12345#anchor", ENTIRE);
+ assert_match (REGEX_URL_AS_IS, "http://1.2.3.4#anchor", ENTIRE);
+ assert_match (REGEX_URL_AS_IS, "http://[dead::beef]#anchor", ENTIRE);
+
/* Username, password */
assert_match (REGEX_URL_AS_IS, "http://joe example com", ENTIRE);
assert_match (REGEX_URL_AS_IS, "http://user.name:sec ret host name", ENTIRE);
diff --git a/src/terminal-regex.hh b/src/terminal-regex.hh
index 8a1eb0be..465df772 100644
--- a/src/terminal-regex.hh
+++ b/src/terminal-regex.hh
@@ -139,7 +139,7 @@
/* Same as above, but the last character (if exists and is not a parenthesis) must be from PATHTERM_CLASS. */
#define PATH_DEF "(?(DEFINE)(?<PATH>(?x: (?: " PATHCHARS_CLASS "* (?: \\( (?&PATH_INNER) \\) | \\[
(?&PATH_INNER) \\] ) )* (?: " PATHCHARS_CLASS "* (?(<APOS_START>)" PATHTERM_NOAPOS_CLASS "|" PATHTERM_CLASS
") )? )))"
-#define URLPATH "(?x: /(?&PATH) )?"
+#define URLPATH "(?x: [/?#](?&PATH) )?"
#define VOIP_PATH "(?x: [;?](?&PATH) )?"
/* Now let's put these fragments together */
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]