[libsoup] SoupTLD: fix a regression in soup_tld_is_public_suffix()



commit 82084a0c02c958a8d5bff4f847fc1507b10db9ed
Author: Sergio Villar Senin <svillar igalia com>
Date:   Thu Aug 2 16:50:54 2012 +0200

    SoupTLD: fix a regression in soup_tld_is_public_suffix()
    
    soup_tld_is_public_suffix() was not returning TRUE for well known TLDs as
    ".com" after cbae89f4. Also added some extra documentation to
    soup_tld_get_base_domain() in order to make explicit that it returns NULL
    for private URLs.
    
    Reworked unit tests in order to allow them detect these regressions.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=681085

 libsoup/soup-tld.c |   16 ++++-----
 tests/tld-test.c   |   85 ++++++++++++++++++++++++++++++++-------------------
 2 files changed, 60 insertions(+), 41 deletions(-)
---
diff --git a/libsoup/soup-tld.c b/libsoup/soup-tld.c
index 0c40b67..fc7c992 100644
--- a/libsoup/soup-tld.c
+++ b/libsoup/soup-tld.c
@@ -59,6 +59,10 @@ soup_tld_ensure_rules_hash_table (void)
  * plus the second level domain, for example for myhost.mydomain.com
  * it will return mydomain.com.
  *
+ * Note that %NULL will be returned for private URLs (those not ending
+ * with any well known TLD) because choosing a base domain for them
+ * would be totally arbitrary.
+ *
  * This method only works for valid UTF-8 hostnames in their canonical
  * representation form, so you should use g_hostname_to_unicode() to
  * get the canonical representation if that is not the case.
@@ -106,7 +110,7 @@ soup_tld_domain_is_public_suffix (const char *domain)
 		g_return_val_if_reached (FALSE);
 
 	base_domain = soup_tld_get_base_domain_internal (domain, 0, &error);
-	if (base_domain)
+	if (g_strcmp0 (domain, base_domain))
 		return FALSE;
 
 	if (g_error_matches (error, SOUP_TLD_ERROR, SOUP_TLD_ERROR_NO_BASE_DOMAIN)) {
@@ -178,15 +182,9 @@ soup_tld_get_base_domain_internal (const char *hostname, guint additional_domain
 				/* If we match a *. rule and there were no previous exceptions
 				 * nor previous domains then treat it as an exact match.
 				 */
-				if (!prev_domain) {
-					g_set_error_literal (error, SOUP_TLD_ERROR,
-							     SOUP_TLD_ERROR_NOT_ENOUGH_DOMAINS,
-							     _("Not enough domains"));
-					return NULL;
-				}
-				tld = prev_domain;
+				tld = prev_domain ? prev_domain : cur_domain;
 				break;
-			} else if (*flags == SOUP_TLD_RULE_NORMAL || !next_dot) {
+			} else if (*flags == SOUP_TLD_RULE_NORMAL) {
 				tld = cur_domain;
 				break;
 			} else if (*flags & SOUP_TLD_RULE_EXCEPTION) {
diff --git a/tests/tld-test.c b/tests/tld-test.c
index 25d2da2..d0c73a8 100644
--- a/tests/tld-test.c
+++ b/tests/tld-test.c
@@ -21,16 +21,6 @@ static struct {
   { ".example", NULL },
   { ".example.com", NULL },
   { ".example.example", NULL },
-  /* Unlisted TLD.*/
-  { "example", NULL },
-  { "example.example", NULL },
-  { "b.example.example", NULL },
-  { "a.b.example.example", NULL },
-  /* Listed, but non-Internet, TLD. */
-  { "local", NULL },
-  { "example.local", NULL },
-  { "b.example.local", NULL },
-  { "a.b.example.local", NULL },
   /* TLD with only 1 rule. */
   { "biz", NULL },
   { "domain.biz", "domain.biz" },
@@ -88,6 +78,20 @@ static struct {
   /* The original list does not include non-ASCII tests. Let's add a couple. */
   { "åå.cn", NULL },
   { "a.b.Ãfjord.no", "b.Ãfjord.no" }
+},
+/* Non Internet TLDs have NULL as expected result
+ */
+non_inet_tld_tests[] = {
+  /* Unlisted TLD.*/
+  { "example", NULL },
+  { "example.example", NULL },
+  { "b.example.example", NULL },
+  { "a.b.example.example", NULL },
+  /* Listed, but non-Internet, TLD. */
+  { "local", NULL },
+  { "example.local", NULL },
+  { "b.example.local", NULL },
+  { "a.b.example.local", NULL }
 };
 
 int
@@ -99,29 +103,46 @@ main (int argc, char **argv)
 
 	errors = 0;
 	for (i = 0; i < G_N_ELEMENTS (tld_tests); ++i) {
-               gboolean is_public = soup_tld_domain_is_public_suffix (tld_tests[i].hostname);
-               const char *base_domain = soup_tld_get_base_domain (tld_tests[i].hostname, NULL);
+		GError *error = NULL;
+		gboolean is_public = soup_tld_domain_is_public_suffix (tld_tests[i].hostname);
+		const char *base_domain = soup_tld_get_base_domain (tld_tests[i].hostname, &error);
+
+		debug_printf (1, "Testing %s: ", tld_tests[i].hostname);
+
+		if (is_public && tld_tests[i].result) {
+			debug_printf (1, "ERROR: domain is public but base_domain is not NULL (%s)\n",
+				      base_domain);
+			++errors;
+		} else if (g_strcmp0 (tld_tests[i].result, base_domain)) {
+			debug_printf (1, "ERROR: %s expected as base domain but got %s\n",
+				      tld_tests[i].result, base_domain);
+			++errors;
+
+		} else if (!tld_tests[i].result && !is_public &&
+			   !g_error_matches (error, SOUP_TLD_ERROR, SOUP_TLD_ERROR_INVALID_HOSTNAME)) {
+			debug_printf (1, "ERROR: not public domain with NULL expected result\n");
+			++errors;
+		} else
+			debug_printf (1, "OK\n");
+
+		g_clear_error(&error);
+	}
+
+	for (i = 0; i < G_N_ELEMENTS (non_inet_tld_tests); ++i) {
+		gboolean is_public = soup_tld_domain_is_public_suffix (non_inet_tld_tests[i].hostname);
+		const char *base_domain = soup_tld_get_base_domain (non_inet_tld_tests[i].hostname, NULL);
+
+		debug_printf (1, "Testing %s: ", non_inet_tld_tests[i].hostname);
 
-	       debug_printf (1, "Testing %s: ", tld_tests[i].hostname);
-               if (tld_tests[i].result) {
-                       /* Public domains have NULL expected results. */
-                       if (is_public || g_strcmp0 (tld_tests[i].result, base_domain)) {
-                               debug_printf (1, "ERROR: %s got %s (%s expected)\n",
-                                             tld_tests[i].hostname, base_domain, tld_tests[i].result);
-                               ++errors;
-                       } else
-			       debug_printf (1, "OK\n");
-               } else {
-                       /* If there is no expected result then either the domain is public or
-                        * the hostname invalid (for example starts with a leading dot).
-                        */
-                       if (!is_public && base_domain) {
-                               debug_printf (1, "ERROR: public domain %s got %s (none expected)\n",
-                                             tld_tests[i].hostname, base_domain);
-                               ++errors;
-                       } else
-			       debug_printf (1, "OK\n");
-	       }
+		if (is_public) {
+			debug_printf (1, "ERROR: domain incorrectly clasified as public\n");
+			++errors;
+		} else if (base_domain) {
+			debug_printf (1, "ERROR: non NULL base domain (%s) for local url\n",
+				      base_domain);
+			++errors;
+		} else
+			debug_printf (1, "OK\n");
 	}
 
 	test_cleanup ();



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]