[libsoup] SoupContentSniffer: don't use gio anymore



commit ecec6eeba21beeab1ef33473dd82d490176e15d9
Author: arno <arno renevier net>
Date:   Tue May 3 07:46:07 2011 +0200

    SoupContentSniffer: don't use gio anymore
    
    This brings the content sniffing algorithm closer to the HTML5
    specification.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=648846

 libsoup/soup-content-sniffer.c   |   51 +++++--------------------------------
 tests/resources/html_binary.html |    1 +
 tests/resources/ps_binary.ps     |    1 +
 tests/resources/text_binary.txt  |    1 +
 tests/sniffing-test.c            |   35 ++++++++++---------------
 5 files changed, 24 insertions(+), 65 deletions(-)
---
diff --git a/libsoup/soup-content-sniffer.c b/libsoup/soup-content-sniffer.c
index 8d54771..4b96735 100644
--- a/libsoup/soup-content-sniffer.c
+++ b/libsoup/soup-content-sniffer.c
@@ -10,7 +10,6 @@
 #endif
 
 #include <string.h>
-#include <gio/gio.h>
 
 #include "soup-content-sniffer.h"
 #include "soup-enum-types.h"
@@ -250,27 +249,6 @@ static char byte_looks_binary[] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 0xF0 - 0xFF */
 };
 
-static char *
-sniff_gio (SoupContentSniffer *sniffer, SoupMessage *msg, SoupBuffer *buffer)
-{
-	SoupURI *uri;
-	char *uri_path;
-	char *content_type;
-	char *mime_type;
-	gboolean uncertain;
-
-	uri = soup_message_get_uri (msg);
-	uri_path = soup_uri_to_string (uri, TRUE);
-
-	content_type= g_content_type_guess (uri_path, (const guchar*)buffer->data, buffer->length, &uncertain);
-	mime_type = g_content_type_get_mime_type (content_type);
-
-	g_free (uri_path);
-	g_free (content_type);
-
-	return mime_type;
-}
-
 /* HTML5: 2.7.4 Content-Type sniffing: unknown type */
 static char*
 sniff_unknown (SoupContentSniffer *sniffer, SoupMessage *msg,
@@ -278,7 +256,6 @@ sniff_unknown (SoupContentSniffer *sniffer, SoupMessage *msg,
 {
 	const guchar *resource = (const guchar *)buffer->data;
 	int resource_length = MIN (512, buffer->length);
-	char *gio_guess;
 	int i;
 
 	for (i = 0; i < G_N_ELEMENTS (types_table); i++) {
@@ -338,29 +315,15 @@ sniff_unknown (SoupContentSniffer *sniffer, SoupMessage *msg,
 		}
 	}
 
-	/* The spec allows us to use platform sniffing to find out
-	 * about other types that are not covered, but we need to be
-	 * careful to not escalate privileges, if on text or binary.
-	 */
-	gio_guess = sniff_gio (sniffer, msg, buffer);
-
-	if (for_text_or_binary) {
-		for (i = 0; i < G_N_ELEMENTS (types_table); i++) {
-			SoupContentSnifferPattern *type_row = &(types_table[i]);
+	if (for_text_or_binary)
+		return g_strdup ("application/octet-stream");
 
-			if (!g_ascii_strcasecmp (type_row->sniffed_type, gio_guess) &&
-			    type_row->scriptable) {
-				g_free (gio_guess);
-				gio_guess = NULL;
-				break;
-			}
-		}
+	for (i = 0; i < resource_length; i++) {
+		if (byte_looks_binary[resource[i]])
+			return g_strdup ("application/octet-stream");
 	}
 
-	if (gio_guess)
-		return gio_guess;
-
-	return g_strdup ("application/octet-stream");
+	return g_strdup ("text/plain");
 }
 
 /* HTML5: 2.7.3 Content-Type sniffing: text or binary */
diff --git a/tests/resources/html_binary.html b/tests/resources/html_binary.html
new file mode 100644
index 0000000..9200dd4
--- /dev/null
+++ b/tests/resources/html_binary.html
@@ -0,0 +1 @@
+<HTML 
diff --git a/tests/resources/ps_binary.ps b/tests/resources/ps_binary.ps
new file mode 100644
index 0000000..3d210ed
--- /dev/null
+++ b/tests/resources/ps_binary.ps
@@ -0,0 +1 @@
+%!PS-Adobe-" 
diff --git a/tests/resources/text_binary.txt b/tests/resources/text_binary.txt
new file mode 100644
index 0000000..113bfdd
--- /dev/null
+++ b/tests/resources/text_binary.txt
@@ -0,0 +1 @@
+abc
\ No newline at end of file
diff --git a/tests/sniffing-test.c b/tests/sniffing-test.c
index 60ca389..828f1d5 100644
--- a/tests/sniffing-test.c
+++ b/tests/sniffing-test.c
@@ -445,24 +445,6 @@ test_disabled (const char *path)
 	g_main_loop_unref (loop);
 }
 
-/* Fix up XDG_DATA_DIRS for jhbuild runs so that it still works even
- * if you didn't install shared-mime-info.
- */
-static void
-fixup_xdg_dirs (void)
-{
-	const char *xdg_data_dirs = g_getenv ("XDG_DATA_DIRS");
-	char *new_data_dirs;
-
-	if (xdg_data_dirs &&
-	    !g_str_has_prefix (xdg_data_dirs, "/usr/share") &&
-	    !strstr (xdg_data_dirs, ":/usr/share")) {
-		new_data_dirs = g_strdup_printf ("%s:/usr/share", xdg_data_dirs);
-		g_setenv ("XDG_DATA_DIRS", new_data_dirs, TRUE);
-		g_free (new_data_dirs);
-	}
-}
-
 int
 main (int argc, char **argv)
 {
@@ -470,8 +452,6 @@ main (int argc, char **argv)
 
 	test_init (argc, argv, NULL);
 
-	fixup_xdg_dirs ();
-
 	server = soup_test_server_new (TRUE);
 	soup_server_add_handler (server, NULL, server_callback, NULL, NULL);
 	base_uri = soup_uri_new ("http://127.0.0.1/";);
@@ -533,11 +513,24 @@ main (int argc, char **argv)
 	 */
 	test_sniffing ("/text_or_binary/test.html", "text/plain");
 
+	/* text/plain with binary content and unknown pattern should be
+	 * application/octet-stream */
+	test_sniffing ("/text_or_binary/text_binary.txt", "application/octet-stream");
+
+	/* text/plain with binary content and scriptable pattern should be
+	 * application/octet-stream to avoid 'privilege escalation' */
+	test_sniffing ("/text_or_binary/html_binary.html", "application/octet-stream");
+
+	/* text/plain with binary content and non scriptable known pattern should
+	 * be the given type */
+	test_sniffing ("/text_or_binary/ps_binary.ps", "application/postscript");
+
 	/* Test the unknown sniffing path */
 
 	test_sniffing ("/unknown/test.html", "text/html");
 	test_sniffing ("/unknown/home.gif", "image/gif");
-	test_sniffing ("/unknown/mbox", "application/mbox");
+	test_sniffing ("/unknown/mbox", "text/plain");
+	test_sniffing ("/unknown/text_binary.txt", "application/octet-stream");
 
 	/* Test the XML sniffing path */
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]