[libsoup/content-sniffing-update: 2/9] Implement the check-apache-bug flag
- From: Gustavo Noronha Silva <gns src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libsoup/content-sniffing-update: 2/9] Implement the check-apache-bug flag
- Date: Wed, 11 Dec 2013 13:10:38 +0000 (UTC)
commit 6d1819d625552498af2a8696fa46e35bf2c6c7f3
Author: Gustavo Noronha Silva <gns gnome org>
Date: Sun Dec 8 19:11:21 2013 +0100
Implement the check-apache-bug flag
Run the text or binary algorythm when some specific text/plain Content-Types
are provided, since older versions of apache would send that type for binary
files. http://mimesniff.spec.whatwg.org/#dfnReturnLink-0
libsoup/soup-content-sniffer.c | 37 ++++++++++++++++++++++++++++---------
tests/resources/text.txt | 1 +
tests/sniffing-test.c | 7 +++++--
3 files changed, 34 insertions(+), 11 deletions(-)
---
diff --git a/libsoup/soup-content-sniffer.c b/libsoup/soup-content-sniffer.c
index 7f79610..54885f0 100644
--- a/libsoup/soup-content-sniffer.c
+++ b/libsoup/soup-content-sniffer.c
@@ -2,7 +2,7 @@
/*
* soup-content-sniffer.c
*
- * Copyright (C) 2009 Gustavo Noronha Silva.
+ * Copyright (C) 2009, 2013 Gustavo Noronha Silva.
*
* This code implements the following specification:
*
@@ -297,7 +297,7 @@ sniff_unknown (SoupContentSniffer *sniffer, SoupBuffer *buffer,
return g_strdup ("text/plain");
}
-/* HTML5: 2.7.3 Content-Type sniffing: text or binary */
+/* MIMESNIFF: 7.2 Sniffing a mislabeled binary resource */
static char*
sniff_text_or_binary (SoupContentSniffer *sniffer, SoupBuffer *buffer)
{
@@ -306,15 +306,20 @@ sniff_text_or_binary (SoupContentSniffer *sniffer, SoupBuffer *buffer)
gboolean looks_binary = FALSE;
int i;
- /* Detecting UTF-16BE, UTF-16LE, or UTF-8 BOMs means it's text/plain */
- if (resource_length >= 4) {
+ /* 2. Detecting UTF-16BE, UTF-16LE BOMs means it's text/plain */
+ if (resource_length >= 2) {
if ((resource[0] == 0xFE && resource[1] == 0xFF) ||
- (resource[0] == 0xFF && resource[1] == 0xFE) ||
- (resource[0] == 0xEF && resource[1] == 0xBB && resource[2] == 0xBF))
+ (resource[0] == 0xFF && resource[1] == 0xFE))
return g_strdup ("text/plain");
}
- /* Look to see if any of the first n bytes looks binary */
+ /* 3. UTF-8 BOM. */
+ if (resource_length >= 3) {
+ if (resource[0] == 0xEF && resource[1] == 0xBB && resource[2] == 0xBF)
+ return g_strdup ("text/plain");
+ }
+
+ /* 4. Look to see if any of the first n bytes looks binary */
for (i = 0; i < resource_length; i++) {
if (byte_looks_binary[resource[i]]) {
looks_binary = TRUE;
@@ -325,6 +330,9 @@ sniff_text_or_binary (SoupContentSniffer *sniffer, SoupBuffer *buffer)
if (!looks_binary)
return g_strdup ("text/plain");
+ /* 5. Execute 7.1 Identifying a resource with an unknown MIME type.
+ * TODO: sniff-scriptable needs to be unset.
+ */
return sniff_unknown (sniffer, buffer, TRUE);
}
@@ -472,14 +480,25 @@ soup_content_sniffer_real_sniff (SoupContentSniffer *sniffer, SoupMessage *msg,
content_type = soup_message_headers_get_content_type (msg->response_headers, params);
- /* These comparisons are done in an ASCII-case-insensitive
- * manner because the spec requires it */
+ /* MIMESNIFF: 7 Determining the sniffed MIME type of a resource. */
+
+ /* 1. Unknown/undefined supplied type respecting sniff-scritable. */
if ((content_type == NULL) ||
!g_ascii_strcasecmp (content_type, "unknown/unknown") ||
!g_ascii_strcasecmp (content_type, "application/unknown") ||
!g_ascii_strcasecmp (content_type, "*/*"))
return sniff_unknown (sniffer, buffer, FALSE);
+ /* TODO: 2. no-sniff flag handling. */
+
+ /* 3. check-for-apache-bug */
+ if ((content_type != NULL) &&
+ (g_str_equal (content_type, "text/plain") ||
+ g_str_equal (content_type, "text/plain; charset=ISO-8859-1") ||
+ g_str_equal (content_type, "text/plain; charset=iso-8859-1") ||
+ g_str_equal (content_type, "text/plain; charset=UTF-8")))
+ return sniff_text_or_binary (sniffer, buffer);
+
if (g_str_has_suffix (content_type, "+xml") ||
!g_ascii_strcasecmp (content_type, "text/xml") ||
!g_ascii_strcasecmp (content_type, "application/xml"))
diff --git a/tests/resources/text.txt b/tests/resources/text.txt
new file mode 100644
index 0000000..ff7066f
--- /dev/null
+++ b/tests/resources/text.txt
@@ -0,0 +1 @@
+This is just text.
diff --git a/tests/sniffing-test.c b/tests/sniffing-test.c
index cbebaba..584fa13 100644
--- a/tests/sniffing-test.c
+++ b/tests/sniffing-test.c
@@ -59,7 +59,7 @@ server_callback (SoupServer *server, SoupMessage *msg,
"Content-Type", "text/plain");
}
- if (g_str_has_prefix (path, "/text_or_binary/")) {
+ if (g_str_has_prefix (path, "/text_or_binary/") || g_str_has_prefix (path, "/apache_bug/")) {
char *base_name = g_path_get_basename (path);
char *file_name = g_strdup_printf (SRCDIR "/resources/%s", base_name);
@@ -514,7 +514,10 @@ main (int argc, char **argv)
do_signals_test (TRUE, TRUE, TRUE, FALSE, TRUE);
do_signals_test (TRUE, TRUE, TRUE, TRUE, TRUE);
- /* Test the text_or_binary sniffing path */
+ /* Test the apache bug sniffing path */
+
+ test_sniffing ("/apache_bug/text_binary.txt", "application/octet-stream");
+ test_sniffing ("/apache_bug/text.txt", "text/plain");
/* GIF is a 'safe' type */
test_sniffing ("/text_or_binary/home.gif", "image/gif");
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]