[libsoup/content-sniffing] Refactor the handling of the unknown path, to handle more cases
- From: Gustavo Noronha Silva <gns src gnome org>
- To: svn-commits-list gnome org
- Subject: [libsoup/content-sniffing] Refactor the handling of the unknown path, to handle more cases
- Date: Thu, 18 Jun 2009 21:57:12 -0400 (EDT)
commit c406891948e46be6b7cdec262989f6c2c375959a
Author: Gustavo Noronha Silva <gns gnome org>
Date: Thu Jun 18 22:16:21 2009 -0300
Refactor the handling of the unknown path, to handle more cases
The unknown handling was first written for the text or binary
algorithm, but it is also used stand-alone, when handling empty,
unknown/unknown, or application/unknown content types.
libsoup/soup-content-sniffer.c | 92 +++++++++++++++++++++++----------------
tests/sniffing-test.c | 29 +++++++++++++
2 files changed, 83 insertions(+), 38 deletions(-)
---
diff --git a/libsoup/soup-content-sniffer.c b/libsoup/soup-content-sniffer.c
index 74884a4..a7a51f9 100644
--- a/libsoup/soup-content-sniffer.c
+++ b/libsoup/soup-content-sniffer.c
@@ -231,44 +231,20 @@ static char kByteLooksBinary[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0 - 0xFF
};
-
-/* HTML5: 2.7.3 Content-Type sniffing: text or binary */
+/* HTML5: 2.7.4 Content-Type sniffing: unknown type */
static char*
-sniff_text_or_binary (SoupContentSniffer *sniffer, SoupMessage *msg, SoupBuffer *buffer)
+sniff_unknown (SoupContentSniffer *sniffer, SoupMessage *msg, SoupBuffer *buffer, gboolean for_text_or_binary)
{
const char *resource = buffer->data;
int resource_length = MIN(512, buffer->length);
- gboolean looks_binary = FALSE;
int i;
- /* Detecting UTF-16BE, UTF-16LE, and UTF-8 BOMs means it's text/plain */
- if (resource_length >= 4) {
- if ((resource[0] == 0xfe && resource[1] == 0xff) ||
- (resource[0] == 0xff && resource[1] == 0xfe) ||
- (resource[0] == 0xef && resource[1] == 0xbb && resource[2] == 0xbf))
- return g_strdup ("text/plain");
- }
-
- /* Look to see if any of the first n bytes looks binary */
- for (i = 0; i < resource_length; i++) {
- if (kByteLooksBinary[(unsigned char)resource[i]]) {
- looks_binary = TRUE;
- break;
- }
- }
-
- if (!looks_binary)
- return g_strdup ("text/plain");
-
- /* HTML5: 2.7.4 Content-Type sniffing: unknown type
- *
- * This will probably live in its own function, since it is
- * used by other parts of the algorithm
- */
for (i = 0; types_table[i].pattern != NULL ; i++) {
struct _type_info *type_row = &(types_table[i]);
- if (type_row->scriptable)
+ /* The scriptable types should be skiped for the text
+ * or binary path, but considered for other paths */
+ if (for_text_or_binary && type_row->scriptable)
continue;
if (type_row->has_ws) {
@@ -322,6 +298,37 @@ sniff_text_or_binary (SoupContentSniffer *sniffer, SoupMessage *msg, SoupBuffer
return g_strdup ("application/octet-stream");
}
+/* HTML5: 2.7.3 Content-Type sniffing: text or binary */
+static char*
+sniff_text_or_binary (SoupContentSniffer *sniffer, SoupMessage *msg, SoupBuffer *buffer)
+{
+ const char *resource = buffer->data;
+ int resource_length = MIN(512, buffer->length);
+ gboolean looks_binary = FALSE;
+ int i;
+
+ /* Detecting UTF-16BE, UTF-16LE, and UTF-8 BOMs means it's text/plain */
+ if (resource_length >= 4) {
+ if ((resource[0] == 0xfe && resource[1] == 0xff) ||
+ (resource[0] == 0xff && resource[1] == 0xfe) ||
+ (resource[0] == 0xef && resource[1] == 0xbb && resource[2] == 0xbf))
+ return g_strdup ("text/plain");
+ }
+
+ /* Look to see if any of the first n bytes looks binary */
+ for (i = 0; i < resource_length; i++) {
+ if (kByteLooksBinary[(unsigned char)resource[i]]) {
+ looks_binary = TRUE;
+ break;
+ }
+ }
+
+ if (!looks_binary)
+ return g_strdup ("text/plain");
+
+ return sniff_unknown (sniffer, msg, buffer, TRUE);
+}
+
static char*
sniff_gio (SoupContentSniffer *sniffer, SoupMessage *msg, SoupBuffer *buffer)
{
@@ -346,18 +353,25 @@ sniff_gio (SoupContentSniffer *sniffer, SoupMessage *msg, SoupBuffer *buffer)
static char*
sniff (SoupContentSniffer *sniffer, SoupMessage *msg, SoupBuffer *buffer)
{
+ const char *content_type_with_params;
const char *content_type;
- content_type = soup_message_headers_get_one (msg->response_headers, "Content-Type");
+ content_type = soup_message_headers_get_content_type (msg->response_headers, NULL);
+
+ /* These comparisons are done in an ASCII-case-insensitive
+ * manner because the spec requires it */
+ if ((content_type == NULL) ||
+ !g_ascii_strcasecmp (content_type, "unknown/unknown") ||
+ !g_ascii_strcasecmp (content_type, "application/unknown"))
+ return sniff_unknown (sniffer, msg, buffer, FALSE);
- if (content_type == NULL)
- return sniff_gio (sniffer, msg, buffer);
+ content_type_with_params = soup_message_headers_get_one (msg->response_headers, "Content-Type");
/* If we got text/plain, use text_or_binary */
- if (g_str_equal (content_type, "text/plain") ||
- g_str_equal (content_type, "text/plain; charset=ISO-8859-1") ||
- g_str_equal (content_type, "text/plain; charset=iso-8859-1") ||
- g_str_equal (content_type, "text/plain; charset=UTF-8")) {
+ if (g_str_equal (content_type_with_params, "text/plain") ||
+ g_str_equal (content_type_with_params, "text/plain; charset=ISO-8859-1") ||
+ g_str_equal (content_type_with_params, "text/plain; charset=iso-8859-1") ||
+ g_str_equal (content_type_with_params, "text/plain; charset=UTF-8")) {
return sniff_text_or_binary (sniffer, msg, buffer);
}
@@ -378,8 +392,10 @@ soup_content_sniffer_got_headers_cb (SoupMessage *msg, SoupContentSniffer *sniff
const char *content_type = soup_message_headers_get_content_type (msg->response_headers, NULL);
if ((content_type == NULL)
- || (strcmp (content_type, "application/octet-stream") == 0)
- || (strcmp (content_type, "text/plain") == 0)) {
+ || (g_ascii_strcasecmp (content_type, "application/octet-stream") == 0)
+ || (g_ascii_strcasecmp (content_type, "text/plain") == 0)
+ || (g_ascii_strcasecmp (content_type, "unknown/unknown") == 0)
+ || (g_ascii_strcasecmp (content_type, "application/unknown") == 0)) {
priv->should_sniff_content = TRUE;
priv->bytes_for_sniffing = content_sniffer_class->get_buffer_size (sniffer);
}
diff --git a/tests/sniffing-test.c b/tests/sniffing-test.c
index 040ac89..a23ab3a 100644
--- a/tests/sniffing-test.c
+++ b/tests/sniffing-test.c
@@ -70,6 +70,29 @@ server_callback (SoupServer *server, SoupMessage *msg,
contents,
length);
}
+
+ if (g_str_has_prefix (path, "/unknown/")) {
+ char *base_name = g_path_get_basename (path);
+ char *file_name = g_strdup_printf ("resources/%s", base_name);
+
+ g_file_get_contents (file_name,
+ &contents, &length,
+ &error);
+
+ g_free (base_name);
+ g_free (file_name);
+
+ if (error) {
+ g_error ("%s", error->message);
+ g_error_free (error);
+ exit (1);
+ }
+
+ soup_message_set_response (msg, "UNKNOWN/unknown",
+ SOUP_MEMORY_TAKE,
+ contents,
+ length);
+ }
}
static gboolean
@@ -294,6 +317,12 @@ main (int argc, char **argv)
*/
test_sniffing ("/text_or_binary/test.html", "text/plain");
+ /* Test the unknown sniffing path */
+
+ test_sniffing ("/unknown/test.html", "text/html");
+ test_sniffing ("/unknown/home.gif", "image/gif");
+ test_sniffing ("/unknown/mbox", "application/octet-stream");
+
soup_uri_free (base_uri);
test_cleanup ();
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]