patch for gnome_vfs_get_mime_type_internal



Hi,

Here is a patch which cleans up a little
gnome_vfs_get_mime_type_internal
gzip files are now detected using a magic number (instead of using a
specific looks_like_gzip function), and I rewrote looks_like_text to use
glib functions to deal with utf8. While I was rewriting looks_like_text,
I thought it might be an idea to also test if we might have a text file
encoded using the current locale if possible. But that might hurt
performance, so I haven't done it.
Imo the looks_like_mp3 function should be totally removed and mp3
detection be done only looking at extension (there is no magic number
for mp3s), but yakk is strongly against that, so that probably won't
happen, I just thought I'd mention it :p
sniff.diff is a patch against gnome-vfs head, and mime.diff a patch for
gnome-mime-data head.

While debugging this patch, I noticed that when reading a directory,
Nautilus would sniff the mime type for a given file 2 or 3 times (one
when it uses read_directory, and a second time when using
get_file_info). Maybe that could be improved to be done only once, I'll
look at that.

Christophe

Christophe

Index: libgnomevfs/gnome-vfs-mime-magic.c
===================================================================
RCS file: /cvs/gnome/gnome-vfs/libgnomevfs/gnome-vfs-mime-magic.c,v
retrieving revision 1.41
diff -u -r1.41 gnome-vfs-mime-magic.c
--- libgnomevfs/gnome-vfs-mime-magic.c	16 Jun 2002 07:33:26 -0000	1.41
+++ libgnomevfs/gnome-vfs-mime-magic.c	5 Nov 2002 14:04:44 -0000
@@ -34,7 +34,7 @@
 #include <glib/gstrfuncs.h>
 #include <glib/gthread.h>
 #include <glib/gutils.h>
-
+#include <glib/gunicode.h>
 static gboolean
 is_octal_digit (char ch)
 {
@@ -732,8 +732,7 @@
 gboolean
 gnome_vfs_sniff_buffer_looks_like_text (GnomeVFSMimeSniffBuffer *sniff_buffer)
 {
-	int index;
-	guchar ch;
+	gchar *end;
 	
 	gnome_vfs_mime_sniff_buffer_get (sniff_buffer, GNOME_VFS_TEXT_SNIFF_LENGTH);
 
@@ -741,46 +740,21 @@
 		return FALSE;
 	}
 	
-	for (index = 0; index < sniff_buffer->buffer_length - 3; index++) {
-		ch = sniff_buffer->buffer[index];
-		if (!g_ascii_isprint (ch) && !g_ascii_isspace (ch)) {
-			/* check if we are dealing with UTF-8 text
-			 * 
-			 *	 bytes | bits | representation
-			 *	     1 |    7 | 0vvvvvvv
-			 *	     2 |   11 | 110vvvvv 10vvvvvv
-			 *	     3 |   16 | 1110vvvv 10vvvvvv 10vvvvvv
-			 *	     4 |   21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv
-     			 */
-			if ((ch & 0xc0) != 0xc0) {
-				/* not a UTF-8 text */
-				return FALSE;
-			}
+	if (g_utf8_validate (sniff_buffer->buffer, 
+			     sniff_buffer->buffer_length, (const gchar**)&end))
+	{
+		return TRUE;
+	} else {
+		/* Check whether the string was truncated in the middle of
+		 * a valid UTF8 char, or if we really have an invalid
+		 * UTF8 string
+     		 */
+		gint remaining_bytes = sniff_buffer->buffer_length;
 
-			if ((ch & 0x20) == 0) {
-				/* check if this is a 2-byte UTF-8 letter */
-				++index;
-				if ((sniff_buffer->buffer[index] & 0xc0) != 0x80) {
-					return FALSE;
-				}
-			} else if ((ch & 0x30) == 0x20) {
-				/* check if this is a 3-byte UTF-8 letter */
-				if ((sniff_buffer->buffer[++index] & 0xc0) != 0x80
-				    || (sniff_buffer->buffer[++index] & 0xc0) != 0x80) {
-					return FALSE;
-				}
-			} else if ((ch & 0x38) == 0x30) {
-				/* check if this is a 4-byte UTF-8 letter */
-				if ((sniff_buffer->buffer[++index] & 0xc0) != 0x80
-				    || (sniff_buffer->buffer[++index] & 0xc0) != 0x80
-				    || (sniff_buffer->buffer[++index] & 0xc0) != 0x80) {
-					return FALSE;
-				}
-			}
-		}
-	}
+		remaining_bytes -= (end-((gchar*)sniff_buffer->buffer));
 	
-	return TRUE;
+ 		return (g_utf8_get_char_validated(end, remaining_bytes) == -2);
+	} 
 }
 
 static int bitrates[2][15] = {
@@ -921,35 +895,3 @@
 
 	return FALSE;
 }
-
-/**
- * gnome_vfs_sniff_buffer_looks_like_gzip:
- * @sniff_buffer: buffer to examine
- *
- * Return value: returns %TRUE if the contents of @sniff_buffer appear to
- * be in the GZip format.
- **/
-gboolean
-gnome_vfs_sniff_buffer_looks_like_gzip (GnomeVFSMimeSniffBuffer *sniff_buffer,
-	const char *file_name)
-{
-	if (sniff_buffer == NULL) {
-		return FALSE;
-	}
-	
-	if (gnome_vfs_mime_sniff_buffer_get (sniff_buffer, 2) != GNOME_VFS_OK) {
-		return FALSE;
-	}
-	
-	if (sniff_buffer->buffer[0] != 0x1F || sniff_buffer->buffer[1] != 0x8B) {
-		/* not a gzipped file */
-		return FALSE;
-	}
-	
-	if (file_name == NULL) {
-		return TRUE;
-	}
-	
-	return TRUE;
-}
-
Index: libgnomevfs/gnome-vfs-mime-sniff-buffer.h
===================================================================
RCS file: /cvs/gnome/gnome-vfs/libgnomevfs/gnome-vfs-mime-sniff-buffer.h,v
retrieving revision 1.10
diff -u -r1.10 gnome-vfs-mime-sniff-buffer.h
--- libgnomevfs/gnome-vfs-mime-sniff-buffer.h	3 Aug 2001 00:39:58 -0000	1.10
+++ libgnomevfs/gnome-vfs-mime-sniff-buffer.h	5 Nov 2002 14:04:44 -0000
@@ -68,10 +68,6 @@
 					 (GnomeVFSMimeSniffBuffer	*buffer);
 gboolean		 gnome_vfs_sniff_buffer_looks_like_mp3
 					 (GnomeVFSMimeSniffBuffer	*buffer);
-gboolean		 gnome_vfs_sniff_buffer_looks_like_gzip
-					 (GnomeVFSMimeSniffBuffer 	*sniff_buffer,
-					  const char 			*file_name);
-
 G_END_DECLS
 
 #endif
Index: libgnomevfs/gnome-vfs-mime.c
===================================================================
RCS file: /cvs/gnome/gnome-vfs/libgnomevfs/gnome-vfs-mime.c,v
retrieving revision 1.35
diff -u -r1.35 gnome-vfs-mime.c
--- libgnomevfs/gnome-vfs-mime.c	26 Jun 2002 19:42:28 -0000	1.35
+++ libgnomevfs/gnome-vfs-mime.c	5 Nov 2002 14:04:44 -0000
@@ -519,21 +519,17 @@
 		result = gnome_vfs_mime_get_type_from_magic_table (buffer);
 		
 		if (result != NULL) {
-			return result;
-		}
+			if (strcmp (result, "application/x-gzip") == 0) {
 		
-		/* So many file types come compressed by gzip that extensions are
-		 * more reliable than magic typing. If the file has a suffix, then
-		 * use the type from the suffix.
-		 *
-		 * FIXME bugzilla.eazel.com 6867:
-		 * Allow specific mime types to override magic detection
-		 */
-		if (gnome_vfs_sniff_buffer_looks_like_gzip (buffer, file_name)) {
-			/* gzip -- treat extensions as a more accurate source
-			 * of type information.
+				/* So many file types come compressed by gzip 
+				 * that extensions are more reliable than magic
+				 * typing. If the file has a suffix, then use 
+				 * the type from the suffix.
+		 		 *
+				 * FIXME bugzilla.gnome.org 46867:
+				 * Allow specific mime types to override 
+				 * magic detection
 			 */
-			
 			if (file_name != NULL) {
 				result = gnome_vfs_mime_type_from_name_or_default (file_name, NULL);
 			}
@@ -541,17 +537,18 @@
 			if (result != NULL) {
 				return result;
 			}
-			
-			/* Didn't find an extension match, assume gzip. */
+				/* Didn't find an extension match,
+				 * assume gzip. */
 			return "application/x-gzip";
 		}
+			return result;
+		}
 		
 		if (result == NULL) {
 			if (gnome_vfs_sniff_buffer_looks_like_text (buffer)) {
-				/* Text file -- treat extensions as a more accurate source
-				 * of type information.
+				/* Text file -- treat extensions as a more 
+				 * accurate source of type information.
 				 */
-				
 				if (file_name != NULL) {
 					result = gnome_vfs_mime_type_from_name_or_default (file_name, NULL);
 				}
@@ -621,6 +618,7 @@
 	buffer = gnome_vfs_mime_sniff_buffer_new_from_handle (handle);
 
 	base_name = gnome_vfs_uri_extract_short_path_name (uri);
+
 	result = gnome_vfs_get_mime_type_internal (buffer, base_name);
 	g_free (base_name);
 
@@ -798,7 +796,6 @@
 
 	buffer = gnome_vfs_mime_sniff_buffer_new_from_existing_data
 		(data, data_size);
-
 	result = gnome_vfs_get_mime_type_internal (buffer, NULL);	
 
 	gnome_vfs_mime_sniff_buffer_free (buffer);
Index: gnome-vfs-mime-magic
===================================================================
RCS file: /cvs/gnome/gnome-mime-data/gnome-vfs-mime-magic,v
retrieving revision 1.55
diff -u -r1.55 gnome-vfs-mime-magic
--- gnome-vfs-mime-magic	20 Aug 2002 08:39:37 -0000	1.55
+++ gnome-vfs-mime-magic	5 Nov 2002 14:05:56 -0000
@@ -186,6 +186,7 @@
 0	string		MZ					application/x-ms-dos-executable
 0	string		%!					application/postscript
 0       string          BZh                                     application/x-bzip
+0	string		\x1f\x8b				application/x-gzip
 0	string		\037\235				application/x-compress
 0	string		\367\002				application/x-dvi
 0	string		\367\203				application/x-font-tex


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]