beagle r4819 - trunk/beagle/Util



Author: dbera
Date: Wed Jul  9 12:37:13 2008
New Revision: 4819
URL: http://svn.gnome.org/viewvc/beagle?rev=4819&view=rev

Log:
Use a custom marshaller (based on Mono's FileNameMarshaler) to convert filenames returned by libc to UTF8 strings. If MONO_EXTERNAL_ENCODINGS is set to something not "utf8", then the platform default encoding is used instead of utf8. (It is a bit weird, if M_E_E is set, then the value of M_E_E is not used but Encoding.Default is used.) Non-UTF8 filenames should be correctly handled now.


Added:
   trunk/beagle/Util/FileNameMarshaler.cs   (contents, props changed)
Modified:
   trunk/beagle/Util/DirectoryWalker.cs
   trunk/beagle/Util/Inotify.cs
   trunk/beagle/Util/Makefile.am

Modified: trunk/beagle/Util/DirectoryWalker.cs
==============================================================================
--- trunk/beagle/Util/DirectoryWalker.cs	(original)
+++ trunk/beagle/Util/DirectoryWalker.cs	Wed Jul  9 12:37:13 2008
@@ -30,6 +30,8 @@
 using System.Runtime.InteropServices;
 using System.Text;
 
+using Mono.Unix.Native;
+
 namespace Beagle.Util {
 
 	public class DirectoryWalker {
@@ -38,26 +40,31 @@
 		private delegate object FileObjectifier (string path, string name);
 
 		[DllImport ("libc", SetLastError = true)]
-		private static extern IntPtr opendir (string name);
-		
+		private static extern IntPtr opendir ([MarshalAs (UnmanagedType.CustomMarshaler, MarshalTypeRef=typeof(Mono.Unix.Native.FileNameMarshaler))] string name);
+
 		[DllImport ("libc", SetLastError = true)]
 		private static extern int closedir (IntPtr dir);
 
 		[DllImport ("libbeagleglue", EntryPoint = "beagled_utils_readdir", SetLastError = true)]
-		private static extern int sys_readdir (IntPtr dir, [Out] StringBuilder name, int max_len);
+		private static extern int sys_readdir (IntPtr dir, [Out] byte[] buf, int max_len);
 		
-		private static string readdir (IntPtr dir, StringBuilder buffer)
+		private static Encoding filename_encoding = Encoding.Default;
+
+		private static string readdir (IntPtr dir, ref byte[] buffer)
 		{
 			int r = 0;
-			buffer.Length = 0;
-			while (r == 0 && buffer.Length == 0) {
-			       r = sys_readdir (dir, buffer, buffer.Capacity); 
-			}
 
+			// We can reuse the same buffer since sys_readdir
+			// will fill up the rest of the space by null characters
+			r = sys_readdir (dir, buffer, buffer.Length); 
 			if (r == -1)
 				return null;
 
-			return buffer.ToString ();
+			int n_chars = 0;
+			while (n_chars < buffer.Length && buffer [n_chars] != 0)
+				++n_chars;
+
+			return FileNameMarshaler.LocalToUTF8 (buffer, 0, n_chars);
 		}
 
 		private class FileEnumerator : IEnumerator {
@@ -67,7 +74,7 @@
 			FileObjectifier file_objectifier;
 			IntPtr dir_handle = IntPtr.Zero;
 			string current;
-			StringBuilder name_buffer = new StringBuilder (256);
+			byte[] buffer = new byte [256];
 
 			public bool NamesOnly = false;
 			
@@ -108,15 +115,7 @@
 				bool skip_file = false;
 
 				do {
-					// FIXME?  I think this might be a bug in mono, but the
-					// capacity of the StringBuilder can apparently shrink
-					// from underneath us.  This leads to truncated filenames,
-					// and the DirectoryWalker drops them because the file
-					// doesn't exist.  Adding an EnsureCapacity() here fixes
-					// this.
-					name_buffer.EnsureCapacity (256);
-
-					current = readdir (dir_handle, name_buffer);
+					current = readdir (dir_handle, ref buffer);
 					if (current == null)
 						break;
 

Added: trunk/beagle/Util/FileNameMarshaler.cs
==============================================================================
--- (empty file)
+++ trunk/beagle/Util/FileNameMarshaler.cs	Wed Jul  9 12:37:13 2008
@@ -0,0 +1,141 @@
+//
+// Mono.Unix/FileNameMarshaler.cs
+//
+// Authors:
+//   Jonathan Pryor (jonpryor vt edu)
+//
+// (C) 2005 Jonathan Pryor
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.Text;
+using System.Runtime.InteropServices;
+using Mono.Unix;
+
+using Beagle.Util;
+
+namespace Mono.Unix.Native {
+
+	class FileNameMarshaler : ICustomMarshaler {
+
+		private static bool local_is_utf8;
+		private static Encoding platform_encoding;
+
+		static FileNameMarshaler ()
+		{
+			if (Encoding.Default != Encoding.UTF8)
+				local_is_utf8 = false;
+			else
+				local_is_utf8 = true;
+
+			if (! local_is_utf8) {
+				// We require that MONO_EXTERNAL_ENCODINGS be set !
+				// UnixEncoding is nice but does not work for us since I see no way to
+				// convert utf8 to unixencoding and vice versa
+				string mono_ext_encoding = Environment.GetEnvironmentVariable ("MONO_EXTERNAL_ENCODINGS");
+				local_is_utf8 = (mono_ext_encoding == null || String.Compare (mono_ext_encoding, "utf8", true) == 0);
+			}
+
+			if (local_is_utf8)
+				platform_encoding = Encoding.UTF8;
+			else
+				platform_encoding = Encoding.Default;
+
+			Log.Debug ("Using {0}utf8 encoding for filenames", local_is_utf8 ? "" : "non-");
+		}
+
+		public static bool LocalIsUTF8 {
+			get { return local_is_utf8; }
+		}
+
+		private static FileNameMarshaler Instance = new FileNameMarshaler ();
+
+		public static ICustomMarshaler GetInstance (string s)
+		{
+			return Instance;
+		}
+
+		public void CleanUpManagedData (object o)
+		{
+		}
+
+		public void CleanUpNativeData (IntPtr pNativeData)
+		{
+			// Console.WriteLine ("# FileNameMarshaler.CleanUpManagedData ({0:x})", pNativeData);
+			UnixMarshal.FreeHeap (pNativeData);
+		}
+
+		public int GetNativeDataSize ()
+		{
+			return IntPtr.Size;
+		}
+
+		public IntPtr MarshalManagedToNative (object obj)
+		{
+			string s = obj as string;
+			if (s == null)
+				return IntPtr.Zero;
+			IntPtr p = UnixMarshal.StringToHeap (s, platform_encoding);
+			// Console.WriteLine ("# FileNameMarshaler.MarshalNativeToManaged for `{0}'={1:x}", s, p);
+			return p;
+		}
+
+		public object MarshalNativeToManaged (IntPtr pNativeData)
+		{
+			string s = UnixMarshal.PtrToString (pNativeData, platform_encoding);
+			// Console.WriteLine ("# FileNameMarshaler.MarshalNativeToManaged ({0:x})=`{1}'",
+			// 		pNativeData, s);
+			return s;
+		}
+
+		public static string LocalToUTF8 (string local_filename)
+		{
+			if (LocalIsUTF8)
+				return local_filename;
+
+			byte[] bytes = Encoding.Default.GetBytes (local_filename);
+			bytes = Encoding.Convert (Encoding.Default, Encoding.UTF8, bytes);
+			return Encoding.UTF8.GetString (bytes);
+		}
+
+		public static string LocalToUTF8 (byte[] filename_bytes, int begin, int count)
+		{
+			if (LocalIsUTF8)
+				return Encoding.UTF8.GetString (filename_bytes, begin, count);
+
+			filename_bytes = Encoding.Convert (Encoding.Default, Encoding.UTF8, filename_bytes, begin, count);
+			return Encoding.UTF8.GetString (filename_bytes);
+		}
+
+		public static string UTF8ToLocal (string utf8_filename)
+		{
+			if (LocalIsUTF8)
+				return utf8_filename;
+
+			byte[] bytes = Encoding.UTF8.GetBytes (utf8_filename);
+			bytes = Encoding.Convert (Encoding.UTF8, Encoding.Default, bytes);
+			return Encoding.Default.GetString (bytes);
+		}
+	}
+}
+
+// vim: noexpandtab

Modified: trunk/beagle/Util/Inotify.cs
==============================================================================
--- trunk/beagle/Util/Inotify.cs	(original)
+++ trunk/beagle/Util/Inotify.cs	Wed Jul  9 12:37:13 2008
@@ -34,6 +34,8 @@
 using System.Text.RegularExpressions;
 using System.Threading;
 
+using Mono.Unix.Native;
+
 namespace Beagle.Util {
 
 	public class Inotify {
@@ -94,7 +96,7 @@
 		static extern int inotify_glue_init ();
 
 		[DllImport ("libbeagleglue")]
-		static extern int inotify_glue_watch (int fd, string filename, EventType mask);
+		static extern int inotify_glue_watch (int fd, [MarshalAs (UnmanagedType.CustomMarshaler, MarshalTypeRef=typeof(Mono.Unix.Native.FileNameMarshaler))] string filename, EventType mask);
 
 		[DllImport ("libbeagleglue")]
 		static extern int inotify_glue_ignore (int fd, int wd);
@@ -536,7 +538,6 @@
 
 		private static unsafe void SnarfWorker ()
 		{
-			Encoding filename_encoding = Encoding.UTF8;
 			int event_size = Marshal.SizeOf (typeof (inotify_event));
 			
 			while (running) {
@@ -588,7 +589,7 @@
 						++n_chars;
 					qe.Filename = "";
 					if (n_chars > 0)
-						qe.Filename = filename_encoding.GetString (filename_bytes, 0, n_chars);
+						qe.Filename = FileNameMarshaler.LocalToUTF8 (filename_bytes, 0, n_chars);
 
 					new_events.Add (qe);
 					nr -= event_size + (int) raw_event.len;

Modified: trunk/beagle/Util/Makefile.am
==============================================================================
--- trunk/beagle/Util/Makefile.am	(original)
+++ trunk/beagle/Util/Makefile.am	Wed Jul  9 12:37:13 2008
@@ -70,6 +70,7 @@
 	$(srcdir)/ExceptionHandlingThread.cs	\
 	$(srcdir)/ExtendedAttribute.cs  	\
 	$(srcdir)/FileAdvise.cs			\
+	$(srcdir)/FileNameMarshaler.cs		\
 	$(srcdir)/FileSystem.cs			\
 	$(srcdir)/FrequencyStatistics.cs	\
 	$(srcdir)/FSpotTools.cs         	\
@@ -240,6 +241,7 @@
 	$(srcdir)/TeeTextWriter.cs		\
 	$(srcdir)/DirectoryWalker.cs		\
 	$(srcdir)/FileSystem.cs			\
+	$(srcdir)/FileNameMarshaler.cs		\
 	$(srcdir)/ExceptionHandlingThread.cs
 
 Inotify.exe: $(INOTIFY_TEST_CSFILES)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]