Re: GSoC Weekly Report



On 02/10/2007, Kevin Kubasik <kevin kubasik net> wrote:
> Very cool, and good to hear. If Arun could share a patch for his
> implementation, that would be awesome in terms of preventing wheel
> reinvention ;) If Arun is unable, or doesn't have the time to look
> into a hybrid solution, I wouldn't mind doing some investigative work,

I've been completely swamped with work here in the first half of the
semester, and I spent a little time getting the xesam-adaptor updated
to the latest spec. Do let me know if you're taking this up, so
there's no duplication of effort. The patch against r4013 is attached.

>  I think the biggest decision comes when its time to determine what
> our cutoff is, (size wise). While there is a little extra complication
> introduced by a hybrid system, I don't see it being a major  issue to
> implement. My thought would just be to have a table in the
> TextCache.db which denotes if a uri is stored in db or on disk. The
> major concern is the cost of 2 sqlite queries per cache item.

Might it not be easier to have a boolean field denoting whether the
field is an on-disk URI or the blob itself? Or better, if this is
possible, to just examine the first few bytes to see if they are some
ASCII text (or !(the Zip magic bytes))

Best,
-- 
Arun Raghavan
(http://nemesis.accosted.net)
v2sw5Chw4+5ln4pr6$OFck2ma4+9u8w3+1!m?l7+9GSCKi056
e6+9i4b8/9HTAen4+5g4/8APa2Xs8r1/2p5-8 hackerkey.com
Index: beagled/FileSystemQueryable/FileSystemQueryable.cs
===================================================================
--- beagled/FileSystemQueryable/FileSystemQueryable.cs	(revision 4013)
+++ beagled/FileSystemQueryable/FileSystemQueryable.cs	(working copy)
@@ -1810,17 +1810,12 @@
 			// is stored in a property.
 			Uri uri = UriFu.EscapedStringToUri (hit ["beagle:InternalUri"]);
 
-			string path = TextCache.UserCache.LookupPathRaw (uri);
+			Stream text = TextCache.UserCache.LookupText(uri, hit.Uri.LocalPath);
 
-			if (path == null)
+			if (text == null)
 				return null;
 
-			// If this is self-cached, use the remapped Uri
-			if (path == TextCache.SELF_CACHE_TAG)
-				return SnippetFu.GetSnippetFromFile (query_terms, hit.Uri.LocalPath, full_text);
-
-			path = Path.Combine (TextCache.UserCache.TextCacheDir, path);
-			return SnippetFu.GetSnippetFromTextCache (query_terms, path, full_text);
+			return SnippetFu.GetSnippet(query_terms, new StreamReader(text), full_text);
 		}
 
 		override public void Start ()
Index: beagled/TextCache.cs
===================================================================
--- beagled/TextCache.cs	(revision 4013)
+++ beagled/TextCache.cs	(working copy)
@@ -37,6 +37,53 @@
 
 namespace Beagle.Daemon {
 
+	// We only have this class because GZipOutputStream doesn't let us
+	// retrieve the baseStream
+	public class TextCacheStream : GZipOutputStream {
+		private Stream stream;
+
+		public Stream BaseStream {
+			get { return stream; }
+		}
+
+		public TextCacheStream() : this(new MemoryStream())
+		{
+		}
+
+		public TextCacheStream(Stream stream) : base(stream)
+		{
+			this.stream = stream;
+			this.IsStreamOwner = false;
+		}
+	}
+
+	public class TextCacheWriter : StreamWriter {
+		private Uri uri;
+		private TextCache parent_cache;
+		private TextCacheStream tcStream;
+
+		public TextCacheWriter(TextCache cache, Uri uri, TextCacheStream tcStream) : base(tcStream)
+		{
+			parent_cache = cache;
+			this.uri = uri;
+			this.tcStream = tcStream;
+		}
+
+		override public void Close()
+		{
+			base.Close();
+
+			Stream stream = tcStream.BaseStream;
+
+			byte[] text = new byte[stream.Length];
+			stream.Seek(0, SeekOrigin.Begin);
+			stream.Read(text, 0, (int)stream.Length);
+
+			parent_cache.Insert(uri, text);
+			tcStream.BaseStream.Close();
+		}
+	}
+
 	// FIXME: This class isn't multithread safe!  This class does not
 	// ensure that different threads don't utilize a transaction started
 	// in a certain thread at the same time.  However, since all the
@@ -50,7 +97,7 @@
 
 		static public bool Debug = false;
 
-		public const string SELF_CACHE_TAG = "*self*";
+		private const string SELF_CACHE_TAG = "*self*";
 
 		private string text_cache_dir;
 		internal string TextCacheDir {
@@ -81,22 +128,8 @@
 
 		public TextCache (string storage_dir, bool read_only)
 		{
-			text_cache_dir = Path.Combine (storage_dir, "TextCache");
-			if (! Directory.Exists (text_cache_dir)) {
-				Directory.CreateDirectory (text_cache_dir);
-				
-				// Create our cache subdirectories.
-				for (int i = 0; i < 256; ++i) {
-					string subdir = i.ToString ("x");
-					if (i < 16)
-						subdir = "0" + subdir;
-					subdir = Path.Combine (text_cache_dir, subdir);
-					Directory.CreateDirectory (subdir);
-				}
-			}
-			
 			// Create our Sqlite database
-			string db_filename = Path.Combine (text_cache_dir, "TextCache.db");
+			string db_filename = Path.Combine (storage_dir, "TextCache.db");
 			bool create_new_db = false;
 			if (! File.Exists (db_filename))
 				create_new_db = true;
@@ -126,7 +159,7 @@
 				command = new SqliteCommand ();
 				command.Connection = connection;
 				command.CommandText =
-					"SELECT filename FROM uri_index WHERE uri='blah'";
+					"SELECT text FROM uri_text WHERE uri='blah'";
 
 				try {
 					reader = SqliteUtils.ExecuteReaderOrWait (command);
@@ -156,9 +189,9 @@
 				}
 
 				SqliteUtils.DoNonQuery (connection,
-							"CREATE TABLE uri_index (            " +
+							"CREATE TABLE uri_text (             " +
 							"  uri      STRING UNIQUE NOT NULL,  " +
-							"  filename STRING NOT NULL          " +
+							"  text     BLOB NOT NULL            " +
 							")");
 			}
 		}
@@ -186,74 +219,92 @@
 			return command;
 		}
 
-		private void Insert (Uri uri, string filename)
+		internal void Insert (Uri uri, string text)
 		{
 			lock (connection) {
 				MaybeStartTransaction_Unlocked ();
 				SqliteUtils.DoNonQuery (connection,
-							"INSERT OR REPLACE INTO uri_index (uri, filename) VALUES ('{0}', '{1}')",
-							UriToString (uri), filename);
+							"INSERT OR REPLACE INTO uri_text (uri, text) VALUES ('{0}', '{1}')",
+							UriToString (uri), text);
 			}
 		}
 
-		// Returns raw path as stored in the db i.e. relative path wrt the text_cache_dir
-		private string LookupPathRawUnlocked (Uri uri, bool create_if_not_found)
+		internal void Insert (Uri uri, byte[] text)
 		{
+			lock (connection) {
+				MaybeStartTransaction_Unlocked ();
+
+				int ret = 0;
+				SqliteCommand cmd = new SqliteCommand("INSERT OR REPLACE INTO uri_text (uri, text) VALUES (@uri,@text)");
+
+				cmd.Connection = connection;
+				cmd.Parameters.Add("@uri", UriToString(uri));
+				cmd.Parameters.Add("@text", text);
+
+				while (true) {
+					try {
+						ret = cmd.ExecuteNonQuery ();
+						if (ret != 1) {
+							// Insertion should add/modify exactly one row
+							throw new SqliteExecutionException();
+						}
+						break;
+					} catch (SqliteBusyException ex) {
+						Thread.Sleep (50);
+					} catch (Exception e) {
+						Log.Error (e, "SQL that caused the exception: {0} (uri = {1}, changed = {2})",
+								cmd.CommandText, UriToString(uri), ret);
+						throw;
+					}
+				}
+			}
+		}
+
+		public Stream LookupText(Uri uri)
+		{
+			return LookupText(uri, uri.LocalPath);
+		}
+
+		// Returns raw text as stored in the db, or from the file if self-cached
+		public Stream LookupText(Uri uri, string localPath)
+		{
 			SqliteCommand command;
 			SqliteDataReader reader = null;
-			string path = null;
+			object blob = null;
+			string text = null;
+			Stream stream = null;
 
-			command = NewCommand ("SELECT filename FROM uri_index WHERE uri='{0}'", 
+			command = NewCommand ("SELECT text FROM uri_text WHERE uri='{0}'", 
 			                      UriToString (uri));
 			reader = SqliteUtils.ExecuteReaderOrWait (command);
 			if (SqliteUtils.ReadOrWait (reader))
-				path = reader.GetString (0);
+				blob = reader.GetValue(0);
 			reader.Close ();
 			command.Dispose ();
 
-			if (path == null && create_if_not_found) {
-				string guid = Guid.NewGuid ().ToString ();
-				path = Path.Combine (guid.Substring (0, 2), guid.Substring (2));
-				Insert (uri, path);
+			if (blob == null) {
+				return null;
 			}
 
-			if (path == SELF_CACHE_TAG)
-				return SELF_CACHE_TAG;
-
-			return path;
-		}
-
-		// Don't do this unless you know what you are doing!  If you
-		// do anything to the path you get back other than open and
-		// read the file, you will almost certainly break something.
-		// And it will be evidence that you are a bad person and that
-		// you deserve whatever horrible fate befalls you.
-		public string LookupPathRaw (Uri uri)
-		{
-			lock (connection)
-				return LookupPathRawUnlocked (uri, false);
-		}
-
-		private string LookupPath (Uri uri, bool create_if_not_found)
-		{
-			lock (connection) {
-				string path = LookupPathRawUnlocked (uri, create_if_not_found);
-				if (path == SELF_CACHE_TAG) {
-					// FIXME: How do we handle URI remapping for self-cached items?
-#if false
-					if (uri_remapper != null)
-						uri = uri_remapper (uri);
-#endif
-					if (! uri.IsFile) {
-						string msg = String.Format ("Non-file uri {0} flagged as self-cached", uri);
-						throw new Exception (msg);
-					}
-					return uri.LocalPath;
+			if (blob.GetType().Equals(typeof(string)) && (string)blob == SELF_CACHE_TAG) {
+				// Directories are also marked as self-cached too, it would seem
+				try {
+					stream = new FileStream(localPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
 				}
-				return path != null ? Path.Combine (text_cache_dir, path) : null;
+				catch (Exception e) {
+					Log.Debug(e, "Couldn't open self-cached file {0} (uri: {1})",
+							uri.LocalPath, UriToString(uri));
+					stream = null;
+				}
+			} else if ( blob.GetType().Equals(typeof(byte[])) ) {
+				stream = new GZipInputStream(new MemoryStream((byte[])blob));
+			} else {
+				throw new Exception("Invalid type while doing TextCache lookup");
 			}
+
+			return stream;
 		}
-		
+
 		public void MarkAsSelfCached (Uri uri)
 		{
 			lock (connection)
@@ -268,26 +319,8 @@
 
 		public TextWriter GetWriter (Uri uri)
 		{
-			// FIXME: Uri remapping?
-			string path = LookupPath (uri, true);
-
-			FileStream fs;
-			fs = new FileStream (path, FileMode.Create, FileAccess.Write, FileShare.ReadWrite);
-
-			// We don't expect to need this again in the near future.
-			FileAdvise.FlushCache (fs);
-			
-			GZipOutputStream stream;
-			stream = new GZipOutputStream (fs);
-
-			if (! world_readable) {
-				// Make files only readable by the owner.
-				Mono.Unix.Native.Syscall.chmod (path, (Mono.Unix.Native.FilePermissions) 384);
-			}
-
-			StreamWriter writer;
-			writer = new StreamWriter (new BufferedStream (stream));
-			return writer;
+			// XXX: Ugly!
+			return new TextCacheWriter(this, uri, new TextCacheStream());
 		}
 
 		public void WriteFromReader (Uri uri, TextReader reader)
@@ -313,58 +346,21 @@
 			writer.Close ();
 		}
 
-		// FIXME: Uri remapping?
 		public TextReader GetReader (Uri uri)
 		{
-			string path = LookupPath (uri, false);
-			if (path == null)
-				return null;
-
-			return GetReader (path);
+			Stream text = LookupText(uri);
+			return (text == null ? null : new StreamReader(text));
 		}
 
-		public TextReader GetReader (string path)
-		{
-			FileStream file_stream;
-			try {
-				file_stream = new FileStream (path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
-			} catch (FileNotFoundException ex) {
-				return null;
-			}
-
-			StreamReader reader = null;
-			try {
-				Stream stream = new GZipInputStream (file_stream);
-				reader = new StreamReader (new BufferedStream (stream));
-
-				// This throws an exception if the file isn't compressed as follows:
-				// 1.) IOException on older versions of SharpZipLib
-				// 2.) GZipException on newer versions of SharpZipLib
-				// FIXME: Change this to GZipException when we depend
-				// on a higer version of SharpZipLib
-				reader.Peek ();
-			} catch (Exception ex) {
-				// FIXME: WTF? The Peek () above advances one character. I'm not sure
-				// why though, maybe because an exception is thrown, anyways seek to
-				// the beginning of the file.
-				file_stream.Seek (0, SeekOrigin.Begin);
-				reader = new StreamReader (file_stream);
-			}
-
-			return reader;
-		}
-
 		public void Delete (Uri uri)
 		{
 			lock (connection) {
-				string path = LookupPathRawUnlocked (uri, false);
-				if (path != null) {
+				Stream text = LookupText(uri, null);
+				if (text != null) {
 					MaybeStartTransaction_Unlocked ();
 					SqliteUtils.DoNonQuery (connection,
-								"DELETE FROM uri_index WHERE uri='{0}' AND filename='{1}'", 
-								UriToString (uri), path);
-					if (path != SELF_CACHE_TAG)
-						File.Delete (Path.Combine (text_cache_dir, path));
+								"DELETE FROM uri_text WHERE uri='{0}'", 
+								UriToString (uri));
 				}
 			}
 		}
Index: beagled/SnippetFu.cs
===================================================================
--- beagled/SnippetFu.cs	(revision 4013)
+++ beagled/SnippetFu.cs	(working copy)
@@ -59,7 +59,7 @@
 
 		static public SnippetReader GetSnippetFromTextCache (string[] query_terms, string filename, bool full_text)
 		{
-			TextReader reader = TextCache.UserCache.GetReader (filename);
+			TextReader reader = TextCache.UserCache.GetReader (new Uri(filename));
 			if (reader == null)
 				return null;
 			try {
Index: configure.in
===================================================================
--- configure.in	(revision 4013)
+++ configure.in	(working copy)
@@ -596,6 +596,14 @@
 
 dnl ----------------------------------------------
 
+AC_ARG_ENABLE([tests],
+        AC_HELP_STRING([--enable-tests], [Build some testing programs (default no)]),
+        enable_tests=$enableval,
+        enable_tests=no)
+AM_CONDITIONAL(ENABLE_TESTS, test "x$enable_tests" = "xyes")
+
+dnl ----------------------------------------------
+
 AC_OUTPUT([
 Makefile
 Util/Makefile
@@ -606,6 +614,7 @@
 beagled/Makefile
 Filters/Makefile
 tools/Makefile
+tests/Makefile
 tools/beagle-settings.desktop.in
 search/Makefile
 search/beagle-search.desktop.in
Index: Makefile.am
===================================================================
--- Makefile.am	(revision 4013)
+++ Makefile.am	(working copy)
@@ -49,6 +49,10 @@
 SUBDIRS += thunderbird-extension
 endif
 
+if ENABLE_TESTS
+SUBDIRS += tests
+endif
+
 # When the chooser work has been fixed up to use libbeagle, we can
 # reenable this directory.
 # SUBDIRS += chooser-fu


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]