Re: GSoC Weekly Report



Updated patch attached -- some of the older code was not building.

Cheers,
Arun

On 02/10/2007, Arun Raghavan <arunissatan gmail com> wrote:
> On 02/10/2007, Kevin Kubasik <kevin kubasik net> wrote:
> > Very cool, and good to hear. If Arun could share a patch for his
> > implementation, that would be awesome in terms of preventing wheel
> > reinvention ;) If Arun is unable, or doesn't have the time to look
> > into a hybrid solution, I wouldn't mind doing some investigative work,
>
> I've been completely swamped with work here in the first half of the
> semester, and I spent a little time getting the xesam-adaptor updated
> to the latest spec. Do let me know if you're taking this up, so
> there's no duplication of effort. The patch against r4013 is attached.
>
> >  I think the biggest decision comes when its time to determine what
> > our cutoff is, (size wise). While there is a little extra complication
> > introduced by a hybrid system, I don't see it being a major  issue to
> > implement. My thought would just be to have a table in the
> > TextCache.db which denotes if a uri is stored in db or on disk. The
> > major concern is the cost of 2 sqlite queries per cache item.
>
> Might it not be easier to have a boolean field denoting whether the
> field is an on-disk URI or the blob itself? Or better, if this is
> possible, to just examine the first few bytes to see if they are some
> ASCII text (or !(the Zip magic bytes))
>
> Best,
> --
> Arun Raghavan
> (http://nemesis.accosted.net)
> v2sw5Chw4+5ln4pr6$OFck2ma4+9u8w3+1!m?l7+9GSCKi056
> e6+9i4b8/9HTAen4+5g4/8APa2Xs8r1/2p5-8 hackerkey.com
Index: beagled/FileSystemQueryable/FileSystemQueryable.cs
===================================================================
--- beagled/FileSystemQueryable/FileSystemQueryable.cs	(revision 4016)
+++ beagled/FileSystemQueryable/FileSystemQueryable.cs	(working copy)
@@ -1810,17 +1810,12 @@
 			// is stored in a property.
 			Uri uri = UriFu.EscapedStringToUri (hit ["beagle:InternalUri"]);
 
-			string path = TextCache.UserCache.LookupPathRaw (uri);
+			Stream text = TextCache.UserCache.LookupText(uri, hit.Uri.LocalPath);
 
-			if (path == null)
+			if (text == null)
 				return null;
 
-			// If this is self-cached, use the remapped Uri
-			if (path == TextCache.SELF_CACHE_TAG)
-				return SnippetFu.GetSnippetFromFile (query_terms, hit.Uri.LocalPath, full_text);
-
-			path = Path.Combine (TextCache.UserCache.TextCacheDir, path);
-			return SnippetFu.GetSnippetFromTextCache (query_terms, path, full_text);
+			return SnippetFu.GetSnippet(query_terms, new StreamReader(text), full_text);
 		}
 
 		override public void Start ()
Index: beagled/TextCache.cs
===================================================================
--- beagled/TextCache.cs	(revision 4016)
+++ beagled/TextCache.cs	(working copy)
@@ -37,6 +37,53 @@
 
 namespace Beagle.Daemon {
 
+	// We only have this class because GZipOutputStream doesn't let us
+	// retrieve the baseStream
+	public class TextCacheStream : GZipOutputStream {
+		private Stream stream;
+
+		public Stream BaseStream {
+			get { return stream; }
+		}
+
+		public TextCacheStream() : this(new MemoryStream())
+		{
+		}
+
+		public TextCacheStream(Stream stream) : base(stream)
+		{
+			this.stream = stream;
+			this.IsStreamOwner = false;
+		}
+	}
+
+	public class TextCacheWriter : StreamWriter {
+		private Uri uri;
+		private TextCache parent_cache;
+		private TextCacheStream tcStream;
+
+		public TextCacheWriter(TextCache cache, Uri uri, TextCacheStream tcStream) : base(tcStream)
+		{
+			parent_cache = cache;
+			this.uri = uri;
+			this.tcStream = tcStream;
+		}
+
+		override public void Close()
+		{
+			base.Close();
+
+			Stream stream = tcStream.BaseStream;
+
+			byte[] text = new byte[stream.Length];
+			stream.Seek(0, SeekOrigin.Begin);
+			stream.Read(text, 0, (int)stream.Length);
+
+			parent_cache.Insert(uri, text);
+			tcStream.BaseStream.Close();
+		}
+	}
+
 	// FIXME: This class isn't multithread safe!  This class does not
 	// ensure that different threads don't utilize a transaction started
 	// in a certain thread at the same time.  However, since all the
@@ -50,7 +97,7 @@
 
 		static public bool Debug = false;
 
-		public const string SELF_CACHE_TAG = "*self*";
+		private const string SELF_CACHE_TAG = "*self*";
 
 		private string text_cache_dir;
 		internal string TextCacheDir {
@@ -81,22 +128,8 @@
 
 		public TextCache (string storage_dir, bool read_only)
 		{
-			text_cache_dir = Path.Combine (storage_dir, "TextCache");
-			if (! Directory.Exists (text_cache_dir)) {
-				Directory.CreateDirectory (text_cache_dir);
-				
-				// Create our cache subdirectories.
-				for (int i = 0; i < 256; ++i) {
-					string subdir = i.ToString ("x");
-					if (i < 16)
-						subdir = "0" + subdir;
-					subdir = Path.Combine (text_cache_dir, subdir);
-					Directory.CreateDirectory (subdir);
-				}
-			}
-			
 			// Create our Sqlite database
-			string db_filename = Path.Combine (text_cache_dir, "TextCache.db");
+			string db_filename = Path.Combine (storage_dir, "TextCache.db");
 			bool create_new_db = false;
 			if (! File.Exists (db_filename))
 				create_new_db = true;
@@ -126,7 +159,7 @@
 				command = new SqliteCommand ();
 				command.Connection = connection;
 				command.CommandText =
-					"SELECT filename FROM uri_index WHERE uri='blah'";
+					"SELECT text FROM uri_text WHERE uri='blah'";
 
 				try {
 					reader = SqliteUtils.ExecuteReaderOrWait (command);
@@ -156,9 +189,9 @@
 				}
 
 				SqliteUtils.DoNonQuery (connection,
-							"CREATE TABLE uri_index (            " +
+							"CREATE TABLE uri_text (             " +
 							"  uri      STRING UNIQUE NOT NULL,  " +
-							"  filename STRING NOT NULL          " +
+							"  text     BLOB NOT NULL            " +
 							")");
 			}
 		}
@@ -186,74 +219,92 @@
 			return command;
 		}
 
-		private void Insert (Uri uri, string filename)
+		internal void Insert (Uri uri, string text)
 		{
 			lock (connection) {
 				MaybeStartTransaction_Unlocked ();
 				SqliteUtils.DoNonQuery (connection,
-							"INSERT OR REPLACE INTO uri_index (uri, filename) VALUES ('{0}', '{1}')",
-							UriToString (uri), filename);
+							"INSERT OR REPLACE INTO uri_text (uri, text) VALUES ('{0}', '{1}')",
+							UriToString (uri), text);
 			}
 		}
 
-		// Returns raw path as stored in the db i.e. relative path wrt the text_cache_dir
-		private string LookupPathRawUnlocked (Uri uri, bool create_if_not_found)
+		internal void Insert (Uri uri, byte[] text)
 		{
+			lock (connection) {
+				MaybeStartTransaction_Unlocked ();
+
+				int ret = 0;
+				SqliteCommand cmd = new SqliteCommand("INSERT OR REPLACE INTO uri_text (uri, text) VALUES (@uri,@text)");
+
+				cmd.Connection = connection;
+				cmd.Parameters.Add("@uri", UriToString(uri));
+				cmd.Parameters.Add("@text", text);
+
+				while (true) {
+					try {
+						ret = cmd.ExecuteNonQuery ();
+						if (ret != 1) {
+							// Insertion should add/modify exactly one row
+							throw new SqliteExecutionException();
+						}
+						break;
+					} catch (SqliteBusyException ex) {
+						Thread.Sleep (50);
+					} catch (Exception e) {
+						Log.Error (e, "SQL that caused the exception: {0} (uri = {1}, changed = {2})",
+								cmd.CommandText, UriToString(uri), ret);
+						throw;
+					}
+				}
+			}
+		}
+
+		public Stream LookupText(Uri uri)
+		{
+			return LookupText(uri, uri.LocalPath);
+		}
+
+		// Returns raw text as stored in the db, or from the file if self-cached
+		public Stream LookupText(Uri uri, string localPath)
+		{
 			SqliteCommand command;
 			SqliteDataReader reader = null;
-			string path = null;
+			object blob = null;
+			string text = null;
+			Stream stream = null;
 
-			command = NewCommand ("SELECT filename FROM uri_index WHERE uri='{0}'", 
+			command = NewCommand ("SELECT text FROM uri_text WHERE uri='{0}'", 
 			                      UriToString (uri));
 			reader = SqliteUtils.ExecuteReaderOrWait (command);
 			if (SqliteUtils.ReadOrWait (reader))
-				path = reader.GetString (0);
+				blob = reader.GetValue(0);
 			reader.Close ();
 			command.Dispose ();
 
-			if (path == null && create_if_not_found) {
-				string guid = Guid.NewGuid ().ToString ();
-				path = Path.Combine (guid.Substring (0, 2), guid.Substring (2));
-				Insert (uri, path);
+			if (blob == null) {
+				return null;
 			}
 
-			if (path == SELF_CACHE_TAG)
-				return SELF_CACHE_TAG;
-
-			return path;
-		}
-
-		// Don't do this unless you know what you are doing!  If you
-		// do anything to the path you get back other than open and
-		// read the file, you will almost certainly break something.
-		// And it will be evidence that you are a bad person and that
-		// you deserve whatever horrible fate befalls you.
-		public string LookupPathRaw (Uri uri)
-		{
-			lock (connection)
-				return LookupPathRawUnlocked (uri, false);
-		}
-
-		private string LookupPath (Uri uri, bool create_if_not_found)
-		{
-			lock (connection) {
-				string path = LookupPathRawUnlocked (uri, create_if_not_found);
-				if (path == SELF_CACHE_TAG) {
-					// FIXME: How do we handle URI remapping for self-cached items?
-#if false
-					if (uri_remapper != null)
-						uri = uri_remapper (uri);
-#endif
-					if (! uri.IsFile) {
-						string msg = String.Format ("Non-file uri {0} flagged as self-cached", uri);
-						throw new Exception (msg);
-					}
-					return uri.LocalPath;
+			if (blob.GetType().Equals(typeof(string)) && (string)blob == SELF_CACHE_TAG) {
+				// Directories are also marked as self-cached too, it would seem
+				try {
+					stream = new FileStream(localPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
 				}
-				return path != null ? Path.Combine (text_cache_dir, path) : null;
+				catch (Exception e) {
+					Log.Debug(e, "Couldn't open self-cached file {0} (uri: {1})",
+							uri.LocalPath, UriToString(uri));
+					stream = null;
+				}
+			} else if ( blob.GetType().Equals(typeof(byte[])) ) {
+				stream = new GZipInputStream(new MemoryStream((byte[])blob));
+			} else {
+				throw new Exception("Invalid type while doing TextCache lookup");
 			}
+
+			return stream;
 		}
-		
+
 		public void MarkAsSelfCached (Uri uri)
 		{
 			lock (connection)
@@ -268,26 +319,8 @@
 
 		public TextWriter GetWriter (Uri uri)
 		{
-			// FIXME: Uri remapping?
-			string path = LookupPath (uri, true);
-
-			FileStream fs;
-			fs = new FileStream (path, FileMode.Create, FileAccess.Write, FileShare.ReadWrite);
-
-			// We don't expect to need this again in the near future.
-			FileAdvise.FlushCache (fs);
-			
-			GZipOutputStream stream;
-			stream = new GZipOutputStream (fs);
-
-			if (! world_readable) {
-				// Make files only readable by the owner.
-				Mono.Unix.Native.Syscall.chmod (path, (Mono.Unix.Native.FilePermissions) 384);
-			}
-
-			StreamWriter writer;
-			writer = new StreamWriter (new BufferedStream (stream));
-			return writer;
+			// XXX: Ugly!
+			return new TextCacheWriter(this, uri, new TextCacheStream());
 		}
 
 		public void WriteFromReader (Uri uri, TextReader reader)
@@ -313,58 +346,21 @@
 			writer.Close ();
 		}
 
-		// FIXME: Uri remapping?
 		public TextReader GetReader (Uri uri)
 		{
-			string path = LookupPath (uri, false);
-			if (path == null)
-				return null;
-
-			return GetReader (path);
+			Stream text = LookupText(uri);
+			return (text == null ? null : new StreamReader(text));
 		}
 
-		public TextReader GetReader (string path)
-		{
-			FileStream file_stream;
-			try {
-				file_stream = new FileStream (path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
-			} catch (FileNotFoundException ex) {
-				return null;
-			}
-
-			StreamReader reader = null;
-			try {
-				Stream stream = new GZipInputStream (file_stream);
-				reader = new StreamReader (new BufferedStream (stream));
-
-				// This throws an exception if the file isn't compressed as follows:
-				// 1.) IOException on older versions of SharpZipLib
-				// 2.) GZipException on newer versions of SharpZipLib
-				// FIXME: Change this to GZipException when we depend
-				// on a higer version of SharpZipLib
-				reader.Peek ();
-			} catch (Exception ex) {
-				// FIXME: WTF? The Peek () above advances one character. I'm not sure
-				// why though, maybe because an exception is thrown, anyways seek to
-				// the beginning of the file.
-				file_stream.Seek (0, SeekOrigin.Begin);
-				reader = new StreamReader (file_stream);
-			}
-
-			return reader;
-		}
-
 		public void Delete (Uri uri)
 		{
 			lock (connection) {
-				string path = LookupPathRawUnlocked (uri, false);
-				if (path != null) {
+				Stream text = LookupText(uri, null);
+				if (text != null) {
 					MaybeStartTransaction_Unlocked ();
 					SqliteUtils.DoNonQuery (connection,
-								"DELETE FROM uri_index WHERE uri='{0}' AND filename='{1}'", 
-								UriToString (uri), path);
-					if (path != SELF_CACHE_TAG)
-						File.Delete (Path.Combine (text_cache_dir, path));
+								"DELETE FROM uri_text WHERE uri='{0}'", 
+								UriToString (uri));
 				}
 			}
 		}
Index: beagled/SnippetFu.cs
===================================================================
--- beagled/SnippetFu.cs	(revision 4016)
+++ beagled/SnippetFu.cs	(working copy)
@@ -59,7 +59,7 @@
 
 		static public SnippetReader GetSnippetFromTextCache (string[] query_terms, string filename, bool full_text)
 		{
-			TextReader reader = TextCache.UserCache.GetReader (filename);
+			TextReader reader = TextCache.UserCache.GetReader (new Uri(filename));
 			if (reader == null)
 				return null;
 			try {
Index: configure.in
===================================================================
--- configure.in	(revision 4016)
+++ configure.in	(working copy)
@@ -596,6 +596,14 @@
 
 dnl ----------------------------------------------
 
+AC_ARG_ENABLE([tests],
+        AC_HELP_STRING([--enable-tests], [Build some testing programs (default no)]),
+        enable_tests=$enableval,
+        enable_tests=no)
+AM_CONDITIONAL(ENABLE_TESTS, test "x$enable_tests" = "xyes")
+
+dnl ----------------------------------------------
+
 AC_OUTPUT([
 Makefile
 Util/Makefile
@@ -606,6 +614,7 @@
 beagled/Makefile
 Filters/Makefile
 tools/Makefile
+tests/Makefile
 tools/beagle-settings.desktop.in
 search/Makefile
 search/beagle-search.desktop.in
Index: Makefile.am
===================================================================
--- Makefile.am	(revision 4016)
+++ Makefile.am	(working copy)
@@ -46,6 +46,10 @@
 SUBDIRS += thunderbird-extension
 endif
 
+if ENABLE_TESTS
+SUBDIRS += tests
+endif
+
 # When the chooser work has been fixed up to use libbeagle, we can
 # reenable this directory.
 # SUBDIRS += chooser-fu


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]