beagle r4628 - in trunk/beagle: BeagleClient beagled beagled/FileSystemQueryable beagled/KonversationQueryable beagled/KopeteQueryable beagled/NetworkServicesQueryable beagled/PidginQueryable tools



Author: dbera
Date: Fri Mar 21 04:36:02 2008
New Revision: 4628
URL: http://svn.gnome.org/viewvc/beagle?rev=4628&view=rev

Log:
Add API for controlling the max snippet length and the number of words before and after a match.


Modified:
   trunk/beagle/BeagleClient/Snippet.cs
   trunk/beagle/beagled/ExternalMetadataQueryable.cs
   trunk/beagle/beagled/FileSystemQueryable/FileSystemQueryable.cs
   trunk/beagle/beagled/IQueryable.cs
   trunk/beagle/beagled/KonversationQueryable/KonversationQueryable.cs
   trunk/beagle/beagled/KopeteQueryable/KopeteQueryable.cs
   trunk/beagle/beagled/LuceneQueryable.cs
   trunk/beagle/beagled/NetworkServicesQueryable/NetworkServicesQueryable.cs
   trunk/beagle/beagled/PidginQueryable/PidginQueryable.cs
   trunk/beagle/beagled/Queryable.cs
   trunk/beagle/beagled/SnippetExecutor.cs
   trunk/beagle/beagled/SnippetFu.cs
   trunk/beagle/beagled/SnippetTest.cs
   trunk/beagle/beagled/StaticQueryable.cs
   trunk/beagle/tools/StaticQuery.cs

Modified: trunk/beagle/BeagleClient/Snippet.cs
==============================================================================
--- trunk/beagle/BeagleClient/Snippet.cs	(original)
+++ trunk/beagle/BeagleClient/Snippet.cs	Fri Mar 21 04:36:02 2008
@@ -47,6 +47,10 @@
 		// should know that.
 		public bool FullText = false; // default, not fulltext
 
+		public int ContextLength = -1; // Use system default = 6 if not specified
+
+		public int SnippetLength = -1; // 200, if not specified
+
 		public SnippetRequest () : base (false) { }
 
 		public SnippetRequest (Query query, Hit hit) : base (false)

Modified: trunk/beagle/beagled/ExternalMetadataQueryable.cs
==============================================================================
--- trunk/beagle/beagled/ExternalMetadataQueryable.cs	(original)
+++ trunk/beagle/beagled/ExternalMetadataQueryable.cs	Fri Mar 21 04:36:02 2008
@@ -69,7 +69,7 @@
 			return 0;
 		}
 
-		public ISnippetReader GetSnippet (string[] query_terms, Hit hit, bool full_text)
+		public ISnippetReader GetSnippet (string[] query_terms, Hit hit, bool full_text, int ctx_length, int snp_length)
 		{
 			return null;
 		}

Modified: trunk/beagle/beagled/FileSystemQueryable/FileSystemQueryable.cs
==============================================================================
--- trunk/beagle/beagled/FileSystemQueryable/FileSystemQueryable.cs	(original)
+++ trunk/beagle/beagled/FileSystemQueryable/FileSystemQueryable.cs	Fri Mar 21 04:36:02 2008
@@ -1853,7 +1853,7 @@
 			return parent_dirs;
 		}
 
-		override public ISnippetReader GetSnippet (string [] query_terms, Hit hit, bool full_text)
+		override public ISnippetReader GetSnippet (string [] query_terms, Hit hit, bool full_text, int ctx_length, int snp_length)
 		{
 			// Uri remapping from a hit is easy: the internal uri
 			// is stored in a property.
@@ -1863,12 +1863,12 @@
 			TextReader reader = TextCache.UserCache.GetReader (uri, ref self_cache);
 
 			if (self_cache)
-				return SnippetFu.GetSnippetFromFile (query_terms, hit.Uri.LocalPath, full_text);
+				return SnippetFu.GetSnippetFromFile (query_terms, hit.Uri.LocalPath, full_text, ctx_length, snp_length);
 
 			if (reader == null)
 				return null;
 
-			return SnippetFu.GetSnippet (query_terms, reader, full_text);
+			return SnippetFu.GetSnippet (query_terms, reader, full_text, ctx_length, snp_length);
 		}
 
 		override public void Start ()

Modified: trunk/beagle/beagled/IQueryable.cs
==============================================================================
--- trunk/beagle/beagled/IQueryable.cs	(original)
+++ trunk/beagle/beagled/IQueryable.cs	Fri Mar 21 04:36:02 2008
@@ -46,7 +46,7 @@
 		// Just return the number of matches
 		int DoCountMatchQuery (Query query);
 
-		ISnippetReader GetSnippet (string[] query_terms, Hit hit, bool full_text);
+		ISnippetReader GetSnippet (string[] query_terms, Hit hit, bool full_text, int context_length, int snippet_length);
 
 		QueryableStatus GetQueryableStatus ();
 	}

Modified: trunk/beagle/beagled/KonversationQueryable/KonversationQueryable.cs
==============================================================================
--- trunk/beagle/beagled/KonversationQueryable/KonversationQueryable.cs	(original)
+++ trunk/beagle/beagled/KonversationQueryable/KonversationQueryable.cs	Fri Mar 21 04:36:02 2008
@@ -513,7 +513,7 @@
 			return false;
 		}
 
-		override public ISnippetReader GetSnippet (string [] query_terms, Hit hit, bool full_text)
+		override public ISnippetReader GetSnippet (string [] query_terms, Hit hit, bool full_text, int ctx_length, int snp_length)
 		{
 			if (hit.ParentUri == null)
 				return null;
@@ -528,7 +528,7 @@
 				LineTextReader reader;
 				reader = new LineTextReader (path, begin_offset, end_offset);
 
-				return SnippetFu.GetSnippet (query_terms, reader, full_text);
+				return SnippetFu.GetSnippet (query_terms, reader, full_text, ctx_length, snp_length);
 			} catch {
 				return null;
 			}

Modified: trunk/beagle/beagled/KopeteQueryable/KopeteQueryable.cs
==============================================================================
--- trunk/beagle/beagled/KopeteQueryable/KopeteQueryable.cs	(original)
+++ trunk/beagle/beagled/KopeteQueryable/KopeteQueryable.cs	Fri Mar 21 04:36:02 2008
@@ -219,7 +219,7 @@
 			return true;
 		}
 
-		public override ISnippetReader GetSnippet (string [] query_terms, Hit hit, bool full_text)
+		public override ISnippetReader GetSnippet (string [] query_terms, Hit hit, bool full_text, int ctx_length, int snp_length)
 		{
 			TextReader reader = TextCache.UserCache.GetReader (hit.Uri);
 
@@ -228,7 +228,7 @@
 
 			KopeteSnippetReader snippet_reader = new KopeteSnippetReader (reader);
 
-			return SnippetFu.GetSnippet (query_terms, snippet_reader, full_text);
+			return SnippetFu.GetSnippet (query_terms, snippet_reader, full_text, ctx_length, snp_length);
 		}
 
 		private class KopeteSnippetReader : TextReader {

Modified: trunk/beagle/beagled/LuceneQueryable.cs
==============================================================================
--- trunk/beagle/beagled/LuceneQueryable.cs	(original)
+++ trunk/beagle/beagled/LuceneQueryable.cs	Fri Mar 21 04:36:02 2008
@@ -363,7 +363,7 @@
 
 		/////////////////////////////////////////
 
-		protected SnippetReader GetSnippetFromTextCache (string [] query_terms, Uri uri, bool full_text)
+		protected SnippetReader GetSnippetFromTextCache (string [] query_terms, Uri uri, bool full_text, int ctx_length, int snp_length)
 		{
 			// Look up the hit in our text cache.  If it is there,
 			// use the cached version to generate a snippet.
@@ -373,12 +373,12 @@
 			if (reader == null)
 				return null;
 
-			return SnippetFu.GetSnippet (query_terms, reader, full_text);
+			return SnippetFu.GetSnippet (query_terms, reader, full_text, ctx_length, snp_length);
 		}
 
-		virtual public ISnippetReader GetSnippet (string [] query_terms, Hit hit, bool full_text)
+		virtual public ISnippetReader GetSnippet (string [] query_terms, Hit hit, bool full_text, int ctx_length, int snp_length)
 		{
-			return GetSnippetFromTextCache (query_terms, hit.Uri, full_text);
+			return GetSnippetFromTextCache (query_terms, hit.Uri, full_text, ctx_length, snp_length);
 		}
 
 		/////////////////////////////////////////

Modified: trunk/beagle/beagled/NetworkServicesQueryable/NetworkServicesQueryable.cs
==============================================================================
--- trunk/beagle/beagled/NetworkServicesQueryable/NetworkServicesQueryable.cs	(original)
+++ trunk/beagle/beagled/NetworkServicesQueryable/NetworkServicesQueryable.cs	Fri Mar 21 04:36:02 2008
@@ -130,7 +130,7 @@
 			return 0;
 		}
 
-		public ISnippetReader GetSnippet (string[] query_terms, Hit hit, bool full_text)
+		public ISnippetReader GetSnippet (string[] query_terms, Hit hit, bool full_text, int ctx_length, int snp_length)
 		{
 			return null;
 		}

Modified: trunk/beagle/beagled/PidginQueryable/PidginQueryable.cs
==============================================================================
--- trunk/beagle/beagled/PidginQueryable/PidginQueryable.cs	(original)
+++ trunk/beagle/beagled/PidginQueryable/PidginQueryable.cs	Fri Mar 21 04:36:02 2008
@@ -129,7 +129,7 @@
 			return true;
 		}
 
-		public override ISnippetReader GetSnippet (string [] query_terms, Hit hit, bool full_text)
+		public override ISnippetReader GetSnippet (string [] query_terms, Hit hit, bool full_text, int ctx_length, int snp_length)
 		{
 			TextReader reader = TextCache.UserCache.GetReader (hit.Uri);
 
@@ -141,7 +141,7 @@
 			if (line[0] == '<')
 				reader = new HtmlRemovingReader (reader);
 
-			return SnippetFu.GetSnippet (query_terms, reader, full_text);
+			return SnippetFu.GetSnippet (query_terms, reader, full_text, ctx_length, snp_length);
 		}
 
 		public ImBuddyListReader ImBuddyListReader {

Modified: trunk/beagle/beagled/Queryable.cs
==============================================================================
--- trunk/beagle/beagled/Queryable.cs	(original)
+++ trunk/beagle/beagled/Queryable.cs	Fri Mar 21 04:36:02 2008
@@ -93,7 +93,7 @@
 			}
 		}
 
-		public ISnippetReader GetSnippet (string[] query_terms, Hit hit, bool full_text)
+		public ISnippetReader GetSnippet (string[] query_terms, Hit hit, bool full_text, int ctx_length, int snp_length)
 		{
 			if (hit == null)
 				return null;
@@ -105,7 +105,7 @@
 			}
 
 			try {
-				return iqueryable.GetSnippet (query_terms, hit, full_text);
+				return iqueryable.GetSnippet (query_terms, hit, full_text, ctx_length, snp_length);
 			} catch (Exception ex) {
 				Logger.Log.Warn (ex, "Caught exception calling DoQuery on '{0}'", Name);
 			}

Modified: trunk/beagle/beagled/SnippetExecutor.cs
==============================================================================
--- trunk/beagle/beagled/SnippetExecutor.cs	(original)
+++ trunk/beagle/beagled/SnippetExecutor.cs	Fri Mar 21 04:36:02 2008
@@ -42,13 +42,15 @@
 			Queryable queryable = QueryDriver.GetQueryable (request.Hit.Source);
 			ISnippetReader snippet_reader;
 			bool full_text = request.FullText;
+			int ctx_length = request.ContextLength;
+			int snp_length = request.SnippetLength;
 
 			if (queryable == null) {
 				Log.Error ("SnippetExecutor: No queryable object matches '{0}'", request.Hit.Source);
-				snippet_reader = new SnippetReader (null, null, false);
+				snippet_reader = new SnippetReader (null, null, false, -1, -1);
 				full_text = false;
 			} else
-				snippet_reader = queryable.GetSnippet (request.QueryTerms, request.Hit, full_text);
+				snippet_reader = queryable.GetSnippet (request.QueryTerms, request.Hit, full_text, ctx_length, snp_length);
 
 			return new SnippetResponse (new SnippetList (full_text, snippet_reader));
 		}

Modified: trunk/beagle/beagled/SnippetFu.cs
==============================================================================
--- trunk/beagle/beagled/SnippetFu.cs	(original)
+++ trunk/beagle/beagled/SnippetFu.cs	Fri Mar 21 04:36:02 2008
@@ -38,7 +38,7 @@
 	
 	public class SnippetFu {
 
-		static public SnippetReader GetSnippet (string[] query_terms, TextReader line_reader, bool full_text)
+		static public SnippetReader GetSnippet (string[] query_terms, TextReader line_reader, bool full_text, int ctx_length, int snp_length)
 		{
 			// FIXME: If the query doesn't have search text (or is null), we should
 			// generate a 'summary snippet'.
@@ -46,24 +46,24 @@
 			if (line_reader == null)
 				return null;
 
-			SnippetReader snippet_reader = new SnippetReader (line_reader, query_terms, full_text);
+			SnippetReader snippet_reader = new SnippetReader (line_reader, query_terms, full_text, ctx_length, snp_length);
 			return snippet_reader;
 		}
 		
-		static public SnippetReader GetSnippetFromFile (string[] query_terms, string filename, bool full_text)
+		static public SnippetReader GetSnippetFromFile (string[] query_terms, string filename, bool full_text, int ctx_length, int snp_length)
 		{
 			FileStream stream = new FileStream (filename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
 
-			return GetSnippet (query_terms, new StreamReader (stream), full_text);
+			return GetSnippet (query_terms, new StreamReader (stream), full_text, ctx_length, snp_length);
 		}
 
-		static public SnippetReader GetSnippetFromTextCache (string[] query_terms, string filename, bool full_text)
+		static public SnippetReader GetSnippetFromTextCache (string[] query_terms, string filename, bool full_text, int ctx_length, int snp_length)
 		{
 			TextReader reader = TextCache.UserCache.GetReader (new Uri (filename));
 			if (reader == null)
 				return null;
 			try {
-				return GetSnippet (query_terms, reader, full_text);
+				return GetSnippet (query_terms, reader, full_text, ctx_length, snp_length);
 			} catch (ICSharpCode.SharpZipLib.SharpZipBaseException ex) {
 				Log.Debug ("Unexpected exception '{0}' while extracting snippet for {1}", ex.Message, filename);
 				return null;
@@ -127,19 +127,24 @@
 		// Keep a sliding window of the starting positions of words
 		SlidingWindow sliding_window;
 
-		const int between_snippet_words = 6;
-		const int soft_snippet_limit = 200;
+		public const int context_length_default = 6;
+		public const int snippet_length_default = 200;
 
-		public SnippetReader (TextReader line_reader, string[] query_terms, bool full_text)
+		private int context_length;
+		private int snippet_length;
+
+		public SnippetReader (TextReader line_reader, string[] query_terms, bool full_text, int context_length, int snippet_length)
 		{
 			this.line_reader = line_reader;
 			this.found_snippet_length = 0;
 			this.full_text = full_text;
+			this.context_length = (context_length >= 0 ? context_length : context_length_default);
+			this.snippet_length = (snippet_length > 0 ? snippet_length : snippet_length_default);
 
 			if (query_terms == null)
 				return;
 
-			this.sliding_window = new SlidingWindow (between_snippet_words);
+			this.sliding_window = new SlidingWindow (context_length);
 
 			// remove stop words from query_terms
 			query_terms_list = new ArrayList (query_terms.Length);
@@ -182,7 +187,7 @@
 			SnippetLine snippet_line;
 			ulong line = 0;
 
-			while (found_snippet_length < soft_snippet_limit) {
+			while (found_snippet_length < snippet_length) {
 				//Console.WriteLine ("Continue with last line ? {0}", continue_line);
 				if (! continue_line) {
 					try {
@@ -307,7 +312,7 @@
 					// Add the start pos of the token to the window
 					sliding_window.Add (pos);
 					// If we found a match previously and saw enough following words, stop
-					if (snippet_line != null && snippet_line.Count > 0 && sliding_window.Count == between_snippet_words) {
+					if (snippet_line != null && snippet_line.Count > 0 && sliding_window.Count == context_length) {
 						sliding_window.Reset ();
 						string after_match = text.Substring (prev_match_end_pos, end_pos - prev_match_end_pos);
 						snippet_line.AddNonMatchFragment (after_match);

Modified: trunk/beagle/beagled/SnippetTest.cs
==============================================================================
--- trunk/beagle/beagled/SnippetTest.cs	(original)
+++ trunk/beagle/beagled/SnippetTest.cs	Fri Mar 21 04:36:02 2008
@@ -45,7 +45,7 @@
 
 		SnippetReader snippet_reader;
 		// FIXME: Oops ... does not quit by passing empty line
-		snippet_reader = SnippetFu.GetSnippet (args, Console.In, false); //delegate {
+		snippet_reader = SnippetFu.GetSnippet (args, Console.In, false, -1, -1); //delegate {
 
 		bool first = true;
 		foreach (SnippetLine snippet_line in snippet_reader.GetSnippet ()) {

Modified: trunk/beagle/beagled/StaticQueryable.cs
==============================================================================
--- trunk/beagle/beagled/StaticQueryable.cs	(original)
+++ trunk/beagle/beagled/StaticQueryable.cs	Fri Mar 21 04:36:02 2008
@@ -53,7 +53,7 @@
 			}
 		}
 
-		override public ISnippetReader GetSnippet (string[] query_terms, Hit hit, bool full_text) 
+		override public ISnippetReader GetSnippet (string[] query_terms, Hit hit, bool full_text, int ctx_length, int snp_length) 
 		{
 			if (text_cache == null)
 				return null;
@@ -63,7 +63,7 @@
 			if (reader == null)
 				return null;
 			
-			return SnippetFu.GetSnippet (query_terms, reader, full_text);
+			return SnippetFu.GetSnippet (query_terms, reader, full_text, ctx_length, snp_length);
 		}
 
 		override protected bool HitIsValid (Uri uri)

Modified: trunk/beagle/tools/StaticQuery.cs
==============================================================================
--- trunk/beagle/tools/StaticQuery.cs	(original)
+++ trunk/beagle/tools/StaticQuery.cs	Fri Mar 21 04:36:02 2008
@@ -147,13 +147,15 @@
 		Queryable queryable = QueryDriver.GetQueryable (request.Hit.Source);
 		ISnippetReader snippet_reader;
 		bool full_text = request.FullText;
+		int ctx_length = request.ContextLength;
+		int snp_length = request.SnippetLength;
 
 		if (queryable == null) {
 			Console.WriteLine ("SnippetExecutor: No queryable object matches '{0}'", request.Hit.Source);
-			snippet_reader = new SnippetReader (null, null, false);
+			snippet_reader = new SnippetReader (null, null, false, -1, -1);
 			full_text = false;
 		} else
-			snippet_reader = queryable.GetSnippet (request.QueryTerms, request.Hit, full_text);
+			snippet_reader = queryable.GetSnippet (request.QueryTerms, request.Hit, full_text, ctx_length, snp_length);
 
 		List<SnippetLine> snippetlines = new List<SnippetLine> ();
 		if (snippet_reader == null)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]