beagle r4743 - in trunk/beagle: BeagleClient Util beagled beagled/NetworkServicesQueryable



Author: dbera
Date: Sat May  3 00:02:00 2008
New Revision: 4743
URL: http://svn.gnome.org/viewvc/beagle?rev=4743&view=rev

Log:
RDF Adapter merge #3: Merge the final changes from the RDF branch (except the extract-links patch).


Modified:
   trunk/beagle/BeagleClient/AssemblyInfo.cs
   trunk/beagle/BeagleClient/Property.cs
   trunk/beagle/BeagleClient/Query.cs
   trunk/beagle/Util/Makefile.am
   trunk/beagle/beagled/AssemblyInfo.cs
   trunk/beagle/beagled/DumpIndex.cs
   trunk/beagle/beagled/ExternalMetadataQueryable.cs
   trunk/beagle/beagled/IQueryable.cs
   trunk/beagle/beagled/LuceneCommon.cs
   trunk/beagle/beagled/LuceneQueryable.cs
   trunk/beagle/beagled/LuceneQueryingDriver.cs
   trunk/beagle/beagled/NetworkServicesQueryable/NetworkServicesQueryable.cs
   trunk/beagle/beagled/QueryDriver.cs
   trunk/beagle/beagled/QueryExecutor.cs
   trunk/beagle/beagled/Queryable.cs

Modified: trunk/beagle/BeagleClient/AssemblyInfo.cs
==============================================================================
--- trunk/beagle/BeagleClient/AssemblyInfo.cs	(original)
+++ trunk/beagle/BeagleClient/AssemblyInfo.cs	Sat May  3 00:02:00 2008
@@ -38,6 +38,9 @@
 	 typeof (Query),
 	 typeof (ReloadConfigRequest),
 	 typeof (ShutdownRequest),
+#if ENABLE_RDF_ADAPTER
+	 typeof (RDFQuery),
+#endif
 	 typeof (SnippetRequest)
 )]
 	 
@@ -51,5 +54,8 @@
 	 typeof (SearchTermResponse),
 	 typeof (DaemonInformationResponse),
 	 typeof (SnippetResponse),
+#if ENABLE_RDF_ADAPTER
+	 typeof (RDFQueryResult),
+#endif
 	 typeof (CountMatchQueryResponse)
 )]

Modified: trunk/beagle/BeagleClient/Property.cs
==============================================================================
--- trunk/beagle/BeagleClient/Property.cs	(original)
+++ trunk/beagle/BeagleClient/Property.cs	Sat May  3 00:02:00 2008
@@ -35,6 +35,7 @@
 namespace Beagle {
 
 	public enum PropertyType {
+		Internal = 0,
 		Text     = 1,
 		Keyword  = 2,
 		Date     = 3

Modified: trunk/beagle/BeagleClient/Query.cs
==============================================================================
--- trunk/beagle/BeagleClient/Query.cs	(original)
+++ trunk/beagle/BeagleClient/Query.cs	Sat May  3 00:02:00 2008
@@ -251,6 +251,85 @@
 		}
 	}
 
+#if ENABLE_RDF_ADAPTER
+	public class RDFQuery : Query {
+
+		[XmlIgnore]
+		public Uri Subject {
+			get {
+				if (SubjectString == String.Empty)
+					return null;
+				return UriFu.EscapedStringToUri (SubjectString);
+			}
+
+			set {
+				if (value == null)
+					SubjectString = String.Empty;
+				else
+					SubjectString =  UriFu.UriToEscapedString (value);
+			}
+		}
+
+		[XmlElement ("Subject")]
+		public string SubjectString = String.Empty;
+
+		public string Predicate;
+		public PropertyType PredicateType;
+
+		public string Object;
+
+		public RDFQuery ()
+		{
+			// RDFQuery is a sync message
+			this.UnregisterAsyncResponseHandler (typeof (HitsAddedResponse));
+			this.UnregisterAsyncResponseHandler (typeof (HitsSubtractedResponse));
+			this.UnregisterAsyncResponseHandler (typeof (FinishedResponse));
+			this.UnregisterAsyncResponseHandler (typeof (ErrorResponse));
+			this.UnregisterAsyncResponseHandler (typeof (SearchTermResponse));
+
+			Keepalive = false;
+		}
+
+		public RDFQuery (Uri subject, string predicate, string _object) : this ()
+		{
+			// extract the property type from the property, and remove the prop:?: prefix
+			// e.g. from prop:k:beagle:MimeType
+			PropertyType ptype = PropertyType.Internal;
+			
+			if (predicate != null) {
+				if ((predicate.Length > 7) && predicate.StartsWith ("prop:")) {
+					switch (predicate [5]) {
+						case 't': ptype = PropertyType.Text; break;
+						case 'k': ptype = PropertyType.Keyword; break;
+						case 'd': ptype = PropertyType.Date; break;
+					}
+					// remove the prop:?:, which will be added by beagle later
+					predicate = predicate.Substring (7);
+				}
+			}
+
+			this.Subject = subject;
+			this.Predicate = (predicate == null ? String.Empty : predicate);
+			this.PredicateType = ptype;
+			this.Object = (_object == null ? String.Empty : _object);
+						
+			// FIXME: the query contains a dummy part that will make the query
+			// pass even if it is empty. Empty queries are not handled by default.
+			//
+			QueryPart_Text dummy = new QueryPart_Text ();
+			dummy.Logic = QueryPartLogic.Prohibited;
+			dummy.Text = "XXXXXXXXXXXXXXXXXXXXXXXXX";
+			AddPart (dummy);
+		}
+	}
+
+	public class RDFQueryResult : ResponseMessage {
+		[XmlArray (ElementName="Hits")]
+		[XmlArrayItem (ElementName="Hit", Type=typeof (Hit))]
+		public ArrayList Hits = new ArrayList ();
+	}
+#endif
+
 	// Synchronous query to return the number of matches
 	public class CountMatchQuery : Query {
 

Modified: trunk/beagle/Util/Makefile.am
==============================================================================
--- trunk/beagle/Util/Makefile.am	(original)
+++ trunk/beagle/Util/Makefile.am	Sat May  3 00:02:00 2008
@@ -160,9 +160,13 @@
 	$(srcdir)/SemWeb/Algos.cs               \
 	$(srcdir)/SemWeb/SparqlClient.cs	\
 	$(srcdir)/SemWeb/XPathSemWebNavigator.cs\
-	$(srcdir)/SemWeb/Euler.cs		\
 	$(srcdir)/QueryKeywordMapping.cs
 
+if ENABLE_RDF_ADAPTER
+UTIL_CSFILES +=
+	$(srcdir)/SemWeb/Euler.cs
+endif
+
 # To reduce dependencies, we don't build this
 IGNORED_SEMWEB_CSFILES =			\
 	$(srcdir)/SemWeb/SQLStore.cs

Modified: trunk/beagle/beagled/AssemblyInfo.cs
==============================================================================
--- trunk/beagle/beagled/AssemblyInfo.cs	(original)
+++ trunk/beagle/beagled/AssemblyInfo.cs	Sat May  3 00:02:00 2008
@@ -52,6 +52,9 @@
 	typeof (ReloadConfigExecutor),
 	typeof (ShutdownExecutor),
 	typeof (SnippetExecutor),
+#if ENABLE_RDF_ADAPTER
+	typeof (RDFQueryExecutor),
+#endif
 	typeof (CountMatchQueryExecutor)
 )]
 

Modified: trunk/beagle/beagled/DumpIndex.cs
==============================================================================
--- trunk/beagle/beagled/DumpIndex.cs	(original)
+++ trunk/beagle/beagled/DumpIndex.cs	Sat May  3 00:02:00 2008
@@ -205,7 +205,7 @@
 				int freq;
 				freq = term_enum.DocFreq ();
 
-				Console.WriteLine ("{0} {1} {2}", index_name, term_enum.Term ().Text (), freq);
+				Console.WriteLine ("{0} '{1}' {2}", index_name, term_enum.Term ().Text (), freq);
 
 				// FIXME: spew these as a count
 				++distinct_term_count;

Modified: trunk/beagle/beagled/ExternalMetadataQueryable.cs
==============================================================================
--- trunk/beagle/beagled/ExternalMetadataQueryable.cs	(original)
+++ trunk/beagle/beagled/ExternalMetadataQueryable.cs	Sat May  3 00:02:00 2008
@@ -64,6 +64,13 @@
 		{
 		}
 
+#if ENABLE_RDF_ADAPTER
+		public ICollection DoRDFQuery (Query query)
+		{
+			return null;
+		}
+#endif
+
 		public int DoCountMatchQuery (Query query)
 		{
 			return 0;

Modified: trunk/beagle/beagled/IQueryable.cs
==============================================================================
--- trunk/beagle/beagled/IQueryable.cs	(original)
+++ trunk/beagle/beagled/IQueryable.cs	Sat May  3 00:02:00 2008
@@ -43,6 +43,9 @@
 			      IQueryResult result,
 			      IQueryableChangeData data);
 
+#if ENABLE_RDF_ADAPTER
+		ICollection DoRDFQuery (Query query);
+#endif
 		// Just return the number of matches
 		int DoCountMatchQuery (Query query);
 

Modified: trunk/beagle/beagled/LuceneCommon.cs
==============================================================================
--- trunk/beagle/beagled/LuceneCommon.cs	(original)
+++ trunk/beagle/beagled/LuceneCommon.cs	Sat May  3 00:02:00 2008
@@ -538,6 +538,9 @@
 					}
 				} else if (fieldName == "PropertyKeyword")
 					return new LowerCaseFilter (new SingletonTokenStream (reader.ReadToEnd ()));
+				else if (fieldName == "Properties")
+					return new WhitespaceTokenizer (new StringReader (reader.ReadToEnd ()));
+
 
 				TokenStream outstream;
 				outstream = base.TokenStream (fieldName, reader);
@@ -615,6 +618,8 @@
 		// Exposing this is a little bit suspicious.
 		static protected string PropertyToFieldName (PropertyType type, string key)
 		{
+			if (type == PropertyType.Internal)
+				return key;
 			return String.Format ("prop:{0}:{1}", TypeToCode (type), key);
 
 		}
@@ -871,6 +876,13 @@
 					
 				AddPropertyToDocument (prop, target_doc);
 			}
+#if ENABLE_RDF_ADAPTER
+
+			// Now add a field containing a whitespace separated list of other fields in the document
+			AddFieldProperies (primary_doc);
+			if (secondary_doc != null)
+				AddFieldProperies (secondary_doc);
+#endif
 		}
 
 		static private Document CreateSecondaryDocument (Uri uri, Uri parent_uri)
@@ -943,6 +955,9 @@
 				}
 			}
 
+#if ENABLE_RDF_ADAPTER
+			AddFieldProperies (new_doc);
+#endif
 			return new_doc;
 		}
 
@@ -964,9 +979,40 @@
 				}
 			}
 
+#if ENABLE_RDF_ADAPTER
+			AddFieldProperies (doc);
+#endif
 			return doc;
 		}
 
+		// Add a new field with whitespace separated names of the existing fields
+		static protected void AddFieldProperies (Document doc)
+		{
+			const string Separator = " ";
+
+			StringBuilder sb = new StringBuilder ();
+			bool seen_properties = false;
+
+			foreach (Field f in doc.Fields ()) {
+				if (f.Name () == "Properties") {
+					seen_properties = true;
+					continue;
+				}
+
+				sb.Append (f.Name ());
+				sb.Append (Separator);
+			}
+
+			if (sb.Length > 0)
+				sb.Length -= Separator.Length;
+
+			if (seen_properties)
+				doc.RemoveFields ("Properties");
+
+			Field field = new Field ("Properties", sb.ToString (), Field.Store.NO, Field.Index.TOKENIZED);
+			doc.Add (field);
+		}
+
 		static protected Uri GetUriFromDocument (Document doc)
 		{
 			string uri;
@@ -997,8 +1043,8 @@
 
 		static protected void AddPropertiesToHit (Hit hit, Document doc, bool from_primary_index)
 		{
+			Property prop;
 			foreach (Field f in doc.Fields ()) {
-				Property prop;
 				prop = GetPropertyFromDocument (f, doc, from_primary_index);
 				if (prop != null)
 					hit.AddProperty (prop);
@@ -1455,7 +1501,9 @@
 			// This gives a chance to modify create new queries based on
 			// backend specific properties
 
-			abstract_part = query_part_hook (abstract_part);
+			if (query_part_hook != null)
+				abstract_part = query_part_hook (abstract_part);
+
 			if (abstract_part == null)
 				return;
 
@@ -1671,11 +1719,13 @@
 				else
 					field_name = PropertyToFieldName (part.Type, part.Key);
 
+				// Details of the conversion here depends on BeagleAnalyzer::TokenStream
 				if (part.Type == PropertyType.Text)
 					primary_query = StringToQuery (field_name, part.Value, term_list);
 				else {
 					Term term;
-					if (field_name.StartsWith ("prop:k:" + Property.PrivateNamespace))
+					// FIXME: Handle date queries for other date fields
+					if (part.Type == PropertyType.Internal || field_name.StartsWith ("prop:k:" + Property.PrivateNamespace))
 						term = new Term (field_name, part.Value);
 					else
 						term = new Term (field_name, part.Value.ToLower ());
@@ -2042,6 +2092,11 @@
 		// a lot of memory.  Don't call it without a good reason!
 		public ICollection GetHitsForUris (ICollection uris)
 		{
+			return GetHitsForUris (uris, null);
+		}
+
+		public ICollection GetHitsForUris (ICollection uris, FieldSelector fields)
+		{
 			Hashtable hits_by_uri = UriFu.NewHashtable ();
 
 			LNS.IndexSearcher primary_searcher = GetSearcher (PrimaryStore);
@@ -2052,7 +2107,9 @@
 			LNS.Hits primary_hits = primary_searcher.Search (uri_query);
 
 			for (int i = 0; i < primary_hits.Length (); i++) {
-				Document doc = primary_hits.Doc (i);
+				Document doc = ((fields == null) ?
+					primary_hits.Doc (i) :
+					primary_hits.Doc (i, fields));
 
 				Uri u = GetUriFromDocument (doc);
 
@@ -2064,7 +2121,9 @@
 				LNS.Hits secondary_hits = secondary_searcher.Search (uri_query);
 
 				for (int i = 0; i < secondary_hits.Length (); i++) {
-					Document doc = secondary_hits.Doc (i);
+					Document doc = ((fields == null) ?
+						secondary_hits.Doc (i) :
+						secondary_hits.Doc (i, fields));
 
 					Uri uri = GetUriFromDocument (doc);
 					Hit hit = (Hit) hits_by_uri [uri];

Modified: trunk/beagle/beagled/LuceneQueryable.cs
==============================================================================
--- trunk/beagle/beagled/LuceneQueryable.cs	(original)
+++ trunk/beagle/beagled/LuceneQueryable.cs	Sat May  3 00:02:00 2008
@@ -281,6 +281,13 @@
 			public ICollection RemovedUris;
 		}
 
+#if ENABLE_RDF_ADAPTER
+		public ICollection DoRDFQuery (Query query)
+		{
+			return Driver.DoRDFQuery (query);
+		}
+#endif
+
 		public void DoQuery (Query                query,
 				     IQueryResult         query_result,
 				     IQueryableChangeData i_change_data)

Modified: trunk/beagle/beagled/LuceneQueryingDriver.cs
==============================================================================
--- trunk/beagle/beagled/LuceneQueryingDriver.cs	(original)
+++ trunk/beagle/beagled/LuceneQueryingDriver.cs	Sat May  3 00:02:00 2008
@@ -313,6 +313,416 @@
 			}
 		}
 
+#if ENABLE_RDF_ADAPTER
+		///////// RDF fu ///////////////////////////////////////////////
+
+		// Returns a collection of Uris
+		// HitFilter and UriFilter are ignored for now
+		// They will come into play in the final FetchDocument part
+		// FIXME: Should RDFQuery do any query mapping using backend_query_part_hook ?
+		// I think it should not. QueryPart hooks are for human beings, RDF is for softwares.
+		public ICollection DoRDFQuery (Query _query)
+		{
+			RDFQuery query = (RDFQuery) _query;
+
+			string subject, predicate, _object;
+			PropertyType pred_type;
+
+			subject = query.SubjectString;
+			predicate = query.Predicate;
+			pred_type = query.PredicateType;
+			_object = query.Object;
+
+			if (Debug)
+				Logger.Log.Debug ("###### {0}: Starting low-level queries '{1}' : '{4}:{2}' = '{3}'", IndexName, subject, predicate, _object, pred_type);
+
+			// ******** 8 cases **********
+
+			// Return all uris
+			if (subject == String.Empty && predicate == String.Empty && _object == String.Empty)
+				return GetAllHitsByUri ().Values;
+
+			// Normal query
+			if (subject == String.Empty && predicate == String.Empty && _object != String.Empty) {
+				QueryPart_Text part = new QueryPart_Text ();
+				part.Text = _object;
+				part.SearchFullText = false; // We only search properties in RDF query
+				query.AddPart (part);
+				return DoLowLevelRDFQuery (query, pred_type, predicate, _object);
+			}
+
+			// Return uris for all documents with this property
+			if (subject == String.Empty && predicate != String.Empty && _object == String.Empty) {
+				string field_name = PropertyToFieldName (pred_type, predicate);
+
+				QueryPart_Property part = new QueryPart_Property ();
+				part.Type = PropertyType.Internal;
+				part.Key = "Properties";
+				part.Value = field_name;
+				query.AddPart (part);
+
+				return DoLowLevelRDFQuery (query, pred_type, predicate, null);
+			}
+
+			// Property query
+			if (subject == String.Empty && predicate != String.Empty && _object != String.Empty) {
+				QueryPart_Property part = new QueryPart_Property ();
+				part.Type = pred_type;
+				part.Key = predicate;
+				part.Value = _object;
+				query.AddPart (part);
+				return DoLowLevelRDFQuery (query, pred_type, predicate, _object);
+			}
+
+			// Return if the URI exists
+			if (subject != String.Empty && predicate == String.Empty && _object == String.Empty) {
+				QueryPart_Uri part = new QueryPart_Uri ();
+				part.Uri = UriFu.UserUritoEscapedUri (subject); // better be URI!
+				query.AddPart (part);
+				// FIXME: Which properties to return in the hit? All or none ?
+				return DoLowLevelRDFQuery (query, pred_type, predicate, null);
+			}
+
+			// Normal query in the document with this URI
+			if (subject != String.Empty && predicate == String.Empty && _object != String.Empty) {
+				QueryPart_Uri uri_part = new QueryPart_Uri ();
+				uri_part.Uri = UriFu.UserUritoEscapedUri (subject); // better be URI!
+				query.AddPart (uri_part);
+
+				QueryPart_Text part = new QueryPart_Text ();
+				part.Text = _object;
+				part.SearchFullText = false; // We only search properties in RDF query
+				query.AddPart (part);
+
+				return DoLowLevelRDFQuery (query, pred_type, predicate, _object);
+			}
+
+			// Return URI if the document with this URI contains this property
+			if (subject != String.Empty && predicate != String.Empty && _object == String.Empty) {
+				ArrayList returned_uris = new ArrayList (1);
+
+				ArrayList uri_list = new ArrayList (1);
+				uri_list.Add (UriFu.UserUritoEscapedUri (subject));
+
+				string field_name = PropertyToFieldName (pred_type, predicate);
+				FieldSelector fields = new MapFieldSelector (new string[] { "Uri", "Timestamp", field_name });
+				ICollection hits = GetHitsForUris (uri_list, fields);
+
+				return hits;
+			}
+
+			// Property query in the document with this URI
+			if (subject != String.Empty && predicate != String.Empty && _object != String.Empty) {
+				QueryPart_Uri uri_part = new QueryPart_Uri ();
+				uri_part.Uri = UriFu.UserUritoEscapedUri (subject); // better be URI!
+				query.AddPart (uri_part);
+
+				QueryPart_Property part = new QueryPart_Property ();
+				part.Type = pred_type;
+				part.Key = predicate;
+				part.Value = _object;
+				query.AddPart (part);
+
+				return DoLowLevelRDFQuery (query, pred_type, predicate, _object);
+			}
+
+			throw new Exception ("Never reaches");
+		}
+#endif
+
+		private ICollection DoLowLevelRDFQuery (Query query,
+							PropertyType pred_type,
+							string predicate,
+							string field_value)
+		{
+
+			Stopwatch total, a, b, c, d, e, f;
+
+			total = new Stopwatch ();
+			a = new Stopwatch ();
+			b = new Stopwatch ();
+			c = new Stopwatch ();
+			d = new Stopwatch ();
+			e = new Stopwatch ();
+			f = new Stopwatch ();
+
+			total.Start ();
+			a.Start ();
+
+			// Assemble all of the parts into a bunch of Lucene queries
+
+			ArrayList primary_required_part_queries;
+			ArrayList secondary_required_part_queries;
+
+			LNS.BooleanQuery primary_prohibited_part_query;
+			LNS.BooleanQuery secondary_prohibited_part_query;
+
+			AndHitFilter all_hit_filters;
+
+			ArrayList term_list;
+
+			// Assemble all of the parts into a bunch of Lucene queries
+
+			term_list = AssembleQuery (query,
+				null,
+				null,
+				out primary_required_part_queries,
+				out secondary_required_part_queries,
+				out primary_prohibited_part_query,
+				out secondary_prohibited_part_query,
+				out all_hit_filters);
+
+			a.Stop ();
+			if (Debug)
+				Log.Debug ("###### {0}: Building queries took {1}", IndexName, a);
+
+			// If we have no required parts, give up.
+			if (primary_required_part_queries == null)
+				return null;
+
+			b.Start ();
+			
+			//
+			// Now that we have all of these nice queries, let's execute them!
+			//
+
+			// Create the searchers that we will need.
+
+			IndexReader primary_reader;
+			LNS.IndexSearcher primary_searcher;
+			IndexReader secondary_reader;
+			LNS.IndexSearcher secondary_searcher;
+
+			// Create the searchers that we will need.
+
+			BuildSearchers (out primary_reader, out primary_searcher, out secondary_reader, out secondary_searcher);
+			b.Stop ();
+			if (Debug)
+				Log.Debug ("###### {0}: Readers/searchers built in {1}", IndexName, b);
+
+			// Build whitelists and blacklists for search subsets.
+			c.Start ();
+			
+			// Possibly create our whitelists from the search subset.
+			LuceneBitArray primary_whitelist, secondary_whitelist;
+			CreateQueryWhitelists (null,
+				primary_searcher,
+				secondary_searcher,
+				primary_prohibited_part_query,
+				secondary_prohibited_part_query,
+				out primary_whitelist,
+				out secondary_whitelist);
+
+			c.Stop ();
+			if (Debug)
+				Log.Debug ("###### {0}: Whitelists and blacklists built in {1}", IndexName, c);
+
+			// Now run the low level queries against our indexes.
+			d.Start ();
+
+			BetterBitArray primary_matches = null;
+
+			if (primary_required_part_queries != null) {
+
+				if (secondary_searcher != null)
+					primary_matches = DoRequiredQueries_TwoIndex (primary_searcher,
+										      secondary_searcher,
+										      primary_required_part_queries,
+										      secondary_required_part_queries,
+										      primary_whitelist,
+										      secondary_whitelist);
+				else
+					primary_matches = DoRequiredQueries (primary_searcher,
+									     primary_required_part_queries,
+									     primary_whitelist);
+
+			} 
+
+			d.Stop ();
+			if (Debug)
+				Logger.Log.Debug ("###### {0}: Low-level queries finished in {1}", IndexName, d);
+
+			e.Start ();
+
+			int count = 0;
+			Document doc;
+			ArrayList hits = new ArrayList (primary_matches.TrueCount);
+
+			TermDocs secondary_term_docs = null;
+			if (secondary_searcher != null)
+				secondary_term_docs = secondary_searcher.Reader.TermDocs ();
+		
+			FieldSelector fields = null;
+			if (predicate != null)
+				fields = new MapFieldSelector (new string[] { "Uri", "Timestamp", PropertyToFieldName (pred_type, predicate)});
+
+			for (int match_index = primary_matches.GetNextTrueIndex (0);
+			     match_index < primary_matches.Count; 
+			     match_index = primary_matches.GetNextTrueIndex (++ match_index)) {
+
+				count++;
+
+				// If we have a UriFilter, apply it.
+				// RDF FIXME: Ignore Uri Filter for now
+				//if (uri_filter != null) {
+				//	Uri uri;
+				//	uri = GetUriFromDocument (doc);
+				//	if (! uri_filter (uri))
+				//		continue;
+				//}
+
+				// If predicate was not specified but object was specified,
+				// then figure out the right predicate
+				if (predicate == null && field_value != null) {
+					Hit hit = new Hit ();
+					doc = primary_searcher.Doc (match_index);
+					hit.Uri = GetUriFromDocument (doc);
+					hit.Timestamp = StringFu.StringToDateTime (doc.Get ("Timestamp"));
+
+					bool found_matching_predicate = false;
+
+					foreach (Field field in doc.Fields ()) {
+						if (! FieldIsPredicate (field, field_value))
+							continue;
+
+						Property prop = new Property ();
+						prop.Type = pred_type;
+						prop.Key = predicate;
+						prop.Value = field_value;
+						hit.AddProperty (prop);
+
+						found_matching_predicate = true;
+					}
+
+					// Now get the matching predicate from the secondary index
+					if (secondary_searcher == null) {
+						doc = null;
+					} else {
+						Term term = new Term ("Uri", doc.Get ("Uri"));
+						secondary_term_docs.Seek (term);
+						if (secondary_term_docs.Next ())
+							doc = secondary_searcher.Doc (secondary_term_docs.Doc ());
+					}
+
+					if (doc != null) {
+						foreach (Field field in doc.Fields ()) {
+							if (! FieldIsPredicate (field, field_value))
+								continue;
+
+							Property prop = new Property ();
+							prop.Type = pred_type;
+							prop.Key = predicate;
+							prop.Value = field_value;
+							hit.AddProperty (prop);
+
+							found_matching_predicate = true;
+						}
+					}
+
+					if (! found_matching_predicate) {
+						// No matching predicate found
+						// This means some unstored field matched the query
+						// FIXME: Add a synthetic property #text
+						hit.AddProperty (Property.New ("#text", field_value));
+					}
+					
+					hits.Add (hit);
+				} else {
+					doc = primary_searcher.Doc (match_index, fields);
+					Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs, fields);
+					foreach (Property prop in hit.Properties) {
+						if (prop.Key == predicate)
+							prop.Value = field_value;
+					}
+
+					hits.Add (hit);
+				}
+			}
+
+			e.Stop ();
+
+			if (Debug)
+				Log.Debug ("###### {0}: Query results generated in {1}", IndexName, e);
+
+			//
+			// Finally, we clean up after ourselves.
+			//
+
+			f.Start ();
+			CloseSearchers (primary_reader, primary_searcher, secondary_reader, secondary_searcher);
+			f.Stop ();
+			
+			if (Debug)
+				Log.Debug ("###### {0}: Readers/searchers released in {1}", IndexName, f);
+
+			total.Stop ();
+			if (Debug) {
+				Log.Debug ("###### {0}: Query time breakdown:", IndexName);
+				Log.Debug ("###### {0}:    Build queries {1,6} ({2:0.0}%)", IndexName, a, 100 * a.ElapsedTime / total.ElapsedTime);
+				Log.Debug ("###### {0}:      Got readers {1,6} ({2:0.0}%)", IndexName, b, 100 * b.ElapsedTime / total.ElapsedTime);
+				Log.Debug ("###### {0}:       Whitelists {1,6} ({2:0.0}%)", IndexName, c, 100 * c.ElapsedTime / total.ElapsedTime);
+				Log.Debug ("###### {0}:          Queries {1,6} ({2:0.0}%)", IndexName, d, 100 * d.ElapsedTime / total.ElapsedTime);
+				Log.Debug ("###### {0}:    Gen'd Results {1,6} ({2:0.0}%)", IndexName, e, 100 * e.ElapsedTime / total.ElapsedTime);
+				Log.Debug ("###### {0}:   Reader cleanup {1,6} ({2:0.0}%)", IndexName, f, 100 * f.ElapsedTime / total.ElapsedTime);
+				Log.Debug ("###### {0}:            TOTAL {1,6}", IndexName, total);
+
+				Logger.Log.Debug ("###### {0}: Total query run in {1}", IndexName, total);
+			}
+
+			return hits;
+		}
+
+		// FIXME: This basically queries the value against the field
+		// and is really really slow!
+		private bool FieldIsPredicate (Field field, string value)
+		{
+			string field_name = field.Name ();
+			string field_value = field.StringValue ();
+			Console.WriteLine ("Reverse searching for '{0}' value in {1}='{2}'", value, field_name, field_value);
+			// Simply run the value of the property against the right analyzer
+			// and check if there is any match
+			TokenStream source = IndexingAnalyzer.TokenStream (field_name, new StringReader (field_value));
+			StringBuilder sb = new StringBuilder ();
+			try {
+				Lucene.Net.Analysis.Token token;
+				while (true) {
+					token = source.Next ();
+					if (token == null)
+						break;
+					sb.Append (token.TermText ());
+					sb.Append (" ");
+					break;
+				}
+			} finally {
+				try {
+					source.Close ();
+				} catch { }
+			}
+
+			string field_analyzed = sb.ToString ();
+			sb.Length = 0;
+
+			source = QueryAnalyzer.TokenStream (field_name, new StringReader (value));
+			try {
+				Lucene.Net.Analysis.Token token;
+				while (true) {
+					token = source.Next ();
+					if (token == null)
+						break;
+					sb.Append (token.TermText ());
+					sb.Append (" ");
+					break;
+				}
+			} finally {
+				try {
+					source.Close ();
+				} catch { }
+			}
+
+			string value_analyzed = sb.ToString ();
+			return field_analyzed.Contains (value_analyzed);
+		}
+
 		////////////////////////////////////////////////////////////////
 
 		public int DoCountMatchQuery (Query query, QueryPartHook query_part_hook)
@@ -1060,6 +1470,17 @@
 					IndexReader secondary_reader,
 					TermDocs term_docs)
 		{
+			return CreateHit ( primary_doc,
+					secondary_reader,
+					term_docs,
+					null);
+		}
+
+		private static Hit CreateHit ( Document primary_doc,
+					IndexReader secondary_reader,
+					TermDocs term_docs,
+					FieldSelector fields)
+		{
 			Hit hit = DocumentToHit (primary_doc);
 
 			if (secondary_reader == null)
@@ -1072,7 +1493,10 @@
 
 			// Move to the first (and only) matching term doc
 			term_docs.Next ();
-			Document secondary_doc = secondary_reader.Document (term_docs.Doc ());
+			Document secondary_doc =
+				(fields == null) ?
+				secondary_reader.Document (term_docs.Doc ()) :
+				secondary_reader.Document (term_docs.Doc (), fields);
 
 			// If we are using the secondary index, now we need to
 			// merge the properties from the secondary index

Modified: trunk/beagle/beagled/NetworkServicesQueryable/NetworkServicesQueryable.cs
==============================================================================
--- trunk/beagle/beagled/NetworkServicesQueryable/NetworkServicesQueryable.cs	(original)
+++ trunk/beagle/beagled/NetworkServicesQueryable/NetworkServicesQueryable.cs	Sat May  3 00:02:00 2008
@@ -31,6 +31,13 @@
 			return (services != null && services.Count > 0);
 		}
 
+#if ENABLE_RDF_ADAPTER
+		public ICollection DoRDFQuery (Query query)
+		{
+			return null;
+		}
+#endif
+
 		public void DoQuery (Query query, IQueryResult result, IQueryableChangeData data)
 		{
 			// Get rid of the standard UnixTransport so that we can

Modified: trunk/beagle/beagled/QueryDriver.cs
==============================================================================
--- trunk/beagle/beagled/QueryDriver.cs	(original)
+++ trunk/beagle/beagled/QueryDriver.cs	Sat May  3 00:02:00 2008
@@ -634,6 +634,25 @@
 			QueryEachQueryable (query, result);
 		}
 
+#if ENABLE_RDF_ADAPTER
+		static public ArrayList DoRDFQuery (RDFQuery query)
+		{
+			ArrayList all_results = new ArrayList ();
+
+			foreach (Queryable q in Queryables) {
+				if (! q.AcceptQuery (query))
+					continue;
+
+				ICollection results = q.DoRDFQuery (query);
+				if (results == null || results.Count == 0)
+					continue;
+				all_results.AddRange (results);
+			}
+
+			return all_results;
+		}
+#endif
+
 		////////////////////////////////////////////////////////
 
 		static public int DoCountMatchQuery (CountMatchQuery query)

Modified: trunk/beagle/beagled/QueryExecutor.cs
==============================================================================
--- trunk/beagle/beagled/QueryExecutor.cs	(original)
+++ trunk/beagle/beagled/QueryExecutor.cs	Sat May  3 00:02:00 2008
@@ -110,6 +110,23 @@
 		}
 	}
 
+#if ENABLE_RDF_ADAPTER
+	[RequestMessage (typeof (RDFQuery))]
+	public class RDFQueryExecutor : RequestMessageExecutor {
+
+		public override ResponseMessage Execute (RequestMessage request)
+		{
+			RDFQueryResult result = new RDFQueryResult ();
+			RDFQuery query = request as RDFQuery;
+			if (query == null)
+				return new ErrorResponse ("Only RDF query please!");
+
+			result.Hits = QueryDriver.DoRDFQuery (query);
+			return result;
+		}
+	}
+#endif
+
 	[RequestMessage (typeof (CountMatchQuery))]
 	public class CountMatchQueryExecutor : RequestMessageExecutor {
 

Modified: trunk/beagle/beagled/Queryable.cs
==============================================================================
--- trunk/beagle/beagled/Queryable.cs	(original)
+++ trunk/beagle/beagled/Queryable.cs	Sat May  3 00:02:00 2008
@@ -83,6 +83,13 @@
 			}
 		}
 
+#if ENABLE_RDF_ADAPTER
+		public ICollection DoRDFQuery (Query query)
+		{
+			return iqueryable.DoRDFQuery (query);
+		}
+#endif
+
 		public int DoCountMatchQuery (Query query)
 		{
 			try {



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]