beagle r4489 - branches/beagle-rdf/beagled



Author: dbera
Date: Fri Feb 15 04:19:41 2008
New Revision: 4489
URL: http://svn.gnome.org/viewvc/beagle?rev=4489&view=rev

Log:
Return only the specified properties if predicate is specified. If predicate is not specified and object is specified, then try to guess which predicate matched the query and return that predicate (current implementation is really expensive). If predicate and object are both unspecified, return all properties.


Modified:
   branches/beagle-rdf/beagled/LuceneCommon.cs
   branches/beagle-rdf/beagled/LuceneQueryingDriver.cs

Modified: branches/beagle-rdf/beagled/LuceneCommon.cs
==============================================================================
--- branches/beagle-rdf/beagled/LuceneCommon.cs	(original)
+++ branches/beagle-rdf/beagled/LuceneCommon.cs	Fri Feb 15 04:19:41 2008
@@ -2002,6 +2002,11 @@
 		// a lot of memory.  Don't call it without a good reason!
 		public ICollection GetHitsForUris (ICollection uris)
 		{
+			return GetHitsForUris (uris, null);
+		}
+
+		public ICollection GetHitsForUris (ICollection uris, string[] fields)
+		{
 			Hashtable hits_by_uri = UriFu.NewHashtable ();
 
 			LNS.IndexSearcher primary_searcher = GetSearcher (PrimaryStore);
@@ -2012,7 +2017,9 @@
 			LNS.Hits primary_hits = primary_searcher.Search (uri_query);
 
 			for (int i = 0; i < primary_hits.Length (); i++) {
-				Document doc = primary_hits.Doc (i);
+				Document doc = ((fields == null) ?
+					primary_hits.Doc (i) :
+					primary_hits.Doc (i, fields));
 
 				Uri u = GetUriFromDocument (doc);
 
@@ -2024,7 +2031,9 @@
 				LNS.Hits secondary_hits = secondary_searcher.Search (uri_query);
 
 				for (int i = 0; i < secondary_hits.Length (); i++) {
-					Document doc = secondary_hits.Doc (i);
+					Document doc = ((fields == null) ?
+						secondary_hits.Doc (i) :
+						secondary_hits.Doc (i, fields));
 
 					Uri uri = GetUriFromDocument (doc);
 					Hit hit = (Hit) hits_by_uri [uri];

Modified: branches/beagle-rdf/beagled/LuceneQueryingDriver.cs
==============================================================================
--- branches/beagle-rdf/beagled/LuceneQueryingDriver.cs	(original)
+++ branches/beagle-rdf/beagled/LuceneQueryingDriver.cs	Fri Feb 15 04:19:41 2008
@@ -350,7 +350,7 @@
 				part.Text = _object;
 				part.SearchFullText = false; // We only search properties in RDF query
 				query.AddPart (part);
-				return DoLowLevelRDFQuery (query, null, query_part_hook);
+				return DoLowLevelRDFQuery (query, null, _object, query_part_hook);
 			}
 
 			// Return uris for all documents with this property
@@ -366,7 +366,7 @@
 				part.Value = _object;
 				query.AddPart (part);
 				string field_name = PropertyToFieldName (pred_type, predicate);
-				return DoLowLevelRDFQuery (query, field_name, query_part_hook);
+				return DoLowLevelRDFQuery (query, field_name, _object, query_part_hook);
 			}
 
 			// Return if the URI exists
@@ -374,7 +374,8 @@
 				QueryPart_Uri part = new QueryPart_Uri ();
 				part.Uri = new Uri (subject); // better be URI!
 				query.AddPart (part);
-				return DoLowLevelRDFQuery (query, null, query_part_hook);
+				// FIXME: Which properties to return in the hit? All or none ?
+				return DoLowLevelRDFQuery (query, null, null, query_part_hook);
 			}
 
 			// Normal query in the document with this URI
@@ -388,7 +389,7 @@
 				part.SearchFullText = false; // We only search properties in RDF query
 				query.AddPart (part);
 
-				return DoLowLevelRDFQuery (query, null, query_part_hook);
+				return DoLowLevelRDFQuery (query, null, _object, query_part_hook);
 			}
 
 			// Return URI if the document with this URI contains this property
@@ -397,15 +398,10 @@
 
 				ArrayList uri_list = new ArrayList (1);
 				uri_list.Add (new Uri (subject));
-				ICollection hits = GetHitsForUris (uri_list);
 
-				/*
-				foreach (Hit hit in hits)
-					if (hit.GetFirstProperty (predicate) != null)
-						returned_uris.Add (hit.Uri);
-				*/
-
-				// FIXME FIXME FIXME this one returns all predicates not just the specified ones.
+				string field_name = PropertyToFieldName (pred_type, predicate);
+				string[] fields = { "Uri", "Timestamp", field_name };
+				ICollection hits = GetHitsForUris (uri_list, fields);
 
 				return hits;
 			}
@@ -423,7 +419,7 @@
 				query.AddPart (part);
 
 				string field_name = PropertyToFieldName (pred_type, predicate);
-				return DoLowLevelRDFQuery (query, field_name, query_part_hook);
+				return DoLowLevelRDFQuery (query, field_name, _object, query_part_hook);
 			}
 
 			throw new Exception ("Never reaches");
@@ -506,7 +502,7 @@
 			if (secondary_searcher != null)
 				secondary_term_docs = secondary_searcher.Reader.TermDocs ();
 
-			string[] fields = { "Uri", field_name };
+			string[] fields = { "Uri", "Timestamp", field_name };
 
 			// Go through all Uris now
 			enumerator = primary_reader.Terms (new Term ("Uri", String.Empty));
@@ -542,6 +538,7 @@
 
 		private ICollection DoLowLevelRDFQuery (Query query,
 							string field_name,
+							string field_value,
 							QueryPartHook query_part_hook)
 		{
 
@@ -662,7 +659,7 @@
 				secondary_term_docs = secondary_searcher.Reader.TermDocs ();
 		
 			string[] fields = (field_name != null) ?
-					new string[] { "Uri", field_name } :
+					new string[] { "Uri", "Timestamp", field_name } :
 					null;
 
 			for (int match_index = primary_matches.GetNextTrueIndex (0);
@@ -671,8 +668,6 @@
 
 				count++;
 
-				doc = primary_searcher.Doc (match_index, fields);
-
 				// If we have a UriFilter, apply it.
 				// RDF FIXME: Ignore Uri Filter for now
 				//if (uri_filter != null) {
@@ -682,8 +677,52 @@
 				//		continue;
 				//}
 
-				Hit hit = CreateHit (doc, secondary_searcher, secondary_term_docs, fields);
-				hits.Add (hit); 
+				// If predicate was not specified but object was specified,
+				// then figure out the right predicate
+				if (field_name == null && field_value != null) {
+					Hit hit = new Hit ();
+					doc = primary_searcher.Doc (match_index);
+					hit.Uri = GetUriFromDocument (doc);
+					hit.Timestamp = StringFu.StringToDateTime (doc.Get ("Timestamp"));
+
+					bool found_matching_predicate = false;
+
+					foreach (Field field in doc.Fields ()) {
+						if (! FieldIsPredicate (field, field_value))
+							continue;
+
+						Property prop;
+						prop = GetPropertyFromDocument (field, doc, true);
+						if (prop != null)
+							hit.AddProperty (prop);
+						found_matching_predicate = true;
+					}
+
+					if (secondary_searcher != null) {
+						foreach (Field field in doc.Fields ()) {
+							if (! FieldIsPredicate (field, field_value))
+								continue;
+
+							Property prop;
+							prop = GetPropertyFromDocument (field, doc, false);
+							if (prop != null)
+								hit.AddProperty (prop);
+							found_matching_predicate = true;
+						}
+					}
+
+					if (! found_matching_predicate) {
+						// No matching predicate found
+						// This means some unstored field matched the query
+						// FIXME: Add a synthetic property #text
+						hit.AddProperty (Property.New ("#text", field_value));
+					}
+					
+					hits.Add (hit);
+				} else {
+					doc = primary_searcher.Doc (match_index, fields);
+					hits.Add (CreateHit (doc, secondary_searcher, secondary_term_docs, fields));
+				}
 			}
 
 			e.Stop ();
@@ -719,6 +758,57 @@
 			return hits;
 		}
 
+		// FIXME: This basically queries the value against the field
+		// and is really really slow!
+		private bool FieldIsPredicate (Field field, string value)
+		{
+			string field_name = field.Name ();
+			string field_value = field.StringValue ();
+			Console.WriteLine ("Reverse searching for '{0}' value in {1}='{2}'", value, field_name, field_value);
+			// Simply run the value of the property against the right analyzer
+			// and check if there is any match
+			TokenStream source = IndexingAnalyzer.TokenStream (field_name, new StringReader (field_value));
+			StringBuilder sb = new StringBuilder ();
+			try {
+				Lucene.Net.Analysis.Token token;
+				while (true) {
+					token = source.Next ();
+					if (token == null)
+						break;
+					sb.Append (token.TermText ());
+					sb.Append (" ");
+					break;
+				}
+			} finally {
+				try {
+					source.Close ();
+				} catch { }
+			}
+
+			string field_analyzed = sb.ToString ();
+			sb.Length = 0;
+
+			source = QueryAnalyzer.TokenStream (field_name, new StringReader (value));
+			try {
+				Lucene.Net.Analysis.Token token;
+				while (true) {
+					token = source.Next ();
+					if (token == null)
+						break;
+					sb.Append (token.TermText ());
+					sb.Append (" ");
+					break;
+				}
+			} finally {
+				try {
+					source.Close ();
+				} catch { }
+			}
+
+			string value_analyzed = sb.ToString ();
+			return field_analyzed.Contains (value_analyzed);
+		}
+
 		////////////////////////////////////////////////////////////////
 
 		public int DoCountMatchQuery (Query query, QueryPartHook query_part_hook)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]