Re: ITagProvider



Hey, did a little cleanup so no ones stuck reading impossibly bad code
;) This also has the super-sucky way of integrating with the querying
of the lucene indexies. The biggest problem is that right now it will
only work on internally mapped Uri's (the uid:xxxxxxx ones) . So, in
addition to the real merging of queries, uri mapping/lookup should be
done too.

Since some people are far too lazy to use patches (or are just that
cool ;) ) theres a bzr branch here:
https://code.launchpad.net/~kkubasik/beagle/kkubasik-beagle



On 9/28/07, Kevin Kubasik <kevin kubasik net> wrote:
> Hey, I was chatting with DBera last night at we got off on a random
> little tangent, anyways, I remembered that I still hadn't shared any
> of the code or my thoughts that had started to evolve as far as
> supporting the idea of 'desktop tagging'.
>
> I figured I would attach a copy of the patch that allows you to see
> the current ITagProvider (unfortunety this is the majorly dumbed down
> interface as I tried to get it integrated, once we have this worked
> into the query system, I'll flesh out the API, and make my simple
> sample threadsafe etc.)sketchup, I need to abstract or make an
> interface for the Tag class, but I got far too tired last night after
> my battle with Lucene.
>
> DBera mentioned that the best place to implement this was probably
> inside LuceneQueryDriver, since we are already merging 2 result sets
> (the primary and secondary indexies) adding a third datasource
> shouldn't be too hard, should it?
>
> Either way, I tried a couple of things, and I've got a fair idea of
> how the process works, I'm just still getting hung up on the different
> BitArrays. It seems that as they are the ones holding all the results
> sets, to merge results from the tagging backend at the lower level, I
> need to figure those out. The other option is always to just build
> hits from the tagged Uri's and drop any duplicates, but I'm not sure
> thats how the response works.
>
> Anyways, I'd love some feedback/help. This is just the core/super
> simple implementation, once I figure out the results merging I'll add
> back in the child tags, descriptions, etc.
> --
> Cheers,
> Kevin Kubasik
> http://kubasik.net/blog
>
>


-- 
Cheers,
Kevin Kubasik
http://kubasik.net/blog
=== added file 'Util/TagProvider.cs'
--- Util/TagProvider.cs	1970-01-01 00:00:00 +0000
+++ Util/TagProvider.cs	2007-09-28 09:06:42 +0000
@@ -0,0 +1,183 @@
+// TagProvider.cs - An interface used to pull tags from a variety of 
+//						sources.
+//
+// Copyright (C) 2007 Kevin Kubasik <kevin kubasik net>
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.IO;
+using System.Collections;
+using System.Collections.Generic;
+
+using Mono.Data.SqliteClient;
+//using ICSharpCode.SharpZipLib.GZip;
+
+
+namespace Beagle.Util
+{
+	
+	public interface ITagProvider{
+		ITag MakeNewTag(string s);
+		ITag GetTag(string s);
+		ITag[] SearchTags(string s);
+		ITag[] GetTagsForUri(string s);
+	}
+	public interface ITag{
+		String GetFirstUri();
+		String[] GetAllUri();
+		void AddUri(string s);
+		void DeleteUri(string s);
+		
+	}
+	public class BeagleTag: ITag {
+		string name;
+		SqliteConnection connection = null;
+		public BeagleTag (){
+			name = "";
+		}
+		public BeagleTag(SqliteConnection conn){
+			connection = conn;
+			name= "";
+		}
+		public BeagleTag(SqliteConnection conn, string argname){
+			connection = conn;
+			name = argname;
+		}
+		public String GetFirstUri(){
+			SqliteCommand scomm = new SqliteCommand(String.Format("select * from tags where tag='{0}' order by uri limit 1;",name),connection);
+			
+			SqliteDataReader sdr = scomm.ExecuteReader();
+			if(sdr.Read())
+				return sdr.GetString(0);
+			return null;
+		}
+		public String[] GetAllUri(){
+			List<string> l = new List<string>();
+			SqliteCommand scomm = new SqliteCommand(String.Format("select * from tags where tag='{0}' order by uri;",name),connection);
+			SqliteDataReader sdr =  scomm.ExecuteReader();
+			
+			while(sdr.Read()){
+				l.Add(sdr.GetString(0));
+			}
+			return l.ToArray();
+		}
+		public void AddUri(string s){
+			SqliteCommand scomm = new SqliteCommand(String.Format("insert into tags values ('{0}','{1}') ;",s,name),connection);
+			int i = scomm.ExecuteNonQuery();
+			
+		}
+		public void DeleteUri(string s){
+			SqliteCommand scomm = new SqliteCommand(String.Format(" delete from tags where tag='{1}' and uri='{0}' ;",s,name),connection);
+			int i = scomm.ExecuteNonQuery();
+		}
+		
+	}
+	
+	public class BeagleTagProvider : ITagProvider
+	{
+		string tag_file = null;
+		SqliteConnection connection = null;
+		
+		public BeagleTagProvider()
+		{
+			Init();
+		}
+		public virtual Beagle.Util.ITag GetTag (string s)
+		{
+			return new BeagleTag(connection,s);
+		}
+
+		public virtual Beagle.Util.ITag[] GetTagsForUri (string s)
+		{
+			List<ITag> l = new List<ITag>();
+			SqliteCommand scomm = new SqliteCommand(String.Format("select * from tags where uri='{0}' order by tag;",s),connection);
+			SqliteDataReader sdr =  scomm.ExecuteReader();
+			
+			while(sdr.Read()){
+				l.Add(new BeagleTag(connection,sdr.GetString(1)));
+			}
+			return l.ToArray();
+			//return new Tag[10];
+		}
+
+		public virtual Beagle.Util.ITag MakeNewTag (string s)
+		{
+			
+			return new BeagleTag(connection,s);
+		}
+
+		public virtual Beagle.Util.ITag[] SearchTags (string s)
+		{
+			List<ITag> l = new List<ITag>();
+			SqliteCommand scomm = new SqliteCommand(String.Format("select * from tags where tag LIKE '%{0}%' order by tag;",s),connection);
+			SqliteDataReader sdr =  scomm.ExecuteReader();
+			
+			while(sdr.Read()){
+				l.Add(new BeagleTag(connection,sdr.GetString(1)));
+			}
+			return l.ToArray();
+			//return new Tag[10];
+		}
+		
+		
+		private void Init(){
+			tag_file = Path.Combine(PathFinder.StorageDir, "beagletags.db");
+			if(!File.Exists(tag_file)){
+				File.Create(tag_file);
+				MakeTables();
+			}else {
+				try {
+					connection =  Open (tag_file);
+				} catch (Exception e) {
+					Log.Debug (e, "Exception opening tags {0}", tag_file);
+				}
+			}
+			
+		}
+		
+		private SqliteConnection Open (string db_filename)
+		{
+			SqliteConnection connection = new SqliteConnection ();
+			connection.ConnectionString = "version=" + ExternalStringsHack.SqliteVersion
+				+ ",encoding=UTF-8,URI=file:" + db_filename;
+			connection.Open ();
+			return connection;
+		}
+		private void MakeTables(){
+			try {
+					connection = Open (tag_file);
+				} catch (Exception e) {
+					Log.Debug (e, "Exception opening tags {0}", tag_file);
+				}
+			SqliteCommand scomm = new SqliteCommand("CREATE TABLE tags ( uri STRING NOT NULL, tag STRING NOT NULL);",connection);
+			scomm.ExecuteNonQuery();
+		}
+		
+		public static void Main(string[] args){
+			BeagleTagProvider btp = new BeagleTagProvider();
+			ITag t = btp.MakeNewTag("Tag");
+			t.AddUri("testmoreuri");
+			Console.WriteLine(t.GetFirstUri());
+			Console.WriteLine("Ran");
+		}
+	}
+}

=== modified file 'Util/Makefile.am'
--- Util/Makefile.am	2007-08-09 15:24:30 +0000
+++ Util/Makefile.am	2007-09-28 02:16:30 +0000
@@ -77,6 +77,7 @@
 	$(srcdir)/StringMatcher.cs		\
 	$(srcdir)/SystemInformation.cs		\
 	$(srcdir)/SystemPriorities.cs		\
+	$(srcdir)/TagProvider.cs	\
 	$(srcdir)/TeeTextWriter.cs		\
 	$(srcdir)/ThreadPond.cs			\
 	$(srcdir)/Timeline.cs			\

=== modified file 'beagled/LuceneQueryingDriver.cs'
--- beagled/LuceneQueryingDriver.cs	2007-08-05 16:10:39 +0000
+++ beagled/LuceneQueryingDriver.cs	2007-09-28 09:08:00 +0000
@@ -49,7 +49,7 @@
 
 	public class LuceneQueryingDriver : LuceneCommon {
 
-		static public bool Debug = false;
+		static public bool Debug = true;
 
 		public delegate bool UriFilter (Uri uri);
 		public delegate double RelevancyMultiplier (Hit hit);
@@ -143,9 +143,10 @@
 		{
 			if (Debug)
 				Logger.Log.Debug ("###### {0}: Starting low-level queries", IndexName);
-
+			
 			Stopwatch total, a, b, c, d, e, f;
-
+			//ITagProvider btp = new Beagle.Util.BeagleTagProvider();
+			List<Uri> taggeduris = new List<Uri>();
 			total = new Stopwatch ();
 			a = new Stopwatch ();
 			b = new Stopwatch ();
@@ -156,12 +157,14 @@
 
 			total.Start ();
 			a.Start ();
-
+			//Our tag provider, eventually will probably be dynamic in some sense.
+			ITagProvider tag_provider = new BeagleTagProvider();
 			// Assemble all of the parts into a bunch of Lucene queries
 
 			ArrayList primary_required_part_queries = null;
 			ArrayList secondary_required_part_queries = null;
-
+			List<string> tagprovider_part_queries = null;
+			
 			LNS.BooleanQuery primary_prohibited_part_query = null;
 			LNS.BooleanQuery secondary_prohibited_part_query = null;
 
@@ -185,17 +188,23 @@
 
 				if (primary_part_query == null)
 					continue;
-
+				
 				switch (part.Logic) {
 					
 				case QueryPartLogic.Required:
 					if (primary_required_part_queries == null) {
 						primary_required_part_queries = new ArrayList ();
 						secondary_required_part_queries = new ArrayList ();
+					    tagprovider_part_queries = new List<string>();
 					}
 					primary_required_part_queries.Add (primary_part_query);
 					secondary_required_part_queries.Add (secondary_part_query);
 					
+					if(part != null && part.GetType().Equals((new QueryPart_Property()).GetType()))
+						if(((Beagle.QueryPart_Property) part).Key =="beagle:tag")
+							tagprovider_part_queries.Add(((Beagle.QueryPart_Property) part).Value);
+			        
+
 					if (part_hit_filter != null)
 						all_hit_filters.Add (part_hit_filter);
 					
@@ -220,8 +229,26 @@
 
 					break;
 				}
+				//Well also query to see if any of the search terms are tags
+				foreach(Term tempterm in term_list){
+					tagprovider_part_queries.Add(tempterm.Text());
+					Log.Debug("Adding {0} to the tag queries",tempterm.Text());
+				}
+				//Actually build a list of tags, here we hit the tag provider
+				List<ITag> tags = new List<ITag>();
+				foreach(string temps in tagprovider_part_queries){
+					tags.AddRange(tag_provider.SearchTags(temps));
+					Log.Debug("Searching {0}",temps);
+					Log.Debug("First Found: {0}",tag_provider.GetTag(temps).GetFirstUri());
+				}
+				//Build a list of all the Uri's associated with any tags found in the 
+				//query.
+				foreach(ITag t in tags)
+					foreach(string temps in t.GetAllUri())
+						taggeduris.Add(UriFu.EscapedStringToUri(temps));
 			}
-
+			
+					
 			a.Stop ();
 			if (Debug)
 				Log.Debug ("###### {0}: Building queries took {1}", IndexName, a);
@@ -265,19 +292,23 @@
 			c.Start ();
 			
 			// Possibly create our whitelists from the search subset.
-
+			
+			
 			LuceneBitArray primary_whitelist = null;
 			LuceneBitArray secondary_whitelist = null;
 			
+			
+			
 			if (search_subset_uris != null && search_subset_uris.Count > 0) {
 				primary_whitelist = new LuceneBitArray (primary_searcher);
 				if (secondary_searcher != null)
 					secondary_whitelist = new LuceneBitArray (secondary_searcher);
-
+				
 				foreach (Uri uri in search_subset_uris) {
 					primary_whitelist.AddUri (uri);
 					if (secondary_whitelist != null)
 						secondary_whitelist.AddUri (uri);
+					
 				}
 				primary_whitelist.FlushUris ();
 				if (secondary_whitelist != null)
@@ -301,8 +332,10 @@
 					primary_blacklist.Join (secondary_blacklist);
 				}
 			}
-
+			Lucene.Net.Search.Query qer = (Lucene.Net.Search.Query) primary_required_part_queries[primary_required_part_queries.Count-1];
+			LuceneBitArray lba = new LuceneBitArray(primary_searcher,qer);
 			
+				Log.Debug("Adding {0} to required queries",qer);
 			// Combine our whitelist and blacklist into just a whitelist.
 			
 			if (primary_blacklist != null) {
@@ -353,6 +386,8 @@
 				Logger.Log.Debug ("###### {0}: Low-level queries finished in {1}", IndexName, d);
 
 			e.Start ();
+			
+			Log.Debug ("is primary matches null {0} does it contain a true {1}", primary_matches, primary_matches.ContainsTrue ());
 			// Only generate results if we got some matches
 			if (primary_matches != null && primary_matches.ContainsTrue ()) {
 				GenerateQueryResults (primary_reader,
@@ -368,10 +403,15 @@
 			}
 
 			e.Stop ();
-
+			//This sucks and is expensive, it only works on internal uri's (uid:xxxxxx)
+			ICollection collOfTagHits = GetHitsForUris(taggeduris);
+			foreach(Hit h in collOfTagHits){
+				//We should not be sending the same Uri twice.
+				result.Add(collOfTagHits);
+			}
 			if (Debug)
 				Log.Debug ("###### {0}: Query results generated in {1}", IndexName, e);
-
+			
 			//
 			// Finally, we clean up after ourselves.
 			//
@@ -489,22 +529,25 @@
 				p_matches = new LuceneBitArray (primary_searcher);
 				if (pq != null) {
 					p_matches.Or (pq);
-					if (primary_whitelist != null)
-						p_matches.And (primary_whitelist);
+//					if (primary_whitelist != null)
+//						p_matches.And (primary_whitelist);
 				}
-
+				Log.Debug("Primary Query {0}",pq);
+				
 				s_matches = new LuceneBitArray (secondary_searcher);
 				if (sq != null) {
 					s_matches.Or (sq);
 					if (secondary_whitelist != null)
 						s_matches.And (secondary_whitelist);
 				}
-
+				
 				MatchInfo info;
 				info = new MatchInfo ();
 				info.PrimaryMatches = p_matches;
 				info.SecondaryMatches = s_matches;
 				info.RestrictBy (null); // a hack to initialize the UpperBound
+				Log.Debug("MatchInfo {0}",info.PrimaryMatches);
+				Log.Debug("Count {0}",p_matches.Count);
 				match_info_list.Add (info);
 			}
 
@@ -628,7 +671,7 @@
 			e = new Stopwatch ();
 
 			total.Start ();
-
+			
 			ArrayList final_list_of_hits = null;
 
 			// This is used only for scoring
@@ -878,6 +921,8 @@
 					uri = GetUriFromDocument (doc);
 					if (! uri_filter (uri))
 						continue;
+					
+					Log.Debug("Heres a primary match Uri {0}",uri);
 				}
 
 				// Get the actual hit now

=== modified file 'beagled/PropertyKeywordFu.cs'
--- beagled/PropertyKeywordFu.cs	2007-02-18 22:23:47 +0000
+++ beagled/PropertyKeywordFu.cs	2007-09-28 07:21:19 +0000
@@ -124,6 +124,7 @@
 
 			property_table.Add ("filetype",
 					    new PropertyDetail (PropertyType.Keyword, "beagle:FileType", "Type of content for HitType File"));
+			property_table.Add("tag",new PropertyDetail(PropertyType.Text, "beagle:tag", "Tag of file"));
 		}
 
 		public static void RegisterMapping (PropertyKeywordMapping mapping)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]