Re: ITagProvider
- From: "Kevin Kubasik" <kevin kubasik net>
- To: dashboard-hackers <dashboard-hackers gnome org>
- Subject: Re: ITagProvider
- Date: Fri, 28 Sep 2007 05:17:05 -0400
Hey, did a little cleanup so no ones stuck reading impossibly bad code
;) This also has the super-sucky way of integrating with the querying
of the lucene indexies. The biggest problem is that right now it will
only work on internally mapped Uri's (the uid:xxxxxxx ones) . So, in
addition to the real merging of queries, uri mapping/lookup should be
done too.
Since some people are far too lazy to use patches (or are just that
cool ;) ) theres a bzr branch here:
https://code.launchpad.net/~kkubasik/beagle/kkubasik-beagle
On 9/28/07, Kevin Kubasik <kevin kubasik net> wrote:
> Hey, I was chatting with DBera last night at we got off on a random
> little tangent, anyways, I remembered that I still hadn't shared any
> of the code or my thoughts that had started to evolve as far as
> supporting the idea of 'desktop tagging'.
>
> I figured I would attach a copy of the patch that allows you to see
> the current ITagProvider (unfortunety this is the majorly dumbed down
> interface as I tried to get it integrated, once we have this worked
> into the query system, I'll flesh out the API, and make my simple
> sample threadsafe etc.)sketchup, I need to abstract or make an
> interface for the Tag class, but I got far too tired last night after
> my battle with Lucene.
>
> DBera mentioned that the best place to implement this was probably
> inside LuceneQueryDriver, since we are already merging 2 result sets
> (the primary and secondary indexies) adding a third datasource
> shouldn't be too hard, should it?
>
> Either way, I tried a couple of things, and I've got a fair idea of
> how the process works, I'm just still getting hung up on the different
> BitArrays. It seems that as they are the ones holding all the results
> sets, to merge results from the tagging backend at the lower level, I
> need to figure those out. The other option is always to just build
> hits from the tagged Uri's and drop any duplicates, but I'm not sure
> thats how the response works.
>
> Anyways, I'd love some feedback/help. This is just the core/super
> simple implementation, once I figure out the results merging I'll add
> back in the child tags, descriptions, etc.
> --
> Cheers,
> Kevin Kubasik
> http://kubasik.net/blog
>
>
--
Cheers,
Kevin Kubasik
http://kubasik.net/blog
=== added file 'Util/TagProvider.cs'
--- Util/TagProvider.cs 1970-01-01 00:00:00 +0000
+++ Util/TagProvider.cs 2007-09-28 09:06:42 +0000
@@ -0,0 +1,183 @@
+// TagProvider.cs - An interface used to pull tags from a variety of
+// sources.
+//
+// Copyright (C) 2007 Kevin Kubasik <kevin kubasik net>
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.IO;
+using System.Collections;
+using System.Collections.Generic;
+
+using Mono.Data.SqliteClient;
+//using ICSharpCode.SharpZipLib.GZip;
+
+
+namespace Beagle.Util
+{
+
+ public interface ITagProvider{
+ ITag MakeNewTag(string s);
+ ITag GetTag(string s);
+ ITag[] SearchTags(string s);
+ ITag[] GetTagsForUri(string s);
+ }
+ public interface ITag{
+ String GetFirstUri();
+ String[] GetAllUri();
+ void AddUri(string s);
+ void DeleteUri(string s);
+
+ }
+ public class BeagleTag: ITag {
+ string name;
+ SqliteConnection connection = null;
+ public BeagleTag (){
+ name = "";
+ }
+ public BeagleTag(SqliteConnection conn){
+ connection = conn;
+ name= "";
+ }
+ public BeagleTag(SqliteConnection conn, string argname){
+ connection = conn;
+ name = argname;
+ }
+ public String GetFirstUri(){
+ SqliteCommand scomm = new SqliteCommand(String.Format("select * from tags where tag='{0}' order by uri limit 1;",name),connection);
+
+ SqliteDataReader sdr = scomm.ExecuteReader();
+ if(sdr.Read())
+ return sdr.GetString(0);
+ return null;
+ }
+ public String[] GetAllUri(){
+ List<string> l = new List<string>();
+ SqliteCommand scomm = new SqliteCommand(String.Format("select * from tags where tag='{0}' order by uri;",name),connection);
+ SqliteDataReader sdr = scomm.ExecuteReader();
+
+ while(sdr.Read()){
+ l.Add(sdr.GetString(0));
+ }
+ return l.ToArray();
+ }
+ public void AddUri(string s){
+ SqliteCommand scomm = new SqliteCommand(String.Format("insert into tags values ('{0}','{1}') ;",s,name),connection);
+ int i = scomm.ExecuteNonQuery();
+
+ }
+ public void DeleteUri(string s){
+ SqliteCommand scomm = new SqliteCommand(String.Format(" delete from tags where tag='{1}' and uri='{0}' ;",s,name),connection);
+ int i = scomm.ExecuteNonQuery();
+ }
+
+ }
+
+ public class BeagleTagProvider : ITagProvider
+ {
+ string tag_file = null;
+ SqliteConnection connection = null;
+
+ public BeagleTagProvider()
+ {
+ Init();
+ }
+ public virtual Beagle.Util.ITag GetTag (string s)
+ {
+ return new BeagleTag(connection,s);
+ }
+
+ public virtual Beagle.Util.ITag[] GetTagsForUri (string s)
+ {
+ List<ITag> l = new List<ITag>();
+ SqliteCommand scomm = new SqliteCommand(String.Format("select * from tags where uri='{0}' order by tag;",s),connection);
+ SqliteDataReader sdr = scomm.ExecuteReader();
+
+ while(sdr.Read()){
+ l.Add(new BeagleTag(connection,sdr.GetString(1)));
+ }
+ return l.ToArray();
+ //return new Tag[10];
+ }
+
+ public virtual Beagle.Util.ITag MakeNewTag (string s)
+ {
+
+ return new BeagleTag(connection,s);
+ }
+
+ public virtual Beagle.Util.ITag[] SearchTags (string s)
+ {
+ List<ITag> l = new List<ITag>();
+ SqliteCommand scomm = new SqliteCommand(String.Format("select * from tags where tag LIKE '%{0}%' order by tag;",s),connection);
+ SqliteDataReader sdr = scomm.ExecuteReader();
+
+ while(sdr.Read()){
+ l.Add(new BeagleTag(connection,sdr.GetString(1)));
+ }
+ return l.ToArray();
+ //return new Tag[10];
+ }
+
+
+ private void Init(){
+ tag_file = Path.Combine(PathFinder.StorageDir, "beagletags.db");
+ if(!File.Exists(tag_file)){
+ File.Create(tag_file);
+ MakeTables();
+ }else {
+ try {
+ connection = Open (tag_file);
+ } catch (Exception e) {
+ Log.Debug (e, "Exception opening tags {0}", tag_file);
+ }
+ }
+
+ }
+
+ private SqliteConnection Open (string db_filename)
+ {
+ SqliteConnection connection = new SqliteConnection ();
+ connection.ConnectionString = "version=" + ExternalStringsHack.SqliteVersion
+ + ",encoding=UTF-8,URI=file:" + db_filename;
+ connection.Open ();
+ return connection;
+ }
+ private void MakeTables(){
+ try {
+ connection = Open (tag_file);
+ } catch (Exception e) {
+ Log.Debug (e, "Exception opening tags {0}", tag_file);
+ }
+ SqliteCommand scomm = new SqliteCommand("CREATE TABLE tags ( uri STRING NOT NULL, tag STRING NOT NULL);",connection);
+ scomm.ExecuteNonQuery();
+ }
+
+ public static void Main(string[] args){
+ BeagleTagProvider btp = new BeagleTagProvider();
+ ITag t = btp.MakeNewTag("Tag");
+ t.AddUri("testmoreuri");
+ Console.WriteLine(t.GetFirstUri());
+ Console.WriteLine("Ran");
+ }
+ }
+}
=== modified file 'Util/Makefile.am'
--- Util/Makefile.am 2007-08-09 15:24:30 +0000
+++ Util/Makefile.am 2007-09-28 02:16:30 +0000
@@ -77,6 +77,7 @@
$(srcdir)/StringMatcher.cs \
$(srcdir)/SystemInformation.cs \
$(srcdir)/SystemPriorities.cs \
+ $(srcdir)/TagProvider.cs \
$(srcdir)/TeeTextWriter.cs \
$(srcdir)/ThreadPond.cs \
$(srcdir)/Timeline.cs \
=== modified file 'beagled/LuceneQueryingDriver.cs'
--- beagled/LuceneQueryingDriver.cs 2007-08-05 16:10:39 +0000
+++ beagled/LuceneQueryingDriver.cs 2007-09-28 09:08:00 +0000
@@ -49,7 +49,7 @@
public class LuceneQueryingDriver : LuceneCommon {
- static public bool Debug = false;
+ static public bool Debug = true;
public delegate bool UriFilter (Uri uri);
public delegate double RelevancyMultiplier (Hit hit);
@@ -143,9 +143,10 @@
{
if (Debug)
Logger.Log.Debug ("###### {0}: Starting low-level queries", IndexName);
-
+
Stopwatch total, a, b, c, d, e, f;
-
+ //ITagProvider btp = new Beagle.Util.BeagleTagProvider();
+ List<Uri> taggeduris = new List<Uri>();
total = new Stopwatch ();
a = new Stopwatch ();
b = new Stopwatch ();
@@ -156,12 +157,14 @@
total.Start ();
a.Start ();
-
+ //Our tag provider, eventually will probably be dynamic in some sense.
+ ITagProvider tag_provider = new BeagleTagProvider();
// Assemble all of the parts into a bunch of Lucene queries
ArrayList primary_required_part_queries = null;
ArrayList secondary_required_part_queries = null;
-
+ List<string> tagprovider_part_queries = null;
+
LNS.BooleanQuery primary_prohibited_part_query = null;
LNS.BooleanQuery secondary_prohibited_part_query = null;
@@ -185,17 +188,23 @@
if (primary_part_query == null)
continue;
-
+
switch (part.Logic) {
case QueryPartLogic.Required:
if (primary_required_part_queries == null) {
primary_required_part_queries = new ArrayList ();
secondary_required_part_queries = new ArrayList ();
+ tagprovider_part_queries = new List<string>();
}
primary_required_part_queries.Add (primary_part_query);
secondary_required_part_queries.Add (secondary_part_query);
+ if(part != null && part.GetType().Equals((new QueryPart_Property()).GetType()))
+ if(((Beagle.QueryPart_Property) part).Key =="beagle:tag")
+ tagprovider_part_queries.Add(((Beagle.QueryPart_Property) part).Value);
+
+
if (part_hit_filter != null)
all_hit_filters.Add (part_hit_filter);
@@ -220,8 +229,26 @@
break;
}
+ //Well also query to see if any of the search terms are tags
+ foreach(Term tempterm in term_list){
+ tagprovider_part_queries.Add(tempterm.Text());
+ Log.Debug("Adding {0} to the tag queries",tempterm.Text());
+ }
+ //Actually build a list of tags, here we hit the tag provider
+ List<ITag> tags = new List<ITag>();
+ foreach(string temps in tagprovider_part_queries){
+ tags.AddRange(tag_provider.SearchTags(temps));
+ Log.Debug("Searching {0}",temps);
+ Log.Debug("First Found: {0}",tag_provider.GetTag(temps).GetFirstUri());
+ }
+ //Build a list of all the Uri's associated with any tags found in the
+ //query.
+ foreach(ITag t in tags)
+ foreach(string temps in t.GetAllUri())
+ taggeduris.Add(UriFu.EscapedStringToUri(temps));
}
-
+
+
a.Stop ();
if (Debug)
Log.Debug ("###### {0}: Building queries took {1}", IndexName, a);
@@ -265,19 +292,23 @@
c.Start ();
// Possibly create our whitelists from the search subset.
-
+
+
LuceneBitArray primary_whitelist = null;
LuceneBitArray secondary_whitelist = null;
+
+
if (search_subset_uris != null && search_subset_uris.Count > 0) {
primary_whitelist = new LuceneBitArray (primary_searcher);
if (secondary_searcher != null)
secondary_whitelist = new LuceneBitArray (secondary_searcher);
-
+
foreach (Uri uri in search_subset_uris) {
primary_whitelist.AddUri (uri);
if (secondary_whitelist != null)
secondary_whitelist.AddUri (uri);
+
}
primary_whitelist.FlushUris ();
if (secondary_whitelist != null)
@@ -301,8 +332,10 @@
primary_blacklist.Join (secondary_blacklist);
}
}
-
+ Lucene.Net.Search.Query qer = (Lucene.Net.Search.Query) primary_required_part_queries[primary_required_part_queries.Count-1];
+ LuceneBitArray lba = new LuceneBitArray(primary_searcher,qer);
+ Log.Debug("Adding {0} to required queries",qer);
// Combine our whitelist and blacklist into just a whitelist.
if (primary_blacklist != null) {
@@ -353,6 +386,8 @@
Logger.Log.Debug ("###### {0}: Low-level queries finished in {1}", IndexName, d);
e.Start ();
+
+ Log.Debug ("is primary matches null {0} does it contain a true {1}", primary_matches, primary_matches.ContainsTrue ());
// Only generate results if we got some matches
if (primary_matches != null && primary_matches.ContainsTrue ()) {
GenerateQueryResults (primary_reader,
@@ -368,10 +403,15 @@
}
e.Stop ();
-
+ //This sucks and is expensive, it only works on internal uri's (uid:xxxxxx)
+ ICollection collOfTagHits = GetHitsForUris(taggeduris);
+ foreach(Hit h in collOfTagHits){
+ //We should not be sending the same Uri twice.
+ result.Add(collOfTagHits);
+ }
if (Debug)
Log.Debug ("###### {0}: Query results generated in {1}", IndexName, e);
-
+
//
// Finally, we clean up after ourselves.
//
@@ -489,22 +529,25 @@
p_matches = new LuceneBitArray (primary_searcher);
if (pq != null) {
p_matches.Or (pq);
- if (primary_whitelist != null)
- p_matches.And (primary_whitelist);
+// if (primary_whitelist != null)
+// p_matches.And (primary_whitelist);
}
-
+ Log.Debug("Primary Query {0}",pq);
+
s_matches = new LuceneBitArray (secondary_searcher);
if (sq != null) {
s_matches.Or (sq);
if (secondary_whitelist != null)
s_matches.And (secondary_whitelist);
}
-
+
MatchInfo info;
info = new MatchInfo ();
info.PrimaryMatches = p_matches;
info.SecondaryMatches = s_matches;
info.RestrictBy (null); // a hack to initialize the UpperBound
+ Log.Debug("MatchInfo {0}",info.PrimaryMatches);
+ Log.Debug("Count {0}",p_matches.Count);
match_info_list.Add (info);
}
@@ -628,7 +671,7 @@
e = new Stopwatch ();
total.Start ();
-
+
ArrayList final_list_of_hits = null;
// This is used only for scoring
@@ -878,6 +921,8 @@
uri = GetUriFromDocument (doc);
if (! uri_filter (uri))
continue;
+
+ Log.Debug("Heres a primary match Uri {0}",uri);
}
// Get the actual hit now
=== modified file 'beagled/PropertyKeywordFu.cs'
--- beagled/PropertyKeywordFu.cs 2007-02-18 22:23:47 +0000
+++ beagled/PropertyKeywordFu.cs 2007-09-28 07:21:19 +0000
@@ -124,6 +124,7 @@
property_table.Add ("filetype",
new PropertyDetail (PropertyType.Keyword, "beagle:FileType", "Type of content for HitType File"));
+ property_table.Add("tag",new PropertyDetail(PropertyType.Text, "beagle:tag", "Tag of file"));
}
public static void RegisterMapping (PropertyKeywordMapping mapping)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]