Re: [Banshee-List] a litlle Patch for Wikipedia plugin, v0.0.2



Hello,

here comes a little patch for the wikipedia  plugin, i added a parser
for the wikipedia pages to remove information that is not needed, for
now the bar left, header and footer. 
Also I added a class to manage the querys an request the pages using
httpwebrequest, this could also help tho find pages that didn't match
match the search. I used the httpwebresponse.Host to detect results not
from wikipedia.
Further i started a cache for the downloaded pages, these are stored in
the plungindir/wikipedia

David

Am Sonntag, den 30.04.2006, 02:37 -0400 schrieb Trick van Staveren:
> Hey folks,
> 
> Been working on it a bit and just wanted to publish :)
> 
> http://www.trick.vanstaveren.us/banshee/banshee-wikipedia-plugin-0.0.2.tar.bz2
> 
> Changelog:
> 2006-04-30  Patrick van Staveren  <trick vanstaveren us>
>         * Google search mode - uses the "i'm feeling lucky" feature of Google to direct
> to the closes match using Google, adding the keyword "band".  I know, this isn't the
> best way, but it is much much more accurate than just searching for the artist
> keyword.
>         * Experimenting with stuff like a status bar and progress bar for loading.
>                 * Progress bar is way too buggy and causes segfaults - commented out :)
>         * Opens to about:blank, not google on init.  Faster.
>         * Made default size a bit bigger.  Maybe this should be a percentage someday. 
> Someone patch this for me!
>         * Bunch of attempts to create a HPaned between the track listing and this, but
> I can't find a widget.  Big chunk of commented code for now.
>         * Added Wikipedia.dll.config, a DLL mapper copied from gecko-sharp.  Should fix
> issues with not finding gtkmozembed.so on some platforms. (Send bug reports!)
>         * Debug info is copied in, so you can easily trace
> 
> Enjoy, and send feedback :)
> 
> Patrick
> 
> --
> Patrick "Trick" van Staveren
> Western Michigan University
> AIM: goofyassmoose
> Cell: 269.267.6008
> http://www.trick.vanstaveren.us/
> 
> 
> 
> 
> _______________________________________________
> Banshee-list mailing list
> Banshee-list gnome org
> http://mail.gnome.org/mailman/listinfo/banshee-list
> 
--- ./WikipediaPane.cs	2006-04-30 08:28:46.000000000 +0200
+++ ../../new/WikipediaPane.cs	2006-04-30 16:42:39.000000000 +0200
@@ -19,13 +19,14 @@ namespace Banshee.Plugins.Wikipedia
 		private VBox main;
 		private HBox bot;
 		private ProgressBar pb;
-
 		private int bot_queue_length;
 		public string current_artist;
 		
 		public WikipediaPane () {
 			Visible = false;
-
+			
+			// set url
+			
 			web = new WebControl("about:blank", "Gecko");
 			win = new Viewport();
 			sb = new Statusbar();
@@ -109,15 +110,27 @@ namespace Banshee.Plugins.Wikipedia
 					slower, as we're going to google and then wikipedia
 					Since it's google, it could return a page that's not the exact title, but is the most popular representation of this.  Might work worse.
 					(Why this method isn't being used right now): because I want to pass to wikipedia that this is a printable page
-			*/
-			web.LoadUrl(
-				"http://www.google.com/search?hl=en&q=site%3Aen.wikipedia.org+"; +
-				+ "%22" + current_artist + "%22"
-				+ "band" +
-				+ "&btnI=asdf"
-			);
-			
+			*/			
 			
+				string temp;		
+				WikipediaQuery w_query 	= new WikipediaQuery(current_artist);
+				WikipediaPage w_page	= w_query.PerformLookUp();
+				if ( w_page != null ) {
+					web.OpenStream(w_page.BaseUri,"text/html");
+					web.AppendData(w_page.Header);
+					web.AppendData("<h1>"+current_artist+"</h1>");
+					//Console.WriteLine(w_page.Header);
+					while ( (temp = w_page.ReadBodyLine())!= null ) {
+						web.AppendData(temp);
+						//Console.WriteLine(temp);
+					}
+					web.AppendData(w_page.Footer);
+					web.CloseStream();
+					web.Show();
+					Console.WriteLine("Wikipedia plugin debug: URL=" +w_page.BaseUri);
+				} else {
+					Visible = false;
+				}
 			/*
 			The plain 'ol, link to wikipedia.
 			tags on printable=yes.  nice, but hides links, and doesn't persist from page-to-page.
@@ -130,7 +143,7 @@ namespace Banshee.Plugins.Wikipedia
 			);
 			*/
 			
-			Console.WriteLine("Wikipedia plugin debug: URL=" + web.Location);
+			
 		}
 		
 		// --------------------------------------------------------------- //
@@ -149,7 +162,7 @@ namespace Banshee.Plugins.Wikipedia
 				Console.WriteLine("Apparently it hasn't been drawn yet.  crap.");
 			}*/
 			if (current_artist == artist) {
-				ShowArtist(null, null);
+				//ShowArtist(null, null);
 				Visible = true;
 				return;
 			}
using System;
using System.Text;
using Mono;
using Mono.Posix;
using Gtk;
using Gecko;
using GLib;
using Banshee;
using Banshee.Base;
using Gdk;

namespace Banshee.Plugins.Wikipedia
{
	public class WikipediaPane : Frame
	{
		private WebControl web;
		private Viewport win;
		private Statusbar sb;
		private VBox main;
		private HBox bot;
		private ProgressBar pb;
		private int bot_queue_length;
		public string current_artist;
		
		public WikipediaPane () {
			Visible = false;
			
			// set url
			
			web = new WebControl("about:blank", "Gecko");
			win = new Viewport();
			sb = new Statusbar();
			pb = new ProgressBar();
			main = new VBox();
			bot = new HBox();
			
			bot_queue_length = 0;

			sb.TextPushed += new Gtk.TextPushedHandler (StatusbarDisplay);
			sb.Visible = false;
			sb.HasResizeGrip = false;

			pb.WidthRequest = 300;


			//web.ProgressAll += new Gecko.ProgressAllHandler(updateProgress);
			web.LinkMsg += new EventHandler (LinkMessage);
			
			
			bot.PackStart(sb, true, true, 0);
			//bot.PackEnd(pb, false, false, 0);
			
			main.PackStart(web, true, true, 0);
			main.PackEnd(bot, false, false, 0);
			
			win.Add(main);
			Add(win);

			ShowAll();
		}
		
		public void StatusbarDisplay (object o, TextPushedArgs args) {
			bot.Visible = true;
			bot_queue_length++;
			GLib.Timeout.Add(5000, delegate {
				bot_queue_length--;
				sb.Pop(1);
				if(bot_queue_length < 1) {
					bot.Visible = false;
					pb.Visible = false;
				}
				return false;
			});
		}
		
		/*public void ProgressbarDisplay (object o, ProgressAllArgs args) {
			bot.Visible = true;
			bot.PackEnd(pb);
			pb.Visible = true;
			bot_queue_length++;
			pb.Fraction = (double) args.Curprogress / (double) args.Maxprogress;
			GLib.Timeout.Add(5000, delegate {
				bot_queue_length--;
				if(bot_queue_length < 1) {
					bot.Remove(pb);
					bot.Visible = false;
				}
				return false;
			});
		}*/
		
		public void LinkMessage (object o, EventArgs args) {
			sb.Push(1, web.LinkMessage);
		}
		
		/*public void updateProgress(object o, ProgressAllArgs args) {
			Console.WriteLine("on {0} of {1}", args.Curprogress, args.Maxprogress);
			if(args.Curprogress <= args.Maxprogress && args.Curprogress > 0 && args.Maxprogress > 1)
				ProgressbarDisplay(o, args);
		}*/
		
		private void ShowArtist(object o, EventArgs e){
			/*
				A way to find pages thru google's index of wikipedia.
				PROS:
					possibly more effective.  tests show better results than just querying an artist
					If the page doesn't exist, the nearest one will automatically be returned
					Since it's google, it could return a page that's not the exact title, but is the most popular representation of this.  Might work better.
				CONS:
					slower, as we're going to google and then wikipedia
					Since it's google, it could return a page that's not the exact title, but is the most popular representation of this.  Might work worse.
					(Why this method isn't being used right now): because I want to pass to wikipedia that this is a printable page
			*/			
			
				string temp;		
				WikipediaQuery w_query 	= new WikipediaQuery(current_artist);
				WikipediaPage w_page	= w_query.PerformLookUp();
				if ( w_page != null ) {
					web.OpenStream(w_page.BaseUri,"text/html");
					web.AppendData(w_page.Header);
					web.AppendData("<h1>"+current_artist+"</h1>");
					//Console.WriteLine(w_page.Header);
					while ( (temp = w_page.ReadBodyLine())!= null ) {
						web.AppendData(temp);
						//Console.WriteLine(temp);
					}
					web.AppendData(w_page.Footer);
					web.CloseStream();
					web.Show();
					Console.WriteLine("Wikipedia plugin debug: URL=" +w_page.BaseUri);
				} else {
					Visible = false;
				}
			/*
			The plain 'ol, link to wikipedia.
			tags on printable=yes.  nice, but hides links, and doesn't persist from page-to-page.
			also, no help with searching for music only...
			
			web.LoadUrl(
				"http://en.wikipedia.org/wiki/"; +
				current_artist + " " + 
				"?printable=yes"
			);
			*/
			
			
		}
		
		// --------------------------------------------------------------- //

		public void HideWikipedia ()
		{
			Visible = false;
		}

		public void ShowWikipedia (string artist)
		{
			/*if(web.Allocation.Width > 1) {
				web.WidthRequest = win.Allocation.Width + 200 - 4;
				win.Hadjustment = new Adjustment(200, 0, win.Allocation.Width, 1, 1, 1);
			}else{
				Console.WriteLine("Apparently it hasn't been drawn yet.  crap.");
			}*/
			if (current_artist == artist) {
				//ShowArtist(null, null);
				Visible = true;
				return;
			}
			current_artist = artist;
			ShowArtist(null, null);
			Visible = true;
			ShowAll ();
			this.HeightRequest = 450;
		}
	}
}

using System;
using System.IO;
using System.Net;
using System.Text;
using System.Xml;
using System.Xml.XPath;
using Mono.Unix;

namespace Banshee.Plugins.Wikipedia
{
	public sealed class WikipediaParser
	{
		private string title;
		private MemoryStream body;
		private Stream target;
		public MemoryStream Body {
			get {
				return body;
			}	
		}		
		public StreamReader BodyReader {
			get {
				if ( this.body == null ) {
					this.body = new MemoryStream();
				}
				return new StreamReader(this.body,Encoding.UTF8);
			}
		}
		
		public string Title {
			get {
				return this.title;
			}
		}
		public WikipediaParser(Stream s) {
			this.target = s;
		}
		public bool GetWikipediaInfo(string query) {
			if ( query == "" ) {
				return false;
			} else {
			//	Stream response_stream = this.SendRequest(query);
			//	this.Parse(response_stream);
			//	response_stream.Close();
				return true;
				
			}
		}
		
		public void Parse() {
			XmlDocument html = new XmlDocument();
			StreamReader sr = new StreamReader(target, Encoding.UTF8);
			sr.ReadLine();
			sr.ReadLine();				
			StringBuilder sb = new StringBuilder();
			sb.Insert(0,"<html>");			
			sb.Append(sr.ReadToEnd());
			sr.Close();
			html.LoadXml(sb.ToString());
			XPathNavigator nav = html.CreateNavigator();
			try {
				XPathNodeIterator iter	= nav.Select("//div[ id='bodyContent']");
				iter.MoveNext();
				XmlNode node  			= ((IHasXmlNode)iter.Current ).GetNode();
				if ( node != null ) {					
					this.body = new MemoryStream(Encoding.UTF8.GetBytes(node.InnerXml));
				}
			} catch ( Exception e ) {
				Console.WriteLine("Error retrieving body "+e.Message);
				this.body = new MemoryStream(Encoding.UTF8.GetBytes(Catalog.GetString("An error ocurred while retrieving the artist information from wikipedia"))); 
			}		
			/*try {
				XPathNodeIterator iter 	= nav.Select("//h1[ class='firstHeading']/text()");
				iter.MoveNext();
				XmlNode node  			= (iter.Current as IHasXmlNode).GetNode();
				if ( node != null ) {					
					this.title 		= node.Value;
				} else {
					this.title 		= Catalog.GetString("Not found");
				}
			} catch ( Exception e ) {
				Console.WriteLine("Error retrieving title "+e.Message);
				this.title = Catalog.GetString("An error ocurred")+" "+e.Message;
			}			
			
			// maybe perform some cleanup on the html
			// get body
			try {
				XPathNodeIterator iter2	= nav.Select("//div[ id='bodyContent']");
				iter2.MoveNext();
				XmlNode node2  			= ((IHasXmlNode)iter2.Current ).GetNode();
				if ( node2 != null ) {					
					this.body = new MemoryStream(Encoding.UTF8.GetBytes(node2.InnerXml));
				}
			} catch ( Exception e ) {
				Console.WriteLine("Error retrieving body "+e.Message);
				this.body = new MemoryStream(Encoding.UTF8.GetBytes(Catalog.GetString("An error ocurred while retrieving the artist information from wikipedia"))); 
			}*/
		}
	}
}
using System;
using System.IO;
using System.Net;
using System.Text;
using Mono.Posix;

using Banshee.Base;
namespace Banshee.Plugins.Wikipedia
{
	
	public class WikipediaQuery
	{
		private string query;		
		private const string url = "http://www.google.com/search?hl=en&q=site%3Aen.wikipedia.org+%22{0}%22%20band&btnI=asdf";//
		private const string wiki_host = "en.wikipedia.org";//
		private static string CACHE_PATH = System.IO.Path.Combine (Paths.UserPluginDirectory, "wikipedia");
		public string QueryUrl {
			get {
				Console.WriteLine(string.Format(url,this.query));
				return string.Format(url,this.query);
			}
		}
					
		public string Query {
			get {
				return this.query;
			} set {
				this.query = value;
			}
			
			
		}
		
		public WikipediaQuery(string query)
		{
			this.query = query;
			if (!Directory.Exists (CACHE_PATH))
				Directory.CreateDirectory (CACHE_PATH);
		}
		
		public WikipediaPage LoadInformation() {
			return PerformLookUp();
			
		}
		
		public WikipediaPage PerformLookUp() {
			string filename = this.GetCachedPath();
			HttpWebRequest c_req = GetRequest();
			WikipediaPage wp;
			if (File.Exists (filename)) { // check if we have a cached veriosn
				DateTime last_updated_time = File.GetLastWriteTime (filename);
				if (DateTime.Compare(last_updated_time, c_req.IfModifiedSince) < 0) { // the cached verion is ok
					Console.WriteLine("Opening from cache"); 
					wp = LoadFromCache();					
				} else {
					Console.WriteLine("In cache but to old");
					wp = DownloadWikipedia(c_req);
				}
			} else {
				Console.WriteLine("Opening from wikipedia");
				wp = DownloadWikipedia(c_req);
			}
			if ( wp != null ) wp.BaseUri = "http://en.wikipedia.org/wiki/";;
			return wp;
		}

		
		private HttpWebRequest GetRequest() {
			HttpWebRequest request 		= (HttpWebRequest)WebRequest.Create(this.QueryUrl);
			request.KeepAlive 			= false;
			request.AllowAutoRedirect 	= true;
			request.UserAgent 			= "Mozilla (Banshee-wikipedia plugin)";
			return request;
			
		}
		private WikipediaPage DownloadWikipedia(HttpWebRequest req) {
			HttpWebResponse response = (HttpWebResponse) req.GetResponse();
			Console.WriteLine(response.ResponseUri);
			if ( response.ResponseUri.Host.Equals(wiki_host) ) {			
				Stream s = response.GetResponseStream ();
				
				WikipediaParser wparser = new WikipediaParser(s);
				wparser.Parse();
				MemoryStream body  = wparser.Body;
				response.Close();
				try {
					SaveToCache(body);
				} catch(Exception e ) {
					Console.WriteLine("Could not cache file because: {0}",e.Message);
				//	Console.WriteLine(e.StackTrace);
				}
				return new WikipediaPage(body);
			} else {
				return null;
			}
		}
		private WikipediaPage LoadFromCache() {
			string f_name = GetCachedPath();
			FileStream fs = File.OpenRead(f_name);
			return new WikipediaPage(fs);
		}
		private void SaveToCache(MemoryStream body) {
			string f_name = GetCachedPath();
			FileStream fs = File.OpenWrite(f_name);
			BufferedStream buffered_stream = new BufferedStream (fs);
			//buffered_stream.Write(body.GetBuffer(),0,(int)body.Length);
			byte [] buffer = new byte [8192];
			int read;
			do {
				read = body.Read (buffer, 0, buffer.Length);
				if (read > 0)
					buffered_stream.Write (buffer, 0, read);
			} while (read > 0);
			buffered_stream.Close();
			fs.Close();
			body.Position = 0;
		}
		
		
		private string GetCachedPath() {
			return  System.IO.Path.Combine (CACHE_PATH,
			                                   Math.Abs(this.query.ToLower().GetHashCode()).ToString());
		}
		
	}
	
}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]