Re: [Banshee-List] a litlle Patch for Wikipedia plugin, v0.0.2
- From: indecent <indecent picle org>
- To: banshee-list gnome org
- Subject: Re: [Banshee-List] a litlle Patch for Wikipedia plugin, v0.0.2
- Date: Sun, 30 Apr 2006 17:17:26 +0200
Hello,
here comes a little patch for the wikipedia plugin, i added a parser
for the wikipedia pages to remove information that is not needed, for
now the bar left, header and footer.
Also I added a class to manage the querys an request the pages using
httpwebrequest, this could also help tho find pages that didn't match
match the search. I used the httpwebresponse.Host to detect results not
from wikipedia.
Further i started a cache for the downloaded pages, these are stored in
the plungindir/wikipedia
David
Am Sonntag, den 30.04.2006, 02:37 -0400 schrieb Trick van Staveren:
> Hey folks,
>
> Been working on it a bit and just wanted to publish :)
>
> http://www.trick.vanstaveren.us/banshee/banshee-wikipedia-plugin-0.0.2.tar.bz2
>
> Changelog:
> 2006-04-30 Patrick van Staveren <trick vanstaveren us>
> * Google search mode - uses the "i'm feeling lucky" feature of Google to direct
> to the closes match using Google, adding the keyword "band". I know, this isn't the
> best way, but it is much much more accurate than just searching for the artist
> keyword.
> * Experimenting with stuff like a status bar and progress bar for loading.
> * Progress bar is way too buggy and causes segfaults - commented out :)
> * Opens to about:blank, not google on init. Faster.
> * Made default size a bit bigger. Maybe this should be a percentage someday.
> Someone patch this for me!
> * Bunch of attempts to create a HPaned between the track listing and this, but
> I can't find a widget. Big chunk of commented code for now.
> * Added Wikipedia.dll.config, a DLL mapper copied from gecko-sharp. Should fix
> issues with not finding gtkmozembed.so on some platforms. (Send bug reports!)
> * Debug info is copied in, so you can easily trace
>
> Enjoy, and send feedback :)
>
> Patrick
>
> --
> Patrick "Trick" van Staveren
> Western Michigan University
> AIM: goofyassmoose
> Cell: 269.267.6008
> http://www.trick.vanstaveren.us/
>
>
>
>
> _______________________________________________
> Banshee-list mailing list
> Banshee-list gnome org
> http://mail.gnome.org/mailman/listinfo/banshee-list
>
--- ./WikipediaPane.cs 2006-04-30 08:28:46.000000000 +0200
+++ ../../new/WikipediaPane.cs 2006-04-30 16:42:39.000000000 +0200
@@ -19,13 +19,14 @@ namespace Banshee.Plugins.Wikipedia
private VBox main;
private HBox bot;
private ProgressBar pb;
-
private int bot_queue_length;
public string current_artist;
public WikipediaPane () {
Visible = false;
-
+
+ // set url
+
web = new WebControl("about:blank", "Gecko");
win = new Viewport();
sb = new Statusbar();
@@ -109,15 +110,27 @@ namespace Banshee.Plugins.Wikipedia
slower, as we're going to google and then wikipedia
Since it's google, it could return a page that's not the exact title, but is the most popular representation of this. Might work worse.
(Why this method isn't being used right now): because I want to pass to wikipedia that this is a printable page
- */
- web.LoadUrl(
- "http://www.google.com/search?hl=en&q=site%3Aen.wikipedia.org+" +
- + "%22" + current_artist + "%22"
- + "band" +
- + "&btnI=asdf"
- );
-
+ */
+ string temp;
+ WikipediaQuery w_query = new WikipediaQuery(current_artist);
+ WikipediaPage w_page = w_query.PerformLookUp();
+ if ( w_page != null ) {
+ web.OpenStream(w_page.BaseUri,"text/html");
+ web.AppendData(w_page.Header);
+ web.AppendData("<h1>"+current_artist+"</h1>");
+ //Console.WriteLine(w_page.Header);
+ while ( (temp = w_page.ReadBodyLine())!= null ) {
+ web.AppendData(temp);
+ //Console.WriteLine(temp);
+ }
+ web.AppendData(w_page.Footer);
+ web.CloseStream();
+ web.Show();
+ Console.WriteLine("Wikipedia plugin debug: URL=" +w_page.BaseUri);
+ } else {
+ Visible = false;
+ }
/*
The plain 'ol, link to wikipedia.
tags on printable=yes. nice, but hides links, and doesn't persist from page-to-page.
@@ -130,7 +143,7 @@ namespace Banshee.Plugins.Wikipedia
);
*/
- Console.WriteLine("Wikipedia plugin debug: URL=" + web.Location);
+
}
// --------------------------------------------------------------- //
@@ -149,7 +162,7 @@ namespace Banshee.Plugins.Wikipedia
Console.WriteLine("Apparently it hasn't been drawn yet. crap.");
}*/
if (current_artist == artist) {
- ShowArtist(null, null);
+ //ShowArtist(null, null);
Visible = true;
return;
}
using System;
using System.Text;
using Mono;
using Mono.Posix;
using Gtk;
using Gecko;
using GLib;
using Banshee;
using Banshee.Base;
using Gdk;
namespace Banshee.Plugins.Wikipedia
{
public class WikipediaPane : Frame
{
private WebControl web;
private Viewport win;
private Statusbar sb;
private VBox main;
private HBox bot;
private ProgressBar pb;
private int bot_queue_length;
public string current_artist;
public WikipediaPane () {
Visible = false;
// set url
web = new WebControl("about:blank", "Gecko");
win = new Viewport();
sb = new Statusbar();
pb = new ProgressBar();
main = new VBox();
bot = new HBox();
bot_queue_length = 0;
sb.TextPushed += new Gtk.TextPushedHandler (StatusbarDisplay);
sb.Visible = false;
sb.HasResizeGrip = false;
pb.WidthRequest = 300;
//web.ProgressAll += new Gecko.ProgressAllHandler(updateProgress);
web.LinkMsg += new EventHandler (LinkMessage);
bot.PackStart(sb, true, true, 0);
//bot.PackEnd(pb, false, false, 0);
main.PackStart(web, true, true, 0);
main.PackEnd(bot, false, false, 0);
win.Add(main);
Add(win);
ShowAll();
}
public void StatusbarDisplay (object o, TextPushedArgs args) {
bot.Visible = true;
bot_queue_length++;
GLib.Timeout.Add(5000, delegate {
bot_queue_length--;
sb.Pop(1);
if(bot_queue_length < 1) {
bot.Visible = false;
pb.Visible = false;
}
return false;
});
}
/*public void ProgressbarDisplay (object o, ProgressAllArgs args) {
bot.Visible = true;
bot.PackEnd(pb);
pb.Visible = true;
bot_queue_length++;
pb.Fraction = (double) args.Curprogress / (double) args.Maxprogress;
GLib.Timeout.Add(5000, delegate {
bot_queue_length--;
if(bot_queue_length < 1) {
bot.Remove(pb);
bot.Visible = false;
}
return false;
});
}*/
public void LinkMessage (object o, EventArgs args) {
sb.Push(1, web.LinkMessage);
}
/*public void updateProgress(object o, ProgressAllArgs args) {
Console.WriteLine("on {0} of {1}", args.Curprogress, args.Maxprogress);
if(args.Curprogress <= args.Maxprogress && args.Curprogress > 0 && args.Maxprogress > 1)
ProgressbarDisplay(o, args);
}*/
private void ShowArtist(object o, EventArgs e){
/*
A way to find pages thru google's index of wikipedia.
PROS:
possibly more effective. tests show better results than just querying an artist
If the page doesn't exist, the nearest one will automatically be returned
Since it's google, it could return a page that's not the exact title, but is the most popular representation of this. Might work better.
CONS:
slower, as we're going to google and then wikipedia
Since it's google, it could return a page that's not the exact title, but is the most popular representation of this. Might work worse.
(Why this method isn't being used right now): because I want to pass to wikipedia that this is a printable page
*/
string temp;
WikipediaQuery w_query = new WikipediaQuery(current_artist);
WikipediaPage w_page = w_query.PerformLookUp();
if ( w_page != null ) {
web.OpenStream(w_page.BaseUri,"text/html");
web.AppendData(w_page.Header);
web.AppendData("<h1>"+current_artist+"</h1>");
//Console.WriteLine(w_page.Header);
while ( (temp = w_page.ReadBodyLine())!= null ) {
web.AppendData(temp);
//Console.WriteLine(temp);
}
web.AppendData(w_page.Footer);
web.CloseStream();
web.Show();
Console.WriteLine("Wikipedia plugin debug: URL=" +w_page.BaseUri);
} else {
Visible = false;
}
/*
The plain 'ol, link to wikipedia.
tags on printable=yes. nice, but hides links, and doesn't persist from page-to-page.
also, no help with searching for music only...
web.LoadUrl(
"http://en.wikipedia.org/wiki/" +
current_artist + " " +
"?printable=yes"
);
*/
}
// --------------------------------------------------------------- //
public void HideWikipedia ()
{
Visible = false;
}
public void ShowWikipedia (string artist)
{
/*if(web.Allocation.Width > 1) {
web.WidthRequest = win.Allocation.Width + 200 - 4;
win.Hadjustment = new Adjustment(200, 0, win.Allocation.Width, 1, 1, 1);
}else{
Console.WriteLine("Apparently it hasn't been drawn yet. crap.");
}*/
if (current_artist == artist) {
//ShowArtist(null, null);
Visible = true;
return;
}
current_artist = artist;
ShowArtist(null, null);
Visible = true;
ShowAll ();
this.HeightRequest = 450;
}
}
}
using System;
using System.IO;
using System.Net;
using System.Text;
using System.Xml;
using System.Xml.XPath;
using Mono.Unix;
namespace Banshee.Plugins.Wikipedia
{
public sealed class WikipediaParser
{
private string title;
private MemoryStream body;
private Stream target;
public MemoryStream Body {
get {
return body;
}
}
public StreamReader BodyReader {
get {
if ( this.body == null ) {
this.body = new MemoryStream();
}
return new StreamReader(this.body,Encoding.UTF8);
}
}
public string Title {
get {
return this.title;
}
}
public WikipediaParser(Stream s) {
this.target = s;
}
public bool GetWikipediaInfo(string query) {
if ( query == "" ) {
return false;
} else {
// Stream response_stream = this.SendRequest(query);
// this.Parse(response_stream);
// response_stream.Close();
return true;
}
}
public void Parse() {
XmlDocument html = new XmlDocument();
StreamReader sr = new StreamReader(target, Encoding.UTF8);
sr.ReadLine();
sr.ReadLine();
StringBuilder sb = new StringBuilder();
sb.Insert(0,"<html>");
sb.Append(sr.ReadToEnd());
sr.Close();
html.LoadXml(sb.ToString());
XPathNavigator nav = html.CreateNavigator();
try {
XPathNodeIterator iter = nav.Select("//div[ id='bodyContent']");
iter.MoveNext();
XmlNode node = ((IHasXmlNode)iter.Current ).GetNode();
if ( node != null ) {
this.body = new MemoryStream(Encoding.UTF8.GetBytes(node.InnerXml));
}
} catch ( Exception e ) {
Console.WriteLine("Error retrieving body "+e.Message);
this.body = new MemoryStream(Encoding.UTF8.GetBytes(Catalog.GetString("An error ocurred while retrieving the artist information from wikipedia")));
}
/*try {
XPathNodeIterator iter = nav.Select("//h1[ class='firstHeading']/text()");
iter.MoveNext();
XmlNode node = (iter.Current as IHasXmlNode).GetNode();
if ( node != null ) {
this.title = node.Value;
} else {
this.title = Catalog.GetString("Not found");
}
} catch ( Exception e ) {
Console.WriteLine("Error retrieving title "+e.Message);
this.title = Catalog.GetString("An error ocurred")+" "+e.Message;
}
// maybe perform some cleanup on the html
// get body
try {
XPathNodeIterator iter2 = nav.Select("//div[ id='bodyContent']");
iter2.MoveNext();
XmlNode node2 = ((IHasXmlNode)iter2.Current ).GetNode();
if ( node2 != null ) {
this.body = new MemoryStream(Encoding.UTF8.GetBytes(node2.InnerXml));
}
} catch ( Exception e ) {
Console.WriteLine("Error retrieving body "+e.Message);
this.body = new MemoryStream(Encoding.UTF8.GetBytes(Catalog.GetString("An error ocurred while retrieving the artist information from wikipedia")));
}*/
}
}
}
using System;
using System.IO;
using System.Net;
using System.Text;
using Mono.Posix;
using Banshee.Base;
namespace Banshee.Plugins.Wikipedia
{
public class WikipediaQuery
{
private string query;
private const string url = "http://www.google.com/search?hl=en&q=site%3Aen.wikipedia.org+%22{0}%22%20band&btnI=asdf";//
private const string wiki_host = "en.wikipedia.org";//
private static string CACHE_PATH = System.IO.Path.Combine (Paths.UserPluginDirectory, "wikipedia");
public string QueryUrl {
get {
Console.WriteLine(string.Format(url,this.query));
return string.Format(url,this.query);
}
}
public string Query {
get {
return this.query;
} set {
this.query = value;
}
}
public WikipediaQuery(string query)
{
this.query = query;
if (!Directory.Exists (CACHE_PATH))
Directory.CreateDirectory (CACHE_PATH);
}
public WikipediaPage LoadInformation() {
return PerformLookUp();
}
public WikipediaPage PerformLookUp() {
string filename = this.GetCachedPath();
HttpWebRequest c_req = GetRequest();
WikipediaPage wp;
if (File.Exists (filename)) { // check if we have a cached veriosn
DateTime last_updated_time = File.GetLastWriteTime (filename);
if (DateTime.Compare(last_updated_time, c_req.IfModifiedSince) < 0) { // the cached verion is ok
Console.WriteLine("Opening from cache");
wp = LoadFromCache();
} else {
Console.WriteLine("In cache but to old");
wp = DownloadWikipedia(c_req);
}
} else {
Console.WriteLine("Opening from wikipedia");
wp = DownloadWikipedia(c_req);
}
if ( wp != null ) wp.BaseUri = "http://en.wikipedia.org/wiki/";
return wp;
}
private HttpWebRequest GetRequest() {
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(this.QueryUrl);
request.KeepAlive = false;
request.AllowAutoRedirect = true;
request.UserAgent = "Mozilla (Banshee-wikipedia plugin)";
return request;
}
private WikipediaPage DownloadWikipedia(HttpWebRequest req) {
HttpWebResponse response = (HttpWebResponse) req.GetResponse();
Console.WriteLine(response.ResponseUri);
if ( response.ResponseUri.Host.Equals(wiki_host) ) {
Stream s = response.GetResponseStream ();
WikipediaParser wparser = new WikipediaParser(s);
wparser.Parse();
MemoryStream body = wparser.Body;
response.Close();
try {
SaveToCache(body);
} catch(Exception e ) {
Console.WriteLine("Could not cache file because: {0}",e.Message);
// Console.WriteLine(e.StackTrace);
}
return new WikipediaPage(body);
} else {
return null;
}
}
private WikipediaPage LoadFromCache() {
string f_name = GetCachedPath();
FileStream fs = File.OpenRead(f_name);
return new WikipediaPage(fs);
}
private void SaveToCache(MemoryStream body) {
string f_name = GetCachedPath();
FileStream fs = File.OpenWrite(f_name);
BufferedStream buffered_stream = new BufferedStream (fs);
//buffered_stream.Write(body.GetBuffer(),0,(int)body.Length);
byte [] buffer = new byte [8192];
int read;
do {
read = body.Read (buffer, 0, buffer.Length);
if (read > 0)
buffered_stream.Write (buffer, 0, read);
} while (read > 0);
buffered_stream.Close();
fs.Close();
body.Position = 0;
}
private string GetCachedPath() {
return System.IO.Path.Combine (CACHE_PATH,
Math.Abs(this.query.ToLower().GetHashCode()).ToString());
}
}
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]