Re: Thoughts about FilterChm
- From: "Miguel Cabrera" <mfcabrera gmail com>
- To: dashboard-hackers gnome org
- Subject: Re: Thoughts about FilterChm
- Date: Wed, 26 Apr 2006 16:52:33 -0500
>
> I dont have much idea about how the chm filter works. Why dont you
> give it a try, maybe duplicating code on the way ? We can then work on
> that and clean it up. Once I see the code it will be helpful to
> comment how the html filter can be exploited.
Hi!
Well I've ready started the work. I decided to modify the filter to
inherit bahaviour from html filter, and I overrided the DoOpen
method, but I'm getting some extrage NullPointerException from
HtmlAgilityPack on methods that previouisly worked flawesly when the
chm filter was working, i reading that code in order to trace the bug.
Did someone chaged/updated the code from HtmlAgilityPack?
I found this lines when reading the code:
// SLIM: _text = reader.ReadToEnd();
_text = new StreamAsArray (sr);
That makes me think that maybe someone chaged/update the code,
further breaking my code :P
-------------------------------------------------------------------------------------------------------
this is the ouput:
ceruno neguange:~/desarrollo/beagle_doctest$ beagle-extract-content
O\'Reilly_-_Regular.Expression.Pocket.Reference.eBook-LiB.chm
Filename: file:///home/ceruno/desarrollo/beagle_doctest/O'Reilly_-_Regular.Expression.Pocket.Reference.eBook-LiB.chm
Debug: Loaded 42 filters from
/home/ceruno/desarrollo/root/lib/beagle/Filters/Filters.dll
Object reference not set to an instance of an object
in [0x0004b] (at
/home/ceruno/desarrollo/beagle/Filters/HtmlAgilityPack/HtmlDocument.cs:205)
HtmlAgilityPack.StreamAsArray:Read (Boolean initial)
in [0x00051] (at
/home/ceruno/desarrollo/beagle/Filters/HtmlAgilityPack/HtmlDocument.cs:194)
HtmlAgilityPack.StreamAsArray:.ctor (System.IO.StreamReader r)
in [0x0009e] (at
/home/ceruno/desarrollo/beagle/Filters/HtmlAgilityPack/HtmlDocument.cs:890)
HtmlAgilityPack.HtmlDocument:Load (System.IO.TextReader reader)
in [0x000aa] (at
/home/ceruno/desarrollo/beagle/Filters/FilterChm.cs:188)
Beagle.Filters.FilterChm:DoOpen (System.IO.FileInfo info)
Filter: Beagle.Filters.FilterChm
MimeType: application/x-chm
Properties:
dc:title = O'Reilly : Regular Expression Pocket Reference
(no content)
(no hot content)
//
// FilterChm.cs : Trivial implementation of a CHM filter.
//
// Copyright (C) 2005,2006 Miguel Cabrera <mfcabrer unal edu co>
//
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
//
using System;
using System.Collections;
using System.IO;
using System.Text;
using HtmlAgilityPack;
using Beagle.Util;
using Beagle.Daemon;
namespace Beagle.Filters {
public class FilterChm : FilterHtml {
ChmFile chmFile;
public FilterChm () : base()
{
RegisterSupportedTypes();
SnippetMode= true;
}
//UNUSED
public void WalkTocFile(HtmlNode node)
{
foreach (HtmlNode subnode in node.ChildNodes) {
if (subnode.NodeType == HtmlNodeType.Element) {
switch (subnode.Name) {
case "html":
case "head":
WalkTocFile (subnode);
break;
case "body":
default:
WalkToc (subnode);
break;
}
}
}
}
//UNUSED
public void WalkToc(HtmlNode node)
{
switch (node.NodeType) {
case HtmlNodeType.Document:
case HtmlNodeType.Element:
if(node.Name == "li")
foreach(HtmlNode subnode in node.ChildNodes)
HandleTocEntry(subnode);
foreach(HtmlNode subnode in node.ChildNodes)
WalkToc(subnode);
break;
}
}
//UNUSED
public void HandleTocEntry(HtmlNode node)
{
if(node.Name == "object") {
string attr = node.GetAttributeValue ("type", "");
if(String.Compare(attr,"text/sitemap",true) == 0)
foreach(HtmlNode subnode in node.ChildNodes)
if(String.Compare(subnode.Name,"param",true) == 0 &&
subnode.GetAttributeValue("name","") == "Name" ){
HotUp();
AppendText(subnode.GetAttributeValue("value",""));
HotDown();
}
}
}
//UNUSED
void ReadHtml(TextReader reader)
{
HtmlDocument doc = new HtmlDocument ();
doc.ReportNode += HandleNodeEvent;
doc.StreamMode = true;
if (reader != null)
Console.WriteLine("Reader es distinto de Null");
try {
doc.Load (reader);
} catch (ArgumentNullException e) {
/*Weird should not happend*/
//¿What should do here?
Logger.Log.Warn (e.Message);
return;
}
}
override protected void DoOpen (FileInfo info)
{
chmFile = new ChmFile();
Encoding enc = null;
try {
chmFile.Load(info.FullName);
}
catch (Exception e) {
Logger.Log.Warn ("Could load {0}: {1}",info.Name,e.Message);
Finished ();
return;
}
TextReader default_page = chmFile.GetDefaultFile();
HtmlDocument doc = new HtmlDocument ();
doc.StreamMode = true;
doc.ReportNode += HandleNodeEvent;
if (default_page == null)
Console.WriteLine("default_page is Null");
try {
doc.Load (default_page);
}
catch (Exception e) {
//Logger.Log.Warn ("Error parsing file contents {0}: {1}",info.Name,e.Message);
Console.WriteLine (e.Message);
Console.WriteLine (e.StackTrace);
}
}
override protected void DoPullProperties()
{
if(chmFile.Title != "")
AddProperty (Beagle.Property.New ("dc:title", chmFile.Title));
}
override protected void DoPull()
{
//Logger.Log.Debug("FilterCHM: Parsing:" + chmFile.Title);
//chmFile.ParseContents(ReadHtml);
/*
We only read the default file and the topic file
**/
/* ReadHtml(chmFile.GetDefaultFile());
HtmlDocument doc = new HtmlDocument();
doc.Load(chmFile.GetTopicsFile());
WalkTocFile(doc.DocumentNode);*/
Finished();
}
override protected void DoClose()
{
chmFile.Dispose();
}
override protected void RegisterSupportedTypes()
{
AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/x-chm"));
}
}
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]