Beagle man page filter
- From: Michael Levy <mlevy wardium homeip net>
- To: Dashboard <dashboard-hackers gnome org>
- Subject: Beagle man page filter
- Date: Sat, 10 Jul 2004 17:16:16 +0200
Hi all,
A while ago I submitted a simple man page filter as well as some code
destined to handle gzip-ed/zip-ed/bzip-ed/tar-ed files. As it turns out
I haven't had much time to work on these and fix the issues that were
rightly discussed on this very list, so I thought that instead of
holding off, I'd at least re-send the man page filter. It's been
generalized a bit to be a troff filter (though people more fluent in
troff than I will need to validate that statement). Attached is the new
file (FilterMan.cs) as well as a patch for Filters/Makefile.am to
include it in the build process.
Keep up the great work everyone!
Mike
//
// Beagle
//
// FilterMan.cs : Trivial implementation of a man-page filter.
//
// Author :
// Michael Levy <mlevy wardium homeip net>
//
// Copyright (C) 2004 Michael levy
//
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
namespace Beagle.Filters {
public class FilterMan : Filter {
StreamReader reader;
public FilterMan ()
{
// Make this a general troff filter.
AddSupportedMimeType ("application/x-troff-man");
AddSupportedMimeType ("text/x-troff-man");
AddSupportedMimeType ("application/x-troff");
AddSupportedMimeType ("text/x-troff");
}
/*
FIXME:
Right now we don't handle pages with just one line like:
.so man3/strcpy.3
Which is in strncpy.3.gz and points to strcpy.3.gz
*/
protected void ParseManFile (StreamReader reader)
{
String str;
/*
The regular expression for a complete man header line is built to allow a suite of
non-spaces, or words separated by spaces which are encompassed in quotes
The regexp should be :
Regex headerRE = new Regex (@"^\.TH\s+" +
@"(?<title>(\S+|(""(\S+\s*)+"")))\s+" +
@"(?<section>\d+)\s+" +
@"(?<date>(\S+|(""(\S+\s*)+"")))\s+" +
@"(?<source>(\S+|(""(\S+\s*)+"")))\s+" +
@"(?<manual>(\S+|(""(\S+\s*)+"")))\s*" +
"$");
But there seem to be a number of broken man pages, and the current filter can be used
for general troff pages.
*/
Regex headerRE = new Regex (@"^\.TH\s+" +
@"(?<title>(\S+|(""(\S+\s*)+"")))\s*"); +
while ((str = reader.ReadLine ()) != null) {
if (str.StartsWith (@".\""")) {
/* Comment in man page */
continue;
} else if (str.StartsWith (".TH ")) {
MatchCollection matches = headerRE.Matches (str);
if (matches.Count != 1) {
Console.Error.WriteLine ("In title Expected 1 match but found {0} matches in '{1}'",
matches.Count, str);
continue;
}
foreach (Match theMatch in matches) {
this ["Title"] = theMatch.Groups ["title"].ToString ();
}
} else {
// A "regular" string
/* FIXME: We can probably do better by stripping other macros (.SH for example) */
AppendContent (str);
}
}
}
override protected void DoOpen (Stream stream)
{
reader = new StreamReader (stream);
}
override protected void DoPull ()
{
ParseManFile (reader);
}
}
}
? Filters/FilterMan.cs
Index: Filters/Makefile.am
===================================================================
RCS file: /cvs/gnome/beagle/Filters/Makefile.am,v
retrieving revision 1.12
diff -u -r1.12 Makefile.am
--- Filters/Makefile.am 24 Jun 2004 18:45:20 -0000 1.12
+++ Filters/Makefile.am 10 Jul 2004 15:13:06 -0000
@@ -26,7 +26,8 @@
$(srcdir)/FilterMusic.cs \
$(srcdir)/FilterOpenOffice.cs \
$(srcdir)/FilterPng.cs \
- $(srcdir)/FilterText.cs
+ $(srcdir)/FilterText.cs \
+ $(srcdir)/FilterMan.cs
LOCAL_ASSEMBLIES = \
../Util/Util.dll
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]