Beagle man page filter



Hi all,
	A while ago I submitted a simple man page filter as well as some code
destined to handle gzip-ed/zip-ed/bzip-ed/tar-ed files. As it turns out
I haven't had much time to work on these and fix the issues that were
rightly discussed on this very list, so I thought that instead of
holding off, I'd at least re-send the man page filter. It's been
generalized a bit to be a troff filter (though people more fluent in
troff than I will need to validate that statement). Attached is the new
file (FilterMan.cs) as well as a patch for Filters/Makefile.am to
include it in the build process.

	Keep up the great work everyone!

Mike

//
// Beagle
//
// FilterMan.cs : Trivial implementation of a man-page filter.
//
// Author :
//      Michael Levy <mlevy wardium homeip net>
//
// Copyright (C) 2004 Michael levy
//
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//

using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;

namespace Beagle.Filters {

	public class FilterMan : Filter {
		StreamReader reader;
		
		public FilterMan ()
		{
			// Make this a general troff filter.
			AddSupportedMimeType ("application/x-troff-man");
			AddSupportedMimeType ("text/x-troff-man");
			AddSupportedMimeType ("application/x-troff");
			AddSupportedMimeType ("text/x-troff");
		}
 		/*
 			FIXME: 
 			Right now we don't handle pages with just one line like:
 				.so man3/strcpy.3
			Which is in strncpy.3.gz and points to strcpy.3.gz
		*/
		protected void ParseManFile (StreamReader reader)
		{
			String str;
			/*
			   The regular expression for a complete man header line is built to allow a suite of 
			   non-spaces, or words separated by spaces which are encompassed in quotes
			   The regexp should be :
			   
			Regex headerRE = new Regex (@"^\.TH\s+" +
						    @"(?<title>(\S+|(""(\S+\s*)+"")))\s+" +
						    @"(?<section>\d+)\s+" + 
						    @"(?<date>(\S+|(""(\S+\s*)+"")))\s+" +
						    @"(?<source>(\S+|(""(\S+\s*)+"")))\s+" +
						    @"(?<manual>(\S+|(""(\S+\s*)+"")))\s*" +
						    "$");
						    
			 But there seem to be a number of broken man pages, and the current filter can be used 
			 for general troff pages.
			*/
			Regex headerRE = new Regex (@"^\.TH\s+" +
						    @"(?<title>(\S+|(""(\S+\s*)+"")))\s*"); +
						    
			while ((str = reader.ReadLine ()) != null) {
				if (str.StartsWith (@".\""")) {
					/* Comment in man page */
					continue;
				} else if (str.StartsWith (".TH ")) {
					MatchCollection matches = headerRE.Matches (str);
					if (matches.Count != 1) {
						Console.Error.WriteLine ("In title Expected 1 match but found {0} matches in '{1}'",
									  matches.Count, str);
						continue;
					}
					foreach (Match theMatch in matches) {
						this ["Title"] = theMatch.Groups ["title"].ToString ();
					}
                      		} else {
                      			// A "regular" string
                      			/* FIXME: We can probably do better by stripping other macros (.SH for example) */
                      			AppendContent (str);
				
                      		}
                      		
			}   
		}

		override protected void DoOpen (Stream stream)
		{
			reader = new StreamReader (stream);
		}
		override protected void DoPull ()
		{
			ParseManFile (reader);
		}
	}
}
? Filters/FilterMan.cs
Index: Filters/Makefile.am
===================================================================
RCS file: /cvs/gnome/beagle/Filters/Makefile.am,v
retrieving revision 1.12
diff -u -r1.12 Makefile.am
--- Filters/Makefile.am	24 Jun 2004 18:45:20 -0000	1.12
+++ Filters/Makefile.am	10 Jul 2004 15:13:06 -0000
@@ -26,7 +26,8 @@
 	$(srcdir)/FilterMusic.cs	\
 	$(srcdir)/FilterOpenOffice.cs	\
 	$(srcdir)/FilterPng.cs		\
-	$(srcdir)/FilterText.cs
+	$(srcdir)/FilterText.cs		\
+	$(srcdir)/FilterMan.cs
 
 LOCAL_ASSEMBLIES =			\
 	../Util/Util.dll


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]