Beagle Patch for BibTex Filter



Hi
I have created a Beagle patch for BibTex Filter as a part of the event FOSSKRITI held at IIT - Kanpur from February 14 -  17, 2008. Please have a look at it and I hope that it will be added to Beagle.

Regards,
Paras Tikmani

Index: Filters/FilterBibTex.cs
==================================================================--- Filters/FilterBibTex.cs	(revision 0)
+++ Filters/FilterBibTex.cs	(revision 0)
@@ -0,0 +1,281 @@
+//
+// FilterBibTex.cs : Basic BibTex filter, adapted from an old version of FilterTeX.cs.
+//
+// Copyright (C) 2004 Novell, Inc.
+// Copyright (C) 2007 auxsvr gmail com
+// Copyright (C) 2008 paras tikmani gmail com, arunpatala gmail com
+//
+
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.Collections;
+using System.IO;
+using System.Text;
+
+using Beagle.Util;
+using Beagle.Daemon;
+using Beagle.Filters;
+
+namespace Beagle.Filters {
+
+	public class FilterBibTex : Beagle.Daemon.Filter {
+
+		private class BibTexControlWordType {
+
+			public enum Type {
+				None,
+				Comment,
+				EntryType,
+				Skip, // Uninmportant symbols for the parser
+				BraceStart,
+				BraceEnd,
+				AtTheRate,
+				EqualTo,
+				DoubleQuote,
+				Comma	
+			};
+
+			public Type Types;
+			public string ctrlWord;
+
+			BibTexControlWordType (Type types, string ctrlword)
+			{
+				this.Types = types;
+				this.ctrlWord = ctrlword;
+			}
+
+			static BibTexControlWordType[] types = {
+				new BibTexControlWordType (Type.None, ""),
+				new BibTexControlWordType (Type.EntryType, "book"),
+				new BibTexControlWordType (Type.EntryType, "article"),			
+				new BibTexControlWordType (Type.EntryType, "misc"),
+				new BibTexControlWordType (Type.EntryType, "techreport"),
+				new BibTexControlWordType (Type.EntryType, "booklet"),
+				new BibTexControlWordType (Type.EntryType, "conference"),	
+				new BibTexControlWordType (Type.EntryType, "inbook"),
+				new BibTexControlWordType (Type.EntryType, "incollection"),
+				new BibTexControlWordType (Type.EntryType, "inproceedings"),
+				new BibTexControlWordType (Type.EntryType, "manual"),
+				new BibTexControlWordType (Type.EntryType, "masterthesis"),
+				new BibTexControlWordType (Type.EntryType, "phdthesis"),
+				new BibTexControlWordType (Type.EntryType, "proceedings"),
+				new BibTexControlWordType (Type.EntryType, "unpublished"),
+				new BibTexControlWordType (Type.AtTheRate, "@"),
+				new BibTexControlWordType (Type.Comment,"%"),
+				new BibTexControlWordType (Type.Skip, "\\"),
+				new BibTexControlWordType (Type.BraceStart,"{"),
+				new BibTexControlWordType (Type.BraceEnd, "}"),
+				new BibTexControlWordType (Type.EqualTo, "="),
+				new BibTexControlWordType (Type.DoubleQuote, "\""),
+				new BibTexControlWordType (Type.Comma, ","),
+			};
+
+			public static BibTexControlWordType Find (string str_ctrl_word)
+			{
+				for (int i = 0; i < types.Length; i++) {
+					if (String.Compare (types [i].ctrlWord, str_ctrl_word) == 0)
+						return types [i];
+				}
+				return types [0];
+			}
+
+			public static BibTexControlWordType Find (char c)
+			{
+				for (int i = 0; i < types.Length; i++) {
+					if (types [i].ctrlWord.Length == 1 && types [i].ctrlWord [0] == c)
+						return types [i];
+				}
+				return types [0];
+			}
+		}
+
+		public enum ErrorCodes {
+			ERROR_BIBTEX_OK,
+			ERROR_BIBTEX_EOF,
+			ERROR_BIBTEX_UNHANDLED_SYMBOL,
+			ERROR_BIBTEX_IMPOSSIBLE
+		};
+
+		StringBuilder str_block_sb = new StringBuilder ();
+
+		public FilterBibTex ()
+		{
+			SnippetMode = true;
+			// No need to store snippets since the raw data (with all its markup)
+			// is better than the extracted data for showing context.
+			// This also means, there is no need to AppendStructuralBreak! Yay.
+			OriginalIsText = true;
+			SetFileType ("document");
+		}
+
+		override protected void RegisterSupportedTypes()
+		{
+			AddSupportedFlavor(FilterFlavor.NewFromMimeType ("text/x-bibtex"));
+			AddSupportedFlavor(FilterFlavor.NewFromMimeType ("application/x-bibtex"));
+		}
+
+		private string ExtractArgument ()
+		{
+			//Extracts text delimited by BraceStart and BraceEnd
+			//First char must be a BraceStart
+			bool flag=false;
+			int aByte = -1;
+			char ch;
+			StringBuilder str_ctrl_word = new StringBuilder ();
+			int counter = 1;
+			aByte = TextReader.Read();
+			if (aByte == -1)
+				return String.Empty;
+
+			ch = (char) aByte;
+			BibTexControlWordType ctrl_word_type = BibTexControlWordType.Find (ch);
+
+                        if (ctrl_word_type.Types != BibTexControlWordType.Type.BraceStart) {
+                                return String.Empty;
+			}
+
+			while (aByte != -1  && counter != 0 ) {
+				aByte = TextReader.Read ();
+				ch = (char) aByte;
+				ctrl_word_type = BibTexControlWordType.Find (ch);
+
+                                if (ctrl_word_type.Types == BibTexControlWordType.Type.BraceEnd && !flag) {
+					counter = 0
+				} else if (ctrl_word_type.Types == BibTexControlWordType.Type.BraceStart && flag) {
+                                } else if (ctrl_word_type.Types == BibTexControlWordType.Type.BraceEnd && flag) {
+                                } else if (ctrl_word_type.Types == BibTexControlWordType.Type.BraceEnd && !flag) {
+				} else if (ctrl_word_type.Types == BibTexControlWordType.Type.EqualTo) {
+                                } else if (ctrl_word_type.Types == BibTexControlWordType.Type.DoubleQuote) {
+					flag=!flag;
+                                } else if (ctrl_word_type.Types == BibTexControlWordType.Type.Comma && !flag) {
+                                } else if (ch == '\r' || ch == '\n') {
+					str_ctrl_word.Append(' ');
+					str_ctrl_word.Append(' ');
+				} else if (ch == '%') {
+					TextReader.ReadLine();
+				} else str_ctrl_word.Append(ch) {
+				}
+			}
+			return str_ctrl_word.ToString();
+		}
+
+		private ErrorCodes HandleControlWord (string str_ctrl_word)
+		{
+			string str_temp;
+			BibTexControlWordType new_ctrl_word_type;
+			BibTexControlWordType ctrl_word_type = BibTexControlWordType.Find (str_ctrl_word.ToString());
+
+			switch (ctrl_word_type.Types) {
+				
+			case BibTexControlWordType.Type.EntryType:
+				HotUp();
+				str_temp = str_block_sb.ToString().Trim();
+				AppendWord(str_ctrl_word.ToString());
+				str_block_sb.Length = 0;	
+				HotDown();
+				AppendWord(ExtractArgument());
+				break;
+			default :
+				return ErrorCodes.ERROR_BIBTEX_IMPOSSIBLE;
+				break;
+			}
+
+			return ErrorCodes.ERROR_BIBTEX_OK;
+		}
+		
+		private ErrorCodes ProcessControlWords ()
+		{
+			int aByte = -1;
+			char ch;
+			StringBuilder str_ctrl_word = new StringBuilder ();
+			
+			aByte = TextReader.Read();
+			if (aByte == -1)
+				return ErrorCodes.ERROR_BIBTEX_EOF;
+			ch = (char) aByte;
+			//Just in order to identify symbols I may have forgottten
+			/*TeXControlWordType ctrl_word_type = TeXControlWordType.Find (new String (ch, 1));
+			
+			if (!Char.IsLetter (ch) && ctrl_word_type.Types != TeXControlWordType.Type.EscSeq &&
+			  ctrl_word_type.Types != TeXControlWordType.Type.Skip) {
+				Logger.Log.Error ("Unhandled symbol: {0}, {1}", ch, ctrl_word_type.Types);
+				return ErrorCodes.ERROR_TEX_UNHANDLED_SYMBOL;
+			}
+			*/
+			while (aByte != -1) {
+				str_ctrl_word.Append (ch);
+				aByte = TextReader.Peek ();
+				ch = (char) aByte;
+				//Macro name is separated with {, [, \ and spaces.
+				if (!Char.IsWhiteSpace(ch) && ch != '{' && ch != '[' && ch != '\\') {
+					aByte = TextReader.Read ();
+					ch = (char) aByte;
+				}
+				else {
+					break;
+				}
+			}
+			
+			return HandleControlWord (str_ctrl_word.ToString());
+		}
+
+		private ErrorCodes BibTexParse ()
+		{
+			int aByte = -1;
+			char ch;
+			ErrorCodes ec;
+			while ((aByte = TextReader.Read ()) != -1) {
+				ch = (char) aByte;
+				switch (ch) {
+				case '@': //Process keywords
+					ec = ProcessControlWords ();
+					if (ec != ErrorCodes.ERROR_BIBTEX_OK)
+						return ec;
+					break;
+				case '\r': //ignore
+				case '\n':
+					str_block_sb.Append (' ');
+					break;
+				case '%':
+					//Skip to the end of the line
+					TextReader.ReadLine ();
+					break;
+				default:
+					return ErrorCodes.ERROR_BIBTEX_IMPOSSIBLE;
+					//str_block_sb.Append (ch);
+					break;
+				}
+			}
+			return ErrorCodes.ERROR_BIBTEX_OK;
+		}
+
+		override protected void DoPullProperties ()
+		{	
+			ErrorCodes ec;
+			ec = BibTexParse ();
+			if (ec != ErrorCodes.ERROR_BIBTEX_OK)
+				Logger.Log.Error ("{0}", ec);
+			Finished ();
+		}
+
+	}
+}
Index: Filters/AssemblyInfo.cs
==================================================================--- Filters/AssemblyInfo.cs	(revision 4471)
+++ Filters/AssemblyInfo.cs	(working copy)
@@ -93,6 +93,7 @@
 	 typeof(FilterSvg),
 	 typeof(FilterTeX),
 	 typeof(FilterText),
+	 typeof(FilterBibTex),
 	 typeof(FilterTiff),
 	 typeof(FilterTotem),
 	 typeof(FilterVideo),
Index: Filters/Makefile.am
==================================================================--- Filters/Makefile.am	(revision 4471)
+++ Filters/Makefile.am	(working copy)
@@ -111,7 +111,8 @@
 	$(srcdir)/FilterTiff.cs		\
 	$(srcdir)/FilterTotem.cs	\
 	$(srcdir)/FilterVideo.cs	\
-	$(srcdir)/FilterXslt.cs
+	$(srcdir)/FilterXslt.cs         \
+	$(srcdir)/FilterBibTex.cs			
 
 if ENABLE_GSF_SHARP
 CSFILES += 				\


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]