beagle r4752 - trunk/beagle/Filters
- From: dbera svn gnome org
- To: svn-commits-list gnome org
- Subject: beagle r4752 - trunk/beagle/Filters
- Date: Sun, 11 May 2008 02:50:48 +0100 (BST)
Author: dbera
Date: Sun May 11 01:50:48 2008
New Revision: 4752
URL: http://svn.gnome.org/viewvc/beagle?rev=4752&view=rev
Log:
Add a BibTex filter based on the command line program 'bibparse' from the btparse package. Most distributions I checked ship this program.
Added:
trunk/beagle/Filters/FilterBibTex.cs
Modified:
trunk/beagle/Filters/AssemblyInfo.cs
trunk/beagle/Filters/Makefile.am
Modified: trunk/beagle/Filters/AssemblyInfo.cs
==============================================================================
--- trunk/beagle/Filters/AssemblyInfo.cs (original)
+++ trunk/beagle/Filters/AssemblyInfo.cs Sun May 11 01:50:48 2008
@@ -34,6 +34,7 @@
typeof(FilterApplication),
typeof(FilterArchive),
typeof(FilterAudio),
+ typeof(FilterBibTex),
typeof(FilterBMP),
typeof(FilterBoo),
typeof(FilterC),
Added: trunk/beagle/Filters/FilterBibTex.cs
==============================================================================
--- (empty file)
+++ trunk/beagle/Filters/FilterBibTex.cs Sun May 11 01:50:48 2008
@@ -0,0 +1,217 @@
+//
+// FilterBibTex.cs : Simple BibTex .bib filter, based on the external tool bibparse.
+//
+// As it turns out, bibtex format is pretty tough to parse and most of the parsers
+// out there use some kind of lexical generator. I decided to use the tried and tested
+// bibparse to do the basic parsing.
+//
+// Copyright (C) 2008 D Bera <dbera web gmail com>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.IO;
+using System.Collections;
+using System.Diagnostics;
+
+using Beagle.Util;
+
+namespace Beagle.Filters {
+ public class FilterBibTex : Beagle.Daemon.Filter {
+ public FilterBibTex ()
+ {
+ SnippetMode = false;
+ SetFileType ("document");
+ }
+
+ protected override void RegisterSupportedTypes ()
+ {
+ AddSupportedFlavor (Beagle.Daemon.FilterFlavor.NewFromMimeType ("text/x-bibtex"));
+ }
+
+ private static bool bibparse_installed = false;
+
+ static FilterBibTex ()
+ {
+ // Check if bibparse is present
+ SafeProcess pc = new SafeProcess ();
+ pc.Arguments = new string [] { "bibparse"};
+ pc.RedirectStandardOutput = false;
+ pc.RedirectStandardError = false;
+
+ try {
+ pc.Start ();
+ bibparse_installed = true;
+ } catch (SafeProcessException e) {
+ Log.Warn (e, "bibparse is not found; bibtex files will not be indexed");
+ bibparse_installed = false;
+ }
+
+ pc.Close ();
+ }
+
+ protected override void DoPullProperties ()
+ {
+ if (! bibparse_installed)
+ Error ();
+ return;
+ }
+
+ public override bool HasGeneratedIndexable {
+ get { return bibparse_installed; }
+ }
+
+ private SafeProcess bib_process = null;
+ private StreamReader reader;
+
+ private bool InitBibparse ()
+ {
+ bib_process = new SafeProcess ();
+ bib_process.Arguments = new string[] { "bibparse", FileInfo.FullName};
+ bib_process.RedirectStandardError = false;
+ bib_process.RedirectStandardOutput = true;
+ bib_process.CpuLimit = 180; // 3 minutes
+ bib_process.MemLimit = 50*1024*1024; // 50 MB
+
+ try {
+ bib_process.Start ();
+ } catch (SafeProcessException e) {
+ Log.Error (e, "Error running 'bibparse {0}'", FileInfo.FullName);
+ return false;
+ }
+
+ reader = new StreamReader (bib_process.StandardOutput);
+ return true;
+ }
+
+ public override bool GenerateNextIndexable (out Indexable child)
+ {
+ child = null;
+
+ if (bib_process == null && ! InitBibparse ())
+ return false;
+
+ string line = null;
+ string type = null, name = null;
+ while ((line = reader.ReadLine ()) != null) {
+ if (line == String.Empty || line [0] != '@')
+ continue;
+
+ int i = line.IndexOf (' ');
+ if (i == -1 || line.Length == i + 1)
+ continue;
+ type = line.Substring (1, i - 1).ToLower ();
+ name = line.Substring (i + 1);
+ break;
+ }
+
+ if (line == null)
+ return false;
+
+ child = new Indexable (new Uri (Indexable.Uri.ToString() + "#" + name));
+ child.CacheContent = false;
+ child.MimeType = "text/x-bibtex";
+ child.DisplayUri = (new Uri (Indexable.Uri.ToString() + "#" + name));
+ child.NoContent = true;
+ child.AddProperty (Property.NewKeyword ("bibtex:type", type));
+
+ string key, value;
+ // Now fill in properties from the key=value lines
+ while ((line = reader.ReadLine ()) != null) {
+ // Entries are separated by empty lines
+ if (line == String.Empty)
+ break;
+
+ int i = line.IndexOf ('=');
+ // ensure non-empty key
+ if (i < 1 || line.Length == i + i)
+ continue;
+ key = line.Substring (0, i).ToLower ();
+ value = line.Substring (i + 1);
+ foreach (Property prop in EntryLineToProperty (key, value))
+ child.AddProperty (prop);
+ }
+
+ child.SetChildOf (Indexable);
+ return true;
+ }
+
+ protected override void DoClose ()
+ {
+ if (reader != null)
+ reader.Close ();
+
+ if (bib_process != null)
+ bib_process.Close ();
+ }
+
+ private static readonly string[] AND = new string[] { "and", "AND"};
+ private IEnumerable EntryLineToProperty (string key, string value)
+ {
+ if (key != "author") {
+ if (IsKeyword (key))
+ yield return Property.NewKeyword (KeyToPropName (key), value);
+ else
+ yield return Property.New (KeyToPropName (key), CleanupValue (value));
+ yield break;
+ }
+
+ // for author, split the values into multiple authors if possible
+ string[] authors = value.Split (AND, StringSplitOptions.None);
+ foreach (string a in authors) {
+ string author = a.Trim ();
+ if (author == "and" || author == String.Empty)
+ continue;
+ yield return Property.New (KeyToPropName (key), CleanupValue (author));
+ }
+ }
+
+ private static bool IsKeyword (string key)
+ {
+ return key == "pages" || key == "url" || key == "identifier";
+ }
+
+ private static string KeyToPropName (string key)
+ {
+ // bibtex to rdf mapping
+ switch (key) {
+ case "title":
+ return "dc:title";
+ case "author":
+ return "dc:creator";
+ case "address":
+ return "vcard:ADR";
+ case "location":
+ return "vcard:ADR";
+ case "dc:publisher":
+ return "dc:publisher";
+ case "url":
+ return "dc:identifier";
+ default:
+ return key;
+ }
+ }
+
+ private static string CleanupValue (string value)
+ {
+ return value.Replace ("\\'", String.Empty); // FIXME better processing of latex excape symbols
+ }
+ }
+}
Modified: trunk/beagle/Filters/Makefile.am
==============================================================================
--- trunk/beagle/Filters/Makefile.am (original)
+++ trunk/beagle/Filters/Makefile.am Sun May 11 01:50:48 2008
@@ -52,6 +52,7 @@
$(srcdir)/FilterAbiword.cs \
$(srcdir)/FilterArchive.cs \
$(srcdir)/FilterAudio.cs \
+ $(srcdir)/FilterBibTex.cs \
$(srcdir)/FilterBmp.cs \
$(srcdir)/FilterBoo.cs \
$(srcdir)/FilterC.cs \
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]