Indexing License Metadata



Hi all,

I'm interested in extending existing filters to extract license
metadata.  The attached patch demonstrates extracting Creative Commons
license metadata from SVGs, XMP embedded in images and PDFs, and OLE files.

However, I don't know what to do with the licenses extracted.  Does a
new property need to be added?  How do I get the license back out after
indexing a file with a license?

Thanks,
Jason
Index: Util/F-Spot/MetadataStore.cs
===================================================================
--- Util/F-Spot/MetadataStore.cs	(revision 3791)
+++ Util/F-Spot/MetadataStore.cs	(working copy)
@@ -198,6 +198,7 @@
 		public const string RdfsNS = "http://www.w3.org/2000/01/rdf-schema#";;
 		public const string IViewNS = "http://ns.iview-multimedia.com/mediapro/1.0/";;
 		public const string XmlNS = "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/";;
+		public const string CcNS = "http://creativecommons.org/ns#";;
 
 		// FIXME this needs to be parsable by System.Uri
 		public const string FSpotXMPBase = "http://fakebase.f-spot.org/internal/";;
@@ -219,6 +220,7 @@
 			Namespaces.AddNamespace (RdfNS, "rdf");
 			Namespaces.AddNamespace (RdfsNS, "rdfs");
 			Namespaces.AddNamespace (IViewNS, "mediapro");
+			Namespaces.AddNamespace (CcNS, "cc");
 		}
 
 		public static MetadataStore Descriptions {
Index: Filters/FilterPdf.cs
===================================================================
--- Filters/FilterPdf.cs	(revision 3791)
+++ Filters/FilterPdf.cs	(working copy)
@@ -13,7 +13,10 @@
 
 using Beagle.Util;
 using Beagle.Daemon;
+using Beagle.Util.Xmp;
 
+using SemWeb;
+
 namespace Beagle.Filters {
 
 	public class FilterPdf : Beagle.Daemon.Filter {
@@ -39,7 +42,7 @@
 		{
 			// create new external process
 			pc = new SafeProcess ();
-			pc.Arguments = new string [] { "pdfinfo", FileInfo.FullName };
+			pc.Arguments = new string [] { "pdfinfo", "-meta", FileInfo.FullName };
 			pc.RedirectStandardOutput = true;
 			// See FIXME below for why this is false.
 			pc.RedirectStandardError = false;
@@ -86,6 +89,11 @@
 					case "Producer":
 						strMetaTag = "dc:appname";
 						break;
+					case "Metadata":
+						string xmpString = pout.ReadToEnd();
+						XmpFile xmp = new XmpFile (new MemoryStream(System.Text.Encoding.ASCII.GetBytes(xmpString)));
+						AddXmpProperties(xmp);
+						break;
 					}
 					if (strMetaTag != null) {
 						if (bKeyword)
@@ -196,6 +204,49 @@
 #endif
 			pc.Close ();
 		}
+
+		internal void AddXmpProperties (XmpFile xmp)
+		{
+			Resource subject_anon = null;
+			Resource creator_anon = null;
+			Resource rights_anon = null;
+			Resource title_anon = null;
+
+			foreach (Statement stmt in xmp.Store) {
+				if (stmt.Predicate == MetadataStore.Namespaces.Resolve ("dc:subject")) {
+					//Console.WriteLine ("found subject");
+					subject_anon = stmt.Object;
+				} else if (stmt.Predicate == MetadataStore.Namespaces.Resolve ("dc:creator")) {
+					//Console.WriteLine ("found creator");
+					creator_anon = stmt.Object;
+				} else if (stmt.Predicate == MetadataStore.Namespaces.Resolve ("dc:rights")) {
+					rights_anon = stmt.Object;
+				} else if (stmt.Predicate == MetadataStore.Namespaces.Resolve ("dc:title")) {
+					if (stmt.Object is Literal)
+						AddProperty (Beagle.Property.New ("dc:title", ((Literal)stmt.Object).Value));
+					else if (stmt.Object is BNode)
+						title_anon = stmt.Object;
+				} else if (stmt.Predicate == MetadataStore.Namespaces.Resolve ("cc:license")) {
+					AddProperty (Beagle.Property.NewKeyword ("fixme:license", ((Literal)stmt.Object).Value));
+				}
+			}
+			
+			foreach (Statement stmt in xmp.Store) {
+				if (stmt.Subject == subject_anon && 
+				    stmt.Predicate != MetadataStore.Namespaces.Resolve ("rdf:type")) {
+					AddProperty (Beagle.Property.New ("dc:subject", ((Literal)stmt.Object).Value));
+				} else if (stmt.Subject == creator_anon &&  
+					   stmt.Predicate != MetadataStore.Namespaces.Resolve ("rdf:type")) {
+					AddProperty (Beagle.Property.New ("dc:creator", ((Literal)stmt.Object).Value));
+				} else if (stmt.Subject == rights_anon &&  
+					   stmt.Predicate != MetadataStore.Namespaces.Resolve ("rdf:type")) {
+					AddProperty (Beagle.Property.New ("dc:rights", ((Literal)stmt.Object).Value));
+				} else if (stmt.Subject == title_anon &&
+					   stmt.Predicate != MetadataStore.Namespaces.Resolve ("rdf:type")) {
+					AddProperty (Beagle.Property.New ("dc:title", ((Literal)stmt.Object).Value));
+				}
+			}
+		}
 	}
 }
 
Index: Filters/FilterOle.cs
===================================================================
--- Filters/FilterOle.cs	(revision 3791)
+++ Filters/FilterOle.cs	(working copy)
@@ -141,6 +141,10 @@
 				prop = docSumMeta.Lookup ("gsf:category");
 				if (prop != null)
 					AddProperty (Beagle.Property.New ("fixme:category", prop.Val as string));
+
+				prop = docSumMeta.Lookup ("CreativeCommons_LicenseURL");
+				if (prop != null)
+					AddProperty (Beagle.Property.New ("fixme:license", prop.Val as string));
 			}
 
 			ExtractMetaData (sum_stream, doc_stream);
Index: Filters/FilterImage.cs
===================================================================
--- Filters/FilterImage.cs	(revision 3791)
+++ Filters/FilterImage.cs	(working copy)
@@ -157,6 +157,8 @@
 				} else if (stmt.Predicate == MetadataStore.Namespaces.Resolve ("tiff:Model")) {
 					// NOTE: the namespaces for xmp and beagle don't always match up
 					AddProperty (Beagle.Property.New ("exif:Model", ((Literal)stmt.Object).Value));
+				} else if (stmt.Predicate == MetadataStore.Namespaces.Resolve ("cc:license")) {
+					AddProperty (Beagle.Property.NewKeyword ("fixme:license", ((Literal)stmt.Object).Value));
 				}
 			}
 			
Index: Filters/FilterSvg.cs
===================================================================
--- Filters/FilterSvg.cs	(revision 3791)
+++ Filters/FilterSvg.cs	(working copy)
@@ -41,6 +41,7 @@
 		static private string [] ignore_strings = { "format"};
 		
 		static private string dcnamespace = "http://purl.org/dc/elements/1.1/";;
+		static private string ccnamespace = "http://creativecommons.org/ns#";;
 		
 		public FilterSvg ()
 		{
@@ -154,8 +155,14 @@
 							if (grab_text)
 								break;
 							
-							if (reader.IsEmptyElement)
+							if (reader.IsEmptyElement) {
+ 								if (reader.NamespaceURI == ccnamespace) {
+									if ( reader.LocalName == "license" ) {
+										AddProperty (Property.NewKeyword ("fixme:license", reader.GetAttribute("resource","http://www.w3.org/1999/02/22-rdf-syntax-ns#";)));
+									}
+								}
 								break;
+							}
 
 							if (ArrayFu.IndexOfString (ignore_strings, reader.LocalName) != -1)
 								break;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]