[pdfmod] Patch for PageLabels support



This patch adds a class for accessing PageLabel data.  Currently, the
only modification to the UI is the modification of the tooltips.  If
PageLabel data is present, the tooltip will read "lbl (Page #)", where
Page # is the physical page number.  Otherwise, it will simply read
Page #.  Note that some documents may have a PageLabel section without
any custom numbering and currently the logic is too dumb to know this,
so you may see documents with tooltips like "5 (Page 5)".

If you find a PDF that causes the label reader to crash, please send
them my way.


For a demonstration, try this PDF:
http://itextdocs.lowagie.com/examples/com/lowagie/examples/objects/bookmarks/PageLabels.pdf
diff --git a/src/PdfMod/Makefile.am b/src/PdfMod/Makefile.am
index be15cb2..6361bb4 100644
--- a/src/PdfMod/Makefile.am
+++ b/src/PdfMod/Makefile.am
@@ -88,6 +88,7 @@ FILES =  \
 	PdfMod/CellRendererPage.cs \
 	PdfMod/Document.cs \
 	PdfMod/GlobalActions.cs \
+	PdfMod/LabelWrangler.cs \
 	PdfMod/MetadataEditorBox.cs \
 	PdfMod/Page.cs \
 	PdfMod/PageThumbnail.cs \
diff --git a/src/PdfMod/PdfMod.mdp b/src/PdfMod/PdfMod.mdp
index b13736d..ad40a59 100644
--- a/src/PdfMod/PdfMod.mdp
+++ b/src/PdfMod/PdfMod.mdp
@@ -37,6 +37,7 @@
     <File name="PdfMod.Actions/MoveAction.cs" subtype="Code" buildaction="Compile" />
     <File name="PdfMod/MetadataEditorBox.cs" subtype="Code" buildaction="Compile" />
     <File name="PdfMod/PageThumbnail.cs" subtype="Code" buildaction="Compile" />
+    <File name="PdfMod/LabelWrangler.cs" subtype="Code" buildaction="Compile" />
   </Contents>
   <References>
     <ProjectReference type="Gac" localcopy="True" refto="System, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
diff --git a/src/PdfMod/PdfMod/Document.cs b/src/PdfMod/PdfMod/Document.cs
index 5cda2fd..27b1cc5 100644
--- a/src/PdfMod/PdfMod/Document.cs
+++ b/src/PdfMod/PdfMod/Document.cs
@@ -17,8 +17,10 @@ namespace PdfMod
         private string password;
         private string tmp_path;
         private string tmp_uri;
+		private LabelWrangler page_labels;
         internal string CurrentStateUri { get { return tmp_uri ?? Uri; } }
 
+		public LabelWrangler PageLabels { get { return page_labels; } }
         public string SuggestedSavePath { get; set; }
         public string Uri { get; private set; }
         public string Path { get; private set; }
@@ -131,6 +133,7 @@ namespace PdfMod
                 pages.Add (page);
             }
 
+			page_labels = new LabelWrangler(pdf_document);
             ExpireThumbnails (pages);
             OnChanged ();
         }
diff --git a/src/PdfMod/PdfMod/LabelWrangler.cs b/src/PdfMod/PdfMod/LabelWrangler.cs
new file mode 100644
index 0000000..bad1bec
--- /dev/null
+++ b/src/PdfMod/PdfMod/LabelWrangler.cs
@@ -0,0 +1,299 @@
+//Author: Michael McKinley (m mckinley gmail com)
+
+using System;
+using System.Linq;
+using System.Text;
+using System.Collections.Generic;
+
+using PdfSharp;
+using PdfSharp.Pdf;
+using PdfSharp.Pdf.Advanced;
+using System;
+
+namespace PdfMod
+{
+	
+	//Information about a page label range.  The start of the range is stored as the key to the dictionary
+	struct PageLabelFmt
+	{
+		public string fmt;	//Format of the label. This is assumed to be one of nstyle_??
+		public string pfx;	//Prefix of the label
+		public int nstart;	//The first number in the sequence
+	}
+	
+	public class LabelWrangler
+	{
+		private const string name_labels = "/PageLabels";
+		private const string name_numtree = "/Nums";
+		
+		//Keys (PdfNames) for defining the label format
+		private const string name_fmt = "/S";
+		private const string name_start_at = "/St";		//Start at an index
+		private const string name_prefix = "/P";			//Prefix for numbers
+		
+		//Possible values for the numbering style (a = alpha, r = roman, u = upper, l = lower)
+		private const string nstyle_au = "/A";	//A B C...
+		private const string nstyle_al = "/a";	//a b c...
+		private const string nstyle_ru = "/R";	//I II III...
+		private const string nstyle_rl = "/r";	//i ii iii..
+		private const string nstyle_de = "/D";	//1 2 3...
+		
+		private SortedDictionary<int, PageLabelFmt> page_labels;
+		private PdfDictionary.DictionaryElements pdf_elements;
+		private PdfDocument pdf_document;
+		private bool edited;
+		
+		//Does this document have labelling information?
+		public bool HasLabels { get { return page_labels.Count() > 0; } }
+		
+		internal LabelWrangler(PdfDocument document)
+		{
+			page_labels = new SortedDictionary<int, PageLabelFmt>();
+			pdf_elements = document.Internals.Catalog.Elements;
+			pdf_document = document;
+			edited = false;
+			
+			//Ignore documents that don't have labelling stuff defined
+			if (!pdf_elements.Contains(name_labels)) {
+				return;
+			}
+			
+			//Ignore documents that don't have a properly-defined PageLabelFmt section
+			PdfDictionary my_labels = pdf_elements.GetDictionary(name_labels);
+			if (!my_labels.Elements.Contains(name_numtree)) {
+				return;
+			}		
+
+			/* The number tree (not my term) is a PdfArray arranged as follows: [##, dict, ##, dict, ##, dict ...]
+			 * ## represents the starting index of the page (0-based) and the following dict is a PdfDictionary
+			 * containing formatting information regarding the range
+			 */
+			
+			PdfArray number_tree = my_labels.Elements.GetArray(name_numtree);
+			
+			for (int i = 0; i < number_tree.Elements.Count / 2; ++i) {
+				
+				Console.WriteLine("Range # {0}", i);
+				PageLabelFmt temp_label = new PageLabelFmt();
+				
+				int range_start = number_tree.Elements.GetInteger(i * 2);	//Contains the starting index for this pair
+				PdfDictionary label_data = number_tree.Elements.GetDictionary(i * 2 + 1);
+				
+				//Set the prefix, default to ""
+				if (label_data.Elements.Contains(name_prefix)) {
+					temp_label.pfx = label_data.Elements.GetString(name_prefix);
+				} 
+				else {
+					temp_label.pfx = "";
+				}		
+				
+				//Set the start number, default to 1
+				if (label_data.Elements.Contains(name_start_at))
+				{
+				    temp_label.nstart = label_data.Elements.GetInteger(name_start_at);
+				} 
+				else {
+					temp_label.nstart = 1;
+				}
+				
+				//Set the format type, default to decimal
+				if (label_data.Elements.Contains(name_fmt)) {
+					temp_label.fmt = label_data.Elements.GetString(name_fmt);
+				} 
+				else {
+					temp_label.fmt = "";
+				}
+
+				page_labels.Add(range_start, temp_label);
+						
+			}
+		
+			//Great success!
+		}
+		
+		public string GetLabel(int index)
+		{	//No labelling if no labelling data
+			if (page_labels.Count == 0) {
+				return (1 + index).ToString();
+			}
+
+			int range_base = GetFormat(index);		
+			try {
+				PageLabelFmt cur_format = page_labels[range_base];
+
+				//Start building the label
+				string label = cur_format.pfx;
+				
+				//Restart numbering for each range of pages
+				int vindex = index + cur_format.nstart - range_base;
+				
+				if (cur_format.fmt == nstyle_ru || cur_format.fmt == nstyle_au) {
+					label += RenderVal(vindex, cur_format.fmt).ToUpper();
+				}
+				else {
+					label += RenderVal(vindex, cur_format.fmt).ToLower();
+				}
+				return label;
+			}
+			
+			//If no special format is defined for this range, simply return the page index
+			catch (KeyNotFoundException e) {
+				return (1 + index).ToString();
+			}
+			
+			return (1 + index).ToString();
+		}
+		
+		//Determine which formatting rules apply to page index.  Returns the start of the formatting range
+		private int GetFormat(int index)
+		{
+			//Todo: binary search
+			SortedDictionary<int, PageLabelFmt>.KeyCollection ranges = page_labels.Keys;
+			
+			int last = -1;
+			foreach (int range_start in ranges) {
+				if (range_start > index) break;
+				last = range_start;
+			}
+			return last;
+		}
+		
+		//Render the value index in the proper format (case-agnostic)
+		private string RenderVal(int index, string fmt)
+		{
+			if (nstyle_de == fmt) {
+				return index.ToString();
+			}
+			
+			if (nstyle_ru == fmt || nstyle_rl == fmt) {
+				return ToRoman(index);
+			}
+			
+			if (nstyle_al == fmt || nstyle_au == fmt) {
+				return ToAlpha(index);
+			}
+			return "";
+			
+		}
+	
+		//Convert val into Roman numerals
+		private string ToRoman(int val)
+		{
+			StringBuilder roman_val = new StringBuilder();
+
+			//Naively convert to Roman numerals
+			//TODO: find a more elegant way to do this.
+			
+			if (val >= 1000) {
+				roman_val.Append('M', val / 1000);
+				val -= (1000 * (val / 1000));
+			}			
+			if (val >= 900) {
+				roman_val.Append("CM");
+				val -= 900;
+			}
+			if (val >= 500) {
+				roman_val.Append('D', val / 500);
+				val -= (500 * (val / 500));
+			}			
+			if (val >= 400) {
+				roman_val.Append("CD");
+				val -= 400;
+			}
+			if (val >= 100) {
+				roman_val.Append('C', val / 100);
+				val -= (100 * (val / 100));
+			}			
+			if (val >= 90) {
+				roman_val.Append("XC");
+				val -= 90;
+			}	
+			if (val >= 50) {
+				roman_val.Append('L', val / 50);
+				val -= (50 * (val / 50));
+			}			
+			if (val >= 40) {
+				roman_val.Append("XL");
+				val -= 40;
+			}			
+			if (val >= 10) {
+				roman_val.Append('X', val / 10);
+				val -= (10 * (val / 10));
+			}			
+			if (val >= 9) {
+				roman_val.Append("IX");
+				val -= 9;
+			}	
+			if (val >= 5) {
+				roman_val.Append('V', val / 5);
+				val -= (5 * (val / 5));
+			}			
+			if (val >= 4) {
+				roman_val.Append("IV");
+				val -= 4;
+			}
+			roman_val.Append('I', val);	
+			return roman_val.ToString();
+		}
+		
+		//Convert val into the alpha representation. 1 -> a, 2 -> b, ... 26 -> z, 27 -> aa, 28 -> bb, etc.
+		private string ToAlpha(int val)
+		{
+			char letter = (char)((val - 1) % 26 + 'a');		//Determine what letter represents this index
+			int rep_count = (val - 1)/26 + 1;				//Determine how many times we repeat the letter
+			StringBuilder s = new StringBuilder(rep_count);
+			s.Append(letter, rep_count);
+			return s.ToString();
+		}
+		
+		//Write labels to the PDF
+		internal void WriteLabels()
+		{
+			//Only re-write label data if we've edited it.
+			if (!edited) { 
+				return;
+			}
+			
+			//Grab the labels element, creating it if necessary
+			PdfDictionary labels_dict;
+			if (!pdf_elements.Contains(name_labels)) {
+				labels_dict = new PdfDictionary(pdf_document);
+				pdf_elements.Add(new PdfName(name_labels), labels_dict);
+			}
+			else {
+				labels_dict = pdf_elements.GetDictionary(name_labels);
+			}
+			labels_dict.Elements.Clear();
+
+			//Create the number tree
+			PdfArray number_tree = new PdfArray(pdf_document);
+
+			//Add the range-start, attrib-dict pairs
+			foreach (int range_start in page_labels.Keys)
+			{
+				number_tree.Elements.Add(new PdfInteger(range_start));
+				PageLabelFmt cur_fmt = page_labels[range_start];	//Grab the format so we only look it up once
+				PdfDictionary r_attribs = new PdfDictionary(pdf_document);	//Create the dictionary for the attribs
+
+				if (cur_fmt.fmt.Length > 0) {
+					r_attribs.Elements.Add(new PdfName(name_fmt), new PdfName(cur_fmt.fmt));		
+				}
+					
+				
+				if (cur_fmt.nstart > 1) { //Add the starting index, if it's valid and non-standard (std = 1)
+					r_attribs.Elements.Add(new PdfName(name_start_at), new PdfInteger(cur_fmt.nstart));	
+				}
+				if (cur_fmt.pfx.Length > 0) { //Add the prefix, if applicable
+					r_attribs.Elements.Add(new PdfName(name_prefix), new PdfString(cur_fmt.pfx));
+					
+				}
+				
+				//Add the attributes to the number_tree
+				number_tree.Elements.Add(r_attribs);
+			}
+	
+			//Add the number treee to the elements
+			labels_dict.Elements.Add(new PdfName(name_numtree), number_tree);
+		}
+	}
+}
diff --git a/src/PdfMod/PdfMod/PdfListStore.cs b/src/PdfMod/PdfMod/PdfListStore.cs
index 145e7aa..3f80b91 100644
--- a/src/PdfMod/PdfMod/PdfListStore.cs
+++ b/src/PdfMod/PdfMod/PdfListStore.cs
@@ -52,15 +52,31 @@ namespace PdfMod
         public void UpdateForPage (TreeIter iter, Page page)
         {
             SetValue (iter, SortColumn, page.Index);
-            SetValue (iter, TooltipColumn, String.Format (Catalog.GetString ("Page {0}"), page.Index + 1));
+			if (page.Document.PageLabels.HasLabels) {
+				SetValue (iter, TooltipColumn, String.Format (Catalog.GetString ("{0} (Page {1})"), 
+				                                              page.Document.PageLabels.GetLabel (page.Index), 
+				                                              page.Index + 1));
+			}
+			else {
+            	SetValue (iter, TooltipColumn, String.Format (Catalog.GetString ("Page {0}"), page.Index + 1));
+			}
             SetValue (iter, PageColumn, page);
         }
 
         internal object [] GetValuesForPage (Page page)
         {
+			string lbl;
+			if (page.Document.PageLabels.HasLabels) {
+				lbl = String.Format (Catalog.GetString ("{0} (Page {1})"), 
+				                     page.Document.PageLabels.GetLabel (page.Index), 
+				                     page.Index + 1);
+			}
+			else {
+					lbl = String.Format (Catalog.GetString ("Page {0}"), page.Index + 1);
+			}
             return new object[] {
                 page.Index,
-                String.Format (Catalog.GetString ("Page {0}"), page.Index + 1),
+                lbl,
                 page
             };
         }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]