[pdfmod] Patch for PageLabels support
- From: Michael McKinley <m mckinley gmail com>
- To: pdfmod-list gnome org
- Subject: [pdfmod] Patch for PageLabels support
- Date: Wed, 5 Aug 2009 20:15:17 -0500
This patch adds a class for accessing PageLabel data. Currently, the
only modification to the UI is the modification of the tooltips. If
PageLabel data is present, the tooltip will read "lbl (Page #)", where
Page # is the physical page number. Otherwise, it will simply read
Page #. Note that some documents may have a PageLabel section without
any custom numbering and currently the logic is too dumb to know this,
so you may see documents with tooltips like "5 (Page 5)".
If you find a PDF that causes the label reader to crash, please send
them my way.
For a demonstration, try this PDF:
http://itextdocs.lowagie.com/examples/com/lowagie/examples/objects/bookmarks/PageLabels.pdf
diff --git a/src/PdfMod/Makefile.am b/src/PdfMod/Makefile.am
index be15cb2..6361bb4 100644
--- a/src/PdfMod/Makefile.am
+++ b/src/PdfMod/Makefile.am
@@ -88,6 +88,7 @@ FILES = \
PdfMod/CellRendererPage.cs \
PdfMod/Document.cs \
PdfMod/GlobalActions.cs \
+ PdfMod/LabelWrangler.cs \
PdfMod/MetadataEditorBox.cs \
PdfMod/Page.cs \
PdfMod/PageThumbnail.cs \
diff --git a/src/PdfMod/PdfMod.mdp b/src/PdfMod/PdfMod.mdp
index b13736d..ad40a59 100644
--- a/src/PdfMod/PdfMod.mdp
+++ b/src/PdfMod/PdfMod.mdp
@@ -37,6 +37,7 @@
<File name="PdfMod.Actions/MoveAction.cs" subtype="Code" buildaction="Compile" />
<File name="PdfMod/MetadataEditorBox.cs" subtype="Code" buildaction="Compile" />
<File name="PdfMod/PageThumbnail.cs" subtype="Code" buildaction="Compile" />
+ <File name="PdfMod/LabelWrangler.cs" subtype="Code" buildaction="Compile" />
</Contents>
<References>
<ProjectReference type="Gac" localcopy="True" refto="System, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
diff --git a/src/PdfMod/PdfMod/Document.cs b/src/PdfMod/PdfMod/Document.cs
index 5cda2fd..27b1cc5 100644
--- a/src/PdfMod/PdfMod/Document.cs
+++ b/src/PdfMod/PdfMod/Document.cs
@@ -17,8 +17,10 @@ namespace PdfMod
private string password;
private string tmp_path;
private string tmp_uri;
+ private LabelWrangler page_labels;
internal string CurrentStateUri { get { return tmp_uri ?? Uri; } }
+ public LabelWrangler PageLabels { get { return page_labels; } }
public string SuggestedSavePath { get; set; }
public string Uri { get; private set; }
public string Path { get; private set; }
@@ -131,6 +133,7 @@ namespace PdfMod
pages.Add (page);
}
+ page_labels = new LabelWrangler(pdf_document);
ExpireThumbnails (pages);
OnChanged ();
}
diff --git a/src/PdfMod/PdfMod/LabelWrangler.cs b/src/PdfMod/PdfMod/LabelWrangler.cs
new file mode 100644
index 0000000..bad1bec
--- /dev/null
+++ b/src/PdfMod/PdfMod/LabelWrangler.cs
@@ -0,0 +1,299 @@
+//Author: Michael McKinley (m mckinley gmail com)
+
+using System;
+using System.Linq;
+using System.Text;
+using System.Collections.Generic;
+
+using PdfSharp;
+using PdfSharp.Pdf;
+using PdfSharp.Pdf.Advanced;
+using System;
+
+namespace PdfMod
+{
+
+ //Information about a page label range. The start of the range is stored as the key to the dictionary
+ struct PageLabelFmt
+ {
+ public string fmt; //Format of the label. This is assumed to be one of nstyle_??
+ public string pfx; //Prefix of the label
+ public int nstart; //The first number in the sequence
+ }
+
+ public class LabelWrangler
+ {
+ private const string name_labels = "/PageLabels";
+ private const string name_numtree = "/Nums";
+
+ //Keys (PdfNames) for defining the label format
+ private const string name_fmt = "/S";
+ private const string name_start_at = "/St"; //Start at an index
+ private const string name_prefix = "/P"; //Prefix for numbers
+
+ //Possible values for the numbering style (a = alpha, r = roman, u = upper, l = lower)
+ private const string nstyle_au = "/A"; //A B C...
+ private const string nstyle_al = "/a"; //a b c...
+ private const string nstyle_ru = "/R"; //I II III...
+ private const string nstyle_rl = "/r"; //i ii iii..
+ private const string nstyle_de = "/D"; //1 2 3...
+
+ private SortedDictionary<int, PageLabelFmt> page_labels;
+ private PdfDictionary.DictionaryElements pdf_elements;
+ private PdfDocument pdf_document;
+ private bool edited;
+
+ //Does this document have labelling information?
+ public bool HasLabels { get { return page_labels.Count() > 0; } }
+
+ internal LabelWrangler(PdfDocument document)
+ {
+ page_labels = new SortedDictionary<int, PageLabelFmt>();
+ pdf_elements = document.Internals.Catalog.Elements;
+ pdf_document = document;
+ edited = false;
+
+ //Ignore documents that don't have labelling stuff defined
+ if (!pdf_elements.Contains(name_labels)) {
+ return;
+ }
+
+ //Ignore documents that don't have a properly-defined PageLabelFmt section
+ PdfDictionary my_labels = pdf_elements.GetDictionary(name_labels);
+ if (!my_labels.Elements.Contains(name_numtree)) {
+ return;
+ }
+
+ /* The number tree (not my term) is a PdfArray arranged as follows: [##, dict, ##, dict, ##, dict ...]
+ * ## represents the starting index of the page (0-based) and the following dict is a PdfDictionary
+ * containing formatting information regarding the range
+ */
+
+ PdfArray number_tree = my_labels.Elements.GetArray(name_numtree);
+
+ for (int i = 0; i < number_tree.Elements.Count / 2; ++i) {
+
+ Console.WriteLine("Range # {0}", i);
+ PageLabelFmt temp_label = new PageLabelFmt();
+
+ int range_start = number_tree.Elements.GetInteger(i * 2); //Contains the starting index for this pair
+ PdfDictionary label_data = number_tree.Elements.GetDictionary(i * 2 + 1);
+
+ //Set the prefix, default to ""
+ if (label_data.Elements.Contains(name_prefix)) {
+ temp_label.pfx = label_data.Elements.GetString(name_prefix);
+ }
+ else {
+ temp_label.pfx = "";
+ }
+
+ //Set the start number, default to 1
+ if (label_data.Elements.Contains(name_start_at))
+ {
+ temp_label.nstart = label_data.Elements.GetInteger(name_start_at);
+ }
+ else {
+ temp_label.nstart = 1;
+ }
+
+ //Set the format type, default to decimal
+ if (label_data.Elements.Contains(name_fmt)) {
+ temp_label.fmt = label_data.Elements.GetString(name_fmt);
+ }
+ else {
+ temp_label.fmt = "";
+ }
+
+ page_labels.Add(range_start, temp_label);
+
+ }
+
+ //Great success!
+ }
+
+ public string GetLabel(int index)
+ { //No labelling if no labelling data
+ if (page_labels.Count == 0) {
+ return (1 + index).ToString();
+ }
+
+ int range_base = GetFormat(index);
+ try {
+ PageLabelFmt cur_format = page_labels[range_base];
+
+ //Start building the label
+ string label = cur_format.pfx;
+
+ //Restart numbering for each range of pages
+ int vindex = index + cur_format.nstart - range_base;
+
+ if (cur_format.fmt == nstyle_ru || cur_format.fmt == nstyle_au) {
+ label += RenderVal(vindex, cur_format.fmt).ToUpper();
+ }
+ else {
+ label += RenderVal(vindex, cur_format.fmt).ToLower();
+ }
+ return label;
+ }
+
+ //If no special format is defined for this range, simply return the page index
+ catch (KeyNotFoundException e) {
+ return (1 + index).ToString();
+ }
+
+ return (1 + index).ToString();
+ }
+
+ //Determine which formatting rules apply to page index. Returns the start of the formatting range
+ private int GetFormat(int index)
+ {
+ //Todo: binary search
+ SortedDictionary<int, PageLabelFmt>.KeyCollection ranges = page_labels.Keys;
+
+ int last = -1;
+ foreach (int range_start in ranges) {
+ if (range_start > index) break;
+ last = range_start;
+ }
+ return last;
+ }
+
+ //Render the value index in the proper format (case-agnostic)
+ private string RenderVal(int index, string fmt)
+ {
+ if (nstyle_de == fmt) {
+ return index.ToString();
+ }
+
+ if (nstyle_ru == fmt || nstyle_rl == fmt) {
+ return ToRoman(index);
+ }
+
+ if (nstyle_al == fmt || nstyle_au == fmt) {
+ return ToAlpha(index);
+ }
+ return "";
+
+ }
+
+ //Convert val into Roman numerals
+ private string ToRoman(int val)
+ {
+ StringBuilder roman_val = new StringBuilder();
+
+ //Naively convert to Roman numerals
+ //TODO: find a more elegant way to do this.
+
+ if (val >= 1000) {
+ roman_val.Append('M', val / 1000);
+ val -= (1000 * (val / 1000));
+ }
+ if (val >= 900) {
+ roman_val.Append("CM");
+ val -= 900;
+ }
+ if (val >= 500) {
+ roman_val.Append('D', val / 500);
+ val -= (500 * (val / 500));
+ }
+ if (val >= 400) {
+ roman_val.Append("CD");
+ val -= 400;
+ }
+ if (val >= 100) {
+ roman_val.Append('C', val / 100);
+ val -= (100 * (val / 100));
+ }
+ if (val >= 90) {
+ roman_val.Append("XC");
+ val -= 90;
+ }
+ if (val >= 50) {
+ roman_val.Append('L', val / 50);
+ val -= (50 * (val / 50));
+ }
+ if (val >= 40) {
+ roman_val.Append("XL");
+ val -= 40;
+ }
+ if (val >= 10) {
+ roman_val.Append('X', val / 10);
+ val -= (10 * (val / 10));
+ }
+ if (val >= 9) {
+ roman_val.Append("IX");
+ val -= 9;
+ }
+ if (val >= 5) {
+ roman_val.Append('V', val / 5);
+ val -= (5 * (val / 5));
+ }
+ if (val >= 4) {
+ roman_val.Append("IV");
+ val -= 4;
+ }
+ roman_val.Append('I', val);
+ return roman_val.ToString();
+ }
+
+ //Convert val into the alpha representation. 1 -> a, 2 -> b, ... 26 -> z, 27 -> aa, 28 -> bb, etc.
+ private string ToAlpha(int val)
+ {
+ char letter = (char)((val - 1) % 26 + 'a'); //Determine what letter represents this index
+ int rep_count = (val - 1)/26 + 1; //Determine how many times we repeat the letter
+ StringBuilder s = new StringBuilder(rep_count);
+ s.Append(letter, rep_count);
+ return s.ToString();
+ }
+
+ //Write labels to the PDF
+ internal void WriteLabels()
+ {
+ //Only re-write label data if we've edited it.
+ if (!edited) {
+ return;
+ }
+
+ //Grab the labels element, creating it if necessary
+ PdfDictionary labels_dict;
+ if (!pdf_elements.Contains(name_labels)) {
+ labels_dict = new PdfDictionary(pdf_document);
+ pdf_elements.Add(new PdfName(name_labels), labels_dict);
+ }
+ else {
+ labels_dict = pdf_elements.GetDictionary(name_labels);
+ }
+ labels_dict.Elements.Clear();
+
+ //Create the number tree
+ PdfArray number_tree = new PdfArray(pdf_document);
+
+ //Add the range-start, attrib-dict pairs
+ foreach (int range_start in page_labels.Keys)
+ {
+ number_tree.Elements.Add(new PdfInteger(range_start));
+ PageLabelFmt cur_fmt = page_labels[range_start]; //Grab the format so we only look it up once
+ PdfDictionary r_attribs = new PdfDictionary(pdf_document); //Create the dictionary for the attribs
+
+ if (cur_fmt.fmt.Length > 0) {
+ r_attribs.Elements.Add(new PdfName(name_fmt), new PdfName(cur_fmt.fmt));
+ }
+
+
+ if (cur_fmt.nstart > 1) { //Add the starting index, if it's valid and non-standard (std = 1)
+ r_attribs.Elements.Add(new PdfName(name_start_at), new PdfInteger(cur_fmt.nstart));
+ }
+ if (cur_fmt.pfx.Length > 0) { //Add the prefix, if applicable
+ r_attribs.Elements.Add(new PdfName(name_prefix), new PdfString(cur_fmt.pfx));
+
+ }
+
+ //Add the attributes to the number_tree
+ number_tree.Elements.Add(r_attribs);
+ }
+
+ //Add the number treee to the elements
+ labels_dict.Elements.Add(new PdfName(name_numtree), number_tree);
+ }
+ }
+}
diff --git a/src/PdfMod/PdfMod/PdfListStore.cs b/src/PdfMod/PdfMod/PdfListStore.cs
index 145e7aa..3f80b91 100644
--- a/src/PdfMod/PdfMod/PdfListStore.cs
+++ b/src/PdfMod/PdfMod/PdfListStore.cs
@@ -52,15 +52,31 @@ namespace PdfMod
public void UpdateForPage (TreeIter iter, Page page)
{
SetValue (iter, SortColumn, page.Index);
- SetValue (iter, TooltipColumn, String.Format (Catalog.GetString ("Page {0}"), page.Index + 1));
+ if (page.Document.PageLabels.HasLabels) {
+ SetValue (iter, TooltipColumn, String.Format (Catalog.GetString ("{0} (Page {1})"),
+ page.Document.PageLabels.GetLabel (page.Index),
+ page.Index + 1));
+ }
+ else {
+ SetValue (iter, TooltipColumn, String.Format (Catalog.GetString ("Page {0}"), page.Index + 1));
+ }
SetValue (iter, PageColumn, page);
}
internal object [] GetValuesForPage (Page page)
{
+ string lbl;
+ if (page.Document.PageLabels.HasLabels) {
+ lbl = String.Format (Catalog.GetString ("{0} (Page {1})"),
+ page.Document.PageLabels.GetLabel (page.Index),
+ page.Index + 1);
+ }
+ else {
+ lbl = String.Format (Catalog.GetString ("Page {0}"), page.Index + 1);
+ }
return new object[] {
page.Index,
- String.Format (Catalog.GetString ("Page {0}"), page.Index + 1),
+ lbl,
page
};
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]