[tracker-miners: 3/4] tracker-extract: Fix handling of (atend) in ps files



commit 8ca94d715b860af4373168aea4f292f4aa7a24bd
Author: Andrea Azzarone <andrea azzarone canonical com>
Date:   Wed Apr 24 13:37:29 2019 +0100

    tracker-extract: Fix handling of (atend) in ps files
    
    Properly handle (atend) when parsing Postscript file. Also add a test
    case.

 src/tracker-extract/tracker-extract-ps.c                  |  5 ++---
 tests/functional-tests/meson.build                        |  3 ++-
 .../office/ps-doc-atend.expected.json                     | 15 +++++++++++++++
 .../test-extraction-data/office/ps-doc-atend.ps           | 13 +++++++++++++
 4 files changed, 32 insertions(+), 4 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-ps.c b/src/tracker-extract/tracker-extract-ps.c
index 207a32a89..5df88a432 100644
--- a/src/tracker-extract/tracker-extract-ps.c
+++ b/src/tracker-extract/tracker-extract-ps.c
@@ -101,6 +101,8 @@ extract_ps_from_inputstream (GInputStream *stream)
        g_autoptr(GDataInputStream) data_stream = NULL;
        gchar *line;
        gsize length, accum, max_bytes;
+       gboolean pageno_atend = FALSE;
+       gboolean header_finished = FALSE;
        g_autoptr(GError) error = NULL;
 
        metadata = tracker_resource_new (NULL);
@@ -114,9 +116,6 @@ extract_ps_from_inputstream (GInputStream *stream)
 
        while ((accum < max_bytes) &&
               (line = g_data_input_stream_read_line (data_stream, &length, NULL, &error)) != NULL) {
-               gboolean pageno_atend = FALSE;
-               gboolean header_finished = FALSE;
-
                /* Update accumulated bytes read */
                accum += length;
 
diff --git a/tests/functional-tests/meson.build b/tests/functional-tests/meson.build
index c30b61218..355d4bd66 100644
--- a/tests/functional-tests/meson.build
+++ b/tests/functional-tests/meson.build
@@ -16,7 +16,8 @@ extractor_tests = [
   'office/office-doc',
   'office/powerpoint',
   'office/pdf-doc',
-  'office/ps-doc'
+  'office/ps-doc',
+  'office/ps-doc-atend'
 ]
 
 if get_option('unzip_ps_gz_files')
diff --git a/tests/functional-tests/test-extraction-data/office/ps-doc-atend.expected.json 
b/tests/functional-tests/test-extraction-data/office/ps-doc-atend.expected.json
new file mode 100644
index 000000000..20ce67b17
--- /dev/null
+++ b/tests/functional-tests/test-extraction-data/office/ps-doc-atend.expected.json
@@ -0,0 +1,15 @@
+{
+    "test": {
+        "Filename": "ps-doc-atend.ps",
+        "Comment": "PS document from the office tools"
+    },
+    "metadata": {
+        "nfo:pageCount": 1,
+        "nie:copyright": "copyleft",
+        "nco:creator": {
+            "nco:fullname": "vi",
+            "@type": "nco:Contact"
+        },
+        "@type": "nfo:PaginatedTextDocument"
+    }
+}
diff --git a/tests/functional-tests/test-extraction-data/office/ps-doc-atend.ps 
b/tests/functional-tests/test-extraction-data/office/ps-doc-atend.ps
new file mode 100644
index 000000000..402192e64
--- /dev/null
+++ b/tests/functional-tests/test-extraction-data/office/ps-doc-atend.ps
@@ -0,0 +1,13 @@
+%!PS
+%%Copyright: copyleft
+%%Title: Hello World!
+%%Creator: vi
+%%Pages: (atend)
+%%EndComments
+/Courier
+20 selectfont
+72 500 moveto
+(Hello world!) show
+showpage
+%%Trailer
+%%Pages: 1


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]