[tracker-miners/sam/extract-validation] functional-tests: Validate SPARQL as part of extractor tests

From: Sam Thursfield <sthursfield src gnome org>
To: commits-list gnome org
Cc:
Subject: [tracker-miners/sam/extract-validation] functional-tests: Validate SPARQL as part of extractor tests
Date: Fri, 9 Oct 2020 18:37:43 +0000 (UTC)

commit 127e64bec487afac1b17a973eb445ac59df0a087
Author: Sam Thursfield <sam afuera me uk>
Date:   Fri Oct 9 19:58:44 2020 +0200

    functional-tests: Validate SPARQL as part of extractor tests
    
    The existing tests served to check that correct metadata was
    extracted. But if an extract module produced invalid SPARQL that
    would not be detected, until now.
    
    Fixes https://gitlab.gnome.org/GNOME/tracker/-/issues/196

 tests/functional-tests/configuration.py            |   9 ++++--
 tests/functional-tests/extractor-flac-cuesheet.py  |   4 +--
 tests/functional-tests/extractor-generic.py        |  31 +++++++++++++++++++--
 tests/functional-tests/fixtures.py                 |  30 ++++++++++----------
 .../test-extraction-data/office/pdf-doc.pdf        | Bin 236639 -> 240855 bytes
 tests/functional-tests/writeback-audio.py          |   2 +-
 tests/functional-tests/writeback-image-details.py  |   2 +-
 tests/functional-tests/writeback-images.py         |   4 +--
 8 files changed, 58 insertions(+), 24 deletions(-)
---
diff --git a/tests/functional-tests/configuration.py b/tests/functional-tests/configuration.py
index 8ec7e13b5..22ad30102 100644
--- a/tests/functional-tests/configuration.py
+++ b/tests/functional-tests/configuration.py
@@ -21,13 +21,12 @@
 
 from gi.repository import GLib
 
+import configparser
 import errno
 import json
-import logging
 import os
 import shutil
 import tempfile
-import sys
 
 
 if 'TRACKER_FUNCTIONAL_TEST_CONFIG' not in os.environ:
@@ -67,6 +66,12 @@ def cli_subcommands_dir():
     return config['TEST_CLI_SUBCOMMANDS_DIR']
 
 
+def nepomuk_path():
+    parser = configparser.ConfigParser()
+    parser.read(config['TEST_DOMAIN_ONTOLOGY_RULE'])
+    return parser.get('DomainOntology', 'OntologyLocation')
+
+
 # This path is used for test data for tests which expect filesystem monitoring
 # to work. For this reason we must avoid it being on a tmpfs filesystem. Note
 # that this MUST NOT be a hidden directory, as Tracker is hardcoded to ignore
diff --git a/tests/functional-tests/extractor-flac-cuesheet.py 
b/tests/functional-tests/extractor-flac-cuesheet.py
index 4662d765b..4a0624ecf 100755
--- a/tests/functional-tests/extractor-flac-cuesheet.py
+++ b/tests/functional-tests/extractor-flac-cuesheet.py
@@ -85,8 +85,8 @@ class FlacCuesheetTest(fixtures.TrackerExtractTestCase):
             audio_path = pathlib.Path(tmpdir).joinpath('cuesheet-test.flac')
             datagenerator.create_test_flac(audio_path, duration=6*60)
 
-            result = fixtures.get_tracker_extract_jsonld_output(
-                cfg.test_environment(tmpdir), audio_path)
+            result = fixtures.get_tracker_extract_output(
+                cfg.test_environment(tmpdir), audio_path, output_format='json-ld')
 
         self.assert_extract_result_matches_spec(
             self.spec(audio_path), result, audio_path, __file__)
diff --git a/tests/functional-tests/extractor-generic.py b/tests/functional-tests/extractor-generic.py
index 9098ca91f..59c964a79 100755
--- a/tests/functional-tests/extractor-generic.py
+++ b/tests/functional-tests/extractor-generic.py
@@ -23,6 +23,11 @@ metadata is extracted. Load dynamically the test information from a data
 directory (containing xxx.expected files)
 """
 
+import gi
+gi.require_version('Tracker', '3.0')
+from gi.repository import Gio
+from gi.repository import Tracker
+
 import json
 import os
 import shutil
@@ -64,6 +69,21 @@ class GenericExtractionTestCase(fixtures.TrackerExtractTestCase):
     def __get_bugnumber(self):
         return self.spec['test'].get('Bugzilla')
 
+    def validate_sparql_update(self, sparql):
+        """Create a temporary database and run the given SPARQL update.
+
+        This gives us a smoke test to detect any situation where the
+        extractor generates invalid SPARQL.
+
+        """
+        cancellable = None
+        ontology_path = Gio.File.new_for_uri(cfg.nepomuk_path())
+        db = Tracker.SparqlConnection.new(Tracker.SparqlConnectionFlags.NONE,
+                                          None, # create in-memory database,
+                                          ontology_path,
+                                          cancellable)
+        db.update(sparql, cancellable)
+
     def generic_test_extraction(self):
         abs_description = os.path.abspath(self.descfile)
 
@@ -76,8 +96,15 @@ class GenericExtractionTestCase(fixtures.TrackerExtractTestCase):
         tmpdir = tempfile.mkdtemp(prefix='tracker-extract-test-')
         try:
             extra_env = cfg.test_environment(tmpdir)
-            result = fixtures.get_tracker_extract_jsonld_output(extra_env, self.file_to_extract)
-            self.__assert_extraction_ok(result)
+            jsonld = fixtures.get_tracker_extract_output(extra_env,
+                                                         self.file_to_extract,
+                                                         output_format='json-ld')
+            self.__assert_extraction_ok(jsonld)
+
+            sparql = fixtures.get_tracker_extract_output(extra_env,
+                                                         self.file_to_extract,
+                                                         output_format='sparql')
+            self.validate_sparql_update(sparql)
         finally:
             shutil.rmtree(tmpdir, ignore_errors=True)
 
diff --git a/tests/functional-tests/fixtures.py b/tests/functional-tests/fixtures.py
index 0f1d16da1..56ebbbf6f 100644
--- a/tests/functional-tests/fixtures.py
+++ b/tests/functional-tests/fixtures.py
@@ -245,13 +245,14 @@ class TrackerMinerFTSTest (TrackerMinerTest):
         return int(result[0][0])
 
 
-def get_tracker_extract_jsonld_output(extra_env, filename, mime_type=None):
+def get_tracker_extract_output(extra_env, filename, output_format='json-ld', mime_type=None):
     """
     Runs `tracker-extract --file` to extract metadata from a file.
     """
 
     tracker_extract = os.path.join(cfg.TRACKER_EXTRACT_PATH)
-    command = [tracker_extract, '--output-format=json-ld', '--file', str(filename)]
+    command = [tracker_extract, '--output-format', output_format, '--file',
+               str(filename)]
     if mime_type is not None:
         command.extend(['--mime', mime_type])
 
@@ -285,19 +286,20 @@ def get_tracker_extract_jsonld_output(extra_env, filename, mime_type=None):
         error_output = stderr.decode('utf-8').strip()
         log.debug("Error output from tracker-extract:\n%s", error_output)
 
-    try:
-        output = stdout.decode('utf-8')
-
-        if len(output.strip()) == 0:
-            raise RuntimeError("tracker-extract didn't return any data.\n"
-                               "Error output was: %s" % error_output)
+    output = stdout.decode('utf-8')
 
-        data = json.loads(output)
-    except ValueError as e:
-        raise RuntimeError("tracker-extract did not return valid JSON data: %s\n"
-                           "Output was: %s" % (e, output))
+    if len(output.strip()) == 0:
+        raise RuntimeError("tracker-extract didn't return any data.\n"
+                            "Error output was: %s" % error_output)
 
-    return data
+    if output_format == 'json-ld':
+        try:
+            return json.loads(output)
+        except ValueError as e:
+            raise RuntimeError("tracker-extract did not return valid JSON data: %s\n"
+                               "Output was: %s" % (e, output))
+    else:
+        return output
 
 
 class TrackerExtractTestCase(ut.TestCase):
@@ -326,7 +328,7 @@ class TrackerExtractTestCase(ut.TestCase):
         """
         Checks tracker-extract json-ld output against the expected result.
 
-        Use get_tracker_extract_jsonld_output() to get the extractor output.
+        Use get_tracker_extract_output() to get the extractor output.
 
         Look in test-extraction-data/*/*.expected.json for examples of the spec
         format.
diff --git a/tests/functional-tests/test-extraction-data/office/pdf-doc.pdf 
b/tests/functional-tests/test-extraction-data/office/pdf-doc.pdf
index 064645c39..bdf13046b 100644
Binary files a/tests/functional-tests/test-extraction-data/office/pdf-doc.pdf and 
b/tests/functional-tests/test-extraction-data/office/pdf-doc.pdf differ
diff --git a/tests/functional-tests/writeback-audio.py b/tests/functional-tests/writeback-audio.py
index b5cc6df1b..93616d386 100755
--- a/tests/functional-tests/writeback-audio.py
+++ b/tests/functional-tests/writeback-audio.py
@@ -40,7 +40,7 @@ class WritebackAudioTest(fixtures.TrackerWritebackTest):
 
         self.wait_for_file_change(path, initial_mtime)
 
-        results = fixtures.get_tracker_extract_jsonld_output({}, path)
+        results = fixtures.get_tracker_extract_output({}, path, output_format='json-ld')
         self.assertIn(TEST_VALUE, results[prop])
 
     def test_writeback_mp3(self):
diff --git a/tests/functional-tests/writeback-image-details.py 
b/tests/functional-tests/writeback-image-details.py
index 96c16b20e..3597e63e0 100755
--- a/tests/functional-tests/writeback-image-details.py
+++ b/tests/functional-tests/writeback-image-details.py
@@ -85,7 +85,7 @@ class WritebackKeepDateTest (fixtures.TrackerWritebackTest):
         self.wait_for_file_change(jpeg_path, initial_mtime)
 
         # Check the value is written in the file
-        metadata = fixtures.get_tracker_extract_jsonld_output(self.extra_env, jpeg_path, "")
+        metadata = fixtures.get_tracker_extract_output(self.extra_env, jpeg_path, output_format='json-ld')
 
         tags = metadata.get('nao:hasTag', [])
         tag_names = [tag['nao:prefLabel'] for tag in tags]
diff --git a/tests/functional-tests/writeback-images.py b/tests/functional-tests/writeback-images.py
index 18344c907..d1182d2c2 100755
--- a/tests/functional-tests/writeback-images.py
+++ b/tests/functional-tests/writeback-images.py
@@ -63,7 +63,7 @@ class WritebackImagesTest(fixtures.TrackerWritebackTest):
         self.wait_for_file_change(path, initial_mtime)
         log.debug("Got the change")
 
-        results = fixtures.get_tracker_extract_jsonld_output({}, path, mimetype)
+        results = fixtures.get_tracker_extract_output({}, path, mime_type=mimetype, output_format='json-ld')
         keyDict = expectedKey or prop
         self.assertIn(TEST_VALUE, results[keyDict])
 
@@ -87,7 +87,7 @@ class WritebackImagesTest(fixtures.TrackerWritebackTest):
 
         self.wait_for_file_change(path, initial_mtime)
 
-        results = fixtures.get_tracker_extract_jsonld_output(self.extra_env, filename, mimetype)
+        results = fixtures.get_tracker_extract_output(self.extra_env, filename, mime_type=mimetype, 
output_format='json-ld')
         self.assertIn("testTag", results["nao:hasTag"])
 
     # JPEG test
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]