[tracker-miners/sam/extract-validation] functional-tests: Validate SPARQL as part of extractor tests

From: Sam Thursfield <sthursfield src gnome org>
To: commits-list gnome org
Cc:
Subject: [tracker-miners/sam/extract-validation] functional-tests: Validate SPARQL as part of extractor tests
Date: Fri, 9 Oct 2020 18:12:58 +0000 (UTC)

commit d2ff5f9339bee08789cdb0240c1647939e1f19c7
Author: Sam Thursfield <sam afuera me uk>
Date:   Fri Oct 9 19:58:44 2020 +0200

    functional-tests: Validate SPARQL as part of extractor tests
    
    The existing tests served to check that correct metadata was
    extracted. But if an extract module produced invalid SPARQL that
    would not be detected, until now.
    
    Fixes https://gitlab.gnome.org/GNOME/tracker/-/issues/196

 tests/functional-tests/extractor-flac-cuesheet.py |  4 +--
 tests/functional-tests/extractor-generic.py       | 31 +++++++++++++++++++++--
 tests/functional-tests/fixtures.py                | 30 ++++++++++++----------
 tests/functional-tests/writeback-audio.py         |  2 +-
 tests/functional-tests/writeback-image-details.py |  2 +-
 tests/functional-tests/writeback-images.py        |  4 +--
 6 files changed, 51 insertions(+), 22 deletions(-)
---
diff --git a/tests/functional-tests/extractor-flac-cuesheet.py 
b/tests/functional-tests/extractor-flac-cuesheet.py
index 4662d765b..4a0624ecf 100755
--- a/tests/functional-tests/extractor-flac-cuesheet.py
+++ b/tests/functional-tests/extractor-flac-cuesheet.py
@@ -85,8 +85,8 @@ class FlacCuesheetTest(fixtures.TrackerExtractTestCase):
             audio_path = pathlib.Path(tmpdir).joinpath('cuesheet-test.flac')
             datagenerator.create_test_flac(audio_path, duration=6*60)
 
-            result = fixtures.get_tracker_extract_jsonld_output(
-                cfg.test_environment(tmpdir), audio_path)
+            result = fixtures.get_tracker_extract_output(
+                cfg.test_environment(tmpdir), audio_path, output_format='json-ld')
 
         self.assert_extract_result_matches_spec(
             self.spec(audio_path), result, audio_path, __file__)
diff --git a/tests/functional-tests/extractor-generic.py b/tests/functional-tests/extractor-generic.py
index 9098ca91f..cff159fdf 100755
--- a/tests/functional-tests/extractor-generic.py
+++ b/tests/functional-tests/extractor-generic.py
@@ -23,6 +23,10 @@ metadata is extracted. Load dynamically the test information from a data
 directory (containing xxx.expected files)
 """
 
+import gi
+gi.require_version('Tracker', '3.0')
+from gi.repository import Tracker
+
 import json
 import os
 import shutil
@@ -64,6 +68,20 @@ class GenericExtractionTestCase(fixtures.TrackerExtractTestCase):
     def __get_bugnumber(self):
         return self.spec['test'].get('Bugzilla')
 
+    def validate_sparql_update(self, sparql):
+        """Create a temporary database and run the given SPARQL update.
+
+        This gives us a smoke test to detect any situation where the
+        extractor generates invalid SPARQL.
+
+        """
+        cancellable = None
+        db = Tracker.SparqlConnection.new(Tracker.SparqlConnectionFlags.NONE,
+                                          None, # create in-memory database,
+                                          Tracker.sparql_get_ontology_nepomuk(),
+                                          cancellable)
+        db.update(sparql, cancellable)
+
     def generic_test_extraction(self):
         abs_description = os.path.abspath(self.descfile)
 
@@ -76,11 +94,20 @@ class GenericExtractionTestCase(fixtures.TrackerExtractTestCase):
         tmpdir = tempfile.mkdtemp(prefix='tracker-extract-test-')
         try:
             extra_env = cfg.test_environment(tmpdir)
-            result = fixtures.get_tracker_extract_jsonld_output(extra_env, self.file_to_extract)
-            self.__assert_extraction_ok(result)
+            jsonld = fixtures.get_tracker_extract_output(extra_env,
+                                                         self.file_to_extract,
+                                                         output_format='json-ld')
+            self.__assert_extraction_ok(jsonld)
+
+            sparql = fixtures.get_tracker_extract_output(extra_env,
+                                                         self.file_to_extract,
+                                                         output_format='sparql')
+            self.validate_sparql_update(sparql)
         finally:
             shutil.rmtree(tmpdir, ignore_errors=True)
 
+
+
     @ut.expectedFailure
     def expected_failure_test_extraction(self):
         self.generic_test_extraction()
diff --git a/tests/functional-tests/fixtures.py b/tests/functional-tests/fixtures.py
index 0f1d16da1..56ebbbf6f 100644
--- a/tests/functional-tests/fixtures.py
+++ b/tests/functional-tests/fixtures.py
@@ -245,13 +245,14 @@ class TrackerMinerFTSTest (TrackerMinerTest):
         return int(result[0][0])
 
 
-def get_tracker_extract_jsonld_output(extra_env, filename, mime_type=None):
+def get_tracker_extract_output(extra_env, filename, output_format='json-ld', mime_type=None):
     """
     Runs `tracker-extract --file` to extract metadata from a file.
     """
 
     tracker_extract = os.path.join(cfg.TRACKER_EXTRACT_PATH)
-    command = [tracker_extract, '--output-format=json-ld', '--file', str(filename)]
+    command = [tracker_extract, '--output-format', output_format, '--file',
+               str(filename)]
     if mime_type is not None:
         command.extend(['--mime', mime_type])
 
@@ -285,19 +286,20 @@ def get_tracker_extract_jsonld_output(extra_env, filename, mime_type=None):
         error_output = stderr.decode('utf-8').strip()
         log.debug("Error output from tracker-extract:\n%s", error_output)
 
-    try:
-        output = stdout.decode('utf-8')
-
-        if len(output.strip()) == 0:
-            raise RuntimeError("tracker-extract didn't return any data.\n"
-                               "Error output was: %s" % error_output)
+    output = stdout.decode('utf-8')
 
-        data = json.loads(output)
-    except ValueError as e:
-        raise RuntimeError("tracker-extract did not return valid JSON data: %s\n"
-                           "Output was: %s" % (e, output))
+    if len(output.strip()) == 0:
+        raise RuntimeError("tracker-extract didn't return any data.\n"
+                            "Error output was: %s" % error_output)
 
-    return data
+    if output_format == 'json-ld':
+        try:
+            return json.loads(output)
+        except ValueError as e:
+            raise RuntimeError("tracker-extract did not return valid JSON data: %s\n"
+                               "Output was: %s" % (e, output))
+    else:
+        return output
 
 
 class TrackerExtractTestCase(ut.TestCase):
@@ -326,7 +328,7 @@ class TrackerExtractTestCase(ut.TestCase):
         """
         Checks tracker-extract json-ld output against the expected result.
 
-        Use get_tracker_extract_jsonld_output() to get the extractor output.
+        Use get_tracker_extract_output() to get the extractor output.
 
         Look in test-extraction-data/*/*.expected.json for examples of the spec
         format.
diff --git a/tests/functional-tests/writeback-audio.py b/tests/functional-tests/writeback-audio.py
index b5cc6df1b..93616d386 100755
--- a/tests/functional-tests/writeback-audio.py
+++ b/tests/functional-tests/writeback-audio.py
@@ -40,7 +40,7 @@ class WritebackAudioTest(fixtures.TrackerWritebackTest):
 
         self.wait_for_file_change(path, initial_mtime)
 
-        results = fixtures.get_tracker_extract_jsonld_output({}, path)
+        results = fixtures.get_tracker_extract_output({}, path, output_format='json-ld')
         self.assertIn(TEST_VALUE, results[prop])
 
     def test_writeback_mp3(self):
diff --git a/tests/functional-tests/writeback-image-details.py 
b/tests/functional-tests/writeback-image-details.py
index 96c16b20e..3597e63e0 100755
--- a/tests/functional-tests/writeback-image-details.py
+++ b/tests/functional-tests/writeback-image-details.py
@@ -85,7 +85,7 @@ class WritebackKeepDateTest (fixtures.TrackerWritebackTest):
         self.wait_for_file_change(jpeg_path, initial_mtime)
 
         # Check the value is written in the file
-        metadata = fixtures.get_tracker_extract_jsonld_output(self.extra_env, jpeg_path, "")
+        metadata = fixtures.get_tracker_extract_output(self.extra_env, jpeg_path, output_format='json-ld')
 
         tags = metadata.get('nao:hasTag', [])
         tag_names = [tag['nao:prefLabel'] for tag in tags]
diff --git a/tests/functional-tests/writeback-images.py b/tests/functional-tests/writeback-images.py
index 18344c907..d1182d2c2 100755
--- a/tests/functional-tests/writeback-images.py
+++ b/tests/functional-tests/writeback-images.py
@@ -63,7 +63,7 @@ class WritebackImagesTest(fixtures.TrackerWritebackTest):
         self.wait_for_file_change(path, initial_mtime)
         log.debug("Got the change")
 
-        results = fixtures.get_tracker_extract_jsonld_output({}, path, mimetype)
+        results = fixtures.get_tracker_extract_output({}, path, mime_type=mimetype, output_format='json-ld')
         keyDict = expectedKey or prop
         self.assertIn(TEST_VALUE, results[keyDict])
 
@@ -87,7 +87,7 @@ class WritebackImagesTest(fixtures.TrackerWritebackTest):
 
         self.wait_for_file_change(path, initial_mtime)
 
-        results = fixtures.get_tracker_extract_jsonld_output(self.extra_env, filename, mimetype)
+        results = fixtures.get_tracker_extract_output(self.extra_env, filename, mime_type=mimetype, 
output_format='json-ld')
         self.assertIn("testTag", results["nao:hasTag"])
 
     # JPEG test
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]