[tracker-miners: 3/4] tracker-extract: Fix text extraction from .odg and .ods




commit 2f0f9840ef781b91d6e6133862ed30a01fbbc03c
Author: Jörn-Thorben Hinz <82747-jtdor users noreply gitlab gnome org>
Date:   Mon Jun 27 19:23:11 2022 +0000

    tracker-extract: Fix text extraction from .odg and .ods
    
    It was broken after 062e726bc ("tracker-extract-oasis.c: Improve parsing
    of .odt files").
    
    .odg and .ods (only?) contain text:p tags. The mentioned commit changed
    a `g_ascii_strncasecmp("text", ..., 4)` call to
    `g_ascii_strcasecmp("text", ...)`, breaking the detection of text in
    those files.

 src/tracker-extract/tracker-extract-oasis.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-oasis.c b/src/tracker-extract/tracker-extract-oasis.c
index 8018a5001..e05f33ef4 100644
--- a/src/tracker-extract/tracker-extract-oasis.c
+++ b/src/tracker-extract/tracker-extract-oasis.c
@@ -460,6 +460,12 @@ xml_start_element_handler_content (GMarkupParseContext  *context,
                        return; \
                };
 
+       #define handle_tag_and_return_n(name, id, n) \
+               if (g_ascii_strncasecmp (element_name, name, n) == 0) { \
+                       push_tag (id); \
+                       return; \
+               };
+
        switch (data->file_type) {
        case FILE_TYPE_ODT:
                handle_tag_and_return ("text:p", ODT_TAG_TYPE_WORD_TEXT);
@@ -479,12 +485,12 @@ xml_start_element_handler_content (GMarkupParseContext  *context,
                return;
 
        case FILE_TYPE_ODS:
-               handle_tag_and_return ("text", ODT_TAG_TYPE_SPREADSHEET_TEXT);
+               handle_tag_and_return_n ("text", ODT_TAG_TYPE_SPREADSHEET_TEXT, 4);
                push_tag (ODT_TAG_TYPE_UNKNOWN);
                return;
 
        case FILE_TYPE_ODG:
-               handle_tag_and_return ("text", ODT_TAG_TYPE_GRAPHICS_TEXT);
+               handle_tag_and_return_n ("text", ODT_TAG_TYPE_GRAPHICS_TEXT, 4);
                push_tag (ODT_TAG_TYPE_UNKNOWN);
                return;
 
@@ -496,6 +502,7 @@ xml_start_element_handler_content (GMarkupParseContext  *context,
 
        #undef push_tag
        #undef handle_tag_and_return
+       #undef handle_tag_and_return_n
 }
 
 static void


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]