[tracker-miners/tracker-miners-2.2: 10/47] tracker-extract-text: Try to read file even if n_bytes == 0



commit ac31e5ec5c87515444e43dba876d9f7bc755534b
Author: Sam Thursfield <sam afuera me uk>
Date:   Sat May 4 00:39:57 2019 +0200

    tracker-extract-text: Try to read file even if n_bytes == 0
    
    This makes a clear distinction between two cases:
    
      1. the user asked us not to read text files
      2. the user wants us to read a text file, but we can't
    
    For a long time, our behaviour was to always insert a resource into
    tracker-store for the text file, even if we failed to read it.
    Since 2eda05bb95419e3 we return an error code if we tried to read and
    failed. But not when org.freedesktop.Tracker.Extract.max-bytes was set
    to 0 as we wouldn't even try to read the file.
    
    This commit changes the code to always read the file in order to check
    it exists. There is a performance penalty for users who set max-bytes to
    0, but it seems unlikely that someone would do that while still having
    tracker-extract enabled for other types of file.
    
    Suggested in
    https://gitlab.gnome.org/GNOME/tracker-miners/merge_requests/62#note_501732

 src/tracker-extract/tracker-extract-text.c | 14 ++++----------
 src/tracker-extract/tracker-read.c         | 20 +++++++++-----------
 2 files changed, 13 insertions(+), 21 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-text.c b/src/tracker-extract/tracker-extract-text.c
index a75909bfd..24da9b77f 100644
--- a/src/tracker-extract/tracker-extract-text.c
+++ b/src/tracker-extract/tracker-extract-text.c
@@ -46,8 +46,6 @@ get_file_content (GFile *file,
        gchar *text, *uri, *path;
        int fd;
 
-       g_return_val_if_fail (n_bytes > 0, NULL);
-
        uri = g_file_get_uri (file);
 
        /* Get filename from URI */
@@ -82,19 +80,15 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
 {
        TrackerResource *metadata;
        TrackerConfig *config;
-       gsize n_bytes;
        gchar *content = NULL;
 
        config = tracker_main_get_config ();
 
-       n_bytes = tracker_config_get_max_bytes (config);
-       if (n_bytes > 0) {
-               content = get_file_content (tracker_extract_info_get_file (info), n_bytes);
+       content = get_file_content (tracker_extract_info_get_file (info), tracker_config_get_max_bytes 
(config));
 
-               if (content == NULL) {
-                       /* An error occurred, perhaps the file was deleted. */
-                       return FALSE;
-               }
+       if (content == NULL) {
+               /* An error occurred, perhaps the file was deleted. */
+               return FALSE;
        }
 
        metadata = tracker_resource_new (NULL);
diff --git a/src/tracker-extract/tracker-read.c b/src/tracker-extract/tracker-read.c
index 89ae1337a..5aac7b7c7 100644
--- a/src/tracker-extract/tracker-read.c
+++ b/src/tracker-extract/tracker-read.c
@@ -93,7 +93,7 @@ process_chunk (const gchar  *read_bytes,
                gsize         read_size,
                gsize         buffer_size,
                gsize        *remaining_size,
-               GString     **s)
+               GString      *s)
 {
        /* If no more bytes to read, halt loop */
        if (read_size == 0) {
@@ -114,7 +114,7 @@ process_chunk (const gchar  *read_bytes,
         * UTF-16LE), so we can't rely on methods which assume
         * NUL-terminated strings, as g_strstr_len().
         */
-       if (*s == NULL) {
+       if (s->len == 0) {
                if (read_size <= 3) {
                        g_debug ("  File has less than 3 characters in it, "
                                 "not indexing file");
@@ -153,9 +153,7 @@ process_chunk (const gchar  *read_bytes,
                 *remaining_size);
 
        /* Append non-NIL terminated bytes */
-       *s = (*s ?
-             g_string_append_len (*s, read_bytes, read_size) :
-             g_string_new_len (read_bytes, read_size));
+       g_string_append_len (s, read_bytes, read_size);
 
        return TRUE;
 }
@@ -305,7 +303,7 @@ tracker_read_text_from_stream (GInputStream *stream,
                                    n_bytes_read,
                                    BUFFER_SIZE,
                                    &n_bytes_remaining,
-                                   &s)) {
+                                   s)) {
                        break;
                }
        }
@@ -333,17 +331,17 @@ tracker_read_text_from_fd (gint  fd,
                            gsize max_bytes)
 {
        FILE *fz;
-       GString *s = NULL;
+       GString *s;
        gsize n_bytes_remaining = max_bytes;
 
-       g_return_val_if_fail (max_bytes > 0, NULL);
-
        if ((fz = fdopen (fd, "r")) == NULL) {
                g_warning ("Cannot read from FD... could not extract text");
                close (fd);
                return NULL;
        }
 
+       s = g_string_new ("");
+
        /* Reading in chunks of BUFFER_SIZE
         *   Loop is halted whenever one of this conditions is met:
         *     a) Read bytes reached the maximum allowed (max_bytes)
@@ -367,7 +365,7 @@ tracker_read_text_from_fd (gint  fd,
                                    n_bytes_read,
                                    BUFFER_SIZE,
                                    &n_bytes_remaining,
-                                   &s)) {
+                                   s)) {
                        break;
                }
        }
@@ -380,5 +378,5 @@ tracker_read_text_from_fd (gint  fd,
        fclose (fz);
 
        /* Validate UTF-8 if something was read, and return it */
-       return s ? process_whole_string (s) : NULL;
+       return process_whole_string (s);
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]