[tracker-miners/tracker-miners-2.2: 10/47] tracker-extract-text: Try to read file even if n_bytes == 0
- From: Sam Thursfield <sthursfield src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker-miners/tracker-miners-2.2: 10/47] tracker-extract-text: Try to read file even if n_bytes == 0
- Date: Mon, 5 Aug 2019 15:47:55 +0000 (UTC)
commit ac31e5ec5c87515444e43dba876d9f7bc755534b
Author: Sam Thursfield <sam afuera me uk>
Date: Sat May 4 00:39:57 2019 +0200
tracker-extract-text: Try to read file even if n_bytes == 0
This makes a clear distinction between two cases:
1. the user asked us not to read text files
2. the user wants us to read a text file, but we can't
For a long time, our behaviour was to always insert a resource into
tracker-store for the text file, even if we failed to read it.
Since 2eda05bb95419e3 we return an error code if we tried to read and
failed. But not when org.freedesktop.Tracker.Extract.max-bytes was set
to 0 as we wouldn't even try to read the file.
This commit changes the code to always read the file in order to check
it exists. There is a performance penalty for users who set max-bytes to
0, but it seems unlikely that someone would do that while still having
tracker-extract enabled for other types of file.
Suggested in
https://gitlab.gnome.org/GNOME/tracker-miners/merge_requests/62#note_501732
src/tracker-extract/tracker-extract-text.c | 14 ++++----------
src/tracker-extract/tracker-read.c | 20 +++++++++-----------
2 files changed, 13 insertions(+), 21 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-text.c b/src/tracker-extract/tracker-extract-text.c
index a75909bfd..24da9b77f 100644
--- a/src/tracker-extract/tracker-extract-text.c
+++ b/src/tracker-extract/tracker-extract-text.c
@@ -46,8 +46,6 @@ get_file_content (GFile *file,
gchar *text, *uri, *path;
int fd;
- g_return_val_if_fail (n_bytes > 0, NULL);
-
uri = g_file_get_uri (file);
/* Get filename from URI */
@@ -82,19 +80,15 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
{
TrackerResource *metadata;
TrackerConfig *config;
- gsize n_bytes;
gchar *content = NULL;
config = tracker_main_get_config ();
- n_bytes = tracker_config_get_max_bytes (config);
- if (n_bytes > 0) {
- content = get_file_content (tracker_extract_info_get_file (info), n_bytes);
+ content = get_file_content (tracker_extract_info_get_file (info), tracker_config_get_max_bytes
(config));
- if (content == NULL) {
- /* An error occurred, perhaps the file was deleted. */
- return FALSE;
- }
+ if (content == NULL) {
+ /* An error occurred, perhaps the file was deleted. */
+ return FALSE;
}
metadata = tracker_resource_new (NULL);
diff --git a/src/tracker-extract/tracker-read.c b/src/tracker-extract/tracker-read.c
index 89ae1337a..5aac7b7c7 100644
--- a/src/tracker-extract/tracker-read.c
+++ b/src/tracker-extract/tracker-read.c
@@ -93,7 +93,7 @@ process_chunk (const gchar *read_bytes,
gsize read_size,
gsize buffer_size,
gsize *remaining_size,
- GString **s)
+ GString *s)
{
/* If no more bytes to read, halt loop */
if (read_size == 0) {
@@ -114,7 +114,7 @@ process_chunk (const gchar *read_bytes,
* UTF-16LE), so we can't rely on methods which assume
* NUL-terminated strings, as g_strstr_len().
*/
- if (*s == NULL) {
+ if (s->len == 0) {
if (read_size <= 3) {
g_debug (" File has less than 3 characters in it, "
"not indexing file");
@@ -153,9 +153,7 @@ process_chunk (const gchar *read_bytes,
*remaining_size);
/* Append non-NIL terminated bytes */
- *s = (*s ?
- g_string_append_len (*s, read_bytes, read_size) :
- g_string_new_len (read_bytes, read_size));
+ g_string_append_len (s, read_bytes, read_size);
return TRUE;
}
@@ -305,7 +303,7 @@ tracker_read_text_from_stream (GInputStream *stream,
n_bytes_read,
BUFFER_SIZE,
&n_bytes_remaining,
- &s)) {
+ s)) {
break;
}
}
@@ -333,17 +331,17 @@ tracker_read_text_from_fd (gint fd,
gsize max_bytes)
{
FILE *fz;
- GString *s = NULL;
+ GString *s;
gsize n_bytes_remaining = max_bytes;
- g_return_val_if_fail (max_bytes > 0, NULL);
-
if ((fz = fdopen (fd, "r")) == NULL) {
g_warning ("Cannot read from FD... could not extract text");
close (fd);
return NULL;
}
+ s = g_string_new ("");
+
/* Reading in chunks of BUFFER_SIZE
* Loop is halted whenever one of this conditions is met:
* a) Read bytes reached the maximum allowed (max_bytes)
@@ -367,7 +365,7 @@ tracker_read_text_from_fd (gint fd,
n_bytes_read,
BUFFER_SIZE,
&n_bytes_remaining,
- &s)) {
+ s)) {
break;
}
}
@@ -380,5 +378,5 @@ tracker_read_text_from_fd (gint fd,
fclose (fz);
/* Validate UTF-8 if something was read, and return it */
- return s ? process_whole_string (s) : NULL;
+ return process_whole_string (s);
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]