[tracker/extractor-remove-word-counting-review] Moved the istream text reader to a separate file
- From: Aleksander Morgado <aleksm src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/extractor-remove-word-counting-review] Moved the istream text reader to a separate file
- Date: Tue, 11 May 2010 13:11:36 +0000 (UTC)
commit de531f5fdd8838884733189ab1974cfe49354b05
Author: Aleksander Morgado <aleksander lanedo com>
Date: Tue May 11 12:38:54 2010 +0200
Moved the istream text reader to a separate file
src/tracker-extract/Makefile.am | 2 +
src/tracker-extract/tracker-extract-text.c | 82 ++------------------
src/tracker-extract/tracker-istream.c | 114 ++++++++++++++++++++++++++++
src/tracker-extract/tracker-istream.h | 34 ++++++++
4 files changed, 156 insertions(+), 76 deletions(-)
---
diff --git a/src/tracker-extract/Makefile.am b/src/tracker-extract/Makefile.am
index d9b6c39..07d0593 100644
--- a/src/tracker-extract/Makefile.am
+++ b/src/tracker-extract/Makefile.am
@@ -304,6 +304,8 @@ tracker_extract_SOURCES = \
tracker-dbus.h \
tracker-extract.c \
tracker-extract.h \
+ tracker-istream.c \
+ tracker-istream.h \
tracker-main.c \
tracker-main.h \
tracker-albumart-generic.h
diff --git a/src/tracker-extract/tracker-extract-text.c b/src/tracker-extract/tracker-extract-text.c
index 07f55b3..09a7340 100644
--- a/src/tracker-extract/tracker-extract-text.c
+++ b/src/tracker-extract/tracker-extract-text.c
@@ -27,11 +27,10 @@
#include <libtracker-extract/tracker-extract.h>
#include "tracker-main.h"
+#include "tracker-istream.h"
#undef TRY_LOCALE_TO_UTF8_CONVERSION
-#define TEXT_BUFFER_SIZE 65535 /* bytes */
-
static void extract_text (const gchar *uri,
TrackerSparqlBuilder *preupdate,
TrackerSparqlBuilder *metadata);
@@ -81,10 +80,8 @@ get_file_content (const gchar *uri,
GFile *file;
GFileInputStream *stream;
GError *error = NULL;
- GString *s = NULL;
- gchar buf[TEXT_BUFFER_SIZE];
- gsize n_bytes_remaining;
- gsize n_valid_utf8_bytes;
+ GString *s;
+ gsize n_valid_utf8_bytes = 0;
file = g_file_new_for_uri (uri);
stream = g_file_read (file, NULL, &error);
@@ -102,76 +99,9 @@ get_file_content (const gchar *uri,
g_debug (" Starting to read '%s' up to %" G_GSIZE_FORMAT " bytes...",
uri, n_bytes);
- /* Reading in chunks of TEXT_BUFFER_SIZE (8192)
- * Loop is halted whenever one of this conditions is met:
- * a) Read bytes reached the maximum allowed (n_bytes)
- * b) No more bytes to read
- * c) Error reading
- * d) File has less than 3 bytes
- * e) File has a single line of TEXT_BUFFER_SIZE bytes with
- * no EOL
- */
- n_bytes_remaining = n_bytes;
- while (n_bytes_remaining > 0) {
- gssize bytes_read;
-
- /* Read n_bytes_remaining or TEXT_BUFFER_SIZE bytes */
- bytes_read = g_input_stream_read (G_INPUT_STREAM (stream),
- buf,
- MIN (TEXT_BUFFER_SIZE, n_bytes_remaining),
- NULL,
- &error);
-
- /* If any error reading, halt the loop */
- if (error) {
- g_message ("Error reading from '%s': '%s'",
- uri,
- error->message);
- g_error_free (error);
- break;
- }
-
- /* If no more bytes to read, halt loop */
- if(bytes_read == 0) {
- break;
- }
-
- /* First of all, check if this is the first time we
- * have tried to read the file up to the TEXT_BUFFER_SIZE
- * limit. Then make sure that we read the maximum size
- * of the buffer. If we don't do this, there is the
- * case where we read 10 bytes in and it is just one
- * line with no '\n'. Once we have confirmed this we
- * check that the buffer has a '\n' to make sure the
- * file is worth indexing. Similarly if the file has
- * <= 3 bytes then we drop it.
- */
- if (s == NULL) {
- if (bytes_read == TEXT_BUFFER_SIZE &&
- g_strstr_len (buf, bytes_read, "\n") == NULL) {
- g_debug (" No '\\n' in the first %" G_GSSIZE_FORMAT " bytes, not indexing file",
- bytes_read);
- break;
- } else if (bytes_read <= 2) {
- g_debug (" File has less than 3 characters in it, not indexing file");
- break;
- }
- }
-
- /* Update remaining bytes */
- n_bytes_remaining -= bytes_read;
-
- g_debug (" Read "
- "%" G_GSSIZE_FORMAT " bytes this time, "
- "%" G_GSIZE_FORMAT " bytes remaining",
- bytes_read,
- n_bytes_remaining);
-
- /* Append non-NIL terminated bytes */
- s = (s == NULL ?
- g_string_new_len (buf, bytes_read) :
- g_string_append_len (s, buf, bytes_read));
- }
+ /* Read up to n_bytes from stream */
+ s = tracker_istream_read_text (G_INPUT_STREAM (stream),
+ n_bytes);
/* If nothing really read, return here */
if (!s) {
diff --git a/src/tracker-extract/tracker-istream.c b/src/tracker-extract/tracker-istream.c
new file mode 100644
index 0000000..2d75373
--- /dev/null
+++ b/src/tracker-extract/tracker-istream.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2010, Nokia <ivan frade nokia com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <string.h>
+
+#include <glib.h>
+#include <gio/gio.h>
+
+#include "tracker-istream.h"
+
+#define BUFFER_SIZE 65535 /* bytes */
+
+GString *
+tracker_istream_read_text (GInputStream *stream,
+ gsize max_bytes)
+{
+ GString *s = NULL;
+ guchar buf[BUFFER_SIZE];
+ gsize n_bytes_remaining;
+ GError *error = NULL;
+
+ g_return_val_if_fail (stream, NULL);
+ g_return_val_if_fail (max_bytes > 0, NULL);
+
+ /* Reading in chunks of BUFFER_SIZE
+ * Loop is halted whenever one of this conditions is met:
+ * a) Read bytes reached the maximum allowed (max_bytes)
+ * b) No more bytes to read
+ * c) Error reading
+ * d) File has less than 3 bytes
+ * e) File has a single line of BUFFER_SIZE bytes with no EOL
+ */
+ n_bytes_remaining = max_bytes;
+ while (n_bytes_remaining > 0) {
+ gssize bytes_read;
+
+ /* Read n_bytes_remaining or BUFFER_SIZE bytes */
+ bytes_read = g_input_stream_read (stream,
+ buf,
+ MIN (BUFFER_SIZE, n_bytes_remaining),
+ NULL,
+ &error);
+
+ /* If any error reading, halt the loop */
+ if (error) {
+ g_message ("Error reading from stream: '%s'",
+ error->message);
+ g_error_free (error);
+ break;
+ }
+
+ /* If no more bytes to read, halt loop */
+ if(bytes_read == 0) {
+ break;
+ }
+
+ /* First of all, check if this is the first time we
+ * have tried to read the stream up to the BUFFER_SIZE
+ * limit. Then make sure that we read the maximum size
+ * of the buffer. If we don't do this, there is the
+ * case where we read 10 bytes in and it is just one
+ * line with no '\n'. Once we have confirmed this we
+ * check that the buffer has a '\n' to make sure the
+ * file is worth indexing. Similarly if the file has
+ * <= 3 bytes then we drop it.
+ */
+ if (s == NULL) {
+ if (bytes_read == BUFFER_SIZE &&
+ g_strstr_len (buf, bytes_read, "\n") == NULL) {
+ g_debug (" No '\\n' in the first %" G_GSSIZE_FORMAT " bytes, "
+ "not indexing file",
+ bytes_read);
+ break;
+ } else if (bytes_read <= 2) {
+ g_debug (" File has less than 3 characters in it, "
+ "not indexing file");
+ break;
+ }
+ }
+
+ /* Update remaining bytes */
+ n_bytes_remaining -= bytes_read;
+
+ g_debug (" Read "
+ "%" G_GSSIZE_FORMAT " bytes this time, "
+ "%" G_GSIZE_FORMAT " bytes remaining",
+ bytes_read,
+ n_bytes_remaining);
+
+ /* Append non-NIL terminated bytes */
+ s = (s == NULL ?
+ g_string_new_len (buf, bytes_read) :
+ g_string_append_len (s, buf, bytes_read));
+ }
+
+ /* Return whatever we got... */
+ return s;
+}
diff --git a/src/tracker-extract/tracker-istream.h b/src/tracker-extract/tracker-istream.h
new file mode 100644
index 0000000..f155dd2
--- /dev/null
+++ b/src/tracker-extract/tracker-istream.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2010, Nokia <ivan frade nokia com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __TRACKER_ISTREAM_H__
+#define __TRACKER_ISTREAM_H__
+
+#include <glib.h>
+#include <gio/gio.h>
+
+G_BEGIN_DECLS
+
+GString *tracker_istream_read_text (GInputStream *stream,
+ gsize max_bytes);
+
+G_END_DECLS
+
+#endif /* __TRACKER_ISTREAM_H__ */
+
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]