[tracker] Fixes GB#631391: MsOffice-XML content extraction doesn't work properly
- From: Aleksander Morgado <aleksm src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker] Fixes GB#631391: MsOffice-XML content extraction doesn't work properly
- Date: Fri, 22 Oct 2010 10:05:07 +0000 (UTC)
commit 127734d05f40e393eb2edc1e90e756aeb24c6198
Author: Aleksander Morgado <aleksander lanedo com>
Date: Fri Oct 22 11:49:47 2010 +0200
Fixes GB#631391: MsOffice-XML content extraction doesn't work properly
* We were extracting text if and only if, the text had some formatting applied,
which is wrong.
src/tracker-extract/tracker-extract-msoffice.c | 34 +----------------------
1 files changed, 2 insertions(+), 32 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-msoffice.c b/src/tracker-extract/tracker-extract-msoffice.c
index 2322699..db729a9 100644
--- a/src/tracker-extract/tracker-extract-msoffice.c
+++ b/src/tracker-extract/tracker-extract-msoffice.c
@@ -1900,42 +1900,12 @@ xml_text_handler_document_data (GMarkupParseContext *context,
GError **error)
{
MsOfficeXMLParserInfo *info = user_data;
- static gboolean found = FALSE;
static gboolean added = FALSE;
switch (info->tag_type) {
case MS_OFFICE_XML_TAG_WORD_TEXT:
- if (info->style_element_present) {
- if (atoi (text) == 0) {
- tracker_text_validate_utf8 (text, -1, &info->content, NULL);
- g_string_append_c (info->content, ' ');
- }
- }
-
- if (info->preserve_attribute_present) {
- gchar *keywords = g_strdup (text);
- if (found) {
- tracker_text_validate_utf8 (text, -1, &info->content, NULL);
- g_string_append_c (info->content, ' ');
- found = FALSE;
- } else {
- gchar *lasts;
- gchar *keyw;
-
- for (keyw = strtok_r (keywords, ",; ", &lasts);
- keyw;
- keyw = strtok_r (NULL, ",; ", &lasts)) {
- if ((g_ascii_strncasecmp (keyw, "Table", 6) == 0) ||
- (g_ascii_strncasecmp (keyw, "Figure", 6) == 0) ||
- (g_ascii_strncasecmp (keyw, "Section", 7) == 0) ||
- (g_ascii_strncasecmp (keyw, "Index", 5) == 0)) {
- found = TRUE;
- }
- }
- }
-
- g_free (keywords);
- }
+ tracker_text_validate_utf8 (text, -1, &info->content, NULL);
+ g_string_append_c (info->content, ' ');
break;
case MS_OFFICE_XML_TAG_SLIDE_TEXT:
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]