[tracker/tracker-0.14] tracker-extract: Simplify text extraction in oasis extractor for ODT files
- From: Martyn James Russell <mr src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/tracker-0.14] tracker-extract: Simplify text extraction in oasis extractor for ODT files
- Date: Thu, 3 May 2012 18:19:52 +0000 (UTC)
commit ac8e8c5487b2930907317bf16a6f2c029710a756
Author: Karl Relton <karllinuxtest relton ntlworld com>
Date: Thu May 3 17:56:29 2012 +0100
tracker-extract: Simplify text extraction in oasis extractor for ODT files
src/tracker-extract/tracker-extract-oasis.c | 65 +++------------------------
1 files changed, 6 insertions(+), 59 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-oasis.c b/src/tracker-extract/tracker-extract-oasis.c
index d6b4aee..70e4492 100644
--- a/src/tracker-extract/tracker-extract-oasis.c
+++ b/src/tracker-extract/tracker-extract-oasis.c
@@ -61,7 +61,6 @@ typedef struct {
typedef struct {
ODTTagType current;
- gboolean styles_present;
ODTFileType file_type;
GString *content;
gulong bytes_pending;
@@ -130,7 +129,6 @@ extract_oasis_content (const gchar *uri,
/* Create parse info */
info.current = ODT_TAG_TYPE_UNKNOWN;
info.file_type = file_type;
- info.styles_present = FALSE;
info.content = g_string_new ("");
info.bytes_pending = total_bytes;
@@ -390,49 +388,14 @@ xml_start_element_handler_content (GMarkupParseContext *context,
GError **error)
{
ODTContentParseInfo *data = user_data;
- const gchar **a;
- const gchar **v;
switch (data->file_type) {
case FILE_TYPE_ODT:
- if ((g_ascii_strcasecmp (element_name, "text:table-of-content") == 0) ||
- (g_ascii_strcasecmp (element_name, "text:table-index") == 0) ||
- (g_ascii_strcasecmp (element_name, "text:illustration-index") == 0) ||
- (g_ascii_strcasecmp (element_name, "text:section") == 0)) {
- data->styles_present = TRUE;
- } else if (g_ascii_strcasecmp (element_name, "table:table-cell") == 0) {
- data->current = ODT_TAG_TYPE_WORD_TEXT;
- } else if (g_ascii_strcasecmp (element_name, "text:p") == 0) {
- if (data->styles_present) {
- data->current = ODT_TAG_TYPE_WORD_TEXT;
- break;
- }
-
- for (a = attribute_names, v = attribute_values; *a; ++a, ++v) {
- if (g_ascii_strcasecmp (*a, "text:style-name") != 0) {
- continue;
- }
-
- if ((g_ascii_strcasecmp (*v, "title-article") == 0) ||
- (g_ascii_strcasecmp (*v, "para-padding") == 0) ||
- (g_ascii_strcasecmp (*v, "para-screen") == 0)) {
- data->current = ODT_TAG_TYPE_WORD_TEXT;
- }
- }
- } else if (g_ascii_strcasecmp (element_name, "text:h") == 0) {
- for (a = attribute_names, v = attribute_values; *a; ++a, ++v) {
- if (g_ascii_strcasecmp (*a, "text:style-name") != 0) {
- continue;
- }
-
- if (g_ascii_strncasecmp (*v, "Heading", 7) == 0) {
- data->current = ODT_TAG_TYPE_WORD_TEXT;
- }
- }
- } else if (g_ascii_strcasecmp (element_name, "text:span") == 0) {
- data->current = ODT_TAG_TYPE_WORD_TEXT;
- } else if ((g_ascii_strcasecmp (element_name, "text:a") == 0) ||
- (g_ascii_strcasecmp (element_name, "text:s") == 0)) {
+ if ((g_ascii_strcasecmp (element_name, "text:p") == 0) ||
+ (g_ascii_strcasecmp (element_name, "text:h") == 0) ||
+ (g_ascii_strcasecmp (element_name, "text:a") == 0) ||
+ (g_ascii_strcasecmp (element_name, "text:span") == 0) ||
+ (g_ascii_strcasecmp (element_name, "table:table-cell")) == 0) {
data->current = ODT_TAG_TYPE_WORD_TEXT;
} else {
data->current = -1;
@@ -473,23 +436,7 @@ xml_end_element_handler_content (GMarkupParseContext *context,
{
ODTContentParseInfo *data = user_data;
- switch (data->file_type) {
- case FILE_TYPE_ODT:
- if ((g_ascii_strcasecmp (element_name, "text:table-of-content") == 0) ||
- (g_ascii_strcasecmp (element_name, "text:table-index") == 0) ||
- (g_ascii_strcasecmp (element_name, "text:illustration-index") == 0) ||
- (g_ascii_strcasecmp (element_name, "text:section") == 0)) {
- data->styles_present = FALSE;
- }
- break;
- default:
- break;
- }
-
- if ((g_ascii_strcasecmp (element_name, "text:a") != 0) &&
- (g_ascii_strcasecmp (element_name, "text:s") != 0)) {
- data->current = -1;
- }
+ data->current = -1;
}
static void
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]