[tracker/oasis-improvements: 2/2] tracker-extract: Simplify text extraction in oasis extractor for ODT files



commit 1247628de09e72ba8662910015fbe0917c78da20
Author: Karl Relton <karllinuxtest relton ntlworld com>
Date:   Thu May 3 17:56:29 2012 +0100

    tracker-extract: Simplify text extraction in oasis extractor for ODT files

 src/tracker-extract/tracker-extract-oasis.c |   65 +++------------------------
 1 files changed, 6 insertions(+), 59 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-oasis.c b/src/tracker-extract/tracker-extract-oasis.c
index d6b4aee..70e4492 100644
--- a/src/tracker-extract/tracker-extract-oasis.c
+++ b/src/tracker-extract/tracker-extract-oasis.c
@@ -61,7 +61,6 @@ typedef struct {
 
 typedef struct {
 	ODTTagType current;
-	gboolean styles_present;
 	ODTFileType file_type;
 	GString *content;
 	gulong bytes_pending;
@@ -130,7 +129,6 @@ extract_oasis_content (const gchar          *uri,
 	/* Create parse info */
 	info.current = ODT_TAG_TYPE_UNKNOWN;
 	info.file_type = file_type;
-	info.styles_present = FALSE;
 	info.content = g_string_new ("");
 	info.bytes_pending = total_bytes;
 
@@ -390,49 +388,14 @@ xml_start_element_handler_content (GMarkupParseContext  *context,
                                    GError              **error)
 {
 	ODTContentParseInfo *data = user_data;
-	const gchar **a;
-	const gchar **v;
 
 	switch (data->file_type) {
 	case FILE_TYPE_ODT:
-		if ((g_ascii_strcasecmp (element_name, "text:table-of-content") == 0) ||
-		    (g_ascii_strcasecmp (element_name, "text:table-index") == 0) ||
-		    (g_ascii_strcasecmp (element_name, "text:illustration-index") == 0) ||
-		    (g_ascii_strcasecmp (element_name, "text:section") == 0)) {
-			data->styles_present = TRUE;
-		} else if (g_ascii_strcasecmp (element_name, "table:table-cell") == 0) {
-			data->current = ODT_TAG_TYPE_WORD_TEXT;
-		} else if (g_ascii_strcasecmp (element_name, "text:p") == 0) {
-			if (data->styles_present) {
-				data->current = ODT_TAG_TYPE_WORD_TEXT;
-				break;
-			}
-
-			for (a = attribute_names, v = attribute_values; *a; ++a, ++v) {
-				if (g_ascii_strcasecmp (*a, "text:style-name") != 0) {
-					continue;
-				}
-
-				if ((g_ascii_strcasecmp (*v, "title-article") == 0) ||
-				    (g_ascii_strcasecmp (*v, "para-padding") == 0) ||
-				    (g_ascii_strcasecmp (*v, "para-screen") == 0)) {
-					data->current = ODT_TAG_TYPE_WORD_TEXT;
-				}
-			}
-		} else if (g_ascii_strcasecmp (element_name, "text:h") == 0) {
-			for (a = attribute_names, v = attribute_values; *a; ++a, ++v) {
-				if (g_ascii_strcasecmp (*a, "text:style-name") != 0) {
-					continue;
-				}
-
-				if (g_ascii_strncasecmp (*v, "Heading", 7) == 0) {
-					data->current = ODT_TAG_TYPE_WORD_TEXT;
-				}
-			}
-		} else if (g_ascii_strcasecmp (element_name, "text:span") == 0) {
-			data->current = ODT_TAG_TYPE_WORD_TEXT;
-		} else if ((g_ascii_strcasecmp (element_name, "text:a") == 0) ||
-		           (g_ascii_strcasecmp (element_name, "text:s") == 0)) {
+		if ((g_ascii_strcasecmp (element_name, "text:p") == 0) ||
+		    (g_ascii_strcasecmp (element_name, "text:h") == 0) ||
+		    (g_ascii_strcasecmp (element_name, "text:a") == 0) ||
+		    (g_ascii_strcasecmp (element_name, "text:span") == 0) ||
+		    (g_ascii_strcasecmp (element_name, "table:table-cell")) == 0) {
 			data->current = ODT_TAG_TYPE_WORD_TEXT;
 		} else {
 			data->current = -1;
@@ -473,23 +436,7 @@ xml_end_element_handler_content (GMarkupParseContext  *context,
 {
 	ODTContentParseInfo *data = user_data;
 
-	switch (data->file_type) {
-	case FILE_TYPE_ODT:
-		if ((g_ascii_strcasecmp (element_name, "text:table-of-content") == 0) ||
-		    (g_ascii_strcasecmp (element_name, "text:table-index") == 0) ||
-		    (g_ascii_strcasecmp (element_name, "text:illustration-index") == 0) ||
-		    (g_ascii_strcasecmp (element_name, "text:section") == 0)) {
-			data->styles_present = FALSE;
-		}
-		break;
-	default:
-		break;
-	}
-
-	if ((g_ascii_strcasecmp (element_name, "text:a") != 0) &&
-	    (g_ascii_strcasecmp (element_name, "text:s") != 0)) {
-		data->current = -1;
-	}
+	data->current = -1;
 }
 
 static void



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]