[totem-pl-parser/gnome-2-26] Bug 579993 – Doesn't handle encoding property in XML files



commit a141f4a233412bf9f4af52c36de8673738d588b7
Author: Bastien Nocera <hadess hadess net>
Date:   Fri Apr 24 14:02:00 2009 +0100

    Bug 579993 â?? Doesn't handle encoding property in XML files
    
    2009-04-24  Bastien Nocera  <hadess hadess net>
    
    	* plparse/totem-pl-parser.c (totem_pl_parser_parse_xml_relaxed):
    	Implement XML parser helper which takes the XML encoding
    	property into account to convert source contents
    
    	* plparse/totem-pl-parser-podcast.c (totem_pl_parser_add_rss),
    	(totem_pl_parser_add_atom), (totem_pl_parser_get_feed_uri),
    	(totem_pl_parser_add_opml):
    	* plparse/totem-pl-parser-private.h:
    	* plparse/totem-pl-parser-qt.c
    	(totem_pl_parser_add_quicktime_metalink):
    	* plparse/totem-pl-parser-smil.c
    	(totem_pl_parser_add_smil_with_data):
    	* plparse/totem-pl-parser-wm.c (totem_pl_parser_add_asx):
    	Use the XML parser helper
    
    	(Closes: #579993)
---
 ChangeLog                         |   19 ++++++++++++++
 plparse/totem-pl-parser-podcast.c |   22 +++++++++-------
 plparse/totem-pl-parser-private.h |    4 ++-
 plparse/totem-pl-parser-qt.c      |    4 +-
 plparse/totem-pl-parser-smil.c    |    4 +-
 plparse/totem-pl-parser-wm.c      |    5 ++-
 plparse/totem-pl-parser.c         |   49 ++++++++++++++++++++++++++++++++++++-
 7 files changed, 89 insertions(+), 18 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 152ddc2..25a39b1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2009-04-24  Bastien Nocera  <hadess hadess net>
+
+	* plparse/totem-pl-parser.c (totem_pl_parser_parse_xml_relaxed):
+	Implement XML parser helper which takes the XML encoding
+	property into account to convert source contents
+
+	* plparse/totem-pl-parser-podcast.c (totem_pl_parser_add_rss),
+	(totem_pl_parser_add_atom), (totem_pl_parser_get_feed_uri),
+	(totem_pl_parser_add_opml):
+	* plparse/totem-pl-parser-private.h:
+	* plparse/totem-pl-parser-qt.c
+	(totem_pl_parser_add_quicktime_metalink):
+	* plparse/totem-pl-parser-smil.c
+	(totem_pl_parser_add_smil_with_data):
+	* plparse/totem-pl-parser-wm.c (totem_pl_parser_add_asx):
+	Use the XML parser helper
+
+	(Closes: #579993)
+
 2009-04-22  Bastien Nocera  <hadess hadess net>
 
 	* plparse/totem-pl-parser-private.h:
diff --git a/plparse/totem-pl-parser-podcast.c b/plparse/totem-pl-parser-podcast.c
index 1638bca..e57e0c7 100644
--- a/plparse/totem-pl-parser-podcast.c
+++ b/plparse/totem-pl-parser-podcast.c
@@ -245,16 +245,16 @@ totem_pl_parser_add_rss (TotemPlParser *parser,
 	if (g_file_load_contents (file, NULL, &contents, &size, NULL, NULL) == FALSE)
 		return TOTEM_PL_PARSER_RESULT_ERROR;
 
-	totem_pl_parser_cleanup_xml (contents);
-	xml_parser_init (contents, size, XML_PARSER_CASE_INSENSITIVE);
-	if (xml_parser_build_tree_with_options (&doc, XML_PARSER_RELAXED | XML_PARSER_MULTI_TEXT) < 0) {
+	doc = totem_pl_parser_parse_xml_relaxed (contents, size);
+	if (doc == NULL) {
 		g_free (contents);
 		return TOTEM_PL_PARSER_RESULT_ERROR;
 	}
+
 	/* If the document has no name */
 	if (doc->name == NULL
 	    || (g_ascii_strcasecmp (doc->name , "rss") != 0
-	    	&& g_ascii_strcasecmp (doc->name , "rss\n") != 0)) {
+		&& g_ascii_strcasecmp (doc->name , "rss\n") != 0)) {
 		g_free (contents);
 		xml_parser_free_tree (doc);
 		return TOTEM_PL_PARSER_RESULT_ERROR;
@@ -495,11 +495,12 @@ totem_pl_parser_add_atom (TotemPlParser *parser,
 	if (g_file_load_contents (file, NULL, &contents, &size, NULL, NULL) == FALSE)
 		return TOTEM_PL_PARSER_RESULT_ERROR;
 
-	xml_parser_init (contents, size, XML_PARSER_CASE_INSENSITIVE);
-	if (xml_parser_build_tree_with_options (&doc, XML_PARSER_RELAXED | XML_PARSER_MULTI_TEXT) < 0) {
+	doc = totem_pl_parser_parse_xml_relaxed (contents, size);
+	if (doc == NULL) {
 		g_free (contents);
 		return TOTEM_PL_PARSER_RESULT_ERROR;
 	}
+
 	/* If the document has no name */
 	if (doc->name == NULL
 	    || g_ascii_strcasecmp (doc->name , "feed") != 0) {
@@ -664,8 +665,8 @@ totem_pl_parser_get_feed_uri (const char *data, gsize len)
 
 	uri = NULL;
 
-	xml_parser_init (data, len, XML_PARSER_CASE_INSENSITIVE);
-	if (xml_parser_build_tree_with_options (&doc, XML_PARSER_RELAXED | XML_PARSER_MULTI_TEXT) < 0)
+	doc = totem_pl_parser_parse_xml_relaxed (data, len);
+	if (doc == NULL)
 		return NULL;
 
 	/* If the document has no name */
@@ -871,11 +872,12 @@ totem_pl_parser_add_opml (TotemPlParser *parser,
 	if (g_file_load_contents (file, NULL, &contents, &size, NULL, NULL) == FALSE)
 		return TOTEM_PL_PARSER_RESULT_ERROR;
 
-	xml_parser_init (contents, size, XML_PARSER_CASE_INSENSITIVE);
-	if (xml_parser_build_tree_with_options (&doc, XML_PARSER_RELAXED | XML_PARSER_MULTI_TEXT) < 0) {
+	doc = totem_pl_parser_parse_xml_relaxed (contents, size);
+	if (doc == NULL) {
 		g_free (contents);
 		return TOTEM_PL_PARSER_RESULT_ERROR;
 	}
+
 	/* If the document has no name */
 	if (doc->name == NULL
 	    || g_ascii_strcasecmp (doc->name , "opml") != 0) {
diff --git a/plparse/totem-pl-parser-private.h b/plparse/totem-pl-parser-private.h
index d09b4cd..2a3d9b8 100644
--- a/plparse/totem-pl-parser-private.h
+++ b/plparse/totem-pl-parser-private.h
@@ -32,6 +32,7 @@
 #include <gio/gio.h>
 #include <gio/gio.h>
 #include <string.h>
+#include "xmlparser.h"
 #else
 #include "totem-pl-parser-mini.h"
 #endif /* !TOTEM_PL_PARSER_MINI */
@@ -126,7 +127,8 @@ void totem_pl_parser_add_uri			(TotemPlParser *parser,
 						 ...);
 gboolean totem_pl_parser_ignore			(TotemPlParser *parser,
 						 const char *uri);
-void totem_pl_parser_cleanup_xml		(char *string);
+xml_node_t * totem_pl_parser_parse_xml_relaxed	(char *contents,
+						 gsize size);
 
 #endif /* !TOTEM_PL_PARSER_MINI */
 
diff --git a/plparse/totem-pl-parser-qt.c b/plparse/totem-pl-parser-qt.c
index be00308..ef9147e 100644
--- a/plparse/totem-pl-parser-qt.c
+++ b/plparse/totem-pl-parser-qt.c
@@ -144,8 +144,8 @@ totem_pl_parser_add_quicktime_metalink (TotemPlParser *parser,
 	if (g_file_load_contents (file, NULL, &contents, &size, NULL, NULL) == FALSE)
 		return TOTEM_PL_PARSER_RESULT_ERROR;
 
-	xml_parser_init (contents, size, XML_PARSER_CASE_INSENSITIVE);
-	if (xml_parser_build_tree_with_options (&doc, XML_PARSER_RELAXED | XML_PARSER_MULTI_TEXT) < 0) {
+	doc = totem_pl_parser_parse_xml_relaxed (contents, size);
+	if (doc == NULL) {
 		g_free (contents);
 		return TOTEM_PL_PARSER_RESULT_ERROR;
 	}
diff --git a/plparse/totem-pl-parser-smil.c b/plparse/totem-pl-parser-smil.c
index 3850739..e2992ee 100644
--- a/plparse/totem-pl-parser-smil.c
+++ b/plparse/totem-pl-parser-smil.c
@@ -197,8 +197,8 @@ totem_pl_parser_add_smil_with_data (TotemPlParser *parser,
 	xml_node_t* doc;
 	TotemPlParserResult retval;
 
-	xml_parser_init (contents, size, XML_PARSER_CASE_INSENSITIVE);
-	if (xml_parser_build_tree_with_options (&doc, XML_PARSER_RELAXED | XML_PARSER_MULTI_TEXT) < 0)
+	doc = totem_pl_parser_parse_xml_relaxed (contents, size);
+	if (doc == NULL)
 		return TOTEM_PL_PARSER_RESULT_ERROR;
 
 	retval = totem_pl_parser_add_smil_with_doc (parser, file, base_file, doc);
diff --git a/plparse/totem-pl-parser-wm.c b/plparse/totem-pl-parser-wm.c
index beae8d5..8a0692b 100644
--- a/plparse/totem-pl-parser-wm.c
+++ b/plparse/totem-pl-parser-wm.c
@@ -389,11 +389,12 @@ totem_pl_parser_add_asx (TotemPlParser *parser,
 	if (g_file_load_contents (file, NULL, &contents, &size, NULL, NULL) == FALSE)
 		return TOTEM_PL_PARSER_RESULT_ERROR;
 
-	xml_parser_init (contents, size, XML_PARSER_CASE_INSENSITIVE);
-	if (xml_parser_build_tree_with_options (&doc, XML_PARSER_RELAXED | XML_PARSER_MULTI_TEXT) < 0) {
+	doc = totem_pl_parser_parse_xml_relaxed (contents, size);
+	if (doc == NULL) {
 		g_free (contents);
 		return TOTEM_PL_PARSER_RESULT_ERROR;
 	}
+
 	/* If the document has no name */
 	if (doc->name == NULL
 	    || g_ascii_strcasecmp (doc->name , "asx") != 0) {
diff --git a/plparse/totem-pl-parser.c b/plparse/totem-pl-parser.c
index 2b4d21b..83a35c3 100644
--- a/plparse/totem-pl-parser.c
+++ b/plparse/totem-pl-parser.c
@@ -1531,7 +1531,7 @@ totem_pl_parser_ignore (TotemPlParser *parser, const char *uri)
  * Removes HTML comments from a string representing the contents of an XML file.
  * The function modifies the string in place.
  */
-void
+static void
 totem_pl_parser_cleanup_xml (char *contents)
 {
 	char *needle;
@@ -1551,6 +1551,53 @@ totem_pl_parser_cleanup_xml (char *contents)
 	}
 }
 
+xml_node_t *
+totem_pl_parser_parse_xml_relaxed (char *contents,
+				   gsize size)
+{
+	xml_node_t* doc, *node;
+	char *encoding, *new_contents;
+	gsize new_size;
+
+	totem_pl_parser_cleanup_xml (contents);
+	xml_parser_init (contents, size, XML_PARSER_CASE_INSENSITIVE);
+	if (xml_parser_build_tree_with_options (&doc, XML_PARSER_RELAXED | XML_PARSER_MULTI_TEXT) < 0)
+		return NULL;
+
+	encoding = NULL;
+	for (node = doc; node != NULL; node = node->next) {
+		if (node->name == NULL || g_str_equal (node->name, "?XML") == FALSE)
+			continue;
+		encoding = g_strdup (xml_parser_get_property (node, "ENCODING"));
+		break;
+	}
+
+	if (encoding == NULL || g_str_equal (encoding, "UTF-8") != FALSE) {
+		g_free (encoding);
+		return doc;
+	}
+
+	xml_parser_free_tree (doc);
+
+	new_contents = g_convert (contents, size, "UTF-8", encoding, NULL, &new_size, NULL);
+	if (new_contents == NULL) {
+		g_warning ("Failed to convert XML data to UTF-8");
+		g_free (encoding);
+		return NULL;
+	}
+	g_free (encoding);
+
+	xml_parser_init (new_contents, new_size, XML_PARSER_CASE_INSENSITIVE);
+	if (xml_parser_build_tree_with_options (&doc, XML_PARSER_RELAXED | XML_PARSER_MULTI_TEXT) < 0) {
+		g_free (new_contents);
+		return NULL;
+	}
+
+	g_free (new_contents);
+
+	return doc;
+}
+
 static gboolean
 totem_pl_parser_ignore_from_mimetype (TotemPlParser *parser, const char *mimetype)
 {



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]