gtkhtml r9115 - trunk/gtkhtml
- From: mcrha svn gnome org
- To: svn-commits-list gnome org
- Subject: gtkhtml r9115 - trunk/gtkhtml
- Date: Fri, 23 Jan 2009 12:12:00 +0000 (UTC)
Author: mcrha
Date: Fri Jan 23 12:12:00 2009
New Revision: 9115
URL: http://svn.gnome.org/viewvc/gtkhtml?rev=9115&view=rev
Log:
2009-01-23 Denis Pauk <pauk denis gmail com>
** Fix for bug #567697
* htmlentity.h: (INVALID_ENTITY_CHARACTER_MARKER):
* htmlentity.c: (html_entity_parse):
* htmltokenizer.c: (html_tokenizer_convert_entity),
(convert_text_encoding):
Do not strip invalid entities, rather return them back as text.
Modified:
trunk/gtkhtml/ChangeLog
trunk/gtkhtml/htmlentity.c
trunk/gtkhtml/htmlentity.h
trunk/gtkhtml/htmltokenizer.c
Modified: trunk/gtkhtml/htmlentity.c
==============================================================================
--- trunk/gtkhtml/htmlentity.c (original)
+++ trunk/gtkhtml/htmlentity.c Fri Jan 23 12:12:00 2009
@@ -870,6 +870,6 @@
{
struct _EntityEntry * result = html_entity_hash( s, len);
if (result == NULL )
- return ' ';
+ return INVALID_ENTITY_CHARACTER_MARKER;
return result->value;
}
Modified: trunk/gtkhtml/htmlentity.h
==============================================================================
--- trunk/gtkhtml/htmlentity.h (original)
+++ trunk/gtkhtml/htmlentity.h Fri Jan 23 12:12:00 2009
@@ -30,6 +30,7 @@
/* We name it with correct unicode name, but OK, later... Lauris */
/* char used for - must correspond to table below */
#define ENTITY_NBSP 160
+#define INVALID_ENTITY_CHARACTER_MARKER '?'
#define IS_UTF8_NBSP(s) (*s == (guchar)0xc2 && *(s + 1) == (guchar)0xa0)
gulong html_entity_parse (const gchar *s, guint len);
Modified: trunk/gtkhtml/htmltokenizer.c
==============================================================================
--- trunk/gtkhtml/htmltokenizer.c (original)
+++ trunk/gtkhtml/htmltokenizer.c Fri Jan 23 12:12:00 2009
@@ -41,7 +41,6 @@
static guint html_tokenizer_signals[HTML_TOKENIZER_LAST_SIGNAL] = { 0 };
#define TOKEN_BUFFER_SIZE (1 << 10)
-#define INVALID_CHARACTER_MARKER '?'
#define dt(x)
@@ -470,11 +469,13 @@
if (read_pos < full_pos)
if (*read_pos == '&') {
/*value to add*/
- gunichar value = INVALID_CHARACTER_MARKER;
+ gunichar value = INVALID_ENTITY_CHARACTER_MARKER;
/*skip not needed &*/
read_pos ++;
count_chars = strcspn (read_pos, ";");
if (count_chars < 14 && count_chars > 1) {
+ /*save for recovery*/
+ gchar save_gchar = *(read_pos + count_chars);
*(read_pos + count_chars)=0;
/* &#******; */
if (*read_pos == '#') {
@@ -488,9 +489,18 @@
} else {
value = html_entity_parse (read_pos, strlen (read_pos));
}
- read_pos += (count_chars + 1);
- write_pos += g_unichar_to_utf8 (value, write_pos);
+ if(value != INVALID_ENTITY_CHARACTER_MARKER){
+ write_pos += g_unichar_to_utf8 (value, write_pos);
+ read_pos += (count_chars + 1);
+ } else {
+ /*recovery old value - it's not entity*/
+ write_pos += g_unichar_to_utf8 ('&', write_pos);
+ *(read_pos + count_chars) = save_gchar;
+ }
}
+ else
+ /*very large string*/
+ write_pos += g_unichar_to_utf8 ('&', write_pos);
}
}
*write_pos = 0;
@@ -527,7 +537,7 @@
g_iconv (iconv_cd, (gchar **)¤t, &currlength, &newbuffer, &newlength);
if (currlength > 0) {
g_warning ("IconvError=%s", current);
- *newbuffer = INVALID_CHARACTER_MARKER;
+ *newbuffer = INVALID_ENTITY_CHARACTER_MARKER;
newbuffer ++;
current ++;
currlength --;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]