gtkhtml r9061 - trunk/gtkhtml



Author: mcrha
Date: Wed Dec 10 18:28:18 2008
New Revision: 9061
URL: http://svn.gnome.org/viewvc/gtkhtml?rev=9061&view=rev

Log:
2008-12-10  Denis Pauk  <pauk denis gmail com>

	** Fix for bug #552357

	* gtkhtml/htmltokenizer.c
	* gtkhtml/htmltokenizer.h
	* gtkhtml/htmlradio.c
	* gtkhtml/htmlform.c
	* gtkhtml/htmlengine.c
	* gtkhtml/htmlengine.h
	* gtkhtml/htmlbutton.c
	* gtkhtml/htmltextarea.c
	* gtkhtml/htmlembedded.c
	* gtkhtml/htmlembedded.h
	* gtkhtml/htmlselect.c
	* gtkhtml/htmlcheckbox.c
	* gtkhtml/htmlhidden.c
	* gtkhtml/testgtkhtml.c
	* gtkhtml/htmlimageinput.c
	* gtkhtml/gtkhtml.c
	* gtkhtml/htmltextinput.c 
	 support http-equiv and set encoding (gtk_html_set_default_content_type)
	 if it exist in http - headers and re-coding resulted query from form
	 (support not utf8 encoding). 
	For use new behavior need 
		gtk_html_set_default_engine(html, TRUE); 
	or set in 
		gtk_html_begin_full in 
	GtkHTMLBeginFlags with GTK_HTML_BEGIN_CHANGECONTENTTYPE.



Modified:
   trunk/gtkhtml/ChangeLog
   trunk/gtkhtml/gtkhtml-enums.h
   trunk/gtkhtml/gtkhtml-private.h
   trunk/gtkhtml/gtkhtml.c
   trunk/gtkhtml/gtkhtml.h
   trunk/gtkhtml/htmlbutton.c
   trunk/gtkhtml/htmlcheckbox.c
   trunk/gtkhtml/htmlembedded.c
   trunk/gtkhtml/htmlembedded.h
   trunk/gtkhtml/htmlengine.c
   trunk/gtkhtml/htmlengine.h
   trunk/gtkhtml/htmlform.c
   trunk/gtkhtml/htmlframe.c
   trunk/gtkhtml/htmlhidden.c
   trunk/gtkhtml/htmliframe.c
   trunk/gtkhtml/htmlimageinput.c
   trunk/gtkhtml/htmlradio.c
   trunk/gtkhtml/htmlselect.c
   trunk/gtkhtml/htmltextarea.c
   trunk/gtkhtml/htmltextinput.c
   trunk/gtkhtml/htmltokenizer.c
   trunk/gtkhtml/htmltokenizer.h
   trunk/gtkhtml/testgtkhtml.c

Modified: trunk/gtkhtml/gtkhtml-enums.h
==============================================================================
--- trunk/gtkhtml/gtkhtml-enums.h	(original)
+++ trunk/gtkhtml/gtkhtml-enums.h	Wed Dec 10 18:28:18 2008
@@ -309,6 +309,8 @@
 	GTK_HTML_BEGIN_KEEP_SCROLL = 1 << 0,
 	GTK_HTML_BEGIN_KEEP_IMAGES = 1 << 1,
 	GTK_HTML_BEGIN_BLOCK_UPDATES = 1 << 2,
-	GTK_HTML_BEGIN_BLOCK_IMAGES = 1 << 3
+	GTK_HTML_BEGIN_BLOCK_IMAGES = 1 << 3,
+	/*enable autochange content_type*/
+	GTK_HTML_BEGIN_CHANGECONTENTTYPE = 1 << 4
 } GtkHTMLBeginFlags;
 #endif

Modified: trunk/gtkhtml/gtkhtml-private.h
==============================================================================
--- trunk/gtkhtml/gtkhtml-private.h	(original)
+++ trunk/gtkhtml/gtkhtml-private.h	Wed Dec 10 18:28:18 2008
@@ -39,7 +39,6 @@
 
 	gint selection_type;
 
-	gchar *content_type;
 	char  *base_url;
 
 	GtkWidget *search_input_line;

Modified: trunk/gtkhtml/gtkhtml.c
==============================================================================
--- trunk/gtkhtml/gtkhtml.c	(original)
+++ trunk/gtkhtml/gtkhtml.c	Wed Dec 10 18:28:18 2008
@@ -767,7 +767,6 @@
 			html->priv->im_context = NULL;
 		}
 
-		g_free (html->priv->content_type);
 		g_free (html->priv->base_url);
 		g_free (html->priv->caret_first_focus_anchor);
 		g_free (html->priv);
@@ -3322,7 +3321,6 @@
 	html->priv->insertion_font_style = GTK_HTML_FONT_STYLE_DEFAULT;
 	html->priv->selection_type = -1;
 	html->priv->selection_as_cite = FALSE;
-	html->priv->content_type = g_strdup ("html/text; charset=utf-8");
 	html->priv->search_input_line = NULL;
 	html->priv->in_object_resize = FALSE;
 	html->priv->resize_cursor = gdk_cursor_new (GDK_BOTTOM_RIGHT_CORNER);
@@ -3474,7 +3472,7 @@
 	html->allow_selection = allow;
 }
 
-
+
 /**
  * gtk_html_begin_full:
  * @html: the GtkHTML widget to operate on.
@@ -3515,9 +3513,6 @@
 	else
 		html->engine->keep_scroll = FALSE;
 
-	if (!content_type)
-		content_type = html->priv->content_type;
-
 	handle = html_engine_begin (html->engine, content_type);
 	if (handle == NULL)
 		return NULL;
@@ -3530,6 +3525,10 @@
 	if (flags & GTK_HTML_BEGIN_KEEP_SCROLL)
 		html->engine->newPage = FALSE;
 
+	/* Enable change content type in engine */
+	if (flags & GTK_HTML_BEGIN_CHANGECONTENTTYPE)
+		gtk_html_set_default_engine(html, TRUE);		
+
 	return handle;
 }
 
@@ -3546,7 +3545,7 @@
 {
 	g_return_val_if_fail (GTK_IS_HTML (html), NULL);
 
-	return gtk_html_begin_full (html, NULL, html->priv->content_type, 0);
+	return gtk_html_begin_full (html, NULL, NULL, 0);
 }
 
 /**
@@ -3564,7 +3563,7 @@
 {
 	g_return_val_if_fail (! gtk_html_get_editable (html), NULL);
 
-	return gtk_html_begin_full (html, NULL, NULL, 0);
+	return gtk_html_begin_full (html, NULL, content_type , 0);
 }
 
 /**
@@ -4493,15 +4492,30 @@
 }
 
 /* misc utils */
+/* if engine_type == false - default behaviour*/
 void
-gtk_html_set_default_content_type (GtkHTML *html, gchar *content_type)
+gtk_html_set_default_engine(GtkHTML *html, gboolean engine_type)
 {
-	g_free (html->priv->content_type);
+	html_engine_set_engine_type( html->engine, engine_type);
+}
 
-	if (content_type) {
-		html->priv->content_type = g_ascii_strdown (content_type, -1);
-	} else
-		html->priv->content_type = NULL;
+gboolean
+gtk_html_get_default_engine(GtkHTML *html)
+{
+	return html_engine_get_engine_type( html->engine);
+}
+
+
+void
+gtk_html_set_default_content_type (GtkHTML *html, const gchar *content_type)
+{   
+    html_engine_set_content_type( html->engine, content_type);
+}
+
+const gchar*
+gtk_html_get_default_content_type (GtkHTML *html)
+{   
+    return html_engine_get_content_type( html->engine);
 }
 
 gpointer

Modified: trunk/gtkhtml/gtkhtml.h
==============================================================================
--- trunk/gtkhtml/gtkhtml.h	(original)
+++ trunk/gtkhtml/gtkhtml.h	Wed Dec 10 18:28:18 2008
@@ -335,8 +335,12 @@
 /* DEPRECATED */
 #if 1
 gboolean                   gtk_html_build_with_gconf              (void);
-void                       gtk_html_set_default_content_type      (GtkHTML                   *html,
-								   gchar                     *content_type);
+const gchar*               gtk_html_get_default_content_type              (GtkHTML                   *html);
+void                       gtk_html_set_default_content_type              (GtkHTML                   *html,
+								   const gchar                     *content_type);
+void			   gtk_html_set_default_engine		  (GtkHTML *html,
+								   gboolean enginetype);
+gboolean		   gtk_html_get_default_engine		  (GtkHTML *html);
 GtkWidget                 *gtk_html_new_from_string               (const gchar               *Astr,
 								   gint                       len);
 void                       gtk_html_load_empty                    (GtkHTML                   *html);

Modified: trunk/gtkhtml/htmlbutton.c
==============================================================================
--- trunk/gtkhtml/htmlbutton.c	(original)
+++ trunk/gtkhtml/htmlbutton.c	Wed Dec 10 18:28:18 2008
@@ -64,19 +64,19 @@
 
 
 static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e,const gchar *codepage)
 {
 	GString *encoding = g_string_new ("");
 	gchar *ptr;
 
 	if(strlen (e->name) && (HTML_BUTTON(e)->successful)) {
-		ptr = html_embedded_encode_string (e->name);
+		ptr = html_embedded_encode_string (e->name, codepage);
 		encoding = g_string_append (encoding, ptr);
 		g_free (ptr);
 
 		encoding = g_string_append_c (encoding, '=');
 
-		ptr = html_embedded_encode_string (e->value);
+		ptr = html_embedded_encode_string (e->value, codepage);
 		encoding = g_string_append (encoding, ptr);
 		g_free (ptr);
 	}

Modified: trunk/gtkhtml/htmlcheckbox.c
==============================================================================
--- trunk/gtkhtml/htmlcheckbox.c	(original)
+++ trunk/gtkhtml/htmlcheckbox.c	Wed Dec 10 18:28:18 2008
@@ -38,20 +38,19 @@
 }
 
 static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar *codepage)
 {
 	GString *encoding = g_string_new ("");
 	gchar *ptr;
 
 	if(strlen (e->name) && gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (e->widget))) {
 
-		ptr = html_embedded_encode_string (e->name);
+		ptr = html_embedded_encode_string (e->name, codepage);
 		encoding = g_string_append (encoding, ptr);
 		g_free (ptr);
 
 		encoding = g_string_append_c (encoding, '=');
-
-		ptr = html_embedded_encode_string (e->value);
+		ptr = html_embedded_encode_string (e->value, codepage);
 		encoding = g_string_append (encoding, ptr);
 		g_free (ptr);
 	}

Modified: trunk/gtkhtml/htmlembedded.c
==============================================================================
--- trunk/gtkhtml/htmlembedded.c	(original)
+++ trunk/gtkhtml/htmlembedded.c	Wed Dec 10 18:28:18 2008
@@ -33,6 +33,8 @@
 #include "htmliframe.h"
 #include "htmlpainter.h"
 #include "htmlengine.h"
+/*For use converter based on g_iconv*/
+#include "htmltokenizer.h"
 
 HTMLEmbeddedClass html_embedded_class;
 static HTMLObjectClass *parent_class = NULL;
@@ -198,7 +200,7 @@
 }
 
 static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar* codepage)
 {
 	return g_strdup ("");
 }
@@ -210,9 +212,9 @@
 }
 
 gchar *
-html_embedded_encode (HTMLEmbedded *e)
+html_embedded_encode (HTMLEmbedded *e, const gchar* codepage)
 {
-	return HTML_EMBEDDED_CLASS (HTML_OBJECT (e)->klass)->encode (e);
+	return HTML_EMBEDDED_CLASS (HTML_OBJECT (e)->klass)->encode (e, codepage);
 }
 
 void
@@ -228,13 +230,21 @@
 }
 
 gchar *
-html_embedded_encode_string (const gchar *str)
+html_embedded_encode_string (const gchar *before, const gchar *codepage)
 {
-        static gchar *safe = "$-._!*(),"; /* RFC 1738 */
+	    const gchar* str = before;
+	    static gchar *safe = "$-._!*(),"; /* RFC 1738 */
         unsigned pos = 0;
         GString *encoded = g_string_new ("");
         gchar buffer[5], *ptr;
-	guchar c;
+		guchar c;
+		
+	    GIConv iconv_cd = generate_iconv_to (codepage);
+	    if( is_valid_g_iconv (iconv_cd))
+	    {
+	    	str= convert_text_encoding(iconv_cd, before);
+	    	g_iconv_close(iconv_cd);
+	    }
 
         while ( pos < strlen(str) ) {
 
@@ -268,7 +278,7 @@
 
 	g_string_free (encoded, FALSE);
 
-        return ptr;
+    return ptr;
 }
 
 void

Modified: trunk/gtkhtml/htmlembedded.h
==============================================================================
--- trunk/gtkhtml/htmlembedded.h	(original)
+++ trunk/gtkhtml/htmlembedded.h	Wed Dec 10 18:28:18 2008
@@ -45,7 +45,7 @@
 
 
 	void   (*reset)    (HTMLEmbedded *element);
-	gchar *(*encode)   (HTMLEmbedded *element);
+	gchar *(*encode)   (HTMLEmbedded *element, const gchar* codepage);
 	void   (*reparent) (HTMLEmbedded *element, GtkWidget *new_parent);
 };
 
@@ -71,8 +71,10 @@
 void          html_embedded_reset          (HTMLEmbedded      *element);
 void          html_embedded_reparent       (HTMLEmbedded      *element,
 					    GtkWidget         *new_parent);
-gchar        *html_embedded_encode         (HTMLEmbedded      *element);
-gchar        *html_embedded_encode_string  (const gchar       *str);
+gchar        *html_embedded_encode         (HTMLEmbedded      *element,
+						const gchar * codepage);
+gchar        *html_embedded_encode_string  (const gchar       *str,
+						const gchar * codepage);
 HTMLEmbedded *html_embedded_new_widget     (GtkWidget         *parent,
 					    GtkHTMLEmbedded   *eb,
 					    HTMLEngine        *engine);

Modified: trunk/gtkhtml/htmlengine.c
==============================================================================
--- trunk/gtkhtml/htmlengine.c	(original)
+++ trunk/gtkhtml/htmlengine.c	Wed Dec 10 18:28:18 2008
@@ -2623,7 +2623,7 @@
 		token = html_string_tokenizer_next_token (e->st);
 		if (g_ascii_strncasecmp (token, "align=", 6) == 0) {
 			style = html_style_add_text_align (style, parse_halign (token + 6, HTML_HALIGN_NONE));
-			//align = parse_halign (token + 6, align);
+			/*align = parse_halign (token + 6, align);*/
 		} else if (g_ascii_strncasecmp (token, "style=", 6) == 0) {
 			style = html_style_add_attribute (style, token + 6);
 		}
@@ -2766,11 +2766,39 @@
 	html_element_free (element);
 }
 
-
+void
+html_engine_set_engine_type( HTMLEngine *e, gboolean engine_type)
+{
+	g_return_if_fail (HTML_IS_ENGINE (e));
+	html_tokenizer_set_engine_type(e->ht, engine_type);
+}
+
+gboolean
+html_engine_get_engine_type( HTMLEngine *e)
+{
+	g_return_val_if_fail (HTML_IS_ENGINE (e), FALSE);
+	return html_tokenizer_get_engine_type(e->ht);
+}
+
+void 
+html_engine_set_content_type(HTMLEngine *e, const gchar* content_type)
+{
+	g_return_if_fail (HTML_IS_ENGINE (e));
+	html_tokenizer_change_content_type(e->ht, content_type);
+}
+
+const gchar *  
+html_engine_get_content_type(HTMLEngine *e)
+{
+	g_return_val_if_fail (HTML_IS_ENGINE (e), NULL);
+	return html_tokenizer_get_content_type(e->ht);
+}
+
 static void
 element_parse_meta (HTMLEngine *e, HTMLObject *clue, const gchar *str)
 {
 	int refresh = 0;
+	int contenttype = 0;
 	int refresh_delay = 0;
 	gchar *refresh_url = NULL;
 
@@ -2778,16 +2806,23 @@
 
 	html_string_tokenizer_tokenize(e->st, str + 5, " >");
 	while (html_string_tokenizer_has_more_tokens (e->st)) {
-
 		const gchar* token = html_string_tokenizer_next_token(e->st);
 		if (g_ascii_strncasecmp(token, "http-equiv=", 11) == 0 ) {
 			if (g_ascii_strncasecmp(token + 11, "refresh", 7) == 0 )
 				refresh = 1;
+			if (g_ascii_strncasecmp(token + 11, "content-type", 12) == 0 )
+				contenttype = 1;
 		} else if (g_ascii_strncasecmp(token, "content=", 8) == 0) {
+			const gchar *content;
+			content = token + 8;	
+			if(contenttype)
+			{
+				contenttype = 0;
+				html_engine_set_content_type(e, content);
+			}
 			if (refresh) {
-				const gchar *content;
-				content = token + 8;
-
+				refresh = 0;
+				
 				/* The time in seconds until the refresh */
 				refresh_delay = atoi(content);
 
@@ -3452,7 +3487,7 @@
 	push_block_element (e, ID_CAPTION, style, DISPLAY_TABLE_CAPTION, block_end_cell, 0, 0);
 
 	table->caption = caption;
-	//FIXME caption alignment should be based on the flow.... or something....
+	/*FIXME caption alignment should be based on the flow.... or something....*/
 	table->capAlign = capAlign;
 }
 

Modified: trunk/gtkhtml/htmlengine.h
==============================================================================
--- trunk/gtkhtml/htmlengine.h	(original)
+++ trunk/gtkhtml/htmlengine.h	Wed Dec 10 18:28:18 2008
@@ -315,6 +315,12 @@
 void           html_engine_stop_parser      (HTMLEngine  *e);
 void           html_engine_stop             (HTMLEngine  *e);
 void           html_engine_flush            (HTMLEngine  *e);
+void           html_engine_set_engine_type   (HTMLEngine *e,
+					 gboolean engine_type);
+gboolean       html_engine_get_engine_type   (HTMLEngine *e);
+void 		   html_engine_set_content_type(HTMLEngine *e,
+					const gchar* content_type);
+const gchar *  html_engine_get_content_type(HTMLEngine *e);
 
 /* Rendering control.  */
 gint  html_engine_calc_min_width       (HTMLEngine *e);

Modified: trunk/gtkhtml/htmlform.c
==============================================================================
--- trunk/gtkhtml/htmlform.c	(original)
+++ trunk/gtkhtml/htmlform.c	Wed Dec 10 18:28:18 2008
@@ -125,10 +125,11 @@
 	gint first = TRUE;
 	GList *i = form->elements;
 	gchar *ptr;
-
+	
+	const gchar * codepage = html_engine_get_content_type(form->engine);
+	
 	while (i) {
-		ptr = html_embedded_encode (HTML_EMBEDDED (i->data));
-
+		ptr = html_embedded_encode (HTML_EMBEDDED (i->data), codepage);
 		if (strlen (ptr)) {
 			if(!first)
 				encoding = g_string_append_c (encoding, '&');

Modified: trunk/gtkhtml/htmlframe.c
==============================================================================
--- trunk/gtkhtml/htmlframe.c	(original)
+++ trunk/gtkhtml/htmlframe.c	Wed Dec 10 18:28:18 2008
@@ -480,7 +480,11 @@
 	new_tokenizer = NULL;
 
 	gtk_html_set_default_content_type (new_html,
-					   parent_html->priv->content_type);
+					   gtk_html_get_default_content_type(parent_html));
+
+	gtk_html_set_default_engine (new_html,
+					   gtk_html_get_default_engine(parent_html));
+					   
 	frame->html = new_widget;
 	frame->url = g_strdup (src);
 	frame->width = width;

Modified: trunk/gtkhtml/htmlhidden.c
==============================================================================
--- trunk/gtkhtml/htmlhidden.c	(original)
+++ trunk/gtkhtml/htmlhidden.c	Wed Dec 10 18:28:18 2008
@@ -28,19 +28,19 @@
 
 
 static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar *codepage)
 {
 	GString *encoding = g_string_new ("");
 	gchar *ptr;
 
 	if(strlen (e->name)) {
-		ptr = html_embedded_encode_string (e->name);
+		ptr = html_embedded_encode_string (e->name, codepage);
 		encoding = g_string_append (encoding, ptr);
 		g_free (ptr);
 
 		encoding = g_string_append_c (encoding, '=');
 
-		ptr = html_embedded_encode_string (e->value);
+		ptr = html_embedded_encode_string (e->value, codepage);
 		encoding = g_string_append (encoding, ptr);
 		g_free (ptr);
 	}

Modified: trunk/gtkhtml/htmliframe.c
==============================================================================
--- trunk/gtkhtml/htmliframe.c	(original)
+++ trunk/gtkhtml/htmliframe.c	Wed Dec 10 18:28:18 2008
@@ -611,7 +611,11 @@
 	new_tokenizer = NULL;
 
 	gtk_html_set_default_content_type (new_html,
-					   parent_html->priv->content_type);
+					   gtk_html_get_default_content_type(parent_html));
+
+	gtk_html_set_default_engine (new_html,
+					   gtk_html_get_default_engine(parent_html));
+					   
 	iframe->html = new_widget;
 	iframe->url = g_strdup (src);
 	iframe->width = width;

Modified: trunk/gtkhtml/htmlimageinput.c
==============================================================================
--- trunk/gtkhtml/htmlimageinput.c	(original)
+++ trunk/gtkhtml/htmlimageinput.c	Wed Dec 10 18:28:18 2008
@@ -107,13 +107,13 @@
 
 
 static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar *codepage)
 {
 	GString *encoding = g_string_new ("");
 	gchar *ptr;
 
 	if(strlen (e->name)) {
-		ptr = html_embedded_encode_string (e->name);
+		ptr = html_embedded_encode_string (e->name, codepage);
 		encoding = g_string_assign (encoding, ptr);
 		g_free (ptr);
 
@@ -121,7 +121,7 @@
 		encoding = g_string_append (encoding, ptr);
 		g_free (ptr);
 
-		ptr = html_embedded_encode_string (e->name);
+		ptr = html_embedded_encode_string (e->name, codepage);
 		encoding = g_string_append (encoding, ptr);
 		g_free (ptr);
 

Modified: trunk/gtkhtml/htmlradio.c
==============================================================================
--- trunk/gtkhtml/htmlradio.c	(original)
+++ trunk/gtkhtml/htmlradio.c	Wed Dec 10 18:28:18 2008
@@ -49,20 +49,20 @@
 }
 
 static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar *codepage)
 {
 	GString *encoding = g_string_new ("");
 	gchar *ptr;
 
 	if(strlen (e->name) && gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (e->widget))) {
 
-		ptr = html_embedded_encode_string (e->name);
+		ptr = html_embedded_encode_string (e->name, codepage);
 		encoding = g_string_append (encoding, ptr);
 		g_free (ptr);
 
 		encoding = g_string_append_c (encoding, '=');
 
-		ptr = html_embedded_encode_string (e->value);
+		ptr = html_embedded_encode_string (e->value, codepage);
 		encoding = g_string_append (encoding, ptr);
 		g_free (ptr);
 	}

Modified: trunk/gtkhtml/htmlselect.c
==============================================================================
--- trunk/gtkhtml/htmlselect.c	(original)
+++ trunk/gtkhtml/htmlselect.c	Wed Dec 10 18:28:18 2008
@@ -102,7 +102,8 @@
 add_selected (GtkTreeModel *model,
               GtkTreePath *path,
               GtkTreeIter *iter,
-              struct EmbeddedSelectionInfo *info)
+              struct EmbeddedSelectionInfo *info,
+              const gchar* codepage)
 {
 	gchar *value, *encoded;
 
@@ -111,13 +112,13 @@
 	if (info->string->len)
 		g_string_append_c (info->string, '&');
 
-	encoded = html_embedded_encode_string (info->embedded->name);
+	encoded = html_embedded_encode_string (info->embedded->name, codepage);
 	g_string_append (info->string, encoded);
 	g_free (encoded);
 
 	g_string_append_c (info->string, '=');
 
-	encoded = html_embedded_encode_string (value);
+	encoded = html_embedded_encode_string (value, codepage);
 	g_string_append (info->string, encoded);
 	g_free (encoded);
 
@@ -125,7 +126,7 @@
 }
 
 static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar *codepage)
 {
 	struct EmbeddedSelectionInfo info;
 	HTMLSelect *s = HTML_SELECT(e);
@@ -146,7 +147,7 @@
 
 			combo_box = GTK_COMBO_BOX (e->widget);
 			if (gtk_combo_box_get_active_iter (combo_box, &iter))
-				add_selected (s->model, NULL, &iter, &info);
+				add_selected (s->model, NULL, &iter, &info, codepage);
 		}
 	}
 

Modified: trunk/gtkhtml/htmltextarea.c
==============================================================================
--- trunk/gtkhtml/htmltextarea.c	(original)
+++ trunk/gtkhtml/htmltextarea.c	Wed Dec 10 18:28:18 2008
@@ -63,7 +63,7 @@
 }
 
 static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar *codepage)
 {
 	GString *encoding = g_string_new ("");
 	gchar *encoded_str, *utf8_str, *gtk_text;
@@ -71,7 +71,7 @@
 	if(strlen (e->name)) {
 		GtkTextIter first, last;
 
-		utf8_str = html_embedded_encode_string (e->name);
+		utf8_str = html_embedded_encode_string (e->name, codepage);
 		encoding = g_string_append (encoding, utf8_str);
 		g_free (utf8_str);
 
@@ -80,7 +80,7 @@
 		gtk_text_buffer_get_bounds (HTML_TEXTAREA (e)->buffer, &first, &last);
 		gtk_text = gtk_text_buffer_get_text (HTML_TEXTAREA (e)->buffer, &first, &last, FALSE);
 
-		encoded_str = html_embedded_encode_string (gtk_text);
+		encoded_str = html_embedded_encode_string (gtk_text, codepage);
 		encoding = g_string_append (encoding, encoded_str);
 
 		g_free (encoded_str);

Modified: trunk/gtkhtml/htmltextinput.c
==============================================================================
--- trunk/gtkhtml/htmltextinput.c	(original)
+++ trunk/gtkhtml/htmltextinput.c	Wed Dec 10 18:28:18 2008
@@ -108,19 +108,19 @@
 /* HTMLEmbedded methods.  */
 
 static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar* codepage)
 {
 	GString *encoding = g_string_new ("");
 	gchar *ptr;
 
 	if(strlen (e->name)) {
-		ptr = html_embedded_encode_string (e->name);
+		ptr = html_embedded_encode_string (e->name, codepage);
 		encoding = g_string_append (encoding, ptr);
 		g_free (ptr);
 
 		encoding = g_string_append_c (encoding, '=');
 
-		ptr = html_embedded_encode_string (gtk_entry_get_text (GTK_ENTRY (e->widget)));
+		ptr = html_embedded_encode_string (gtk_entry_get_text (GTK_ENTRY (e->widget)), codepage);
 		encoding = g_string_append (encoding, ptr);
 		g_free (ptr);
 	}

Modified: trunk/gtkhtml/htmltokenizer.c
==============================================================================
--- trunk/gtkhtml/htmltokenizer.c	(original)
+++ trunk/gtkhtml/htmltokenizer.c	Wed Dec 10 18:28:18 2008
@@ -33,6 +33,8 @@
 enum {
 	HTML_TOKENIZER_BEGIN_SIGNAL,
 	HTML_TOKENIZER_END_SIGNAL,
+	HTML_TOKENIZER_CHANGECONTENT_SIGNAL,
+	HTML_TOKENIZER_CHANGEENGINE_SIGNAL,
 	HTML_TOKENIZER_LAST_SIGNAL
 };
 
@@ -52,6 +54,7 @@
 	gint used;
 	gchar * data;
 };
+
 struct _HTMLTokenizerPrivate {
 
 	/* token buffers list */
@@ -87,7 +90,6 @@
 	gboolean textarea; /* Are we in a <textarea> block? */
 	gint     pre; /* Are we in a <pre> block? */
 	gboolean select; /* Are we in a <select> block? */
-	gboolean charEntity; /* Are we in an &... sequence? */
 	gboolean extension; /* Are we in an <!-- +GtkHTML: sequence? */
 	gboolean aTag; /* Are we in a <a/> tag*/
 
@@ -117,11 +119,17 @@
 	GList *blocking; /* Blocking tokens */
 
 	const gchar *searchFor;
-	gboolean utf8;
-	gchar utf8_buffer[7];
-	gint utf8_length;
+	
+	gboolean enableconvert;
+	
+	gchar * content_type;
+	/*convert*/
+	GIConv iconv_cd;
+
 };
 
+
+
 static const gchar *commentStart = "<!--";
 static const gchar *scriptEnd = "</script>";
 static const gchar *styleEnd = "</style>";
@@ -144,12 +152,20 @@
 
 /* default implementations of tokenization functions */
 static void     html_tokenizer_finalize             (GObject *);
-static void     html_tokenizer_real_begin           (HTMLTokenizer *, gchar *content_type);
+static void     html_tokenizer_real_change          (HTMLTokenizer *, const gchar *content_type);
+static void     html_tokenizer_real_begin           (HTMLTokenizer *, const gchar *content_type);
+static void     html_tokenizer_real_engine_type (HTMLTokenizer *t, gboolean engine_type);
 static void     html_tokenizer_real_write           (HTMLTokenizer *, const gchar *str, size_t size);
 static void     html_tokenizer_real_end             (HTMLTokenizer *);
+static const gchar *
+				html_tokenizer_real_get_content_type(HTMLTokenizer *);
+static gboolean
+				html_tokenizer_real_get_engine_type(HTMLTokenizer *);
 static gchar   *html_tokenizer_real_peek_token      (HTMLTokenizer *);
 static gchar   *html_tokenizer_real_next_token      (HTMLTokenizer *);
 static gboolean html_tokenizer_real_has_more_tokens (HTMLTokenizer *);
+static gchar   *html_tokenizer_converted_token (HTMLTokenizer *t,const gchar* token);
+
 
 static HTMLTokenizer *html_tokenizer_real_clone     (HTMLTokenizer *);
 
@@ -160,8 +176,11 @@
 							      HTMLTokenType   tt);
 static void               html_tokenizer_tokenize_one_char   (HTMLTokenizer  *t,
 							      const gchar  **src);
+static void				  add_char(HTMLTokenizer *t, gchar c);
+
+gboolean 				  is_need_convert(const gchar* token);
 
-static void               add_unichar(HTMLTokenizer *t, gunichar wc);
+gchar*					  html_tokenizer_convert_entity(gchar * token);
 
 static GObjectClass *parent_class = NULL;
 
@@ -172,6 +191,26 @@
 
 	parent_class = g_type_class_ref (G_TYPE_OBJECT);
 
+	html_tokenizer_signals[HTML_TOKENIZER_CHANGECONTENT_SIGNAL] =
+		g_signal_new ("change",
+			      G_TYPE_FROM_CLASS (klass),
+			      G_SIGNAL_RUN_LAST,
+			      G_STRUCT_OFFSET (HTMLTokenizerClass, change),
+			      NULL, NULL,
+			      g_cclosure_marshal_VOID__POINTER,
+			      G_TYPE_NONE,
+			      1, G_TYPE_POINTER);
+
+	html_tokenizer_signals[HTML_TOKENIZER_CHANGEENGINE_SIGNAL] =
+		g_signal_new ("engine",
+			      G_TYPE_FROM_CLASS (klass),
+			      G_SIGNAL_RUN_LAST,
+			      G_STRUCT_OFFSET (HTMLTokenizerClass, engine),
+			      NULL, NULL,
+			      g_cclosure_marshal_VOID__POINTER,
+			      G_TYPE_NONE,
+			      1, G_TYPE_POINTER);
+
 	html_tokenizer_signals[HTML_TOKENIZER_BEGIN_SIGNAL] =
 		g_signal_new ("begin",
 			      G_TYPE_FROM_CLASS (klass),
@@ -194,12 +233,16 @@
 
 	object_class->finalize = html_tokenizer_finalize;
 
+	klass->change     = html_tokenizer_real_change;
+	klass->engine     = html_tokenizer_real_engine_type;
 	klass->begin      = html_tokenizer_real_begin;
 	klass->end        = html_tokenizer_real_end;
 
 	klass->write      = html_tokenizer_real_write;
 	klass->peek_token = html_tokenizer_real_peek_token;
 	klass->next_token = html_tokenizer_real_next_token;
+	klass->get_content_type = html_tokenizer_real_get_content_type;
+	klass->get_engine_type = html_tokenizer_real_get_engine_type;
 	klass->has_more   = html_tokenizer_real_has_more_tokens;
 	klass->clone      = html_tokenizer_real_clone;
 }
@@ -232,7 +275,6 @@
 	p->textarea = FALSE;
 	p->pre = 0;
 	p->select = FALSE;
-	p->charEntity = FALSE;
 	p->extension = FALSE;
 	p->aTag = FALSE;
 
@@ -250,19 +292,30 @@
 	p->blocking = NULL;
 
 	p->searchFor = NULL;
+	
+	/* Use old logic and not convert charset */
+	p->enableconvert = FALSE;
+	
+	p->content_type = g_strdup ("html/text; charset=utf-8");
 }
 
 static void
 html_tokenizer_finalize (GObject *obj)
 {
 	HTMLTokenizer *t = HTML_TOKENIZER (obj);
-
+	
 	html_tokenizer_reset (t);
-
+	
+	if(is_valid_g_iconv (t->priv->iconv_cd))
+		g_iconv_close (t->priv->iconv_cd);
+		
+	if(t->priv->content_type)
+		g_free(t->priv->content_type);
+		
 	g_free (t->priv);
 	t->priv = NULL;
 
-        G_OBJECT_CLASS (parent_class)->finalize (obj);
+    G_OBJECT_CLASS (parent_class)->finalize (obj);
 }
 
 GType
@@ -369,10 +422,153 @@
 		/* finally get first token */
 		token = buffer->data;
 	}
+	
+	return html_tokenizer_converted_token (t,token);
+}
+
+/* test iconv for valid*/
+gboolean
+is_valid_g_iconv(const GIConv iconv_cd)
+{
+	return iconv_cd != NULL && iconv_cd != (GIConv)-1;
+}
 
+/*Convert only chars when code >127*/
+gboolean
+is_need_convert (const gchar* token)
+{
+	int i=strlen (token);
+	for(;i>=0;i--)
+		if(token[i]&128)
+			return TRUE;
+	return FALSE;
+}
+
+/*Convert entity values in already converted to right charset token*/
+gchar*
+html_tokenizer_convert_entity(gchar * token)
+{	
+	char* full_pos = token + strlen (token);	
+	char* write_pos = token + strcspn (token, "&");
+	gunichar value;
+	size_t count_chars;
+	char *read_pos;
+	while(write_pos < full_pos)
+	{
+		write_pos++;
+		count_chars = strcspn(write_pos+1, ";");
+		value = INVALID_CHARACTER_MARKER;
+		if(count_chars < 14)
+		{
+			char save = *(write_pos + count_chars + 1);
+			*(write_pos + count_chars + 1)=0;
+			/* &#1234567 */
+			if (*write_pos == '#')
+			{
+				if(isdigit (*(write_pos + 1)))
+				{
+					value=strtoull (write_pos + 1, NULL, 10);
+				}
+				/* &#xdd */
+				else if(*(write_pos + 1) == 'x')
+				{
+					value=strtoull (write_pos + 2, NULL, 16);
+				}
+			}
+			else 
+			{
+				value=html_entity_parse (write_pos, 0);
+			}
+			*(write_pos+count_chars+1)=save;
+			if(count_chars>0)
+			{
+				memset (write_pos-1, ' ', count_chars + 3);
+				/* first char is & I think this not need */
+				write_pos --;
+				read_pos = write_pos + count_chars + 3;
+				write_pos += g_unichar_to_utf8 (value,write_pos);
+				memcpy (write_pos, read_pos, full_pos - read_pos + 1);
+				full_pos = write_pos + (full_pos - read_pos);
+			}
+		}	
+		write_pos = write_pos + strcspn (write_pos, "&");
+	}		
 	return token;
 }
 
+gchar* 
+convert_text_encoding(const GIConv iconv_cd,const gchar * token)
+{
+	size_t currlength;
+	gchar * newbuffer;
+	gchar * returnbuffer;
+	const gchar * current;
+	size_t newlength;
+	size_t oldlength;
+	if(token == NULL)
+		return NULL;
+	currlength = strlen (token);
+	if(is_valid_g_iconv (iconv_cd) && is_need_convert (token))
+	{
+		current = token;
+		newlength = currlength*7+1;
+		oldlength = newlength;
+		newbuffer = g_new (gchar, newlength);
+		returnbuffer = newbuffer;
+		g_assert (returnbuffer);
+		while(currlength > 0)
+		{			
+			/*function not change current, but g_iconv use not const source*/
+			g_iconv (iconv_cd, (gchar **)&current, &currlength, &newbuffer, &newlength);
+			if(currlength > 0)
+			{
+				g_warning ("IconvError=%s", current);
+				*newbuffer = INVALID_CHARACTER_MARKER;
+				newbuffer ++;
+				current ++;
+				currlength --;
+				newlength --;
+			}
+		}
+		returnbuffer[oldlength - newlength] = '\0';
+		returnbuffer = g_realloc (returnbuffer, oldlength - newlength + 1);
+		g_assert (returnbuffer);
+		return returnbuffer;
+	}
+	newbuffer = g_new (gchar, currlength + 1);
+	memcpy (newbuffer,token, currlength);
+	newbuffer[currlength] = 0;
+	return newbuffer;
+}
+
+static gchar *
+html_tokenizer_converted_token(HTMLTokenizer *t, const gchar* token)
+{
+	if(token != NULL)
+	{
+		struct _HTMLTokenizerPrivate *p = t->priv;
+		return html_tokenizer_convert_entity (convert_text_encoding (p->iconv_cd, token));
+	}
+	return NULL;
+}
+
+static const gchar *
+html_tokenizer_real_get_content_type(HTMLTokenizer *t)
+{
+	struct _HTMLTokenizerPrivate *p = t->priv;
+	if(p->content_type)
+		return p->content_type;
+	return NULL;
+}
+
+static gboolean
+html_tokenizer_real_get_engine_type(HTMLTokenizer *t)
+{
+	struct _HTMLTokenizerPrivate *p = t->priv;
+	return p->enableconvert;
+	return FALSE;
+}
+
 static gchar *
 html_tokenizer_real_next_token (HTMLTokenizer *t)
 {
@@ -411,8 +607,8 @@
 
 	p->tokens_num--;
 	g_assert (p->tokens_num >= 0);
-
-	return token;
+	
+	return html_tokenizer_converted_token (t, token);
 }
 
 static gboolean
@@ -460,14 +656,103 @@
 	p->scriptCode = NULL;
 }
 
-static gint
-charset_is_utf8 (gchar *content_type)
+static gboolean
+charset_is_utf8 (const gchar *content_type)
+{
+	return content_type && strstr (content_type, "=utf-8") != NULL;
+}
+
+static gboolean
+is_text (const gchar *content_type)
+{
+	return content_type && strstr (content_type, "text/") != NULL;
+}
+
+static const gchar*
+get_encoding_from_content_type(const gchar * content_type)
+{
+	gchar * charset;
+	if(content_type)
+	{
+		charset =  g_strrstr (content_type, "charset=");
+		if(charset != NULL)
+			return charset + strlen ("charset=");
+		charset =  g_strrstr (content_type, "encoding=");
+		if(charset != NULL)
+			return charset + strlen ("encoding=");
+		
+	}
+	return NULL;
+}
+
+GIConv
+generate_iconv_from(const gchar * content_type)
+{
+	if(content_type)
+		if(!charset_is_utf8(content_type))
+		{
+			const gchar * encoding = get_encoding_from_content_type (content_type);
+			if(encoding)
+				return g_iconv_open ("utf-8", encoding);
+		}
+	return NULL;
+}
+
+GIConv
+generate_iconv_to(const gchar * content_type)
+{
+	if(content_type)
+		if(!charset_is_utf8 (content_type))
+		{
+			const gchar * encoding = get_encoding_from_content_type (content_type);
+			if(encoding)
+				return g_iconv_open (encoding, "utf-8");
+		}
+	return NULL;
+}
+
+static void
+html_tokenizer_real_engine_type (HTMLTokenizer *t, gboolean engine_type)
 {
-	return content_type && strstr (content_type, "charset=utf-8") != NULL;
+	struct _HTMLTokenizerPrivate *p;
+	p = t->priv;
+	
+	p->enableconvert = engine_type;
+}
+
+static void
+html_tokenizer_real_change (HTMLTokenizer *t, const gchar *content_type)
+{	
+	struct _HTMLTokenizerPrivate *p;
+	if(!is_text (content_type))
+		return;
+			
+	p = t->priv;
+	
+	if (!p->enableconvert)
+		return;
+	
+	if(p->content_type)
+		g_free(p->content_type);
+	
+	p->content_type = g_ascii_strdown ( content_type, -1);
+	
+	if(is_valid_g_iconv (p->iconv_cd))
+		g_iconv_close (p->iconv_cd);
+		
+	p->iconv_cd = generate_iconv_from (p->content_type);
+	
+#if 0
+	if (charset_is_utf8 (p->content_type))
+		g_warning ("Trying UTF-8");
+	else
+		g_warning ("Trying %s",p->content_type);
+#endif
 }
 
+
 static void
-html_tokenizer_real_begin (HTMLTokenizer *t, gchar *content_type)
+html_tokenizer_real_begin (HTMLTokenizer *t, const gchar *content_type)
 {
 	struct _HTMLTokenizerPrivate *p = t->priv;
 
@@ -490,17 +775,8 @@
 	p->searchCount = 0;
 	p->searchGtkHTMLCount = 0;
 	p->title = FALSE;
-	p->charEntity = FALSE;
-
-	p->utf8 = charset_is_utf8 (content_type);
-	p->utf8_length = 0;
-#if 0
-	if (p->utf8)
-		g_warning ("Trying UTF-8");
-	else
-		g_warning ("Trying ISO-8859-1");
-#endif
 
+	html_tokenizer_real_change (t, content_type);
 }
 
 static void
@@ -561,6 +837,23 @@
 	}
 }
 
+static void add_byte (HTMLTokenizer *t, const gchar **c)
+{
+	add_char (t,**c);
+	(*c) ++;
+}
+
+static void
+add_char(HTMLTokenizer *t, gchar c){
+	struct _HTMLTokenizerPrivate *p = t->priv;
+	if(c!='\0')
+	{
+		*(p->dest) = c;
+		p->dest ++;
+		*(p->dest) = 0;
+	}
+}
+
 static void
 html_tokenizer_append_token_buffer (HTMLTokenizer *t, gint min_size)
 {
@@ -592,31 +885,31 @@
 	struct _HTMLTokenizerPrivate *p = t->priv;
 
 	if (p->tag || p->select) {
-		add_unichar (t, ' ');
+		add_char (t, ' ');
 	}
 	else if (p->textarea) {
 		if (p->pending == LFPending)
-			add_unichar (t, '\n');
+			add_char (t, '\n');
 		else
-			add_unichar (t, ' ');
+			add_char (t, ' ');
 	}
 	else if (p->pre) {
 		switch (p->pending) {
 		case SpacePending:
-			add_unichar (t, ' ');
+			add_char (t, ' ');
 			break;
 		case LFPending:
 			if (p->dest > p->buffer) {
 				html_tokenizer_append_token (t, p->buffer, p->dest - p->buffer);
 			}
 			p->dest = p->buffer;
-			add_unichar (t, TAG_ESCAPE);
-			add_unichar (t, '\n');
+			add_char (t, TAG_ESCAPE);
+			add_char (t, '\n');
 			html_tokenizer_append_token (t, p->buffer, 2);
 			p->dest = p->buffer;
 			break;
 		case TabPending:
-			add_unichar (t, '\t');
+			add_char (t, '\t');
 			break;
 		default:
 			g_warning ("Unknown pending type: %d\n", (gint) p->pending);
@@ -624,7 +917,7 @@
 		}
 	}
 	else {
-		add_unichar (t, ' ');
+		add_char (t, ' ');
 	}
 
 	p->pending = NonePending;
@@ -779,196 +1072,6 @@
 	}
 }
 
-static gunichar win1252_to_unicode [32] = {
-	0x20ac,
-	0x81,
-	0x201a,
-	0x0192,
-	0x201e,
-	0x2026,
-	0x2020,
-	0x2021,
-	0x02c6,
-	0x2030,
-	0x0160,
-	0x2039,
-	0x0152,
-	0x8d,
-	0x017d,
-	0x8f,
-	0x90,
-	0x2018,
-	0x2019,
-	0x201c,
-	0x201d,
-	0x2022,
-	0x2013,
-	0x2014,
-	0x02dc,
-	0x2122,
-	0x0161,
-	0x203a,
-	0x0153,
-	0x9d,
-	0x017e,
-	0x0178
-};
-
-static void
-add_unichar (HTMLTokenizer *t, gunichar wc)
-{
-	struct _HTMLTokenizerPrivate *p = t->priv;
-
-	p->utf8_length = 0;
-
-	/*
-	  chars in range 128 - 159 are control characters in unicode,
-	  but most browsers treat them as windows 1252
-	  encoded characters and translate them to unicode
-	  it's broken, but we do the same here
-	*/
-	if (wc > 127 && wc < 160)
-		wc = win1252_to_unicode [wc - 128];
-
-	if (wc != '\0') {
-		p->dest += g_unichar_to_utf8 (wc, p->dest);
-		*(p->dest) = 0;
-	}
-}
-
-static void
-add_byte (HTMLTokenizer *t, const gchar **src)
-{
-	gunichar wc;
-	struct _HTMLTokenizerPrivate *p = t->priv;
-
-	if (p->utf8) {
-		p->utf8_buffer[p->utf8_length] = **src;
-		p->utf8_length++;
-
-		wc = g_utf8_get_char_validated ((const gchar *)p->utf8_buffer, p->utf8_length);
-		if (wc == -1 || p->utf8_length >= (sizeof(p->utf8_buffer)/sizeof(p->utf8_buffer[0]))) {
-			add_unichar (t, INVALID_CHARACTER_MARKER);
-			(*src)++;
-			return;
-		} else if (wc == -2) {
-			/* incomplete character check again */
-			(*src)++;
-			return;
-		}
-	} else {
-		wc = (guchar)**src;
-	}
-
-	add_unichar (t, wc);
-	(*src)++;
-}
-
-static void
-flush_entity (HTMLTokenizer *t)
-{
-	struct _HTMLTokenizerPrivate *p = t->priv;
-	/* ignore the TAG_ESCAPE when flushing */
-	const char *str = p->searchBuffer + 1;
-
-	 while (p->searchCount--) {
-		add_byte (t, &str);
-	}
-}
-
-static gboolean
-add_unichar_validated (HTMLTokenizer *t, gunichar uc)
-{
-	if (g_unichar_validate (uc)) {
-		add_unichar (t, uc);
-		return TRUE;
-	}
-
-	g_warning ("invalid character value: x%xd", uc);
-	return FALSE;
-}
-
-static void
-in_entity (HTMLTokenizer *t, const gchar **src)
-{
-	struct _HTMLTokenizerPrivate *p = t->priv;
-	gunichar entityValue = 0;
-
-	/* See http://www.mozilla.org/newlayout/testcases/layout/entities.html for a complete entity list,
-	   ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT
-	   (or 'man iso_8859_1') for the character encodings. */
-
-	p->searchBuffer [p->searchCount + 1] = **src;
-	p->searchBuffer [p->searchCount + 2] = '\0';
-
-	/* Check for &#0000 sequence */
-	if (p->searchBuffer[2] == '#') {
-		if ((p->searchCount > 1) &&
-		    (!isdigit (**src)) &&
-		    (p->searchBuffer[3] != 'x')) {
-			/* &#123 */
-			p->searchBuffer [p->searchCount + 1] = '\0';
-			entityValue = strtoul (&(p->searchBuffer [3]),
-					       NULL, 10);
-			p->charEntity = FALSE;
-		}
-		if ((p->searchCount > 1) &&
-		    (!isalnum (**src)) &&
-		    (p->searchBuffer[3] == 'x')) {
-			/* &x12AB */
-			p->searchBuffer [p->searchCount + 1] = '\0';
-
-			entityValue = strtoul (&(p->searchBuffer [4]),
-					       NULL, 16);
-			p->charEntity = FALSE;
-		}
-	}
-	else {
-		/* Check for &abc12 sequence */
-		if (!isalnum (**src)) {
-			p->charEntity = FALSE;
-			if ((p->searchBuffer [p->searchCount + 1] == ';') ||
-			    (!p->tag)) {
-				char *ename = p->searchBuffer + 2;
-
-				p->searchBuffer [p->searchCount + 1] = '\0'; /* FIXME sucks */
-				entityValue = html_entity_parse (ename, 0);
-			}
-		}
-
-	}
-
-	if (p->searchCount > 13) {
-		/* Ignore this sequence since it's too long */
-		p->charEntity = FALSE;
-		flush_entity (t);
-	}
-	else if (p->charEntity) {
-				/* Keep searching for end of character entity */
-		p->searchCount++;
-		(*src)++;
-	}
-	else {
-		/*
-		 * my reading of http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.2 makes
-		 * seem correct to always collapse entity references, even in element names
-		 * and attributes.
-		 */
-		if (entityValue) {
-			if (entityValue != TAG_ESCAPE)
-				/* make sure the entity value is a valid character value */
-				if (!add_unichar_validated (t, entityValue))
-					add_unichar (t, INVALID_CHARACTER_MARKER);
-
-			if (**src == ';')
-				(*src)++;
-		} else {
-			/* Ignore the sequence, just add it as plaintext */
-			flush_entity (t);
-		}
-	}
-}
-
 static void
 in_tag (HTMLTokenizer *t, const gchar **src)
 {
@@ -994,7 +1097,7 @@
 				/* Invalid tag, just add it */
 		if (p->pending)
 			html_tokenizer_add_pending (t);
-		add_unichar (t, '<');
+		add_char (t, '<');
 		add_byte (t, src);
 		return;
 	}
@@ -1006,31 +1109,13 @@
 		html_tokenizer_append_token (t, p->buffer, p->dest - p->buffer);
 		p->dest = p->buffer;
 	}
-	add_unichar (t, TAG_ESCAPE);
-	add_unichar (t, '<');
+	add_char (t, TAG_ESCAPE);
+	add_char (t, '<');
 	p->tag = TRUE;
 	p->searchCount = 1; /* Look for <!-- to start comment */
 }
 
 static void
-start_entity (HTMLTokenizer *t, const gchar **src)
-{
-	struct _HTMLTokenizerPrivate *p = t->priv;
-
-	(*src)++;
-
-	p->discard = NoneDiscard;
-
-	if (p->pending)
-		html_tokenizer_add_pending (t);
-
-	p->charEntity      = TRUE;
-	p->searchBuffer[0] = TAG_ESCAPE;
-	p->searchBuffer[1] = '&';
-	p->searchCount     = 1;
-}
-
-static void
 start_tag (HTMLTokenizer *t, const gchar **src)
 {
 	(*src)++;
@@ -1046,7 +1131,7 @@
 
 	p->searchCount = 0; /* Stop looking for <!-- sequence */
 
-	add_unichar (t, '>');
+	add_char (t, '>');
 
 	/* Make the tag lower case */
 	ptr = p->buffer + 2;
@@ -1208,7 +1293,7 @@
 		t->priv->searchCount = 0; /* Stop looking for <!-- sequence */
 		if ((t->priv->tquote == SINGLE_QUOTE && **src == '\"') /* match " */
 		    || (t->priv->tquote == DOUBLE_QUOTE && **src == '\'')) {
-			add_unichar (t, **src);
+			add_char (t, **src);
 			(*src)++;
 		} else if (*(t->priv->dest-1) == '=' && !t->priv->tquote) {
 			t->priv->discard = SpaceDiscard;
@@ -1218,7 +1303,7 @@
 				t->priv->tquote = DOUBLE_QUOTE;
 			else
 				t->priv->tquote = SINGLE_QUOTE;
-			add_unichar (t, **src);
+			add_char (t, **src);
 			(*src)++;
 		}
 		else if (t->priv->tquote) {
@@ -1245,7 +1330,7 @@
 	t->priv->discard = NoneDiscard;
 	if (t->priv->tag) {
 		t->priv->searchCount = 0; /* Stop looking for <!-- sequence */
-		add_unichar (t, '=');
+		add_char (t, '=');
 		if (!t->priv->tquote) {
 			t->priv->pending = NonePending;
 			t->priv->discard = SpaceDiscard;
@@ -1255,7 +1340,7 @@
 		if (t->priv->pending)
 			html_tokenizer_add_pending (t);
 
-		add_unichar (t, '=');
+		add_char (t, '=');
 	}
 	(*src)++;
 }
@@ -1309,12 +1394,8 @@
 		in_extension (t, src);
 	else if (p->script || p->style)
 		in_script_or_style (t, src);
-	else if (p->charEntity)
-		in_entity (t, src);
 	else if (p->startTag)
 		in_tag (t, src);
-	else if (**src == '&' && !p->aTag)
-		start_entity (t, src);
 	else if (**src == '<' && !p->tag)
 		start_tag (t, src);
 	else if (**src == '>' && p->tag && !p->tquote)
@@ -1335,7 +1416,7 @@
 html_tokenizer_real_write (HTMLTokenizer *t, const gchar *string, size_t size)
 {
 	const gchar *src = string;
-
+	
 	while ((src - string) < size)
 		html_tokenizer_tokenize_one_char (t, &src);
 }
@@ -1381,14 +1462,32 @@
 /** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **/
 
 void
-html_tokenizer_begin (HTMLTokenizer *t, gchar *content_type)
+html_tokenizer_begin (HTMLTokenizer *t, const gchar *content_type)
 {
+	
 	g_return_if_fail (t && HTML_IS_TOKENIZER (t));
 
 	g_signal_emit (t, html_tokenizer_signals [HTML_TOKENIZER_BEGIN_SIGNAL], 0, content_type);
 }
 
 void
+html_tokenizer_set_engine_type (HTMLTokenizer *t, gboolean engine_type)
+{
+	g_return_if_fail (t && HTML_IS_TOKENIZER (t));
+
+	g_signal_emit (t, html_tokenizer_signals [HTML_TOKENIZER_CHANGEENGINE_SIGNAL], 0, engine_type);
+}
+
+void
+html_tokenizer_change_content_type (HTMLTokenizer *t,const gchar *content_type)
+{	
+	g_return_if_fail (t && HTML_IS_TOKENIZER (t));
+
+	g_signal_emit (t, html_tokenizer_signals [HTML_TOKENIZER_CHANGECONTENT_SIGNAL], 0, content_type);
+}
+
+
+void
 html_tokenizer_end (HTMLTokenizer *t)
 {
 	g_return_if_fail (t && HTML_IS_TOKENIZER (t));
@@ -1427,6 +1526,39 @@
 
 }
 
+const gchar *
+html_tokenizer_get_content_type(HTMLTokenizer *t)
+{
+	HTMLTokenizerClass *klass;
+
+	g_return_val_if_fail (t && HTML_IS_TOKENIZER (t), NULL);
+
+	klass = HTML_TOKENIZER_CLASS (G_OBJECT_GET_CLASS (t));
+		
+	if(klass->get_content_type)
+		return  klass->get_content_type(t);
+
+	g_warning ("No get_content_type method defined.");
+	return NULL;
+
+}
+
+gboolean       
+html_tokenizer_get_engine_type (HTMLTokenizer *t)
+{
+	HTMLTokenizerClass *klass;
+
+	g_return_val_if_fail (t && HTML_IS_TOKENIZER (t),FALSE);
+
+	klass = HTML_TOKENIZER_CLASS (G_OBJECT_GET_CLASS (t));
+		
+	if(klass->get_engine_type)
+		return  klass->get_engine_type(t);
+
+	g_warning ("No get_engine_type method defined.");
+	return FALSE;
+}
+
 gchar *
 html_tokenizer_next_token (HTMLTokenizer *t)
 {

Modified: trunk/gtkhtml/htmltokenizer.h
==============================================================================
--- trunk/gtkhtml/htmltokenizer.h	(original)
+++ trunk/gtkhtml/htmltokenizer.h	Wed Dec 10 18:28:18 2008
@@ -48,13 +48,17 @@
 	GObjectClass parent_class;
 
 	/* signals */
-	void     (*begin)           (HTMLTokenizer *, gchar *content_type);
+	void     (*begin)           (HTMLTokenizer *, const gchar *content_type);
+	void     (*change)          (HTMLTokenizer *, const gchar *content_type);
+	void     (*engine)          (HTMLTokenizer *, gboolean enginetype);
 	void     (*end)             (HTMLTokenizer *);
 
 	/* virtual functions */
 	void           (*write)      (HTMLTokenizer *, const gchar *string, size_t size);
 	gchar         *(*peek_token) (HTMLTokenizer *);
 	gchar         *(*next_token) (HTMLTokenizer *);
+	const gchar   *(*get_content_type) (HTMLTokenizer *);
+	gboolean       (*get_engine_type) (HTMLTokenizer *);
 	gboolean       (*has_more)   (HTMLTokenizer *);
 
 	HTMLTokenizer *(*clone)      (HTMLTokenizer *);
@@ -66,7 +70,17 @@
 void           html_tokenizer_destroy         (HTMLTokenizer *tokenizer);
 
 void           html_tokenizer_begin           (HTMLTokenizer *t,
-					       gchar *content_type);
+					       const gchar *content_type);
+
+const gchar *  html_tokenizer_get_content_type(HTMLTokenizer *t);					     
+void           html_tokenizer_change_content_type
+				              (HTMLTokenizer *t,
+					       const gchar *content_type);
+					       
+void	       html_tokenizer_set_engine_type (HTMLTokenizer *t,
+						   gboolean enginetype);
+gboolean       html_tokenizer_get_engine_type (HTMLTokenizer *t);
+				
 void           html_tokenizer_write           (HTMLTokenizer *t,
 					       const gchar *string,
 					       size_t size);
@@ -77,4 +91,12 @@
 
 HTMLTokenizer *html_tokenizer_clone           (HTMLTokenizer *t);
 
+/*for convert input code page to -->utf */
+GIConv     generate_iconv_from (const gchar * content_type);
+/*for convert resulted query to needed encoding <--utf*/
+GIConv     generate_iconv_to (const gchar * content_type);
+/*convert test to needed encoding*/
+gchar*     convert_text_encoding (const GIConv iconv_cd, const gchar * token);
+/*validate result g_iconv_open*/
+gboolean   is_valid_g_iconv (const GIConv iconv_cd);
 #endif /* _HTMLTOKENIZER_H_ */

Modified: trunk/gtkhtml/testgtkhtml.c
==============================================================================
--- trunk/gtkhtml/testgtkhtml.c	(original)
+++ trunk/gtkhtml/testgtkhtml.c	Wed Dec 10 18:28:18 2008
@@ -662,6 +662,7 @@
 static void
 got_data (SoupSession *session, SoupMessage *msg, gpointer user_data)
 {
+	const gchar *ContentType;
 	GtkHTMLStream *handle = user_data;
 
 	if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) {
@@ -669,6 +670,13 @@
 		gtk_html_end (html, handle, GTK_HTML_STREAM_ERROR);
 		return;
 	}
+	/* Enable change content type in engine */
+	gtk_html_set_default_engine(html, TRUE);
+	
+	ContentType = soup_message_headers_get (msg->response_headers, "Content-type");
+
+	if (ContentType != NULL)
+		gtk_html_set_default_content_type (html, ContentType);
 
 	gtk_html_write (html, handle, msg->response_body->data,
 			msg->response_body->length);
@@ -701,7 +709,6 @@
 				if (nread == -1) {
 					if (errno == EINTR)
 						continue;
-
 					g_warning ("read error: %s", g_strerror (errno));
 					gtk_html_end (html, handle, GTK_HTML_STREAM_ERROR);
 					break;
@@ -846,8 +853,7 @@
 	}
 
 	/* TODO2 gnome_animator_start (GNOME_ANIMATOR (animator)); */
-
-	html_stream_handle = gtk_html_begin_content (html, "text/html; charset=utf-8");
+	html_stream_handle = gtk_html_begin_content (html, (gchar *)gtk_html_get_default_content_type (html));
 
 	/* Yuck yuck yuck.  Well this code is butt-ugly already
 	anyway.  */



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]