gtkhtml r9061 - trunk/gtkhtml
- From: mcrha svn gnome org
- To: svn-commits-list gnome org
- Subject: gtkhtml r9061 - trunk/gtkhtml
- Date: Wed, 10 Dec 2008 18:28:19 +0000 (UTC)
Author: mcrha
Date: Wed Dec 10 18:28:18 2008
New Revision: 9061
URL: http://svn.gnome.org/viewvc/gtkhtml?rev=9061&view=rev
Log:
2008-12-10 Denis Pauk <pauk denis gmail com>
** Fix for bug #552357
* gtkhtml/htmltokenizer.c
* gtkhtml/htmltokenizer.h
* gtkhtml/htmlradio.c
* gtkhtml/htmlform.c
* gtkhtml/htmlengine.c
* gtkhtml/htmlengine.h
* gtkhtml/htmlbutton.c
* gtkhtml/htmltextarea.c
* gtkhtml/htmlembedded.c
* gtkhtml/htmlembedded.h
* gtkhtml/htmlselect.c
* gtkhtml/htmlcheckbox.c
* gtkhtml/htmlhidden.c
* gtkhtml/testgtkhtml.c
* gtkhtml/htmlimageinput.c
* gtkhtml/gtkhtml.c
* gtkhtml/htmltextinput.c
support http-equiv and set encoding (gtk_html_set_default_content_type)
if it exist in http - headers and re-coding resulted query from form
(support not utf8 encoding).
For use new behavior need
gtk_html_set_default_engine(html, TRUE);
or set in
gtk_html_begin_full in
GtkHTMLBeginFlags with GTK_HTML_BEGIN_CHANGECONTENTTYPE.
Modified:
trunk/gtkhtml/ChangeLog
trunk/gtkhtml/gtkhtml-enums.h
trunk/gtkhtml/gtkhtml-private.h
trunk/gtkhtml/gtkhtml.c
trunk/gtkhtml/gtkhtml.h
trunk/gtkhtml/htmlbutton.c
trunk/gtkhtml/htmlcheckbox.c
trunk/gtkhtml/htmlembedded.c
trunk/gtkhtml/htmlembedded.h
trunk/gtkhtml/htmlengine.c
trunk/gtkhtml/htmlengine.h
trunk/gtkhtml/htmlform.c
trunk/gtkhtml/htmlframe.c
trunk/gtkhtml/htmlhidden.c
trunk/gtkhtml/htmliframe.c
trunk/gtkhtml/htmlimageinput.c
trunk/gtkhtml/htmlradio.c
trunk/gtkhtml/htmlselect.c
trunk/gtkhtml/htmltextarea.c
trunk/gtkhtml/htmltextinput.c
trunk/gtkhtml/htmltokenizer.c
trunk/gtkhtml/htmltokenizer.h
trunk/gtkhtml/testgtkhtml.c
Modified: trunk/gtkhtml/gtkhtml-enums.h
==============================================================================
--- trunk/gtkhtml/gtkhtml-enums.h (original)
+++ trunk/gtkhtml/gtkhtml-enums.h Wed Dec 10 18:28:18 2008
@@ -309,6 +309,8 @@
GTK_HTML_BEGIN_KEEP_SCROLL = 1 << 0,
GTK_HTML_BEGIN_KEEP_IMAGES = 1 << 1,
GTK_HTML_BEGIN_BLOCK_UPDATES = 1 << 2,
- GTK_HTML_BEGIN_BLOCK_IMAGES = 1 << 3
+ GTK_HTML_BEGIN_BLOCK_IMAGES = 1 << 3,
+ /*enable autochange content_type*/
+ GTK_HTML_BEGIN_CHANGECONTENTTYPE = 1 << 4
} GtkHTMLBeginFlags;
#endif
Modified: trunk/gtkhtml/gtkhtml-private.h
==============================================================================
--- trunk/gtkhtml/gtkhtml-private.h (original)
+++ trunk/gtkhtml/gtkhtml-private.h Wed Dec 10 18:28:18 2008
@@ -39,7 +39,6 @@
gint selection_type;
- gchar *content_type;
char *base_url;
GtkWidget *search_input_line;
Modified: trunk/gtkhtml/gtkhtml.c
==============================================================================
--- trunk/gtkhtml/gtkhtml.c (original)
+++ trunk/gtkhtml/gtkhtml.c Wed Dec 10 18:28:18 2008
@@ -767,7 +767,6 @@
html->priv->im_context = NULL;
}
- g_free (html->priv->content_type);
g_free (html->priv->base_url);
g_free (html->priv->caret_first_focus_anchor);
g_free (html->priv);
@@ -3322,7 +3321,6 @@
html->priv->insertion_font_style = GTK_HTML_FONT_STYLE_DEFAULT;
html->priv->selection_type = -1;
html->priv->selection_as_cite = FALSE;
- html->priv->content_type = g_strdup ("html/text; charset=utf-8");
html->priv->search_input_line = NULL;
html->priv->in_object_resize = FALSE;
html->priv->resize_cursor = gdk_cursor_new (GDK_BOTTOM_RIGHT_CORNER);
@@ -3474,7 +3472,7 @@
html->allow_selection = allow;
}
-
+
/**
* gtk_html_begin_full:
* @html: the GtkHTML widget to operate on.
@@ -3515,9 +3513,6 @@
else
html->engine->keep_scroll = FALSE;
- if (!content_type)
- content_type = html->priv->content_type;
-
handle = html_engine_begin (html->engine, content_type);
if (handle == NULL)
return NULL;
@@ -3530,6 +3525,10 @@
if (flags & GTK_HTML_BEGIN_KEEP_SCROLL)
html->engine->newPage = FALSE;
+ /* Enable change content type in engine */
+ if (flags & GTK_HTML_BEGIN_CHANGECONTENTTYPE)
+ gtk_html_set_default_engine(html, TRUE);
+
return handle;
}
@@ -3546,7 +3545,7 @@
{
g_return_val_if_fail (GTK_IS_HTML (html), NULL);
- return gtk_html_begin_full (html, NULL, html->priv->content_type, 0);
+ return gtk_html_begin_full (html, NULL, NULL, 0);
}
/**
@@ -3564,7 +3563,7 @@
{
g_return_val_if_fail (! gtk_html_get_editable (html), NULL);
- return gtk_html_begin_full (html, NULL, NULL, 0);
+ return gtk_html_begin_full (html, NULL, content_type , 0);
}
/**
@@ -4493,15 +4492,30 @@
}
/* misc utils */
+/* if engine_type == false - default behaviour*/
void
-gtk_html_set_default_content_type (GtkHTML *html, gchar *content_type)
+gtk_html_set_default_engine(GtkHTML *html, gboolean engine_type)
{
- g_free (html->priv->content_type);
+ html_engine_set_engine_type( html->engine, engine_type);
+}
- if (content_type) {
- html->priv->content_type = g_ascii_strdown (content_type, -1);
- } else
- html->priv->content_type = NULL;
+gboolean
+gtk_html_get_default_engine(GtkHTML *html)
+{
+ return html_engine_get_engine_type( html->engine);
+}
+
+
+void
+gtk_html_set_default_content_type (GtkHTML *html, const gchar *content_type)
+{
+ html_engine_set_content_type( html->engine, content_type);
+}
+
+const gchar*
+gtk_html_get_default_content_type (GtkHTML *html)
+{
+ return html_engine_get_content_type( html->engine);
}
gpointer
Modified: trunk/gtkhtml/gtkhtml.h
==============================================================================
--- trunk/gtkhtml/gtkhtml.h (original)
+++ trunk/gtkhtml/gtkhtml.h Wed Dec 10 18:28:18 2008
@@ -335,8 +335,12 @@
/* DEPRECATED */
#if 1
gboolean gtk_html_build_with_gconf (void);
-void gtk_html_set_default_content_type (GtkHTML *html,
- gchar *content_type);
+const gchar* gtk_html_get_default_content_type (GtkHTML *html);
+void gtk_html_set_default_content_type (GtkHTML *html,
+ const gchar *content_type);
+void gtk_html_set_default_engine (GtkHTML *html,
+ gboolean enginetype);
+gboolean gtk_html_get_default_engine (GtkHTML *html);
GtkWidget *gtk_html_new_from_string (const gchar *Astr,
gint len);
void gtk_html_load_empty (GtkHTML *html);
Modified: trunk/gtkhtml/htmlbutton.c
==============================================================================
--- trunk/gtkhtml/htmlbutton.c (original)
+++ trunk/gtkhtml/htmlbutton.c Wed Dec 10 18:28:18 2008
@@ -64,19 +64,19 @@
static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e,const gchar *codepage)
{
GString *encoding = g_string_new ("");
gchar *ptr;
if(strlen (e->name) && (HTML_BUTTON(e)->successful)) {
- ptr = html_embedded_encode_string (e->name);
+ ptr = html_embedded_encode_string (e->name, codepage);
encoding = g_string_append (encoding, ptr);
g_free (ptr);
encoding = g_string_append_c (encoding, '=');
- ptr = html_embedded_encode_string (e->value);
+ ptr = html_embedded_encode_string (e->value, codepage);
encoding = g_string_append (encoding, ptr);
g_free (ptr);
}
Modified: trunk/gtkhtml/htmlcheckbox.c
==============================================================================
--- trunk/gtkhtml/htmlcheckbox.c (original)
+++ trunk/gtkhtml/htmlcheckbox.c Wed Dec 10 18:28:18 2008
@@ -38,20 +38,19 @@
}
static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar *codepage)
{
GString *encoding = g_string_new ("");
gchar *ptr;
if(strlen (e->name) && gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (e->widget))) {
- ptr = html_embedded_encode_string (e->name);
+ ptr = html_embedded_encode_string (e->name, codepage);
encoding = g_string_append (encoding, ptr);
g_free (ptr);
encoding = g_string_append_c (encoding, '=');
-
- ptr = html_embedded_encode_string (e->value);
+ ptr = html_embedded_encode_string (e->value, codepage);
encoding = g_string_append (encoding, ptr);
g_free (ptr);
}
Modified: trunk/gtkhtml/htmlembedded.c
==============================================================================
--- trunk/gtkhtml/htmlembedded.c (original)
+++ trunk/gtkhtml/htmlembedded.c Wed Dec 10 18:28:18 2008
@@ -33,6 +33,8 @@
#include "htmliframe.h"
#include "htmlpainter.h"
#include "htmlengine.h"
+/*For use converter based on g_iconv*/
+#include "htmltokenizer.h"
HTMLEmbeddedClass html_embedded_class;
static HTMLObjectClass *parent_class = NULL;
@@ -198,7 +200,7 @@
}
static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar* codepage)
{
return g_strdup ("");
}
@@ -210,9 +212,9 @@
}
gchar *
-html_embedded_encode (HTMLEmbedded *e)
+html_embedded_encode (HTMLEmbedded *e, const gchar* codepage)
{
- return HTML_EMBEDDED_CLASS (HTML_OBJECT (e)->klass)->encode (e);
+ return HTML_EMBEDDED_CLASS (HTML_OBJECT (e)->klass)->encode (e, codepage);
}
void
@@ -228,13 +230,21 @@
}
gchar *
-html_embedded_encode_string (const gchar *str)
+html_embedded_encode_string (const gchar *before, const gchar *codepage)
{
- static gchar *safe = "$-._!*(),"; /* RFC 1738 */
+ const gchar* str = before;
+ static gchar *safe = "$-._!*(),"; /* RFC 1738 */
unsigned pos = 0;
GString *encoded = g_string_new ("");
gchar buffer[5], *ptr;
- guchar c;
+ guchar c;
+
+ GIConv iconv_cd = generate_iconv_to (codepage);
+ if( is_valid_g_iconv (iconv_cd))
+ {
+ str= convert_text_encoding(iconv_cd, before);
+ g_iconv_close(iconv_cd);
+ }
while ( pos < strlen(str) ) {
@@ -268,7 +278,7 @@
g_string_free (encoded, FALSE);
- return ptr;
+ return ptr;
}
void
Modified: trunk/gtkhtml/htmlembedded.h
==============================================================================
--- trunk/gtkhtml/htmlembedded.h (original)
+++ trunk/gtkhtml/htmlembedded.h Wed Dec 10 18:28:18 2008
@@ -45,7 +45,7 @@
void (*reset) (HTMLEmbedded *element);
- gchar *(*encode) (HTMLEmbedded *element);
+ gchar *(*encode) (HTMLEmbedded *element, const gchar* codepage);
void (*reparent) (HTMLEmbedded *element, GtkWidget *new_parent);
};
@@ -71,8 +71,10 @@
void html_embedded_reset (HTMLEmbedded *element);
void html_embedded_reparent (HTMLEmbedded *element,
GtkWidget *new_parent);
-gchar *html_embedded_encode (HTMLEmbedded *element);
-gchar *html_embedded_encode_string (const gchar *str);
+gchar *html_embedded_encode (HTMLEmbedded *element,
+ const gchar * codepage);
+gchar *html_embedded_encode_string (const gchar *str,
+ const gchar * codepage);
HTMLEmbedded *html_embedded_new_widget (GtkWidget *parent,
GtkHTMLEmbedded *eb,
HTMLEngine *engine);
Modified: trunk/gtkhtml/htmlengine.c
==============================================================================
--- trunk/gtkhtml/htmlengine.c (original)
+++ trunk/gtkhtml/htmlengine.c Wed Dec 10 18:28:18 2008
@@ -2623,7 +2623,7 @@
token = html_string_tokenizer_next_token (e->st);
if (g_ascii_strncasecmp (token, "align=", 6) == 0) {
style = html_style_add_text_align (style, parse_halign (token + 6, HTML_HALIGN_NONE));
- //align = parse_halign (token + 6, align);
+ /*align = parse_halign (token + 6, align);*/
} else if (g_ascii_strncasecmp (token, "style=", 6) == 0) {
style = html_style_add_attribute (style, token + 6);
}
@@ -2766,11 +2766,39 @@
html_element_free (element);
}
-
+void
+html_engine_set_engine_type( HTMLEngine *e, gboolean engine_type)
+{
+ g_return_if_fail (HTML_IS_ENGINE (e));
+ html_tokenizer_set_engine_type(e->ht, engine_type);
+}
+
+gboolean
+html_engine_get_engine_type( HTMLEngine *e)
+{
+ g_return_val_if_fail (HTML_IS_ENGINE (e), FALSE);
+ return html_tokenizer_get_engine_type(e->ht);
+}
+
+void
+html_engine_set_content_type(HTMLEngine *e, const gchar* content_type)
+{
+ g_return_if_fail (HTML_IS_ENGINE (e));
+ html_tokenizer_change_content_type(e->ht, content_type);
+}
+
+const gchar *
+html_engine_get_content_type(HTMLEngine *e)
+{
+ g_return_val_if_fail (HTML_IS_ENGINE (e), NULL);
+ return html_tokenizer_get_content_type(e->ht);
+}
+
static void
element_parse_meta (HTMLEngine *e, HTMLObject *clue, const gchar *str)
{
int refresh = 0;
+ int contenttype = 0;
int refresh_delay = 0;
gchar *refresh_url = NULL;
@@ -2778,16 +2806,23 @@
html_string_tokenizer_tokenize(e->st, str + 5, " >");
while (html_string_tokenizer_has_more_tokens (e->st)) {
-
const gchar* token = html_string_tokenizer_next_token(e->st);
if (g_ascii_strncasecmp(token, "http-equiv=", 11) == 0 ) {
if (g_ascii_strncasecmp(token + 11, "refresh", 7) == 0 )
refresh = 1;
+ if (g_ascii_strncasecmp(token + 11, "content-type", 12) == 0 )
+ contenttype = 1;
} else if (g_ascii_strncasecmp(token, "content=", 8) == 0) {
+ const gchar *content;
+ content = token + 8;
+ if(contenttype)
+ {
+ contenttype = 0;
+ html_engine_set_content_type(e, content);
+ }
if (refresh) {
- const gchar *content;
- content = token + 8;
-
+ refresh = 0;
+
/* The time in seconds until the refresh */
refresh_delay = atoi(content);
@@ -3452,7 +3487,7 @@
push_block_element (e, ID_CAPTION, style, DISPLAY_TABLE_CAPTION, block_end_cell, 0, 0);
table->caption = caption;
- //FIXME caption alignment should be based on the flow.... or something....
+ /*FIXME caption alignment should be based on the flow.... or something....*/
table->capAlign = capAlign;
}
Modified: trunk/gtkhtml/htmlengine.h
==============================================================================
--- trunk/gtkhtml/htmlengine.h (original)
+++ trunk/gtkhtml/htmlengine.h Wed Dec 10 18:28:18 2008
@@ -315,6 +315,12 @@
void html_engine_stop_parser (HTMLEngine *e);
void html_engine_stop (HTMLEngine *e);
void html_engine_flush (HTMLEngine *e);
+void html_engine_set_engine_type (HTMLEngine *e,
+ gboolean engine_type);
+gboolean html_engine_get_engine_type (HTMLEngine *e);
+void html_engine_set_content_type(HTMLEngine *e,
+ const gchar* content_type);
+const gchar * html_engine_get_content_type(HTMLEngine *e);
/* Rendering control. */
gint html_engine_calc_min_width (HTMLEngine *e);
Modified: trunk/gtkhtml/htmlform.c
==============================================================================
--- trunk/gtkhtml/htmlform.c (original)
+++ trunk/gtkhtml/htmlform.c Wed Dec 10 18:28:18 2008
@@ -125,10 +125,11 @@
gint first = TRUE;
GList *i = form->elements;
gchar *ptr;
-
+
+ const gchar * codepage = html_engine_get_content_type(form->engine);
+
while (i) {
- ptr = html_embedded_encode (HTML_EMBEDDED (i->data));
-
+ ptr = html_embedded_encode (HTML_EMBEDDED (i->data), codepage);
if (strlen (ptr)) {
if(!first)
encoding = g_string_append_c (encoding, '&');
Modified: trunk/gtkhtml/htmlframe.c
==============================================================================
--- trunk/gtkhtml/htmlframe.c (original)
+++ trunk/gtkhtml/htmlframe.c Wed Dec 10 18:28:18 2008
@@ -480,7 +480,11 @@
new_tokenizer = NULL;
gtk_html_set_default_content_type (new_html,
- parent_html->priv->content_type);
+ gtk_html_get_default_content_type(parent_html));
+
+ gtk_html_set_default_engine (new_html,
+ gtk_html_get_default_engine(parent_html));
+
frame->html = new_widget;
frame->url = g_strdup (src);
frame->width = width;
Modified: trunk/gtkhtml/htmlhidden.c
==============================================================================
--- trunk/gtkhtml/htmlhidden.c (original)
+++ trunk/gtkhtml/htmlhidden.c Wed Dec 10 18:28:18 2008
@@ -28,19 +28,19 @@
static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar *codepage)
{
GString *encoding = g_string_new ("");
gchar *ptr;
if(strlen (e->name)) {
- ptr = html_embedded_encode_string (e->name);
+ ptr = html_embedded_encode_string (e->name, codepage);
encoding = g_string_append (encoding, ptr);
g_free (ptr);
encoding = g_string_append_c (encoding, '=');
- ptr = html_embedded_encode_string (e->value);
+ ptr = html_embedded_encode_string (e->value, codepage);
encoding = g_string_append (encoding, ptr);
g_free (ptr);
}
Modified: trunk/gtkhtml/htmliframe.c
==============================================================================
--- trunk/gtkhtml/htmliframe.c (original)
+++ trunk/gtkhtml/htmliframe.c Wed Dec 10 18:28:18 2008
@@ -611,7 +611,11 @@
new_tokenizer = NULL;
gtk_html_set_default_content_type (new_html,
- parent_html->priv->content_type);
+ gtk_html_get_default_content_type(parent_html));
+
+ gtk_html_set_default_engine (new_html,
+ gtk_html_get_default_engine(parent_html));
+
iframe->html = new_widget;
iframe->url = g_strdup (src);
iframe->width = width;
Modified: trunk/gtkhtml/htmlimageinput.c
==============================================================================
--- trunk/gtkhtml/htmlimageinput.c (original)
+++ trunk/gtkhtml/htmlimageinput.c Wed Dec 10 18:28:18 2008
@@ -107,13 +107,13 @@
static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar *codepage)
{
GString *encoding = g_string_new ("");
gchar *ptr;
if(strlen (e->name)) {
- ptr = html_embedded_encode_string (e->name);
+ ptr = html_embedded_encode_string (e->name, codepage);
encoding = g_string_assign (encoding, ptr);
g_free (ptr);
@@ -121,7 +121,7 @@
encoding = g_string_append (encoding, ptr);
g_free (ptr);
- ptr = html_embedded_encode_string (e->name);
+ ptr = html_embedded_encode_string (e->name, codepage);
encoding = g_string_append (encoding, ptr);
g_free (ptr);
Modified: trunk/gtkhtml/htmlradio.c
==============================================================================
--- trunk/gtkhtml/htmlradio.c (original)
+++ trunk/gtkhtml/htmlradio.c Wed Dec 10 18:28:18 2008
@@ -49,20 +49,20 @@
}
static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar *codepage)
{
GString *encoding = g_string_new ("");
gchar *ptr;
if(strlen (e->name) && gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (e->widget))) {
- ptr = html_embedded_encode_string (e->name);
+ ptr = html_embedded_encode_string (e->name, codepage);
encoding = g_string_append (encoding, ptr);
g_free (ptr);
encoding = g_string_append_c (encoding, '=');
- ptr = html_embedded_encode_string (e->value);
+ ptr = html_embedded_encode_string (e->value, codepage);
encoding = g_string_append (encoding, ptr);
g_free (ptr);
}
Modified: trunk/gtkhtml/htmlselect.c
==============================================================================
--- trunk/gtkhtml/htmlselect.c (original)
+++ trunk/gtkhtml/htmlselect.c Wed Dec 10 18:28:18 2008
@@ -102,7 +102,8 @@
add_selected (GtkTreeModel *model,
GtkTreePath *path,
GtkTreeIter *iter,
- struct EmbeddedSelectionInfo *info)
+ struct EmbeddedSelectionInfo *info,
+ const gchar* codepage)
{
gchar *value, *encoded;
@@ -111,13 +112,13 @@
if (info->string->len)
g_string_append_c (info->string, '&');
- encoded = html_embedded_encode_string (info->embedded->name);
+ encoded = html_embedded_encode_string (info->embedded->name, codepage);
g_string_append (info->string, encoded);
g_free (encoded);
g_string_append_c (info->string, '=');
- encoded = html_embedded_encode_string (value);
+ encoded = html_embedded_encode_string (value, codepage);
g_string_append (info->string, encoded);
g_free (encoded);
@@ -125,7 +126,7 @@
}
static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar *codepage)
{
struct EmbeddedSelectionInfo info;
HTMLSelect *s = HTML_SELECT(e);
@@ -146,7 +147,7 @@
combo_box = GTK_COMBO_BOX (e->widget);
if (gtk_combo_box_get_active_iter (combo_box, &iter))
- add_selected (s->model, NULL, &iter, &info);
+ add_selected (s->model, NULL, &iter, &info, codepage);
}
}
Modified: trunk/gtkhtml/htmltextarea.c
==============================================================================
--- trunk/gtkhtml/htmltextarea.c (original)
+++ trunk/gtkhtml/htmltextarea.c Wed Dec 10 18:28:18 2008
@@ -63,7 +63,7 @@
}
static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar *codepage)
{
GString *encoding = g_string_new ("");
gchar *encoded_str, *utf8_str, *gtk_text;
@@ -71,7 +71,7 @@
if(strlen (e->name)) {
GtkTextIter first, last;
- utf8_str = html_embedded_encode_string (e->name);
+ utf8_str = html_embedded_encode_string (e->name, codepage);
encoding = g_string_append (encoding, utf8_str);
g_free (utf8_str);
@@ -80,7 +80,7 @@
gtk_text_buffer_get_bounds (HTML_TEXTAREA (e)->buffer, &first, &last);
gtk_text = gtk_text_buffer_get_text (HTML_TEXTAREA (e)->buffer, &first, &last, FALSE);
- encoded_str = html_embedded_encode_string (gtk_text);
+ encoded_str = html_embedded_encode_string (gtk_text, codepage);
encoding = g_string_append (encoding, encoded_str);
g_free (encoded_str);
Modified: trunk/gtkhtml/htmltextinput.c
==============================================================================
--- trunk/gtkhtml/htmltextinput.c (original)
+++ trunk/gtkhtml/htmltextinput.c Wed Dec 10 18:28:18 2008
@@ -108,19 +108,19 @@
/* HTMLEmbedded methods. */
static gchar *
-encode (HTMLEmbedded *e)
+encode (HTMLEmbedded *e, const gchar* codepage)
{
GString *encoding = g_string_new ("");
gchar *ptr;
if(strlen (e->name)) {
- ptr = html_embedded_encode_string (e->name);
+ ptr = html_embedded_encode_string (e->name, codepage);
encoding = g_string_append (encoding, ptr);
g_free (ptr);
encoding = g_string_append_c (encoding, '=');
- ptr = html_embedded_encode_string (gtk_entry_get_text (GTK_ENTRY (e->widget)));
+ ptr = html_embedded_encode_string (gtk_entry_get_text (GTK_ENTRY (e->widget)), codepage);
encoding = g_string_append (encoding, ptr);
g_free (ptr);
}
Modified: trunk/gtkhtml/htmltokenizer.c
==============================================================================
--- trunk/gtkhtml/htmltokenizer.c (original)
+++ trunk/gtkhtml/htmltokenizer.c Wed Dec 10 18:28:18 2008
@@ -33,6 +33,8 @@
enum {
HTML_TOKENIZER_BEGIN_SIGNAL,
HTML_TOKENIZER_END_SIGNAL,
+ HTML_TOKENIZER_CHANGECONTENT_SIGNAL,
+ HTML_TOKENIZER_CHANGEENGINE_SIGNAL,
HTML_TOKENIZER_LAST_SIGNAL
};
@@ -52,6 +54,7 @@
gint used;
gchar * data;
};
+
struct _HTMLTokenizerPrivate {
/* token buffers list */
@@ -87,7 +90,6 @@
gboolean textarea; /* Are we in a <textarea> block? */
gint pre; /* Are we in a <pre> block? */
gboolean select; /* Are we in a <select> block? */
- gboolean charEntity; /* Are we in an &... sequence? */
gboolean extension; /* Are we in an <!-- +GtkHTML: sequence? */
gboolean aTag; /* Are we in a <a/> tag*/
@@ -117,11 +119,17 @@
GList *blocking; /* Blocking tokens */
const gchar *searchFor;
- gboolean utf8;
- gchar utf8_buffer[7];
- gint utf8_length;
+
+ gboolean enableconvert;
+
+ gchar * content_type;
+ /*convert*/
+ GIConv iconv_cd;
+
};
+
+
static const gchar *commentStart = "<!--";
static const gchar *scriptEnd = "</script>";
static const gchar *styleEnd = "</style>";
@@ -144,12 +152,20 @@
/* default implementations of tokenization functions */
static void html_tokenizer_finalize (GObject *);
-static void html_tokenizer_real_begin (HTMLTokenizer *, gchar *content_type);
+static void html_tokenizer_real_change (HTMLTokenizer *, const gchar *content_type);
+static void html_tokenizer_real_begin (HTMLTokenizer *, const gchar *content_type);
+static void html_tokenizer_real_engine_type (HTMLTokenizer *t, gboolean engine_type);
static void html_tokenizer_real_write (HTMLTokenizer *, const gchar *str, size_t size);
static void html_tokenizer_real_end (HTMLTokenizer *);
+static const gchar *
+ html_tokenizer_real_get_content_type(HTMLTokenizer *);
+static gboolean
+ html_tokenizer_real_get_engine_type(HTMLTokenizer *);
static gchar *html_tokenizer_real_peek_token (HTMLTokenizer *);
static gchar *html_tokenizer_real_next_token (HTMLTokenizer *);
static gboolean html_tokenizer_real_has_more_tokens (HTMLTokenizer *);
+static gchar *html_tokenizer_converted_token (HTMLTokenizer *t,const gchar* token);
+
static HTMLTokenizer *html_tokenizer_real_clone (HTMLTokenizer *);
@@ -160,8 +176,11 @@
HTMLTokenType tt);
static void html_tokenizer_tokenize_one_char (HTMLTokenizer *t,
const gchar **src);
+static void add_char(HTMLTokenizer *t, gchar c);
+
+gboolean is_need_convert(const gchar* token);
-static void add_unichar(HTMLTokenizer *t, gunichar wc);
+gchar* html_tokenizer_convert_entity(gchar * token);
static GObjectClass *parent_class = NULL;
@@ -172,6 +191,26 @@
parent_class = g_type_class_ref (G_TYPE_OBJECT);
+ html_tokenizer_signals[HTML_TOKENIZER_CHANGECONTENT_SIGNAL] =
+ g_signal_new ("change",
+ G_TYPE_FROM_CLASS (klass),
+ G_SIGNAL_RUN_LAST,
+ G_STRUCT_OFFSET (HTMLTokenizerClass, change),
+ NULL, NULL,
+ g_cclosure_marshal_VOID__POINTER,
+ G_TYPE_NONE,
+ 1, G_TYPE_POINTER);
+
+ html_tokenizer_signals[HTML_TOKENIZER_CHANGEENGINE_SIGNAL] =
+ g_signal_new ("engine",
+ G_TYPE_FROM_CLASS (klass),
+ G_SIGNAL_RUN_LAST,
+ G_STRUCT_OFFSET (HTMLTokenizerClass, engine),
+ NULL, NULL,
+ g_cclosure_marshal_VOID__POINTER,
+ G_TYPE_NONE,
+ 1, G_TYPE_POINTER);
+
html_tokenizer_signals[HTML_TOKENIZER_BEGIN_SIGNAL] =
g_signal_new ("begin",
G_TYPE_FROM_CLASS (klass),
@@ -194,12 +233,16 @@
object_class->finalize = html_tokenizer_finalize;
+ klass->change = html_tokenizer_real_change;
+ klass->engine = html_tokenizer_real_engine_type;
klass->begin = html_tokenizer_real_begin;
klass->end = html_tokenizer_real_end;
klass->write = html_tokenizer_real_write;
klass->peek_token = html_tokenizer_real_peek_token;
klass->next_token = html_tokenizer_real_next_token;
+ klass->get_content_type = html_tokenizer_real_get_content_type;
+ klass->get_engine_type = html_tokenizer_real_get_engine_type;
klass->has_more = html_tokenizer_real_has_more_tokens;
klass->clone = html_tokenizer_real_clone;
}
@@ -232,7 +275,6 @@
p->textarea = FALSE;
p->pre = 0;
p->select = FALSE;
- p->charEntity = FALSE;
p->extension = FALSE;
p->aTag = FALSE;
@@ -250,19 +292,30 @@
p->blocking = NULL;
p->searchFor = NULL;
+
+ /* Use old logic and not convert charset */
+ p->enableconvert = FALSE;
+
+ p->content_type = g_strdup ("html/text; charset=utf-8");
}
static void
html_tokenizer_finalize (GObject *obj)
{
HTMLTokenizer *t = HTML_TOKENIZER (obj);
-
+
html_tokenizer_reset (t);
-
+
+ if(is_valid_g_iconv (t->priv->iconv_cd))
+ g_iconv_close (t->priv->iconv_cd);
+
+ if(t->priv->content_type)
+ g_free(t->priv->content_type);
+
g_free (t->priv);
t->priv = NULL;
- G_OBJECT_CLASS (parent_class)->finalize (obj);
+ G_OBJECT_CLASS (parent_class)->finalize (obj);
}
GType
@@ -369,10 +422,153 @@
/* finally get first token */
token = buffer->data;
}
+
+ return html_tokenizer_converted_token (t,token);
+}
+
+/* test iconv for valid*/
+gboolean
+is_valid_g_iconv(const GIConv iconv_cd)
+{
+ return iconv_cd != NULL && iconv_cd != (GIConv)-1;
+}
+/*Convert only chars when code >127*/
+gboolean
+is_need_convert (const gchar* token)
+{
+ int i=strlen (token);
+ for(;i>=0;i--)
+ if(token[i]&128)
+ return TRUE;
+ return FALSE;
+}
+
+/*Convert entity values in already converted to right charset token*/
+gchar*
+html_tokenizer_convert_entity(gchar * token)
+{
+ char* full_pos = token + strlen (token);
+ char* write_pos = token + strcspn (token, "&");
+ gunichar value;
+ size_t count_chars;
+ char *read_pos;
+ while(write_pos < full_pos)
+ {
+ write_pos++;
+ count_chars = strcspn(write_pos+1, ";");
+ value = INVALID_CHARACTER_MARKER;
+ if(count_chars < 14)
+ {
+ char save = *(write_pos + count_chars + 1);
+ *(write_pos + count_chars + 1)=0;
+ /* � */
+ if (*write_pos == '#')
+ {
+ if(isdigit (*(write_pos + 1)))
+ {
+ value=strtoull (write_pos + 1, NULL, 10);
+ }
+ /* Ý */
+ else if(*(write_pos + 1) == 'x')
+ {
+ value=strtoull (write_pos + 2, NULL, 16);
+ }
+ }
+ else
+ {
+ value=html_entity_parse (write_pos, 0);
+ }
+ *(write_pos+count_chars+1)=save;
+ if(count_chars>0)
+ {
+ memset (write_pos-1, ' ', count_chars + 3);
+ /* first char is & I think this not need */
+ write_pos --;
+ read_pos = write_pos + count_chars + 3;
+ write_pos += g_unichar_to_utf8 (value,write_pos);
+ memcpy (write_pos, read_pos, full_pos - read_pos + 1);
+ full_pos = write_pos + (full_pos - read_pos);
+ }
+ }
+ write_pos = write_pos + strcspn (write_pos, "&");
+ }
return token;
}
+gchar*
+convert_text_encoding(const GIConv iconv_cd,const gchar * token)
+{
+ size_t currlength;
+ gchar * newbuffer;
+ gchar * returnbuffer;
+ const gchar * current;
+ size_t newlength;
+ size_t oldlength;
+ if(token == NULL)
+ return NULL;
+ currlength = strlen (token);
+ if(is_valid_g_iconv (iconv_cd) && is_need_convert (token))
+ {
+ current = token;
+ newlength = currlength*7+1;
+ oldlength = newlength;
+ newbuffer = g_new (gchar, newlength);
+ returnbuffer = newbuffer;
+ g_assert (returnbuffer);
+ while(currlength > 0)
+ {
+ /*function not change current, but g_iconv use not const source*/
+ g_iconv (iconv_cd, (gchar **)¤t, &currlength, &newbuffer, &newlength);
+ if(currlength > 0)
+ {
+ g_warning ("IconvError=%s", current);
+ *newbuffer = INVALID_CHARACTER_MARKER;
+ newbuffer ++;
+ current ++;
+ currlength --;
+ newlength --;
+ }
+ }
+ returnbuffer[oldlength - newlength] = '\0';
+ returnbuffer = g_realloc (returnbuffer, oldlength - newlength + 1);
+ g_assert (returnbuffer);
+ return returnbuffer;
+ }
+ newbuffer = g_new (gchar, currlength + 1);
+ memcpy (newbuffer,token, currlength);
+ newbuffer[currlength] = 0;
+ return newbuffer;
+}
+
+static gchar *
+html_tokenizer_converted_token(HTMLTokenizer *t, const gchar* token)
+{
+ if(token != NULL)
+ {
+ struct _HTMLTokenizerPrivate *p = t->priv;
+ return html_tokenizer_convert_entity (convert_text_encoding (p->iconv_cd, token));
+ }
+ return NULL;
+}
+
+static const gchar *
+html_tokenizer_real_get_content_type(HTMLTokenizer *t)
+{
+ struct _HTMLTokenizerPrivate *p = t->priv;
+ if(p->content_type)
+ return p->content_type;
+ return NULL;
+}
+
+static gboolean
+html_tokenizer_real_get_engine_type(HTMLTokenizer *t)
+{
+ struct _HTMLTokenizerPrivate *p = t->priv;
+ return p->enableconvert;
+ return FALSE;
+}
+
static gchar *
html_tokenizer_real_next_token (HTMLTokenizer *t)
{
@@ -411,8 +607,8 @@
p->tokens_num--;
g_assert (p->tokens_num >= 0);
-
- return token;
+
+ return html_tokenizer_converted_token (t, token);
}
static gboolean
@@ -460,14 +656,103 @@
p->scriptCode = NULL;
}
-static gint
-charset_is_utf8 (gchar *content_type)
+static gboolean
+charset_is_utf8 (const gchar *content_type)
+{
+ return content_type && strstr (content_type, "=utf-8") != NULL;
+}
+
+static gboolean
+is_text (const gchar *content_type)
+{
+ return content_type && strstr (content_type, "text/") != NULL;
+}
+
+static const gchar*
+get_encoding_from_content_type(const gchar * content_type)
+{
+ gchar * charset;
+ if(content_type)
+ {
+ charset = g_strrstr (content_type, "charset=");
+ if(charset != NULL)
+ return charset + strlen ("charset=");
+ charset = g_strrstr (content_type, "encoding=");
+ if(charset != NULL)
+ return charset + strlen ("encoding=");
+
+ }
+ return NULL;
+}
+
+GIConv
+generate_iconv_from(const gchar * content_type)
+{
+ if(content_type)
+ if(!charset_is_utf8(content_type))
+ {
+ const gchar * encoding = get_encoding_from_content_type (content_type);
+ if(encoding)
+ return g_iconv_open ("utf-8", encoding);
+ }
+ return NULL;
+}
+
+GIConv
+generate_iconv_to(const gchar * content_type)
+{
+ if(content_type)
+ if(!charset_is_utf8 (content_type))
+ {
+ const gchar * encoding = get_encoding_from_content_type (content_type);
+ if(encoding)
+ return g_iconv_open (encoding, "utf-8");
+ }
+ return NULL;
+}
+
+static void
+html_tokenizer_real_engine_type (HTMLTokenizer *t, gboolean engine_type)
{
- return content_type && strstr (content_type, "charset=utf-8") != NULL;
+ struct _HTMLTokenizerPrivate *p;
+ p = t->priv;
+
+ p->enableconvert = engine_type;
+}
+
+static void
+html_tokenizer_real_change (HTMLTokenizer *t, const gchar *content_type)
+{
+ struct _HTMLTokenizerPrivate *p;
+ if(!is_text (content_type))
+ return;
+
+ p = t->priv;
+
+ if (!p->enableconvert)
+ return;
+
+ if(p->content_type)
+ g_free(p->content_type);
+
+ p->content_type = g_ascii_strdown ( content_type, -1);
+
+ if(is_valid_g_iconv (p->iconv_cd))
+ g_iconv_close (p->iconv_cd);
+
+ p->iconv_cd = generate_iconv_from (p->content_type);
+
+#if 0
+ if (charset_is_utf8 (p->content_type))
+ g_warning ("Trying UTF-8");
+ else
+ g_warning ("Trying %s",p->content_type);
+#endif
}
+
static void
-html_tokenizer_real_begin (HTMLTokenizer *t, gchar *content_type)
+html_tokenizer_real_begin (HTMLTokenizer *t, const gchar *content_type)
{
struct _HTMLTokenizerPrivate *p = t->priv;
@@ -490,17 +775,8 @@
p->searchCount = 0;
p->searchGtkHTMLCount = 0;
p->title = FALSE;
- p->charEntity = FALSE;
-
- p->utf8 = charset_is_utf8 (content_type);
- p->utf8_length = 0;
-#if 0
- if (p->utf8)
- g_warning ("Trying UTF-8");
- else
- g_warning ("Trying ISO-8859-1");
-#endif
+ html_tokenizer_real_change (t, content_type);
}
static void
@@ -561,6 +837,23 @@
}
}
+static void add_byte (HTMLTokenizer *t, const gchar **c)
+{
+ add_char (t,**c);
+ (*c) ++;
+}
+
+static void
+add_char(HTMLTokenizer *t, gchar c){
+ struct _HTMLTokenizerPrivate *p = t->priv;
+ if(c!='\0')
+ {
+ *(p->dest) = c;
+ p->dest ++;
+ *(p->dest) = 0;
+ }
+}
+
static void
html_tokenizer_append_token_buffer (HTMLTokenizer *t, gint min_size)
{
@@ -592,31 +885,31 @@
struct _HTMLTokenizerPrivate *p = t->priv;
if (p->tag || p->select) {
- add_unichar (t, ' ');
+ add_char (t, ' ');
}
else if (p->textarea) {
if (p->pending == LFPending)
- add_unichar (t, '\n');
+ add_char (t, '\n');
else
- add_unichar (t, ' ');
+ add_char (t, ' ');
}
else if (p->pre) {
switch (p->pending) {
case SpacePending:
- add_unichar (t, ' ');
+ add_char (t, ' ');
break;
case LFPending:
if (p->dest > p->buffer) {
html_tokenizer_append_token (t, p->buffer, p->dest - p->buffer);
}
p->dest = p->buffer;
- add_unichar (t, TAG_ESCAPE);
- add_unichar (t, '\n');
+ add_char (t, TAG_ESCAPE);
+ add_char (t, '\n');
html_tokenizer_append_token (t, p->buffer, 2);
p->dest = p->buffer;
break;
case TabPending:
- add_unichar (t, '\t');
+ add_char (t, '\t');
break;
default:
g_warning ("Unknown pending type: %d\n", (gint) p->pending);
@@ -624,7 +917,7 @@
}
}
else {
- add_unichar (t, ' ');
+ add_char (t, ' ');
}
p->pending = NonePending;
@@ -779,196 +1072,6 @@
}
}
-static gunichar win1252_to_unicode [32] = {
- 0x20ac,
- 0x81,
- 0x201a,
- 0x0192,
- 0x201e,
- 0x2026,
- 0x2020,
- 0x2021,
- 0x02c6,
- 0x2030,
- 0x0160,
- 0x2039,
- 0x0152,
- 0x8d,
- 0x017d,
- 0x8f,
- 0x90,
- 0x2018,
- 0x2019,
- 0x201c,
- 0x201d,
- 0x2022,
- 0x2013,
- 0x2014,
- 0x02dc,
- 0x2122,
- 0x0161,
- 0x203a,
- 0x0153,
- 0x9d,
- 0x017e,
- 0x0178
-};
-
-static void
-add_unichar (HTMLTokenizer *t, gunichar wc)
-{
- struct _HTMLTokenizerPrivate *p = t->priv;
-
- p->utf8_length = 0;
-
- /*
- chars in range 128 - 159 are control characters in unicode,
- but most browsers treat them as windows 1252
- encoded characters and translate them to unicode
- it's broken, but we do the same here
- */
- if (wc > 127 && wc < 160)
- wc = win1252_to_unicode [wc - 128];
-
- if (wc != '\0') {
- p->dest += g_unichar_to_utf8 (wc, p->dest);
- *(p->dest) = 0;
- }
-}
-
-static void
-add_byte (HTMLTokenizer *t, const gchar **src)
-{
- gunichar wc;
- struct _HTMLTokenizerPrivate *p = t->priv;
-
- if (p->utf8) {
- p->utf8_buffer[p->utf8_length] = **src;
- p->utf8_length++;
-
- wc = g_utf8_get_char_validated ((const gchar *)p->utf8_buffer, p->utf8_length);
- if (wc == -1 || p->utf8_length >= (sizeof(p->utf8_buffer)/sizeof(p->utf8_buffer[0]))) {
- add_unichar (t, INVALID_CHARACTER_MARKER);
- (*src)++;
- return;
- } else if (wc == -2) {
- /* incomplete character check again */
- (*src)++;
- return;
- }
- } else {
- wc = (guchar)**src;
- }
-
- add_unichar (t, wc);
- (*src)++;
-}
-
-static void
-flush_entity (HTMLTokenizer *t)
-{
- struct _HTMLTokenizerPrivate *p = t->priv;
- /* ignore the TAG_ESCAPE when flushing */
- const char *str = p->searchBuffer + 1;
-
- while (p->searchCount--) {
- add_byte (t, &str);
- }
-}
-
-static gboolean
-add_unichar_validated (HTMLTokenizer *t, gunichar uc)
-{
- if (g_unichar_validate (uc)) {
- add_unichar (t, uc);
- return TRUE;
- }
-
- g_warning ("invalid character value: x%xd", uc);
- return FALSE;
-}
-
-static void
-in_entity (HTMLTokenizer *t, const gchar **src)
-{
- struct _HTMLTokenizerPrivate *p = t->priv;
- gunichar entityValue = 0;
-
- /* See http://www.mozilla.org/newlayout/testcases/layout/entities.html for a complete entity list,
- ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT
- (or 'man iso_8859_1') for the character encodings. */
-
- p->searchBuffer [p->searchCount + 1] = **src;
- p->searchBuffer [p->searchCount + 2] = '\0';
-
- /* Check for � sequence */
- if (p->searchBuffer[2] == '#') {
- if ((p->searchCount > 1) &&
- (!isdigit (**src)) &&
- (p->searchBuffer[3] != 'x')) {
- /* { */
- p->searchBuffer [p->searchCount + 1] = '\0';
- entityValue = strtoul (&(p->searchBuffer [3]),
- NULL, 10);
- p->charEntity = FALSE;
- }
- if ((p->searchCount > 1) &&
- (!isalnum (**src)) &&
- (p->searchBuffer[3] == 'x')) {
- /* &x12AB */
- p->searchBuffer [p->searchCount + 1] = '\0';
-
- entityValue = strtoul (&(p->searchBuffer [4]),
- NULL, 16);
- p->charEntity = FALSE;
- }
- }
- else {
- /* Check for &abc12 sequence */
- if (!isalnum (**src)) {
- p->charEntity = FALSE;
- if ((p->searchBuffer [p->searchCount + 1] == ';') ||
- (!p->tag)) {
- char *ename = p->searchBuffer + 2;
-
- p->searchBuffer [p->searchCount + 1] = '\0'; /* FIXME sucks */
- entityValue = html_entity_parse (ename, 0);
- }
- }
-
- }
-
- if (p->searchCount > 13) {
- /* Ignore this sequence since it's too long */
- p->charEntity = FALSE;
- flush_entity (t);
- }
- else if (p->charEntity) {
- /* Keep searching for end of character entity */
- p->searchCount++;
- (*src)++;
- }
- else {
- /*
- * my reading of http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.2 makes
- * seem correct to always collapse entity references, even in element names
- * and attributes.
- */
- if (entityValue) {
- if (entityValue != TAG_ESCAPE)
- /* make sure the entity value is a valid character value */
- if (!add_unichar_validated (t, entityValue))
- add_unichar (t, INVALID_CHARACTER_MARKER);
-
- if (**src == ';')
- (*src)++;
- } else {
- /* Ignore the sequence, just add it as plaintext */
- flush_entity (t);
- }
- }
-}
-
static void
in_tag (HTMLTokenizer *t, const gchar **src)
{
@@ -994,7 +1097,7 @@
/* Invalid tag, just add it */
if (p->pending)
html_tokenizer_add_pending (t);
- add_unichar (t, '<');
+ add_char (t, '<');
add_byte (t, src);
return;
}
@@ -1006,31 +1109,13 @@
html_tokenizer_append_token (t, p->buffer, p->dest - p->buffer);
p->dest = p->buffer;
}
- add_unichar (t, TAG_ESCAPE);
- add_unichar (t, '<');
+ add_char (t, TAG_ESCAPE);
+ add_char (t, '<');
p->tag = TRUE;
p->searchCount = 1; /* Look for <!-- to start comment */
}
static void
-start_entity (HTMLTokenizer *t, const gchar **src)
-{
- struct _HTMLTokenizerPrivate *p = t->priv;
-
- (*src)++;
-
- p->discard = NoneDiscard;
-
- if (p->pending)
- html_tokenizer_add_pending (t);
-
- p->charEntity = TRUE;
- p->searchBuffer[0] = TAG_ESCAPE;
- p->searchBuffer[1] = '&';
- p->searchCount = 1;
-}
-
-static void
start_tag (HTMLTokenizer *t, const gchar **src)
{
(*src)++;
@@ -1046,7 +1131,7 @@
p->searchCount = 0; /* Stop looking for <!-- sequence */
- add_unichar (t, '>');
+ add_char (t, '>');
/* Make the tag lower case */
ptr = p->buffer + 2;
@@ -1208,7 +1293,7 @@
t->priv->searchCount = 0; /* Stop looking for <!-- sequence */
if ((t->priv->tquote == SINGLE_QUOTE && **src == '\"') /* match " */
|| (t->priv->tquote == DOUBLE_QUOTE && **src == '\'')) {
- add_unichar (t, **src);
+ add_char (t, **src);
(*src)++;
} else if (*(t->priv->dest-1) == '=' && !t->priv->tquote) {
t->priv->discard = SpaceDiscard;
@@ -1218,7 +1303,7 @@
t->priv->tquote = DOUBLE_QUOTE;
else
t->priv->tquote = SINGLE_QUOTE;
- add_unichar (t, **src);
+ add_char (t, **src);
(*src)++;
}
else if (t->priv->tquote) {
@@ -1245,7 +1330,7 @@
t->priv->discard = NoneDiscard;
if (t->priv->tag) {
t->priv->searchCount = 0; /* Stop looking for <!-- sequence */
- add_unichar (t, '=');
+ add_char (t, '=');
if (!t->priv->tquote) {
t->priv->pending = NonePending;
t->priv->discard = SpaceDiscard;
@@ -1255,7 +1340,7 @@
if (t->priv->pending)
html_tokenizer_add_pending (t);
- add_unichar (t, '=');
+ add_char (t, '=');
}
(*src)++;
}
@@ -1309,12 +1394,8 @@
in_extension (t, src);
else if (p->script || p->style)
in_script_or_style (t, src);
- else if (p->charEntity)
- in_entity (t, src);
else if (p->startTag)
in_tag (t, src);
- else if (**src == '&' && !p->aTag)
- start_entity (t, src);
else if (**src == '<' && !p->tag)
start_tag (t, src);
else if (**src == '>' && p->tag && !p->tquote)
@@ -1335,7 +1416,7 @@
html_tokenizer_real_write (HTMLTokenizer *t, const gchar *string, size_t size)
{
const gchar *src = string;
-
+
while ((src - string) < size)
html_tokenizer_tokenize_one_char (t, &src);
}
@@ -1381,14 +1462,32 @@
/** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **/
void
-html_tokenizer_begin (HTMLTokenizer *t, gchar *content_type)
+html_tokenizer_begin (HTMLTokenizer *t, const gchar *content_type)
{
+
g_return_if_fail (t && HTML_IS_TOKENIZER (t));
g_signal_emit (t, html_tokenizer_signals [HTML_TOKENIZER_BEGIN_SIGNAL], 0, content_type);
}
void
+html_tokenizer_set_engine_type (HTMLTokenizer *t, gboolean engine_type)
+{
+ g_return_if_fail (t && HTML_IS_TOKENIZER (t));
+
+ g_signal_emit (t, html_tokenizer_signals [HTML_TOKENIZER_CHANGEENGINE_SIGNAL], 0, engine_type);
+}
+
+void
+html_tokenizer_change_content_type (HTMLTokenizer *t,const gchar *content_type)
+{
+ g_return_if_fail (t && HTML_IS_TOKENIZER (t));
+
+ g_signal_emit (t, html_tokenizer_signals [HTML_TOKENIZER_CHANGECONTENT_SIGNAL], 0, content_type);
+}
+
+
+void
html_tokenizer_end (HTMLTokenizer *t)
{
g_return_if_fail (t && HTML_IS_TOKENIZER (t));
@@ -1427,6 +1526,39 @@
}
+const gchar *
+html_tokenizer_get_content_type(HTMLTokenizer *t)
+{
+ HTMLTokenizerClass *klass;
+
+ g_return_val_if_fail (t && HTML_IS_TOKENIZER (t), NULL);
+
+ klass = HTML_TOKENIZER_CLASS (G_OBJECT_GET_CLASS (t));
+
+ if(klass->get_content_type)
+ return klass->get_content_type(t);
+
+ g_warning ("No get_content_type method defined.");
+ return NULL;
+
+}
+
+gboolean
+html_tokenizer_get_engine_type (HTMLTokenizer *t)
+{
+ HTMLTokenizerClass *klass;
+
+ g_return_val_if_fail (t && HTML_IS_TOKENIZER (t),FALSE);
+
+ klass = HTML_TOKENIZER_CLASS (G_OBJECT_GET_CLASS (t));
+
+ if(klass->get_engine_type)
+ return klass->get_engine_type(t);
+
+ g_warning ("No get_engine_type method defined.");
+ return FALSE;
+}
+
gchar *
html_tokenizer_next_token (HTMLTokenizer *t)
{
Modified: trunk/gtkhtml/htmltokenizer.h
==============================================================================
--- trunk/gtkhtml/htmltokenizer.h (original)
+++ trunk/gtkhtml/htmltokenizer.h Wed Dec 10 18:28:18 2008
@@ -48,13 +48,17 @@
GObjectClass parent_class;
/* signals */
- void (*begin) (HTMLTokenizer *, gchar *content_type);
+ void (*begin) (HTMLTokenizer *, const gchar *content_type);
+ void (*change) (HTMLTokenizer *, const gchar *content_type);
+ void (*engine) (HTMLTokenizer *, gboolean enginetype);
void (*end) (HTMLTokenizer *);
/* virtual functions */
void (*write) (HTMLTokenizer *, const gchar *string, size_t size);
gchar *(*peek_token) (HTMLTokenizer *);
gchar *(*next_token) (HTMLTokenizer *);
+ const gchar *(*get_content_type) (HTMLTokenizer *);
+ gboolean (*get_engine_type) (HTMLTokenizer *);
gboolean (*has_more) (HTMLTokenizer *);
HTMLTokenizer *(*clone) (HTMLTokenizer *);
@@ -66,7 +70,17 @@
void html_tokenizer_destroy (HTMLTokenizer *tokenizer);
void html_tokenizer_begin (HTMLTokenizer *t,
- gchar *content_type);
+ const gchar *content_type);
+
+const gchar * html_tokenizer_get_content_type(HTMLTokenizer *t);
+void html_tokenizer_change_content_type
+ (HTMLTokenizer *t,
+ const gchar *content_type);
+
+void html_tokenizer_set_engine_type (HTMLTokenizer *t,
+ gboolean enginetype);
+gboolean html_tokenizer_get_engine_type (HTMLTokenizer *t);
+
void html_tokenizer_write (HTMLTokenizer *t,
const gchar *string,
size_t size);
@@ -77,4 +91,12 @@
HTMLTokenizer *html_tokenizer_clone (HTMLTokenizer *t);
+/*for convert input code page to -->utf */
+GIConv generate_iconv_from (const gchar * content_type);
+/*for convert resulted query to needed encoding <--utf*/
+GIConv generate_iconv_to (const gchar * content_type);
+/*convert test to needed encoding*/
+gchar* convert_text_encoding (const GIConv iconv_cd, const gchar * token);
+/*validate result g_iconv_open*/
+gboolean is_valid_g_iconv (const GIConv iconv_cd);
#endif /* _HTMLTOKENIZER_H_ */
Modified: trunk/gtkhtml/testgtkhtml.c
==============================================================================
--- trunk/gtkhtml/testgtkhtml.c (original)
+++ trunk/gtkhtml/testgtkhtml.c Wed Dec 10 18:28:18 2008
@@ -662,6 +662,7 @@
static void
got_data (SoupSession *session, SoupMessage *msg, gpointer user_data)
{
+ const gchar *ContentType;
GtkHTMLStream *handle = user_data;
if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) {
@@ -669,6 +670,13 @@
gtk_html_end (html, handle, GTK_HTML_STREAM_ERROR);
return;
}
+ /* Enable change content type in engine */
+ gtk_html_set_default_engine(html, TRUE);
+
+ ContentType = soup_message_headers_get (msg->response_headers, "Content-type");
+
+ if (ContentType != NULL)
+ gtk_html_set_default_content_type (html, ContentType);
gtk_html_write (html, handle, msg->response_body->data,
msg->response_body->length);
@@ -701,7 +709,6 @@
if (nread == -1) {
if (errno == EINTR)
continue;
-
g_warning ("read error: %s", g_strerror (errno));
gtk_html_end (html, handle, GTK_HTML_STREAM_ERROR);
break;
@@ -846,8 +853,7 @@
}
/* TODO2 gnome_animator_start (GNOME_ANIMATOR (animator)); */
-
- html_stream_handle = gtk_html_begin_content (html, "text/html; charset=utf-8");
+ html_stream_handle = gtk_html_begin_content (html, (gchar *)gtk_html_get_default_content_type (html));
/* Yuck yuck yuck. Well this code is butt-ugly already
anyway. */
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]