[evolution/wip/webkit-composer: 125/372] Improve conversion from HTML to plain text and vice versa



commit d81edc38090717cb16c522fa4636b55725633a7a
Author: Dan Vrátil <dvratil redhat com>
Date:   Thu Aug 23 12:14:42 2012 +0200

    Improve conversion from  HTML to plain text and vice versa

 e-util/e-editor-widget.c |  157 +++++++++++++++++++++++++++++++++-------------
 1 files changed, 113 insertions(+), 44 deletions(-)
---
diff --git a/e-util/e-editor-widget.c b/e-util/e-editor-widget.c
index 10de3dc..cc9acfd 100644
--- a/e-util/e-editor-widget.c
+++ b/e-util/e-editor-widget.c
@@ -131,16 +131,6 @@ editor_widget_get_dom_range (EEditorWidget *widget)
 }
 
 static void
-editor_widget_strip_formatting (EEditorWidget *widget)
-{
-       gchar *plain;
-
-       plain = e_editor_widget_get_text_plain (widget);
-       e_editor_widget_set_text_plain (widget, plain);
-       g_free (plain);
-}
-
-static void
 editor_widget_user_changed_contents_cb (EEditorWidget *widget,
                                        gpointer user_data)
 {
@@ -965,8 +955,34 @@ e_editor_widget_set_mode (EEditorWidget *widget,
 
        widget->priv->mode = mode;
 
-       if (widget->priv->mode == E_EDITOR_WIDGET_MODE_PLAIN_TEXT)
-               editor_widget_strip_formatting (widget);
+       if (widget->priv->mode == E_EDITOR_WIDGET_MODE_PLAIN_TEXT) {
+               gchar *plain;
+
+               plain = e_editor_widget_get_text_plain (widget);
+               e_editor_widget_set_text_plain (widget, plain);
+
+               g_free (plain);
+       } else {
+               gchar *plain_text, *html;
+
+               plain_text = e_editor_widget_get_text_plain (widget);
+
+               /* FIXME WEBKIT: This does not process smileys! */
+               html = camel_text_to_html (
+                       plain_text,
+                       CAMEL_MIME_FILTER_TOHTML_CONVERT_NL |
+                       CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES |
+                       CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS |
+                       CAMEL_MIME_FILTER_TOHTML_MARK_CITATION |
+                       CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES |
+                       CAMEL_MIME_FILTER_TOHTML_FORMAT_FLOWED,
+                       0);
+
+               e_editor_widget_set_text_html (widget, html);
+
+               g_free (plain_text);
+               g_free (html);
+       }
 
        g_object_notify (G_OBJECT (widget), "mode");
 }
@@ -1079,62 +1095,115 @@ e_editor_widget_get_text_html (EEditorWidget *widget)
                        WEBKIT_DOM_HTML_ELEMENT (element));
 }
 
-gchar *
-e_editor_widget_get_text_plain (EEditorWidget *widget)
+static void
+process_elements (WebKitDOMNode *node,
+                 GString *buffer)
 {
        WebKitDOMDocument *document;
-       WebKitDOMNode *body;
-       WebKitDOMNodeList *imgs;
+       WebKitDOMDOMWindow *window;
+       WebKitDOMNodeList *nodes;
+       WebKitDOMCSSStyleDeclaration *style;
+       gchar *display, *tagname;
        gulong ii, length;
+       GRegex *regex;
 
-       document = webkit_web_view_get_dom_document (WEBKIT_WEB_VIEW (widget));
-       body = (WebKitDOMNode *) webkit_dom_document_get_body (document);
-       body = webkit_dom_node_clone_node (body, TRUE);
-
-       imgs = webkit_dom_element_get_elements_by_tag_name (
-                       (WebKitDOMElement *) body, "IMG");
-       length = webkit_dom_node_list_get_length (imgs);
+       document = webkit_dom_node_get_owner_document (node);
+       window = webkit_dom_document_get_default_view (document);
 
-       /* Replace all smiley images with their text representation */
-       for (ii = 0; ii < length; ii++) {
-               WebKitDOMNode *img;
+       /* Is this a block element? */
+       style = webkit_dom_dom_window_get_computed_style (
+                       window, WEBKIT_DOM_ELEMENT (node), "");
+       display = webkit_dom_css_style_declaration_get_property_value (
+                       style, "display");
 
-               img = webkit_dom_node_list_item (imgs, ii);
+       tagname = webkit_dom_element_get_tag_name (WEBKIT_DOM_ELEMENT (node));
 
+       /* Replace images with smileys by their text representation */
+       if (g_ascii_strncasecmp (tagname, "IMG", 3) == 0) {
                if (webkit_dom_element_has_attribute (
-                               WEBKIT_DOM_ELEMENT (img), "x-evo-smiley")) {
+                               WEBKIT_DOM_ELEMENT (node), "x-evo-smiley")) {
 
-                       gchar *name;
+                       gchar *smiley_name;
                        const EEmoticon *emoticon;
 
-                       name = webkit_dom_element_get_attribute (
-                               WEBKIT_DOM_ELEMENT (img), "x-evo-smiley");
-                       emoticon = e_emoticon_chooser_lookup_emoticon (name);
+                       smiley_name = webkit_dom_element_get_attribute (
+                                       WEBKIT_DOM_ELEMENT (node), "x-evo-smiley");
+                       emoticon = e_emoticon_chooser_lookup_emoticon (smiley_name);
                        if (emoticon) {
-                               WebKitDOMText *text;
+                               g_string_append_printf (
+                                       buffer, " %s ", emoticon->text_face);
+                       }
+                       g_free (smiley_name);
+                       g_free (display);
+
+                       /* IMG can't have child elements, so we return now */
+                       return;
+               }
+       }
 
-                               text = webkit_dom_document_create_text_node (
-                                       document, emoticon->text_face);
 
-                               webkit_dom_node_insert_before (
-                                       webkit_dom_node_get_parent_node (img),
-                                       WEBKIT_DOM_NODE (text), img, NULL);
-                               webkit_dom_node_remove_child (
-                                       webkit_dom_node_get_parent_node (img),
-                                       img, NULL);
-                       }
+       nodes = webkit_dom_node_get_child_nodes (node);
+       length = webkit_dom_node_list_get_length (nodes);
+       regex = g_regex_new ("\x9", 0, 0, NULL);
+
+       for (ii = 0; ii < length; ii++) {
+               WebKitDOMNode *child;
+
+               child = webkit_dom_node_list_item (nodes, ii);
+               if (webkit_dom_node_get_node_type (child) == 3) {
+                       gchar *content, *tmp;
+
+                       tmp = webkit_dom_node_get_text_content (child);
 
-                       g_free (name);
+                       /* Replace tabs with 4 whitespaces, otherwise they got
+                          replaced by single whitespace */
+                       content = g_regex_replace (
+                               regex, tmp, -1, 0, "    ",
+                               0, NULL);
+
+                       g_string_append (buffer, content);
+                       g_free (tmp);
+                       g_free (content);
+               }
+
+               if (webkit_dom_node_has_child_nodes (child)) {
+                       process_elements (child, buffer);
                }
        }
 
-       return webkit_dom_html_element_get_inner_text ((WebKitDOMHTMLElement *) body);
+       if (g_strcmp0 (display, "block") == 0) {
+               g_string_append (buffer, "\n");
+       }
+
+       g_free (display);
+       g_regex_unref (regex);
+}
+
+gchar *
+e_editor_widget_get_text_plain (EEditorWidget *widget)
+{
+       WebKitDOMDocument *document;
+       WebKitDOMNode *body;
+       GString *plain_text;
+
+       document = webkit_web_view_get_dom_document (WEBKIT_WEB_VIEW (widget));
+       body = (WebKitDOMNode *) webkit_dom_document_get_body (document);
+
+       plain_text = g_string_sized_new (1024);
+       process_elements (body, plain_text);
+
+       printf("%s\n", plain_text->str);
+
+       /* Return text content between <body> and </body> */
+       return g_string_free (plain_text, FALSE);
 }
 
 void
 e_editor_widget_set_text_html (EEditorWidget *widget,
                               const gchar *text)
 {
+       widget->priv->reload_in_progress = TRUE;
+
        webkit_web_view_load_html_string (
                WEBKIT_WEB_VIEW (widget), text, "file://");
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]