[libgepub] Revert "Using rust epub lib instead C code"



commit 251e1192b8037c33581be10a473e8a883e4ef73f
Author: Daniel GarcĂ­a Moreno <danigm wadobo com>
Date:   Thu Mar 30 14:02:49 2017 +0200

    Revert "Using rust epub lib instead C code"
    
    We'll maintain the C code until Rust is stable enough to integrate in
    Gnome.

 .gitignore                  |    3 -
 NEWS                        |    5 -
 configure.ac                |   32 +---
 libgepub/Makefile.am        |   50 ++----
 libgepub/gepub-archive.c    |  182 ++++++++++++++++++
 libgepub/gepub-archive.h    |   49 +++++
 libgepub/gepub-doc.c        |  432 ++++++++++++++++++++++++++++++++++---------
 libgepub/gepub-doc.h        |   10 +
 libgepub/gepub-text-chunk.c |  111 +++++++++++
 libgepub/gepub-text-chunk.h |   64 +++++++
 libgepub/gepub-utils.c      |  249 +++++++++++++++++++++++++
 libgepub/gepub-utils.h      |   30 +++
 libgepub/gepub.h            |    2 +
 libgepub/rust/Cargo.lock    |  289 -----------------------------
 libgepub/rust/Cargo.toml    |   16 --
 libgepub/rust/src/lib.rs    |  212 ---------------------
 tests/test-gepub.c          |  207 +++++++++++++++++++--
 17 files changed, 1249 insertions(+), 694 deletions(-)
---
diff --git a/NEWS b/NEWS
index 658922e..e088ef1 100644
--- a/NEWS
+++ b/NEWS
@@ -1,8 +1,3 @@
-Version 0.5
-===========
-
- - Replaced gepub-archive / gepub-doc C source with Rust epub lib
-
 Version 0.4
 ===========
 
diff --git a/configure.ac b/configure.ac
index 84a23e6..9f62ece 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
 AC_PREREQ(2.60)
-AC_INIT([libgepub],[0.5],[danigm wadobo com])
+AC_INIT([libgepub],[0.4],[danigm wadobo com])
 AC_CONFIG_AUX_DIR([build])
-AM_INIT_AUTOMAKE([1.10 dist-bzip2])
+AM_INIT_AUTOMAKE([1.10 -Wall -Werror dist-bzip2])
 AM_SILENT_RULES([yes])
 
 GNOME_COMMON_INIT
@@ -16,34 +16,6 @@ AM_PROG_CC_C_O
 AC_PROG_INSTALL
 AC_PROG_LIBTOOL
 
-AC_CHECK_PROG(CARGO, [cargo], [yes], [no])
-AS_IF(test x$CARGO = xno,
-    AC_MSG_ERROR([cargo is required.  Please install the Rust toolchain from https://www.rust-lang.org/])
-)
-AC_CHECK_PROG(RUSTC, [rustc], [yes], [no])
-AS_IF(test x$RUSTC = xno,
-    AC_MSG_ERROR([rustc is required.  Please install the Rust toolchain from https://www.rust-lang.org/])
-)
-
-AC_ARG_ENABLE(debug,
-              AC_HELP_STRING([--enable-debug],
-                             [Build Rust code with debugging information [default=no]]),
-              [debug_release=$enableval],
-             [debug_release=no])
-
-AC_MSG_CHECKING(whether to build Rust code with debugging information)
-if test "x$debug_release" = "xyes" ; then
-       AC_MSG_RESULT(yes)
-       RUST_TARGET_SUBDIR=debug
-else
-       AC_MSG_RESULT(no)
-       RUST_TARGET_SUBDIR=release
-fi
-AM_CONDITIONAL([DEBUG_RELEASE], [test "x$debug_release" = "xyes"])
-
-AC_SUBST([RUST_TARGET_SUBDIR])
-
-
 PKG_CHECK_MODULES(GEPUB,
                   webkit2gtk-4.0
                   libsoup-2.4
diff --git a/libgepub/Makefile.am b/libgepub/Makefile.am
index 84456d3..1ae6339 100644
--- a/libgepub/Makefile.am
+++ b/libgepub/Makefile.am
@@ -1,7 +1,12 @@
 lib_LTLIBRARIES = libgepub.la
 
+NOINST_H_FILES = \
+       gepub-utils.h
+
 INST_H_FILES = \
        gepub-widget.h          \
+       gepub-archive.h         \
+       gepub-text-chunk.h      \
        gepub-doc.h             \
        gepub.h
 
@@ -10,35 +15,13 @@ libgepubinclude_HEADERS = $(INST_H_FILES)
 
 libgepub_la_SOURCES = \
        gepub-widget.c                  \
+       gepub-archive.c                 \
+       gepub-text-chunk.c              \
        gepub-doc.c                             \
+       gepub-utils.c                   \
        $(NOINST_H_FILES)               \
        $(INST_H_FILES)
 
-
-RUST_SOURCES =                                 \
-       rust/Cargo.toml                         \
-       rust/src/lib.rs
-
-if DEBUG_RELEASE
-CARGO_RELEASE_ARGS=
-else
-CARGO_RELEASE_ARGS=--release
-endif
-
-RUST_LIB=@abs_top_builddir@/target/@RUST_TARGET_SUBDIR@/libepub_internals.a
-
-check-local:
-       cd $(srcdir)/rust && \
-       CARGO_TARGET_DIR=@abs_top_builddir@/target cargo test
-
-clean-local:
-       cd $(top_srcdir)/rust && \
-       CARGO_TARGET_DIR=@abs_top_builddir@/target cargo clean
-
-@abs_top_builddir@/target/@RUST_TARGET_SUBDIR@/libepub_internals.a: $(RUST_SOURCES)
-       cd $(top_srcdir)/libgepub/rust && \
-       CARGO_TARGET_DIR=@abs_top_builddir@/target cargo build --verbose $(CARGO_RELEASE_ARGS)
-
 libgepub_la_CPPFLAGS = \
        -I$(top_builddir)       \
        -I$(srcdir)                     \
@@ -56,8 +39,7 @@ libgepub_la_LDFLAGS = \
        $(AM_LDFLAGS)
 
 libgepub_la_LIBADD = \
-       $(GEPUB_LIBS)    \
-       $(RUST_LIB)
+       $(GEPUB_LIBS)
 
 -include $(INTROSPECTION_MAKEFILE)
 INTROSPECTION_GIRS =
@@ -67,13 +49,13 @@ INTROSPECTION_COMPILER_ARGS = --includedir=$(srcdir)
 if HAVE_INTROSPECTION
 introspection_sources = $(libgepub_la_SOURCES)
 
-Gepub-0.5.gir: libgepub.la
-Gepub_0_5_gir_INCLUDES = GObject-2.0 libxml2-2.0 WebKit2-4.0
-Gepub_0_5_gir_CFLAGS = $(INCLUDES)
-Gepub_0_5_gir_LIBS = libgepub.la
-Gepub_0_5_gir_FILES = $(introspection_sources)
-Gepub_0_5_gir_SCANNERFLAGS = --warn-all
-INTROSPECTION_GIRS += Gepub-0.5.gir
+Gepub-0.4.gir: libgepub.la
+Gepub_0_4_gir_INCLUDES = GObject-2.0 libxml2-2.0 WebKit2-4.0
+Gepub_0_4_gir_CFLAGS = $(INCLUDES)
+Gepub_0_4_gir_LIBS = libgepub.la
+Gepub_0_4_gir_FILES = $(introspection_sources)
+Gepub_0_4_gir_SCANNERFLAGS = --warn-all
+INTROSPECTION_GIRS += Gepub-0.4.gir
 
 girdir = $(datadir)/gir-1.0
 gir_DATA = $(INTROSPECTION_GIRS)
diff --git a/libgepub/gepub-archive.c b/libgepub/gepub-archive.c
new file mode 100644
index 0000000..ad246a4
--- /dev/null
+++ b/libgepub/gepub-archive.c
@@ -0,0 +1,182 @@
+/* GepubArchive
+ *
+ * Copyright (C) 2011 Daniel Garcia <danigm wadobo com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <config.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <archive.h>
+#include <archive_entry.h>
+
+#include "gepub-archive.h"
+#include "gepub-utils.h"
+
+#define BUFZISE 1024
+
+struct _GepubArchive {
+    GObject parent;
+
+    struct archive *archive;
+    gchar *path;
+};
+
+struct _GepubArchiveClass {
+    GObjectClass parent_class;
+};
+
+G_DEFINE_TYPE (GepubArchive, gepub_archive, G_TYPE_OBJECT)
+
+static gboolean
+gepub_archive_open (GepubArchive *archive)
+{
+    archive->archive = archive_read_new ();
+    archive_read_support_format_zip (archive->archive);
+    int r;
+
+    r = archive_read_open_filename (archive->archive, archive->path, 10240);
+
+    if (r != ARCHIVE_OK) {
+        archive_read_free (archive->archive);
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+static void
+gepub_archive_close (GepubArchive *archive)
+{
+    if (!archive->archive)
+        return;
+
+    archive_read_free (archive->archive);
+    archive->archive = NULL;
+}
+
+static void
+gepub_archive_finalize (GObject *object)
+{
+    GepubArchive *archive = GEPUB_ARCHIVE (object);
+
+    g_clear_pointer (&archive->path, g_free);
+
+    gepub_archive_close (archive);
+
+    G_OBJECT_CLASS (gepub_archive_parent_class)->finalize (object);
+}
+
+static void
+gepub_archive_init (GepubArchive *archive)
+{
+}
+
+static void
+gepub_archive_class_init (GepubArchiveClass *klass)
+{
+    GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+    object_class->finalize = gepub_archive_finalize;
+}
+
+GepubArchive *
+gepub_archive_new (const gchar *path)
+{
+    GepubArchive *archive;
+
+    archive = GEPUB_ARCHIVE (g_object_new (GEPUB_TYPE_ARCHIVE, NULL));
+    archive->path = g_strdup (path);
+    archive->archive = NULL;
+
+    return archive;
+}
+
+/**
+ * gepub_archive_list_files:
+ * @archive: a #GepubArchive
+ *
+ * Returns: (element-type utf8) (transfer full): list of files in the archive
+ */
+GList *
+gepub_archive_list_files (GepubArchive *archive)
+{
+    struct archive_entry *entry;
+    GList *file_list = NULL;
+
+    if (!gepub_archive_open (archive))
+        return NULL;
+    while (archive_read_next_header (archive->archive, &entry) == ARCHIVE_OK) {
+        file_list = g_list_prepend (file_list, g_strdup (archive_entry_pathname (entry)));
+        archive_read_data_skip (archive->archive);
+    }
+    gepub_archive_close (archive);
+
+    return file_list;
+}
+
+GBytes *
+gepub_archive_read_entry (GepubArchive *archive,
+                          const gchar *path)
+{
+    struct archive_entry *entry;
+    guchar *buffer;
+    gint size;
+
+    if (!gepub_archive_open (archive))
+        return NULL;
+
+    while (archive_read_next_header (archive->archive, &entry) == ARCHIVE_OK) {
+        if (g_ascii_strcasecmp (path, archive_entry_pathname (entry)) == 0)
+            break;
+        archive_read_data_skip (archive->archive);
+    }
+
+    size = archive_entry_size (entry);
+    buffer = g_malloc0 (size);
+    archive_read_data (archive->archive, buffer, size);
+
+    gepub_archive_close (archive);
+    return g_bytes_new_take (buffer, size);
+}
+
+gchar *
+gepub_archive_get_root_file (GepubArchive *archive)
+{
+    xmlDoc *doc = NULL;
+    xmlNode *root_element = NULL;
+    xmlNode *root_node = NULL;
+    GBytes *bytes;
+    const guchar *buffer;
+    gsize bufsize;
+    gchar *root_file = NULL;
+
+    // root file is in META-INF/container.xml
+    bytes = gepub_archive_read_entry (archive, "META-INF/container.xml");
+    if (!bytes)
+        return NULL;
+
+    buffer = g_bytes_get_data (bytes, &bufsize);
+    doc = xmlRecoverMemory (buffer, bufsize);
+    root_element = xmlDocGetRootElement (doc);
+    root_node = gepub_utils_get_element_by_tag (root_element, "rootfile");
+    root_file = xmlGetProp (root_node, "full-path");
+
+    xmlFreeDoc (doc);
+    g_bytes_unref (bytes);
+
+    return root_file;
+}
diff --git a/libgepub/gepub-archive.h b/libgepub/gepub-archive.h
new file mode 100644
index 0000000..ee4ce9d
--- /dev/null
+++ b/libgepub/gepub-archive.h
@@ -0,0 +1,49 @@
+/* GepubArchive
+ *
+ * Copyright (C) 2011  Daniel Garcia <danigm wadobo com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef __GEPUB_ARCHIVE_H__
+#define __GEPUB_ARCHIVE_H__
+
+#include <glib-object.h>
+#include <gio/gio.h>
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+#define GEPUB_TYPE_ARCHIVE           (gepub_archive_get_type ())
+#define GEPUB_ARCHIVE(obj)           (G_TYPE_CHECK_INSTANCE_CAST (obj, GEPUB_TYPE_ARCHIVE, GepubArchive))
+#define GEPUB_ARCHIVE_CLASS(cls)     (G_TYPE_CHECK_CLASS_CAST (cls, GEPUB_TYPE_ARCHIVE, GepubArchiveClass))
+#define GEPUB_IS_ARCHIVE(obj)        (G_TYPE_CHECK_INSTANCE_TYPE (obj, GEPUB_TYPE_ARCHIVE))
+#define GEPUB_IS_ARCHIVE_CLASS(obj)  (G_TYPE_CHECK_CLASS_TYPE (obj, GEPUB_TYPE_ARCHIVE))
+#define GEPUB_ARCHIVE_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GEPUB_TYPE_ARCHIVE, 
GepubArchiveClass))
+
+typedef struct _GepubArchive      GepubArchive;
+typedef struct _GepubArchiveClass GepubArchiveClass;
+
+GType             gepub_archive_get_type       (void) G_GNUC_CONST;
+
+GepubArchive     *gepub_archive_new            (const gchar  *path);
+GList            *gepub_archive_list_files     (GepubArchive *archive);
+GBytes           *gepub_archive_read_entry     (GepubArchive *archive,
+                                                const gchar *path);
+gchar            *gepub_archive_get_root_file  (GepubArchive *archive);
+
+G_END_DECLS
+
+#endif /* __GEPUB_ARCHIVE_H__ */
diff --git a/libgepub/gepub-doc.c b/libgepub/gepub-doc.c
index 63450c3..2273aef 100644
--- a/libgepub/gepub-doc.c
+++ b/libgepub/gepub-doc.c
@@ -18,42 +18,30 @@
  */
 
 #include <config.h>
-#include <gio/gio.h>
 #include <libxml/tree.h>
 #include <libxml/HTMLparser.h>
 #include <string.h>
 
+#include "gepub-utils.h"
 #include "gepub-doc.h"
+#include "gepub-archive.h"
+#include "gepub-text-chunk.h"
 
-
-// Rust
-void      *epub_new(char *path);
-void       epub_destroy(void *doc);
-void      *epub_get_resource(void *doc, const char *path, int *size);
-void      *epub_get_resource_by_id(void *doc, const char *id, int *size);
-void      *epub_get_metadata(void *doc, const char *mdata);
-void      *epub_get_resource_mime(void *doc, const char *path);
-void      *epub_get_resource_mime_by_id(void *doc, const char *id);
-void      *epub_get_current_mime(void *doc);
-void      *epub_get_current(void *doc, int *size);
-void      *epub_get_current_with_epub_uris(void *doc, int *size);
-void       epub_set_page(void *doc, guint page);
-guint      epub_get_num_pages(void *doc);
-guint      epub_get_page(void *doc);
-gboolean   epub_next_page(void *doc);
-gboolean   epub_prev_page(void *doc);
-void      *epub_get_cover(void *doc);
-void      *epub_resource_path(void *doc, const char *id);
-void      *epub_current_path(void *doc);
-void      *epub_current_id(void *doc);
-
-
+static void gepub_doc_fill_resources (GepubDoc *doc);
+static void gepub_doc_fill_spine (GepubDoc *doc);
 static void gepub_doc_initable_iface_init (GInitableIface *iface);
 
 struct _GepubDoc {
     GObject parent;
+
+    GepubArchive *archive;
+    GBytes *content;
+    gchar *content_base;
     gchar *path;
-    void *rust_epub_doc;
+    GHashTable *resources;
+
+    GList *spine;
+    GList *page;
 };
 
 struct _GepubDocClass {
@@ -64,7 +52,6 @@ enum {
     PROP_0,
     PROP_PATH,
     PROP_PAGE,
-    PROP_FILE,
     NUM_PROPS
 };
 
@@ -74,11 +61,27 @@ G_DEFINE_TYPE_WITH_CODE (GepubDoc, gepub_doc, G_TYPE_OBJECT,
                          G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE, gepub_doc_initable_iface_init))
 
 static void
+gepub_resource_free (GepubResource *res)
+{
+    g_free (res->mime);
+    g_free (res->uri);
+    g_free (res);
+}
+
+static void
 gepub_doc_finalize (GObject *object)
 {
     GepubDoc *doc = GEPUB_DOC (object);
 
-    epub_destroy (doc->rust_epub_doc);
+    g_clear_object (&doc->archive);
+    g_clear_pointer (&doc->content, g_bytes_unref);
+    g_clear_pointer (&doc->path, g_free);
+    g_clear_pointer (&doc->resources, g_hash_table_destroy);
+
+    if (doc->spine) {
+        g_list_foreach (doc->spine, (GFunc)g_free, NULL);
+        g_clear_pointer (&doc->spine, g_list_free);
+    }
 
     G_OBJECT_CLASS (gepub_doc_parent_class)->finalize (object);
 }
@@ -98,13 +101,6 @@ gepub_doc_set_property (GObject      *object,
     case PROP_PAGE:
         gepub_doc_set_page (doc, g_value_get_int (value));
         break;
-    case PROP_FILE: {
-        GFile *f = G_FILE (g_value_get_object (value));
-        if (f) {
-            doc->path = g_file_get_path (G_FILE (g_value_get_object (value)));
-        }
-        break;
-    }
     default:
         G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
         break;
@@ -126,11 +122,6 @@ gepub_doc_get_property (GObject    *object,
     case PROP_PAGE:
         g_value_set_int (value, gepub_doc_get_page (doc));
         break;
-    case PROP_FILE: {
-        GFile *f = g_file_new_for_path (doc->path);
-        g_value_set_object (value, f);
-        break;
-    }
     default:
         G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
         break;
@@ -140,6 +131,13 @@ gepub_doc_get_property (GObject    *object,
 static void
 gepub_doc_init (GepubDoc *doc)
 {
+    /* doc resources hashtable:
+     * id : (mime, path)
+     */
+    doc->resources = g_hash_table_new_full (g_str_hash,
+                                            g_str_equal,
+                                            (GDestroyNotify)g_free,
+                                            (GDestroyNotify)gepub_resource_free);
 }
 
 static void
@@ -167,15 +165,6 @@ gepub_doc_class_init (GepubDocClass *klass)
                           G_PARAM_READWRITE |
                           G_PARAM_STATIC_STRINGS);
 
-    properties[PROP_FILE] =
-        g_param_spec_object ("file",
-                             "File",
-                             "GFile to the EPUB document",
-                             G_TYPE_FILE,
-                             G_PARAM_READWRITE |
-                             G_PARAM_CONSTRUCT_ONLY |
-                             G_PARAM_STATIC_STRINGS);
-
     g_object_class_install_properties (object_class, NUM_PROPS, properties);
 }
 
@@ -185,12 +174,35 @@ gepub_doc_initable_init (GInitable     *initable,
                          GError       **error)
 {
     GepubDoc *doc = GEPUB_DOC (initable);
+    gchar *file;
+    gsize bufsize = 0;
+    gint i = 0, len;
+
+    g_assert (doc->path != NULL);
 
-    doc->rust_epub_doc = epub_new (doc->path);
-    if (!doc->rust_epub_doc) {
+    doc->archive = gepub_archive_new (doc->path);
+    file = gepub_archive_get_root_file (doc->archive);
+    if (!file)
+        return FALSE;
+    doc->content = gepub_archive_read_entry (doc->archive, file);
+    if (!doc->content)
         return FALSE;
+
+    len = strlen (file);
+    doc->content_base = g_strdup ("");
+    for (i=0; i<len; i++) {
+        if (file[i] == '/') {
+            g_free (doc->content_base);
+            doc->content_base = g_strndup (file, i+1);
+            break;
+        }
     }
 
+    gepub_doc_fill_resources (doc);
+    gepub_doc_fill_spine (doc);
+
+    g_free (file);
+
     return TRUE;
 }
 
@@ -215,6 +227,93 @@ gepub_doc_new (const gchar *path)
                            NULL);
 }
 
+static void
+gepub_doc_fill_resources (GepubDoc *doc)
+{
+    xmlDoc *xdoc = NULL;
+    xmlNode *root_element = NULL;
+    xmlNode *mnode = NULL;
+    xmlNode *item = NULL;
+    gchar *id, *tmpuri, *uri;
+    GepubResource *res;
+    const char *data;
+    gsize size;
+
+    data = g_bytes_get_data (doc->content, &size);
+    xdoc = xmlRecoverMemory (data, size);
+    root_element = xmlDocGetRootElement (xdoc);
+    mnode = gepub_utils_get_element_by_tag (root_element, "manifest");
+
+    item = mnode->children;
+    while (item) {
+        if (item->type != XML_ELEMENT_NODE ) {
+            item = item->next;
+            continue;
+        }
+
+        id = xmlGetProp (item, "id");
+        tmpuri = xmlGetProp (item, "href");
+        uri = g_strdup_printf ("%s%s", doc->content_base, tmpuri);
+        g_free (tmpuri);
+
+        res = g_malloc (sizeof (GepubResource));
+        res->mime = xmlGetProp (item, "media-type");
+        res->uri = uri;
+        g_hash_table_insert (doc->resources, id, res);
+        item = item->next;
+    }
+
+    xmlFreeDoc (xdoc);
+}
+
+static void
+gepub_doc_fill_spine (GepubDoc *doc)
+{
+    xmlDoc *xdoc = NULL;
+    xmlNode *root_element = NULL;
+    xmlNode *snode = NULL;
+    xmlNode *item = NULL;
+    gchar *id;
+    const char *data;
+    gsize size;
+    GList *spine = NULL;
+
+    data = g_bytes_get_data (doc->content, &size);
+    xdoc = xmlRecoverMemory (data, size);
+    root_element = xmlDocGetRootElement (xdoc);
+    snode = gepub_utils_get_element_by_tag (root_element, "spine");
+
+    item = snode->children;
+    while (item) {
+        if (item->type != XML_ELEMENT_NODE ) {
+            item = item->next;
+            continue;
+        }
+
+        id = xmlGetProp (item, "idref");
+
+        spine = g_list_prepend (spine, id);
+        item = item->next;
+    }
+
+    doc->spine = g_list_reverse (spine);
+    doc->page = doc->spine;
+
+    xmlFreeDoc (xdoc);
+}
+
+/**
+ * gepub_doc_get_content:
+ * @doc: a #GepubDoc
+ *
+ * Returns: (transfer none): the document content
+ */
+GBytes *
+gepub_doc_get_content (GepubDoc *doc)
+{
+    return doc->content;
+}
+
 /**
  * gepub_doc_get_metadata:
  * @doc: a #GepubDoc
@@ -225,7 +324,40 @@ gepub_doc_new (const gchar *path)
 gchar *
 gepub_doc_get_metadata (GepubDoc *doc, const gchar *mdata)
 {
-    return epub_get_metadata (doc->rust_epub_doc, mdata);
+    xmlDoc *xdoc = NULL;
+    xmlNode *root_element = NULL;
+    xmlNode *mnode = NULL;
+    xmlNode *mdata_node = NULL;
+    gchar *ret;
+    xmlChar *text;
+    const char *data;
+    gsize size;
+
+    data = g_bytes_get_data (doc->content, &size);
+    xdoc = xmlRecoverMemory (data, size);
+    root_element = xmlDocGetRootElement (xdoc);
+    mnode = gepub_utils_get_element_by_tag (root_element, "metadata");
+    mdata_node = gepub_utils_get_element_by_tag (mnode, mdata);
+
+    text = xmlNodeGetContent (mdata_node);
+    ret = g_strdup (text);
+    xmlFree (text);
+
+    xmlFreeDoc (xdoc);
+
+    return ret;
+}
+
+/**
+ * gepub_doc_get_resources:
+ * @doc: a #GepubDoc
+ *
+ * Returns: (element-type utf8 Gepub.Resource) (transfer none): doc resource table
+ */
+GHashTable *
+gepub_doc_get_resources (GepubDoc *doc)
+{
+    return doc->resources;
 }
 
 /**
@@ -238,9 +370,13 @@ gepub_doc_get_metadata (GepubDoc *doc, const gchar *mdata)
 GBytes *
 gepub_doc_get_resource_by_id (GepubDoc *doc, const gchar *id)
 {
-    int size = 0;
-    guint8 *data = epub_get_resource_by_id (doc->rust_epub_doc, id, &size);
-    return g_bytes_new_take (data, size);
+    GepubResource *gres = g_hash_table_lookup (doc->resources, id);
+    if (!gres) {
+        // not found
+        return NULL;
+    }
+
+    return gepub_archive_read_entry (doc->archive, gres->uri);
 }
 
 /**
@@ -253,9 +389,7 @@ gepub_doc_get_resource_by_id (GepubDoc *doc, const gchar *id)
 GBytes *
 gepub_doc_get_resource (GepubDoc *doc, const gchar *path)
 {
-    int size = 0;
-    guint8 *data = epub_get_resource (doc->rust_epub_doc, path, &size);
-    return g_bytes_new_take (data, size);
+    return gepub_archive_read_entry (doc->archive, path);
 }
 
 /**
@@ -268,7 +402,17 @@ gepub_doc_get_resource (GepubDoc *doc, const gchar *path)
 gchar *
 gepub_doc_get_resource_mime_by_id (GepubDoc *doc, const gchar *id)
 {
-    return epub_get_resource_mime_by_id (doc->rust_epub_doc, id);
+    GepubResource *gres;
+
+    g_return_val_if_fail (id != NULL, NULL);
+
+    gres = g_hash_table_lookup (doc->resources, id);
+    if (!gres) {
+        // not found
+        return NULL;
+    }
+
+    return g_strdup (gres->mime);
 }
 
 /**
@@ -281,7 +425,20 @@ gepub_doc_get_resource_mime_by_id (GepubDoc *doc, const gchar *id)
 gchar *
 gepub_doc_get_resource_mime (GepubDoc *doc, const gchar *path)
 {
-    return epub_get_resource_mime (doc->rust_epub_doc, path);
+    GepubResource *gres;
+    GList *keys = g_hash_table_get_keys (doc->resources);
+
+    while (keys) {
+        gres = ((GepubResource*)g_hash_table_lookup (doc->resources, keys->data));
+        if (!strcmp (gres->uri, path))
+            break;
+        keys = keys->next;
+    }
+
+    if (keys)
+        return g_strdup (gres->mime);
+    else
+        return NULL;
 }
 
 /**
@@ -293,7 +450,7 @@ gepub_doc_get_resource_mime (GepubDoc *doc, const gchar *path)
 gchar *
 gepub_doc_get_current_mime (GepubDoc *doc)
 {
-    return epub_get_current_mime (doc->rust_epub_doc);
+    return gepub_doc_get_resource_mime_by_id (doc, doc->page->data);
 }
 
 /**
@@ -305,9 +462,7 @@ gepub_doc_get_current_mime (GepubDoc *doc)
 GBytes *
 gepub_doc_get_current (GepubDoc *doc)
 {
-    int size = 0;
-    guint8 *data = epub_get_current (doc->rust_epub_doc, &size);
-    return g_bytes_new_take (data, size);
+    return gepub_doc_get_resource_by_id (doc, doc->page->data);
 }
 
 /**
@@ -321,9 +476,96 @@ gepub_doc_get_current (GepubDoc *doc)
 GBytes *
 gepub_doc_get_current_with_epub_uris (GepubDoc *doc)
 {
-    int size = 0;
-    guint8 *data = epub_get_current_with_epub_uris (doc->rust_epub_doc, &size);
-    return g_bytes_new_take (data, size);
+    GBytes *content = gepub_doc_get_current (doc);
+    gchar *path = gepub_doc_get_current_path (doc);
+    // getting the basepath of the current xhtml loaded
+    gchar *base = g_path_get_dirname (path);
+
+    GBytes *replaced = gepub_utils_replace_resources (content, base);
+
+    g_free (path);
+    g_bytes_unref (content);
+
+    return replaced;
+}
+
+/**
+ * gepub_doc_get_text:
+ * @doc: a #GepubDoc
+ *
+ * Returns: (element-type Gepub.TextChunk) (transfer full): the list of text in the current chapter.
+ */
+GList *
+gepub_doc_get_text (GepubDoc *doc)
+{
+    xmlDoc *xdoc = NULL;
+    xmlNode *root_element = NULL;
+    GBytes *current;
+    const guchar *data;
+    gsize size;
+
+    GList *texts = NULL;
+
+    current = gepub_doc_get_current (doc);
+    if (!current) {
+        return NULL;
+    }
+    data = g_bytes_get_data (current, &size);
+    xdoc = htmlReadMemory (data, size, "", NULL, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR);
+    root_element = xmlDocGetRootElement (xdoc);
+    texts = gepub_utils_get_text_elements (root_element);
+
+    g_bytes_unref (current);
+    xmlFreeDoc (xdoc);
+
+    return texts;
+}
+
+/**
+ * gepub_doc_get_text_by_id:
+ * @doc: a #GepubDoc
+ * @id: the resource id
+ *
+ * Returns: (element-type Gepub.TextChunk) (transfer full): the list of text in the current chapter.
+ */
+GList *
+gepub_doc_get_text_by_id (GepubDoc *doc, const gchar *id)
+{
+    xmlDoc *xdoc = NULL;
+    xmlNode *root_element = NULL;
+    gsize size;
+    const guchar *res;
+    GBytes *contents;
+
+    GList *texts = NULL;
+
+    contents = gepub_doc_get_resource_by_id (doc, id);
+    if (!res) {
+        return NULL;
+    }
+
+    res = g_bytes_get_data (contents, &size);
+    xdoc = htmlReadMemory (res, size, "", NULL, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR);
+    root_element = xmlDocGetRootElement (xdoc);
+    texts = gepub_utils_get_text_elements (root_element);
+
+    g_bytes_unref (contents);
+    xmlFreeDoc (xdoc);
+
+    return texts;
+}
+
+static gboolean
+gepub_doc_set_page_internal (GepubDoc *doc,
+                             GList    *page)
+{
+    if (!page || doc->page == page)
+        return FALSE;
+
+    doc->page = page;
+    g_object_notify_by_pspec (G_OBJECT (doc), properties[PROP_PAGE]);
+
+    return TRUE;
 }
 
 /**
@@ -335,11 +577,7 @@ gepub_doc_get_current_with_epub_uris (GepubDoc *doc)
 gboolean
 gepub_doc_go_next (GepubDoc *doc)
 {
-    gboolean isok = epub_next_page (doc->rust_epub_doc);
-    if (isok) {
-        g_object_notify_by_pspec (G_OBJECT (doc), properties[PROP_PAGE]);
-    }
-    return isok;
+    return gepub_doc_set_page_internal (doc, doc->page->next);
 }
 
 /**
@@ -351,11 +589,7 @@ gepub_doc_go_next (GepubDoc *doc)
 gboolean
 gepub_doc_go_prev (GepubDoc *doc)
 {
-    gboolean isok = epub_prev_page (doc->rust_epub_doc);
-    if (isok) {
-        g_object_notify_by_pspec (G_OBJECT (doc), properties[PROP_PAGE]);
-    }
-    return isok;
+    return gepub_doc_set_page_internal (doc, doc->page->prev);
 }
 
 /**
@@ -367,7 +601,7 @@ gepub_doc_go_prev (GepubDoc *doc)
 int
 gepub_doc_get_n_pages (GepubDoc *doc)
 {
-    return epub_get_num_pages (doc->rust_epub_doc);
+    return g_list_length (doc->spine);
 }
 
 /**
@@ -379,7 +613,7 @@ gepub_doc_get_n_pages (GepubDoc *doc)
 int
 gepub_doc_get_page (GepubDoc *doc)
 {
-    return epub_get_page (doc->rust_epub_doc);
+    return g_list_position (doc->spine, doc->page);
 }
 
 /**
@@ -393,9 +627,12 @@ void
 gepub_doc_set_page (GepubDoc *doc,
                     gint      index)
 {
+    GList *page;
+
     g_return_if_fail (index >= 0 && index <= gepub_doc_get_n_pages (doc));
-    g_object_notify_by_pspec (G_OBJECT (doc), properties[PROP_PAGE]);
-    epub_set_page (doc->rust_epub_doc, index);
+
+    page = g_list_nth (doc->spine, index);
+    gepub_doc_set_page_internal (doc, page);
 }
 
 /**
@@ -408,7 +645,26 @@ gepub_doc_set_page (GepubDoc *doc,
 gchar *
 gepub_doc_get_cover (GepubDoc *doc)
 {
-    return epub_get_cover (doc->rust_epub_doc);
+    xmlDoc *xdoc = NULL;
+    xmlNode *root_element = NULL;
+    xmlNode *mnode = NULL;
+    gchar *ret;
+    xmlChar *text;
+    const char *data;
+    gsize size;
+
+    data = g_bytes_get_data (doc->content, &size);
+    xdoc = xmlRecoverMemory (data, size);
+    root_element = xmlDocGetRootElement (xdoc);
+    mnode = gepub_utils_get_element_by_attr (root_element, "name", "cover");
+    text = xmlGetProp(mnode, "content");
+
+    ret = g_strdup (text);
+    xmlFree (text);
+
+    xmlFreeDoc (xdoc);
+
+    return ret;
 }
 
 /**
@@ -421,7 +677,13 @@ gepub_doc_get_cover (GepubDoc *doc)
 gchar *
 gepub_doc_get_resource_path (GepubDoc *doc, const gchar *id)
 {
-    return epub_resource_path(doc->rust_epub_doc, id);
+    GepubResource *gres = g_hash_table_lookup (doc->resources, id);
+    if (!gres) {
+        // not found
+        return NULL;
+    }
+
+    return g_strdup (gres->uri);
 }
 
 /**
@@ -433,7 +695,7 @@ gepub_doc_get_resource_path (GepubDoc *doc, const gchar *id)
 gchar *
 gepub_doc_get_current_path (GepubDoc *doc)
 {
-    return epub_current_path (doc->rust_epub_doc);
+    return gepub_doc_get_resource_path (doc, doc->page->data);
 }
 
 /**
@@ -441,10 +703,10 @@ gepub_doc_get_current_path (GepubDoc *doc)
  * @doc: a #GepubDoc
  *
 
- * Returns: (transfer full): the current resource id
+ * Returns: (transfer none): the current resource id
  */
 const gchar *
 gepub_doc_get_current_id (GepubDoc *doc)
 {
-    return epub_current_id (doc->rust_epub_doc);
+    return doc->page->data;
 }
diff --git a/libgepub/gepub-doc.h b/libgepub/gepub-doc.h
index ccc8f66..68c2a93 100644
--- a/libgepub/gepub-doc.h
+++ b/libgepub/gepub-doc.h
@@ -35,16 +35,26 @@ G_BEGIN_DECLS
 typedef struct _GepubDoc      GepubDoc;
 typedef struct _GepubDocClass GepubDocClass;
 
+struct _GepubResource {
+    gchar *mime;
+    gchar *uri;
+};
+
+typedef struct _GepubResource GepubResource;
 
 GType             gepub_doc_get_type                        (void) G_GNUC_CONST;
 
 GepubDoc         *gepub_doc_new                             (const gchar *path);
+GBytes           *gepub_doc_get_content                     (GepubDoc *doc);
 gchar            *gepub_doc_get_metadata                    (GepubDoc *doc, const gchar *mdata);
 GBytes           *gepub_doc_get_resource                    (GepubDoc *doc, const gchar *path);
 GBytes           *gepub_doc_get_resource_by_id              (GepubDoc *doc, const gchar *id);
+GHashTable       *gepub_doc_get_resources                   (GepubDoc *doc);
 gchar            *gepub_doc_get_resource_mime               (GepubDoc *doc, const gchar *path);
 gchar            *gepub_doc_get_resource_mime_by_id         (GepubDoc *doc, const gchar *id);
 gchar            *gepub_doc_get_current_mime                (GepubDoc *doc);
+GList            *gepub_doc_get_text                        (GepubDoc *doc);
+GList            *gepub_doc_get_text_by_id                  (GepubDoc *doc, const gchar *id);
 GBytes           *gepub_doc_get_current                     (GepubDoc *doc);
 GBytes           *gepub_doc_get_current_with_epub_uris      (GepubDoc *doc);
 gchar            *gepub_doc_get_cover                       (GepubDoc *doc);
diff --git a/libgepub/gepub-text-chunk.c b/libgepub/gepub-text-chunk.c
new file mode 100644
index 0000000..b240caa
--- /dev/null
+++ b/libgepub/gepub-text-chunk.c
@@ -0,0 +1,111 @@
+/* GepubTextChunk
+ *
+ * Copyright (C) 2011 Daniel Garcia <danigm wadobo com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <config.h>
+#include "gepub-text-chunk.h"
+
+
+G_DEFINE_TYPE (GepubTextChunk, gepub_text_chunk, G_TYPE_OBJECT)
+
+
+static void
+gepub_text_chunk_finalize (GObject *object)
+{
+    GepubTextChunk *t = GEPUB_TEXT_CHUNK (object);
+
+    g_clear_pointer (&t->text, g_free);
+
+    G_OBJECT_CLASS (gepub_text_chunk_parent_class)->finalize (object);
+}
+
+static void
+gepub_text_chunk_init (GepubTextChunk *t)
+{
+}
+
+static void
+gepub_text_chunk_class_init (GepubTextChunkClass *klass)
+{
+    GObjectClass *object_class = G_OBJECT_CLASS (klass);
+    object_class->finalize = gepub_text_chunk_finalize;
+}
+
+/**
+ * gepub_text_chunk_new:
+ * @type: a #GepubTextChunkType, the chunk type
+ * @text: the chunk text
+ *
+ * Returns: (transfer none): the chunk text
+ */
+GepubTextChunk *
+gepub_text_chunk_new (GepubTextChunkType type, const guchar *text)
+{
+    GepubTextChunk *t;
+
+    t = GEPUB_TEXT_CHUNK (g_object_new (GEPUB_TYPE_TEXT_CHUNK, NULL));
+    t->type = type;
+    t->text = g_strdup (text);
+
+    return t;
+}
+
+/**
+ * gepub_text_chunk_type_str:
+ * @chunk: a #GepubTextChunk
+ *
+ * Returns: (transfer none): the chunk text
+ */
+gchar *
+gepub_text_chunk_type_str (GepubTextChunk *chunk)
+{
+    switch (chunk->type) {
+        case GEPUBTextBold:
+            return "bold";
+        case GEPUBTextItalic:
+            return "italic";
+        case GEPUBTextHeader:
+            return "header";
+        default:
+            return "normal";
+    }
+}
+
+/**
+ * gepub_text_chunk_text:
+ * @chunk: a #GepubTextChunk
+ *
+ * Returns: (transfer none): the chunk text
+ */
+gchar *
+gepub_text_chunk_text (GepubTextChunk *chunk)
+{
+    return chunk->text;
+}
+
+/**
+ * gepub_text_chunk_type:
+ * @chunk: a #GepubTextChunk
+ *
+ * Returns: (transfer none): the chunk type
+ */
+GepubTextChunkType
+gepub_text_chunk_type (GepubTextChunk *chunk)
+{
+    return chunk->type;
+}
diff --git a/libgepub/gepub-text-chunk.h b/libgepub/gepub-text-chunk.h
new file mode 100644
index 0000000..7c53f15
--- /dev/null
+++ b/libgepub/gepub-text-chunk.h
@@ -0,0 +1,64 @@
+/* GepubTextChunk
+ *
+ * Copyright (C) 2011  Daniel Garcia <danigm wadobo com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef __GEPUB_TEXT_CHUNK_H__
+#define __GEPUB_TEXT_CHUNK_H__
+
+#include <glib-object.h>
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+#define GEPUB_TYPE_TEXT_CHUNK           (gepub_text_chunk_get_type ())
+#define GEPUB_TEXT_CHUNK(obj)           (G_TYPE_CHECK_INSTANCE_CAST (obj, GEPUB_TYPE_TEXT_CHUNK, 
GepubTextChunk))
+#define GEPUB_TEXT_CHUNK_CLASS(cls)     (G_TYPE_CHECK_CLASS_CAST (cls, GEPUB_TYPE_TEXT_CHUNK, 
GepubTextChunkClass))
+#define GEPUB_IS_TEXT_CHUNK(obj)        (G_TYPE_CHECK_INSTANCE_TYPE (obj, GEPUB_TYPE_TEXT_CHUNK))
+#define GEPUB_IS_TEXT_CHUNK_CLASS(obj)  (G_TYPE_CHECK_CLASS_TYPE (obj, GEPUB_TYPE_TEXT_CHUNK))
+#define GEPUB_TEXT_CHUNK_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GEPUB_TYPE_TEXT_CHUNK, 
GepubTextChunkClass))
+
+typedef enum {
+    GEPUBTextHeader,
+    GEPUBTextBold,
+    GEPUBTextItalic,
+    GEPUBTextNormal
+} GepubTextChunkType;
+
+struct _GepubTextChunk {
+    GObject parent;
+
+    GepubTextChunkType type;
+    guchar *text;
+};
+
+struct _GepubTextChunkClass {
+    GObjectClass parent_class;
+};
+
+typedef struct _GepubTextChunk      GepubTextChunk;
+typedef struct _GepubTextChunkClass GepubTextChunkClass;
+
+GType               gepub_text_chunk_get_type     (void) G_GNUC_CONST;
+GepubTextChunk     *gepub_text_chunk_new          (GepubTextChunkType type, const guchar *text);
+gchar              *gepub_text_chunk_type_str     (GepubTextChunk *chunk);
+gchar              *gepub_text_chunk_text         (GepubTextChunk *chunk);
+GepubTextChunkType  gepub_text_chunk_type         (GepubTextChunk *chunk);
+
+G_END_DECLS
+
+#endif /* __GEPUB_TEXT_CHUNK_H__ */
diff --git a/libgepub/gepub-utils.c b/libgepub/gepub-utils.c
new file mode 100644
index 0000000..d120ffd
--- /dev/null
+++ b/libgepub/gepub-utils.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (C) 2011  Daniel Garcia <danigm wadobo com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <libsoup/soup.h>
+
+#include <libxml/tree.h>
+#include <libxml/parser.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include "gepub-utils.h"
+#include "gepub-text-chunk.h"
+
+
+/* Replaces the attr value with epub:// prefix for the tagname. This
+ * function also makes the resource absolute based on the epub root
+ */
+static void
+set_epub_uri (xmlNode *node, const gchar *path, const gchar *tagname, const gchar *attr)
+{
+    xmlNode *cur_node = NULL;
+    xmlChar *text = NULL;
+
+    SoupURI *baseURI;
+    gchar *basepath = g_strdup_printf ("epub://%s/", path);
+
+    baseURI = soup_uri_new (basepath);
+    g_free (basepath);
+
+    for (cur_node = node; cur_node; cur_node = cur_node->next) {
+        if (cur_node->type == XML_ELEMENT_NODE ) {
+            text = xmlGetProp (cur_node, attr);
+            if (!strcmp (cur_node->name, tagname) && text) {
+                SoupURI *uri = soup_uri_new_with_base (baseURI, text);
+                gchar *value = soup_uri_to_string (uri, FALSE);
+
+                xmlSetProp (cur_node, attr, value);
+
+                soup_uri_free (uri);
+                g_free (value);
+            }
+            if (text) {
+                xmlFree (text);
+                text = NULL;
+            }
+        }
+
+        if (cur_node->children)
+            set_epub_uri (cur_node->children, path, tagname, attr);
+    }
+
+    soup_uri_free (baseURI);
+}
+
+gboolean
+gepub_utils_has_parent_tag (xmlNode *node, gchar *name, ...)
+{
+    va_list ap;
+
+    xmlNode *cur_node = NULL;
+    GList *tags = NULL;
+    GList *l = NULL;
+    gchar *name2 = NULL;
+
+    va_start (ap, name);
+
+    for (name2 = name; name2 != NULL; name2 = va_arg(ap, gchar*)) {
+        tags = g_list_append (tags, name2);
+    }
+
+    for (cur_node = node; cur_node; cur_node = cur_node->parent) {
+        if (cur_node->type == XML_ELEMENT_NODE) {
+            for (l = tags; l; l = l->next) {
+                gchar *nodetag = g_ascii_strup (cur_node->name, strlen (cur_node->name));
+                name2 = g_ascii_strup (l->data, strlen (l->data));
+
+                if (!strcmp (nodetag, name2))
+                    return TRUE;
+            }
+        }
+    }
+
+    va_end (ap);
+
+    return FALSE;
+}
+
+/**
+ * gepub_utils_get_element_by_tag: (skip):
+ * @node: an #xmlNode
+ * @name: the tag name
+ *
+ * Returns: the tag matching @name.
+ */
+xmlNode *
+gepub_utils_get_element_by_tag (xmlNode *node, const gchar *name)
+{
+    xmlNode *cur_node = NULL;
+    xmlNode *ret = NULL;
+
+    for (cur_node = node; cur_node; cur_node = cur_node->next) {
+        if (cur_node->type == XML_ELEMENT_NODE ) {
+            if (!strcmp (cur_node->name, name))
+                return cur_node;
+        }
+
+        ret = gepub_utils_get_element_by_tag (cur_node->children, name);
+        if (ret)
+            return ret;
+    }
+    return ret;
+}
+
+/**
+ * gepub_utils_get_element_by_attr: (skip):
+ * @node: an #xmlNode
+ * @attr: the attribute
+ * @value: the value
+ *
+ * Returns: the element matching @attr and @value.
+ */
+xmlNode *
+gepub_utils_get_element_by_attr (xmlNode *node, const gchar *attr, const gchar *value)
+{
+    xmlNode *cur_node = NULL;
+    xmlNode *ret = NULL;
+    xmlChar *text = NULL;
+
+    for (cur_node = node; cur_node; cur_node = cur_node->next) {
+        if (cur_node->type == XML_ELEMENT_NODE ) {
+            text = xmlGetProp (cur_node, attr);
+            if (text && !strcmp (text, value)) {
+                return cur_node;
+            }
+            if (text) {
+                xmlFree (text);
+                text = NULL;
+            }
+        }
+
+        if (cur_node->children)
+            ret = gepub_utils_get_element_by_attr (cur_node->children, attr, value);
+
+        if (ret)
+            return ret;
+    }
+    return ret;
+}
+
+/**
+ * gepub_utils_get_text_elements:
+ * @node: an #xmlNode
+ *
+ * Returns: (element-type Gepub.TextChunk) (transfer full): the text elements
+ *  of @node.
+ */
+GList *
+gepub_utils_get_text_elements (xmlNode *node)
+{
+    GList *text_list = NULL;
+    GList *sub_texts = NULL;
+
+    xmlNode *cur_node = NULL;
+
+    for (cur_node = node; cur_node; cur_node = cur_node->next) {
+        if (cur_node->type == XML_TEXT_NODE) {
+            GepubTextChunk *text_chunk = NULL;
+
+            if (gepub_utils_has_parent_tag (cur_node, "b", "strong", NULL)) {
+                text_chunk = gepub_text_chunk_new (GEPUBTextBold, cur_node->content);
+            } else if (gepub_utils_has_parent_tag (cur_node, "i", "em", NULL)) {
+                text_chunk = gepub_text_chunk_new (GEPUBTextItalic, cur_node->content);
+            } else if (gepub_utils_has_parent_tag (cur_node, "h1", "h2", "h3", "h4", "h5", NULL)) {
+                text_chunk = gepub_text_chunk_new (GEPUBTextHeader, cur_node->content);
+            } else if (gepub_utils_has_parent_tag (cur_node, "p", NULL)) {
+                text_chunk = gepub_text_chunk_new (GEPUBTextNormal, cur_node->content);
+            }
+
+            if (text_chunk)
+                text_list = g_list_append (text_list, text_chunk);
+        }
+
+        if (cur_node->type == XML_ELEMENT_NODE) {
+            GepubTextChunk *text_chunk = NULL;
+            gchar *nodetag = g_ascii_strup (cur_node->name, strlen (cur_node->name));
+            if (text_list && (!strcmp (nodetag, "P") || !strcmp (nodetag, "BR"))) {
+                gchar *old_text;
+                text_chunk = (GepubTextChunk*)(g_list_last (text_list)->data);
+                old_text = text_chunk->text;
+                text_chunk->text = g_strdup_printf ("%s\n", old_text);
+                g_free (old_text);
+            }
+        }
+
+        // TODO add images to this list of objects
+
+        sub_texts = gepub_utils_get_text_elements (cur_node->children);
+        if (sub_texts)
+            text_list = g_list_concat (text_list, sub_texts);
+    }
+
+    return text_list;
+}
+
+/* Replacing epub media paths, for css, image and svg files, to be
+ * able to provide these files to webkit from the epub file
+ */
+GBytes *
+gepub_utils_replace_resources (GBytes *content, const gchar *path)
+{
+    xmlDoc *doc = NULL;
+    xmlNode *root_element = NULL;
+    guchar *buffer;
+    const guchar *data;
+    gsize bufsize;
+
+    data = g_bytes_get_data (content, &bufsize);
+    doc = xmlReadMemory (data, bufsize, "", NULL, XML_PARSE_NOWARNING | XML_PARSE_NOERROR);
+    root_element = xmlDocGetRootElement (doc);
+
+    // replacing css resources
+    set_epub_uri (root_element, path, "link", "href");
+    // replacing images resources
+    set_epub_uri (root_element, path, "img", "src");
+    // replacing svg images resources
+    set_epub_uri (root_element, path, "image", "xlink:href");
+    // replacing crosslinks
+    set_epub_uri (root_element, path, "a", "href");
+
+    xmlDocDumpFormatMemory (doc, (xmlChar**)&buffer, (int*)&bufsize, 1);
+    xmlFreeDoc (doc);
+
+    return g_bytes_new_take (buffer, bufsize);
+}
diff --git a/libgepub/gepub-utils.h b/libgepub/gepub-utils.h
new file mode 100644
index 0000000..1dbf388
--- /dev/null
+++ b/libgepub/gepub-utils.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2011  Daniel Garcia <danigm wadobo com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef __GEPUB_UTILS_H__
+#define __GEPUB_UTILS_H__
+
+#include <glib.h>
+#include <libxml/tree.h>
+
+xmlNode * gepub_utils_get_element_by_tag  (xmlNode *node, const gchar *name);
+xmlNode * gepub_utils_get_element_by_attr (xmlNode *node, const gchar *attr, const gchar *value);
+GList *   gepub_utils_get_text_elements   (xmlNode *node);
+GBytes *  gepub_utils_replace_resources   (GBytes *content, const gchar *path);
+
+#endif
diff --git a/libgepub/gepub.h b/libgepub/gepub.h
index cdc1c3c..a8f7fd2 100644
--- a/libgepub/gepub.h
+++ b/libgepub/gepub.h
@@ -1,6 +1,8 @@
 #ifndef _GEPUB__H_
 #define _GEPUB__H_
 
+#include "gepub-archive.h"
+#include "gepub-text-chunk.h"
 #include "gepub-doc.h"
 #include "gepub-widget.h"
 
diff --git a/tests/test-gepub.c b/tests/test-gepub.c
index f237964..20048d4 100644
--- a/tests/test-gepub.c
+++ b/tests/test-gepub.c
@@ -20,6 +20,39 @@ GtkTextBuffer *page_buffer;
 #define TEST(f,arg...) PTEST ("\n### TESTING " #f " ###\n\n"); f (arg); PTEST ("\n\n");
 
 void
+update_text (GepubDoc *doc)
+{
+    GList *l, *chunks;
+    GtkTextIter start, end;
+
+    gtk_text_buffer_get_start_iter (page_buffer, &start);
+    gtk_text_buffer_get_end_iter (page_buffer, &end);
+    gtk_text_buffer_delete (page_buffer, &start, &end);
+
+    chunks = gepub_doc_get_text (doc);
+
+    for (l=chunks; l; l = l->next) {
+        GepubTextChunk *chunk = GEPUB_TEXT_CHUNK (l->data);
+        if (chunk->type == GEPUBTextHeader) {
+            gtk_text_buffer_insert_at_cursor (page_buffer, "\n", -1);
+            gtk_text_buffer_get_end_iter (page_buffer, &end);
+            gtk_text_buffer_insert_with_tags_by_name (page_buffer, &end, chunk->text, -1, "head",  NULL);
+            gtk_text_buffer_insert_at_cursor (page_buffer, "\n", -1);
+        } else if (chunk->type == GEPUBTextNormal) {
+            gtk_text_buffer_insert_at_cursor (page_buffer, "\n", -1);
+            gtk_text_buffer_insert_at_cursor (page_buffer, chunk->text, -1);
+            gtk_text_buffer_insert_at_cursor (page_buffer, "\n", -1);
+        } else if (chunk->type == GEPUBTextItalic) {
+            gtk_text_buffer_get_end_iter (page_buffer, &end);
+            gtk_text_buffer_insert_with_tags_by_name (page_buffer, &end, chunk->text, -1, "italic",  NULL);
+        } else if (chunk->type == GEPUBTextBold) {
+            gtk_text_buffer_get_end_iter (page_buffer, &end);
+            gtk_text_buffer_insert_with_tags_by_name (page_buffer, &end, chunk->text, -1, "bold",  NULL);
+        }
+    }
+}
+
+void
 print_replaced_text (GepubDoc *doc)
 {
     GBytes *content;
@@ -33,17 +66,102 @@ print_replaced_text (GepubDoc *doc)
 }
 
 void
-button_pressed (GtkButton *button, GepubDoc *doc)
+button_pressed (GtkButton *button, GepubWidget *widget)
 {
+    GepubDoc *doc = gepub_widget_get_doc (widget);
+
     if (!strcmp (gtk_button_get_label (button), "prev")) {
         gepub_doc_go_prev (doc);
     } else {
         gepub_doc_go_next (doc);
     }
-    printf ("CURRENT: %d\n", gepub_doc_get_page (doc));
-    printf ("CURRENT: %s\n", gepub_doc_get_current_id (doc));
-    printf ("CURRENT: %s\n", gepub_doc_get_current_path (doc));
-    //print_replaced_text (doc);
+    update_text (doc);
+    print_replaced_text (doc);
+}
+
+void
+test_open (const char *path)
+{
+    GepubArchive *a;
+    GList *list_files;
+    gint i;
+    gint size;
+
+    a = gepub_archive_new (path);
+    list_files = gepub_archive_list_files (a);
+    if (!list_files) {
+        PTEST ("ERROR: BAD epub file");
+        g_object_unref (a);
+        return;
+    }
+
+    size = g_list_length (list_files);
+    PTEST ("%d\n", size);
+    for (i = 0; i < size; i++) {
+        PTEST ("file: %s\n", (char *)g_list_nth_data (list_files, i));
+        g_free (g_list_nth_data (list_files, i));
+    }
+
+    g_list_free (list_files);
+
+    g_object_unref (a);
+}
+
+void
+find_xhtml (gchar *key, GepubResource *value, gpointer data)
+{
+    guchar **d = (guchar **)data;
+    if (g_strcmp0 (value->mime, "application/xhtml+xml") == 0) {
+        *d = value->uri;
+    }
+}
+
+void
+test_read (const char *path)
+{
+    GepubArchive *a;
+    GList *list_files = NULL;
+    const guchar *buffer;
+    guchar *file = NULL;
+    gsize bufsize;
+    GBytes *bytes;
+
+    a = gepub_archive_new (path);
+
+    GepubDoc *doc = gepub_doc_new (path);
+    GHashTable *ht = (GHashTable*)gepub_doc_get_resources (doc);
+    g_hash_table_foreach (ht, (GHFunc)find_xhtml, &file);
+
+    bytes = gepub_archive_read_entry (a, file);
+    if (bytes) {
+        const char *data;
+        gsize size;
+
+        buffer = g_bytes_get_data (bytes, &bufsize);
+        PTEST ("doc:%s\n----\n%s\n-----\n", file, buffer);
+        g_bytes_unref (bytes);
+    }
+
+    g_list_foreach (list_files, (GFunc)g_free, NULL);
+    g_list_free (list_files);
+
+    g_object_unref (a);
+}
+
+void
+test_root_file (const char *path)
+{
+    GepubArchive *a;
+    gchar *root_file = NULL;
+
+    a = gepub_archive_new (path);
+
+    root_file = gepub_archive_get_root_file (a);
+    PTEST ("root file: %s\n", root_file);
+    if (root_file)
+        g_free (root_file);
+
+    g_object_unref (a);
 }
 
 void
@@ -55,24 +173,60 @@ test_doc_name (const char *path)
     gchar *id = gepub_doc_get_metadata (doc, GEPUB_META_ID);
     gchar *author = gepub_doc_get_metadata (doc, GEPUB_META_AUTHOR);
     gchar *description = gepub_doc_get_metadata (doc, GEPUB_META_DESC);
-    //gchar *cover = gepub_doc_get_cover (doc);
-    //gchar *cover_mime = gepub_doc_get_resource_mime_by_id (doc, cover);
+    gchar *cover = gepub_doc_get_cover (doc);
+    gchar *cover_mime = gepub_doc_get_resource_mime_by_id (doc, cover);
 
     PTEST ("title: %s\n", title);
     PTEST ("author: %s\n", author);
     PTEST ("id: %s\n", id);
     PTEST ("lang: %s\n", lang);
     PTEST ("desc: %s\n", description);
-    //PTEST ("cover: %s\n", cover);
-    //PTEST ("cover mime: %s\n", cover_mime);
+    PTEST ("cover: %s\n", cover);
+    PTEST ("cover mime: %s\n", cover_mime);
 
     g_free (title);
     g_free (lang);
     g_free (id);
     g_free (author);
     g_free (description);
-    //g_free (cover);
-    //g_free (cover_mime);
+    g_free (cover);
+    g_object_unref (G_OBJECT (doc));
+}
+
+void
+pk (gchar *key, GepubResource *value, gpointer data)
+{
+    PTEST ("%s: %s, %s\n", key, value->mime, value->uri);
+}
+
+void
+test_doc_resources (const char *path)
+{
+    GepubDoc *doc = gepub_doc_new (path);
+    GHashTable *ht = (GHashTable*)gepub_doc_get_resources (doc);
+    g_hash_table_foreach (ht, (GHFunc)pk, NULL);
+    GBytes *ncx;
+    const guchar *data;
+    gsize size;
+
+    ncx = gepub_doc_get_resource_by_id (doc, "ncx");
+    data = g_bytes_get_data (ncx, &size);
+    PTEST ("ncx:\n%s\n", data);
+    g_bytes_unref (ncx);
+
+    g_object_unref (G_OBJECT (doc));
+}
+
+void
+test_doc_spine (const char *path)
+{
+    GepubDoc *doc = gepub_doc_new (path);
+    int id = 0;
+
+    do {
+        PTEST ("%d: %s\n", id++, gepub_doc_get_current_id (doc));
+    } while (gepub_doc_go_next (doc));
+
     g_object_unref (G_OBJECT (doc));
 }
 
@@ -93,9 +247,8 @@ main (int argc, char **argv)
 
     GtkTextBuffer *buffer;
 
-    GError *error = NULL;
-
     GepubDoc *doc;
+    GtkWidget *textview2;
     GtkWidget *widget = gepub_widget_new ();
 
     if (argc < 2) {
@@ -110,11 +263,7 @@ main (int argc, char **argv)
     gtk_container_add (GTK_CONTAINER (window), vpaned);
 
     // gepub widget
-    GFile *file = g_file_new_for_path (argv[1]);
-    doc = g_initable_new (gepub_doc_get_type (), NULL, &error,
-                          "file", file,
-                          NULL);
-    g_object_unref (file);
+    doc = gepub_doc_new (argv[1]);
     if (!doc) {
         perror ("BAD epub FILE");
         return -1;
@@ -122,15 +271,28 @@ main (int argc, char **argv)
 
     gepub_widget_set_doc (GEPUB_WIDGET (widget), doc);
 
+    scrolled = gtk_scrolled_window_new (NULL, NULL);
+    gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (scrolled), GTK_POLICY_AUTOMATIC, 
GTK_POLICY_AUTOMATIC);
+    textview2 = gtk_text_view_new ();
+    gtk_text_view_set_wrap_mode (GTK_TEXT_VIEW (textview2), GTK_WRAP_WORD_CHAR);
+    page_buffer = gtk_text_view_get_buffer (GTK_TEXT_VIEW (textview2));
+    gtk_text_buffer_create_tag (page_buffer, "bold", "weight", PANGO_WEIGHT_BOLD, "foreground", "#ff0000", 
NULL);
+    gtk_text_buffer_create_tag (page_buffer, "italic", "style", PANGO_STYLE_ITALIC, "foreground", "#005500", 
NULL);
+    gtk_text_buffer_create_tag (page_buffer, "head", "size-points", 20.0, NULL);
+    update_text (doc);
+    gtk_container_add (GTK_CONTAINER (scrolled), GTK_WIDGET (textview2));
+    gtk_widget_set_size_request (GTK_WIDGET (textview2), 500, 300);
+
     vbox = gtk_box_new (GTK_ORIENTATION_VERTICAL, 5);
     hbox = gtk_box_new (GTK_ORIENTATION_HORIZONTAL, 5);
     b_prev = gtk_button_new_with_label ("prev");
-    g_signal_connect (b_prev, "clicked", (GCallback)button_pressed, GEPUB_DOC (doc));
+    g_signal_connect (b_prev, "clicked", (GCallback)button_pressed, GEPUB_WIDGET (widget));
     b_next = gtk_button_new_with_label ("next");
-    g_signal_connect (b_next, "clicked", (GCallback)button_pressed, GEPUB_DOC (doc));
+    g_signal_connect (b_next, "clicked", (GCallback)button_pressed, GEPUB_WIDGET (widget));
     gtk_container_add (GTK_CONTAINER (hbox), b_prev);
     gtk_container_add (GTK_CONTAINER (hbox), b_next);
     gtk_box_pack_start (GTK_BOX (vbox), hbox, FALSE, FALSE, 5);
+    gtk_box_pack_start (GTK_BOX (vbox), scrolled, TRUE, TRUE, 5);
 
     textview = gtk_text_view_new ();
     scrolled = gtk_scrolled_window_new (NULL, NULL);
@@ -146,7 +308,12 @@ main (int argc, char **argv)
 
 
     // Testing all
+    TEST(test_open, argv[1])
+    TEST(test_read, argv[1])
+    TEST(test_root_file, argv[1])
     TEST(test_doc_name, argv[1])
+    TEST(test_doc_resources, argv[1])
+    TEST(test_doc_spine, argv[1])
 
     // Freeing the mallocs :P
     if (buf2) {


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]