[tracker] libtracker-common: Added libenca-based encoding detection method



commit 5a7156c87f32547a24fb572de452aef888920027
Author: Aleksander Morgado <aleksander lanedo com>
Date:   Fri Feb 25 11:33:54 2011 +0100

    libtracker-common: Added libenca-based encoding detection method

 src/libtracker-common/Makefile.am        |  102 ++++++++++++++++--------------
 src/libtracker-common/tracker-common.h   |    1 +
 src/libtracker-common/tracker-encoding.c |   84 ++++++++++++++++++++++++
 src/libtracker-common/tracker-encoding.h |   40 ++++++++++++
 4 files changed, 179 insertions(+), 48 deletions(-)
---
diff --git a/src/libtracker-common/Makefile.am b/src/libtracker-common/Makefile.am
index df33113..d243760 100644
--- a/src/libtracker-common/Makefile.am
+++ b/src/libtracker-common/Makefile.am
@@ -1,9 +1,9 @@
 include $(top_srcdir)/Makefile.decl
 
-AM_CPPFLAGS =                                          \
-	$(BUILD_CFLAGS)                                \
-	-I$(top_srcdir)/src                            \
-	-DSHAREDIR=\""$(datadir)"\"                    \
+AM_CPPFLAGS = \
+	$(BUILD_CFLAGS) \
+	-I$(top_srcdir)/src \
+	-DSHAREDIR=\""$(datadir)"\" \
 	$(LIBTRACKER_COMMON_CFLAGS)
 
 libtracker_commondir = $(libdir)/tracker-$(TRACKER_API_VERSION)
@@ -15,59 +15,65 @@ else
 os_sources = tracker-os-dependant-unix.c
 endif
 
-libtracker_common_la_SOURCES =                         \
-	$(marshal_sources)                             \
-	$(os_sources)                                  \
-	tracker-config-file.c                          \
-	tracker-date-time.c                            \
-	tracker-dbus.c                                 \
-	tracker-file-utils.c                           \
-	tracker-ioprio.c                               \
-	tracker-keyfile-object.c                       \
-	tracker-log.c                                  \
-	tracker-type-utils.c                           \
-	tracker-utils.c                                \
-	tracker-crc32.c                                \
-	tracker-locale.c
-
-noinst_HEADERS =                                       \
-	tracker-dbus.h                                 \
-	tracker-ioprio.h                               \
-	tracker-log.h                                  \
-	tracker-os-dependant.h                         \
-	tracker-config-file.h                          \
-	tracker-common.h                               \
-	tracker-date-time.h                            \
-	tracker-file-utils.h                           \
-	tracker-keyfile-object.h                       \
-	tracker-ontologies.h                           \
-	tracker-type-utils.h                           \
-	tracker-utils.h                                \
-	tracker-crc32.h                                \
-	tracker-locale.h
+libtracker_common_la_SOURCES = \
+	$(marshal_sources) \
+	$(os_sources) \
+	tracker-config-file.c \
+	tracker-date-time.c \
+	tracker-dbus.c \
+	tracker-file-utils.c \
+	tracker-ioprio.c \
+	tracker-keyfile-object.c \
+	tracker-log.c \
+	tracker-type-utils.c \
+	tracker-utils.c \
+	tracker-crc32.c \
+	tracker-locale.c \
+	tracker-encoding.c
+
+noinst_HEADERS = \
+	tracker-dbus.h \
+	tracker-ioprio.h \
+	tracker-log.h \
+	tracker-os-dependant.h \
+	tracker-config-file.h \
+	tracker-common.h \
+	tracker-date-time.h \
+	tracker-file-utils.h \
+	tracker-keyfile-object.h \
+	tracker-ontologies.h \
+	tracker-type-utils.h \
+	tracker-utils.h \
+	tracker-crc32.h \
+	tracker-locale.h \
+	tracker-encoding.h
 
 if HAVE_TRACKER_FTS
-libtracker_common_la_SOURCES +=                        \
-	tracker-language.c
-noinst_HEADERS +=                                      \
-	tracker-language.h
+libtracker_common_la_SOURCES += tracker-language.c
+noinst_HEADERS += tracker-language.h
 endif
 
-libtracker_common_la_LDFLAGS =                         \
+libtracker_common_la_CFLAGS =
+
+libtracker_common_la_LDFLAGS = \
 	-version-info $(LT_CURRENT):$(LT_REVISION):$(LT_AGE)
 
-libtracker_common_la_LIBADD =                          \
-	$(BUILD_LIBS)                                  \
+libtracker_common_la_LIBADD = \
+	$(BUILD_LIBS) \
 	$(LIBTRACKER_COMMON_LIBS)
 
 if HAVE_TRACKER_FTS
-libtracker_common_la_LIBADD +=                         \
-	$(top_builddir)/src/libstemmer/libstemmer.la
+libtracker_common_la_LIBADD += $(top_builddir)/src/libstemmer/libstemmer.la
+endif
+
+if HAVE_ENCA
+libtracker_common_la_CFLAGS += $(ENCA_CFLAGS)
+libtracker_common_la_LIBADD += $(ENCA_LIBS)
 endif
 
-marshal_sources =                                      \
-        tracker-marshal.h                              \
-        tracker-marshal.c
+marshal_sources = \
+	tracker-marshal.h \
+	tracker-marshal.c
 
 tracker-marshal.h: tracker-marshal.list
 	$(AM_V_GEN)$(GLIB_GENMARSHAL) $< --prefix=tracker_marshal --header > $@
@@ -80,7 +86,7 @@ BUILT_SOURCES =	$(marshal_sources)
 
 CLEANFILES = $(marshal_sources)
 
-EXTRA_DIST =                                           \
-	tracker-marshal.list                           \
+EXTRA_DIST = \
+	tracker-marshal.list \
 	libtracker-common.vapi
 
diff --git a/src/libtracker-common/tracker-common.h b/src/libtracker-common/tracker-common.h
index bcf4e98..9eb08b5 100644
--- a/src/libtracker-common/tracker-common.h
+++ b/src/libtracker-common/tracker-common.h
@@ -41,6 +41,7 @@
 #include "tracker-type-utils.h"
 #include "tracker-utils.h"
 #include "tracker-locale.h"
+#include "tracker-encoding.h"
 
 #undef __LIBTRACKER_COMMON_INSIDE__
 
diff --git a/src/libtracker-common/tracker-encoding.c b/src/libtracker-common/tracker-encoding.c
new file mode 100644
index 0000000..6868d1b
--- /dev/null
+++ b/src/libtracker-common/tracker-encoding.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2011 Nokia <ivan frade nokia com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include "config.h"
+
+#include <glib.h>
+#include "tracker-encoding.h"
+
+#ifdef HAVE_ENCA
+#include <enca.h>
+#endif
+
+#ifdef HAVE_ENCA
+static gchar *
+encoding_guess_enca (const gchar *buffer,
+                     gsize        size)
+{
+	gchar *encoding = NULL;
+	const gchar **langs;
+	gsize s;
+	gsize i;
+
+	langs = enca_get_languages (&s);
+
+	for (i = 0; i < s && !encoding; i++) {
+		EncaAnalyser analyser;
+		EncaEncoding eencoding;
+
+		analyser = enca_analyser_alloc (langs[i]);
+		eencoding = enca_analyse_const (analyser, buffer, size);
+
+		if (enca_charset_is_known (eencoding.charset)) {
+			encoding = g_strdup (enca_charset_name (eencoding.charset,
+			                                        ENCA_NAME_STYLE_ICONV));
+		}
+
+		enca_analyser_free (analyser);
+	}
+
+	free (langs);
+
+	return encoding;
+}
+#endif /* HAVE_ENCA */
+
+gboolean
+tracker_encoding_can_guess (void)
+{
+#ifdef HAVE_ENCA
+	return TRUE;
+#else
+	return FALSE;
+#endif
+}
+
+gchar *
+tracker_encoding_guess (const gchar *buffer,
+                        gsize        size)
+{
+	gchar *encoding = NULL;
+
+#ifdef HAVE_ENCA
+	if (!encoding)
+		encoding = encoding_guess_enca (buffer, size);
+#endif /* HAVE_ENCA */
+
+	return encoding;
+}
diff --git a/src/libtracker-common/tracker-encoding.h b/src/libtracker-common/tracker-encoding.h
new file mode 100644
index 0000000..e915ad3
--- /dev/null
+++ b/src/libtracker-common/tracker-encoding.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2011 Nokia <ivan frade nokia com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __LIBTRACKER_COMMON_ENCODING_H__
+#define __LIBTRACKER_COMMON_ENCODING_H__
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+#if !defined (__LIBTRACKER_COMMON_INSIDE__) && !defined (TRACKER_COMPILATION)
+#error "only <libtracker-common/tracker-common.h> must be included directly."
+#endif
+
+/* Returns TRUE if there is some method available to guess encodings */
+gboolean  tracker_encoding_can_guess (void);
+
+/* Returns NULL if it couldn't guess it */
+gchar    *tracker_encoding_guess     (const gchar *buffer,
+                                      gsize        size);
+
+G_END_DECLS
+
+#endif /* __LIBTRACKER_COMMON_ENCODING_H__ */



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]