[tracker] tracker-extract, mp3: Use encoding detection from libtracker-common



commit 99b832cdd99194ff7ec280b17f8e4671f250e389
Author: Aleksander Morgado <aleksander lanedo com>
Date:   Fri Feb 25 11:46:43 2011 +0100

    tracker-extract,mp3: Use encoding detection from libtracker-common

 configure.ac                              |    4 +-
 src/libtracker-common/Makefile.am         |    7 --
 src/tracker-extract/Makefile.am           |    5 --
 src/tracker-extract/tracker-extract-mp3.c |   89 ++++++++++-------------------
 4 files changed, 33 insertions(+), 72 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index a5bfbee..076c9a7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1312,8 +1312,8 @@ AC_ARG_WITH(enca,
 if test "x$enable_enca" != "xno" ; then
    PKG_CHECK_MODULES(ENCA, [enca >= 1.9], have_enca=yes, have_enca=no)
 
-   AC_SUBST(ENCA_CFLAGS)
-   AC_SUBST(ENCA_LIBS)
+   LIBTRACKER_COMMON_CFLAGS="$LIBTRACKER_COMMON_CFLAGS $ENCA_CFLAGS"
+   LIBTRACKER_COMMON_LIBS="$LIBTRACKER_COMMON_LIBS $ENCA_LIBS"
 
    if test "x$have_enca" = "xyes"; then
      AC_DEFINE(HAVE_ENCA, [], [Enca language detection aid])
diff --git a/src/libtracker-common/Makefile.am b/src/libtracker-common/Makefile.am
index d243760..9f05a61 100644
--- a/src/libtracker-common/Makefile.am
+++ b/src/libtracker-common/Makefile.am
@@ -53,8 +53,6 @@ libtracker_common_la_SOURCES += tracker-language.c
 noinst_HEADERS += tracker-language.h
 endif
 
-libtracker_common_la_CFLAGS =
-
 libtracker_common_la_LDFLAGS = \
 	-version-info $(LT_CURRENT):$(LT_REVISION):$(LT_AGE)
 
@@ -66,11 +64,6 @@ if HAVE_TRACKER_FTS
 libtracker_common_la_LIBADD += $(top_builddir)/src/libstemmer/libstemmer.la
 endif
 
-if HAVE_ENCA
-libtracker_common_la_CFLAGS += $(ENCA_CFLAGS)
-libtracker_common_la_LIBADD += $(ENCA_LIBS)
-endif
-
 marshal_sources = \
 	tracker-marshal.h \
 	tracker-marshal.c
diff --git a/src/tracker-extract/Makefile.am b/src/tracker-extract/Makefile.am
index e2027a9..ce71311 100644
--- a/src/tracker-extract/Makefile.am
+++ b/src/tracker-extract/Makefile.am
@@ -106,11 +106,6 @@ libextract_mp3_la_LIBADD =  \
 	$(BUILD_LIBS) \
 	$(TRACKER_EXTRACT_MODULES_LIBS)
 
-if HAVE_ENCA
-libextract_mp3_la_CFLAGS += $(ENCA_CFLAGS)
-libextract_mp3_la_LIBADD += $(ENCA_LIBS)
-endif
-
 # Vorbis (OGG)
 libextract_vorbis_la_SOURCES = tracker-extract-vorbis.c $(escape_sources)
 libextract_vorbis_la_CFLAGS = $(TRACKER_EXTRACT_MODULES_CFLAGS)
diff --git a/src/tracker-extract/tracker-extract-mp3.c b/src/tracker-extract/tracker-extract-mp3.c
index 29d9c5f..01d5ba9 100644
--- a/src/tracker-extract/tracker-extract-mp3.c
+++ b/src/tracker-extract/tracker-extract-mp3.c
@@ -40,10 +40,6 @@
 #include <sys/mman.h>
 #endif /* G_OS_WIN32 */
 
-#ifdef HAVE_ENCA
-#include <enca.h>
-#endif
-
 #include <libtracker-common/tracker-common.h>
 
 #include <libtracker-extract/tracker-extract.h>
@@ -639,46 +635,24 @@ un_unsync (const unsigned char *source,
 	*dest_size = new_size;
 }
 
-static char*
-get_encoding (const char *data,
-              gssize      size,
-              gboolean   *encoding_found)
+static gchar *
+get_encoding (const gchar *data,
+              gsize        size,
+              gboolean    *encoding_found)
 {
-	gchar *encoding = NULL;
-#ifdef HAVE_ENCA
-	const char **langs;
-	size_t s, i;
-#endif
-
-	if (encoding_found) {
-		*encoding_found = FALSE;
-	}
-
-#ifdef HAVE_ENCA
-	langs = enca_get_languages (&s);
-
-	for (i = 0; i < s && !encoding; i++) {
-		EncaAnalyser analyser;
-		EncaEncoding eencoding;
-
-		analyser = enca_analyser_alloc (langs[i]);
-		eencoding = enca_analyse_const (analyser, data, size);
-
-		if (enca_charset_is_known (eencoding.charset)) {
-			if (encoding_found) {
-				*encoding_found = TRUE;
-			}
+	gchar *encoding;
 
-			encoding = g_strdup (enca_charset_name (eencoding.charset,
-			                                        ENCA_NAME_STYLE_ICONV));
-		}
+	/* Try to guess encoding */
+	encoding = (data && size ?
+	            tracker_encoding_guess (data, size) :
+	            NULL);
 
-		enca_analyser_free (analyser);
+	/* Notify if a proper detection was done */
+	if (encoding_found) {
+		*encoding_found = (encoding ? TRUE : FALSE);;
 	}
 
-	free (langs);
-#endif
-
+	/* If no proper detection was done, return default */
 	if (!encoding) {
 		/* Use Windows-1252 instead of ISO-8859-1 as the former is a
 		   superset in terms of printable characters and some
@@ -737,10 +711,6 @@ get_id3 (const gchar *data,
          size_t       size,
          id3tag      *id3)
 {
-#ifdef HAVE_ENCA
-	GString *s;
-	gboolean encoding_was_found;
-#endif /* HAVE_ENCA */
 	gchar *encoding, *year;
 	const gchar *pos;
 
@@ -765,22 +735,27 @@ get_id3 (const gchar *data,
 	 * have a better way to collect a bit more data before we let
 	 * enca loose on it for v1.
 	 */
-#ifdef HAVE_ENCA
-	/* Get the encoding for ALL the data we are extracting here */
-	s = g_string_new_len (pos, 30);
-	g_string_append_len (s, pos + 30, 30);
-	g_string_append_len (s, pos + 60, 30);
+	if (tracker_encoding_can_guess ()) {
+		GString *s;
+		gboolean encoding_was_found;
 
-	encoding = get_encoding (s->str, 90, &encoding_was_found);
+		/* Get the encoding for ALL the data we are extracting here */
+		s = g_string_new_len (pos, 30);
+		g_string_append_len (s, pos + 30, 30);
+		g_string_append_len (s, pos + 60, 30);
 
-	if (encoding_was_found) {
-		id3->encoding = encoding;
-	}
+		encoding = get_encoding (s->str, 90, &encoding_was_found);
+
+		if (encoding_was_found) {
+			id3->encoding = g_strdup (encoding);
+		}
 
-	g_string_free (s, TRUE);
-#else  /* HAVE_ENCA */
-	encoding = get_encoding (NULL, 0, NULL);
-#endif /* HAVE_ENCA */
+		g_string_free (s, TRUE);
+	} else {
+		/* If we cannot guess encoding, don't even try it, just
+		 * use the default one */
+		encoding = get_encoding (NULL, 0, NULL);
+	}
 
 	id3->title = g_convert (pos, 30, "UTF-8", encoding, NULL, NULL, NULL);
 
@@ -818,9 +793,7 @@ get_id3 (const gchar *data,
 		id3->genre = g_strdup ("");
 	}
 
-#ifndef HAVE_ENCA
 	g_free (encoding);
-#endif /* HAVE_ENCA */
 
 	return TRUE;
 }



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]