[tracker] libtracker-common: Only use encoding guessing when confidence >30%
- From: Martyn James Russell <mr src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker] libtracker-common: Only use encoding guessing when confidence >30%
- Date: Thu, 31 Mar 2011 17:35:10 +0000 (UTC)
commit 91ce0b167644e68365bdb729fcbfab37de387708
Author: Philip Van Hoof <philip codeminded be>
Date: Thu Mar 31 11:16:39 2011 +0200
libtracker-common: Only use encoding guessing when confidence >30%
.../tracker-encoding-meegotouch.cpp | 42 +++++++++++++-------
src/tracker-extract/tracker-extract-mp3.c | 4 +-
2 files changed, 30 insertions(+), 16 deletions(-)
---
diff --git a/src/libtracker-common/tracker-encoding-meegotouch.cpp b/src/libtracker-common/tracker-encoding-meegotouch.cpp
index 28573d0..29502f5 100644
--- a/src/libtracker-common/tracker-encoding-meegotouch.cpp
+++ b/src/libtracker-common/tracker-encoding-meegotouch.cpp
@@ -24,6 +24,7 @@
#include <glib.h>
#include "tracker-encoding-meegotouch.h"
+#include "tracker-locale.h"
/*
* See http://apidocs.meego.com/git-tip/mtf/class_m_charset_detector.html
@@ -35,6 +36,8 @@ tracker_encoding_guess_meegotouch (const gchar *buffer,
{
/* Initialize detector */
MCharsetDetector detector ((const char *)buffer, (int)size);
+ gchar *locale;
+ gchar *encoding = NULL;
if (detector.hasError ()) {
g_warning ("Charset detector error when creating: %s",
@@ -50,26 +53,37 @@ tracker_encoding_guess_meegotouch (const gchar *buffer,
return NULL;
}
- gchar *encoding = g_strdup (bestMatch.name ().toUtf8 ().data ());
+ locale = tracker_locale_get (TRACKER_LOCALE_LANGUAGE);
+ detector.setDeclaredLocale (locale);
+
+ if (bestMatch.confidence () > 30) {
+ encoding = g_strdup (bestMatch.name ().toUtf8 ().data ());
#if 0
- QList<MCharsetMatch> mCharsetMatchList = detector.detectAll();
+ QList<MCharsetMatch> mCharsetMatchList = detector.detectAll();
- if (detector.hasError ()) {
- g_warning ("Charset detector error when detecting all: %s",
- detector.errorString ().toUtf8 (). data ());
- }
+ if (detector.hasError ()) {
+ g_warning ("Charset detector error when detecting all: %s",
+ detector.errorString ().toUtf8 (). data ());
+ }
- g_debug ("Detecting all charsets...");
- for (gint i = 0; i < mCharsetMatchList.size (); ++i) {
- g_debug (" Charset '%s' with %d%% confidence...",
- mCharsetMatchList[i].name (). toUtf8 ().data (),
- mCharsetMatchList[i].confidence ());
- }
+ g_debug ("Detecting all charsets...");
+ for (gint i = 0; i < mCharsetMatchList.size (); ++i) {
+ g_debug (" Charset '%s' with %d%% confidence...",
+ mCharsetMatchList[i].name (). toUtf8 ().data (),
+ mCharsetMatchList[i].confidence ());
+ }
#endif
- g_debug ("Guessing charset as '%s' with %d%% confidence",
- encoding, bestMatch.confidence ());
+ g_debug ("Guessing charset as '%s' with %d%% confidence",
+ encoding, bestMatch.confidence ());
+ } else {
+ g_debug ("Ignoring charset as '%s' with %d%% (< 30%%) confidence",
+ bestMatch.name ().toUtf8 ().data (),
+ bestMatch.confidence ());
+ }
+
+ g_free (locale);
return encoding;
}
diff --git a/src/tracker-extract/tracker-extract-mp3.c b/src/tracker-extract/tracker-extract-mp3.c
index 791d999..51cf1e7 100644
--- a/src/tracker-extract/tracker-extract-mp3.c
+++ b/src/tracker-extract/tracker-extract-mp3.c
@@ -513,10 +513,10 @@ read_id3v1_buffer (int fd,
static gchar *
ucs2_to_utf8(const gchar *data, guint len)
{
- const gchar *encoding = NULL;
+ const gchar *encoding = NULL;
guint16 c;
gboolean be;
- gchar *utf8 = NULL;
+ gchar *utf8 = NULL;
memcpy (&c, data, 2);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]