[tracker/tracker-0.6] Fixs according to specs and improve handwritten MP3 genres
- From: Martyn James Russell <mr src gnome org>
- To: svn-commits-list gnome org
- Subject: [tracker/tracker-0.6] Fixs according to specs and improve handwritten MP3 genres
- Date: Wed, 24 Jun 2009 11:59:45 +0000 (UTC)
commit c47d8a6700fd1d621072b427ec1ff3b21d4184a0
Author: Martyn Russell <martyn imendio com>
Date: Wed Jun 24 12:51:30 2009 +0100
Fixs according to specs and improve handwritten MP3 genres
configure.ac | 1 +
src/tracker-extract/tracker-extract-mp3.c | 143 +++++++++++++++---
utils/Makefile.am | 9 +-
utils/mp3-genre-leading-uppercase/Makefile.am | 18 ++
.../mp3-genre-leading-uppercase.c | 166 ++++++++++++++++++++
5 files changed, 314 insertions(+), 23 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index ebda38e..776d353 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1463,6 +1463,7 @@ AC_CONFIG_FILES([
tests/tracker-extract/Makefile
utils/Makefile
utils/albumart/Makefile
+ utils/mp3-genre-leading-uppercase/Makefile
utils/qdbm/Makefile
utils/sqlite/Makefile
utils/tracker-fts/Makefile
diff --git a/src/tracker-extract/tracker-extract-mp3.c b/src/tracker-extract/tracker-extract-mp3.c
index ca3a095..7dcde3b 100644
--- a/src/tracker-extract/tracker-extract-mp3.c
+++ b/src/tracker-extract/tracker-extract-mp3.c
@@ -114,7 +114,23 @@ enum {
static void extract_mp3 (const gchar *filename,
GHashTable *metadata);
+/* This list is based on the comprehensive list on the French wiki
+ * page here:
+ *
+ * http://fr.wikipedia.org/wiki/ID3
+ *
+ * The actual list as explained by the standard is available here but
+ * has some ~17 or so genres missing which are on the French list:
+ *
+ * http://www.id3.org/id3v2.3.0#head-129376727ebe5309c1de1888987d070288d7c7e7
+ *
+ * Since the index is the most important thing here and this list is
+ * not sorted alphabetically, all new IDs are only ever appended to
+ * the list and that's why we can still use the French ID3 list over
+ * the actual list on the standards website.
+ */
static const char *const genre_names[] = {
+ /* Standard genres */
"Blues",
"Classic Rock",
"Country",
@@ -155,7 +171,7 @@ static const char *const genre_names[] = {
"Sound Clip",
"Gospel",
"Noise",
- "Alt. Rock",
+ "AlternRock",
"Bass",
"Soul",
"Punk",
@@ -174,7 +190,7 @@ static const char *const genre_names[] = {
"Southern Rock",
"Comedy",
"Cult",
- "Gangsta Rap",
+ "Gangsta",
"Top 40",
"Christian Rap",
"Pop/Funk",
@@ -182,7 +198,7 @@ static const char *const genre_names[] = {
"Native American",
"Cabaret",
"New Wave",
- "Psychedelic",
+ "Psychadelic",
"Rave",
"Showtunes",
"Trailer",
@@ -195,11 +211,13 @@ static const char *const genre_names[] = {
"Musical",
"Rock & Roll",
"Hard Rock",
+
+ /* Added on December 12, 1997 in cooperation with Winamp: */
"Folk",
- "Folk/Rock",
+ "Folk-Rock",
"National Folk",
"Swing",
- "Fast-Fusion",
+ "Fast Fusion",
"Bebob",
"Latin",
"Revival",
@@ -226,11 +244,15 @@ static const char *const genre_names[] = {
"Primus",
"Porn Groove",
"Satire",
+
+ /* Added on January 26, 1998 to ensure compatibility with Winamp 1.7: */
"Slow Jam",
"Club",
"Tango",
"Samba",
"Folklore",
+
+ /* Added on April 13, 1998 to ensure compatibility with Winamp 1.90: */
"Ballad",
"Power Ballad",
"Rhythmic Soul",
@@ -238,7 +260,7 @@ static const char *const genre_names[] = {
"Duet",
"Punk Rock",
"Drum Solo",
- "A Cappella",
+ "A capella",
"Euro-House",
"Dance Hall",
"Goa",
@@ -257,6 +279,8 @@ static const char *const genre_names[] = {
"Crossover",
"Contemporary Christian",
"Christian Rock",
+
+ /* Added on Jun 1, 1998 to ensure compatibility with Winamp 1.91: */
"Merengue",
"Salsa",
"Thrash Metal",
@@ -308,6 +332,80 @@ static TrackerExtractData extract_data[] = {
{ NULL, NULL }
};
+static void
+improve_handwritten_genre (gchar *genre)
+{
+ /* This function tries to make each first letter of each word
+ * upper case so we conform a bit more to the standards, for
+ * example, if it is "Fusion jazz", we want "Fussion Jazz" to
+ * make things more consistent.
+ */
+ gchar *p;
+ gunichar c;
+ gboolean set_next;
+
+ if (!genre) {
+ return;
+ }
+
+ c = g_utf8_get_char (genre);
+ *genre = g_unichar_toupper (c);
+
+ for (p = genre, set_next = FALSE; *p; p = g_utf8_next_char (p)) {
+ GUnicodeBreakType t;
+
+ c = g_utf8_get_char (p);
+ t = g_unichar_break_type (c);
+
+ if (set_next) {
+ *p = g_unichar_toupper (c);
+ set_next = FALSE;
+ }
+
+ switch (t) {
+ case G_UNICODE_BREAK_MANDATORY:
+ case G_UNICODE_BREAK_CARRIAGE_RETURN:
+ case G_UNICODE_BREAK_LINE_FEED:
+ case G_UNICODE_BREAK_COMBINING_MARK:
+ case G_UNICODE_BREAK_SURROGATE:
+ case G_UNICODE_BREAK_ZERO_WIDTH_SPACE:
+ case G_UNICODE_BREAK_INSEPARABLE:
+ case G_UNICODE_BREAK_NON_BREAKING_GLUE:
+ case G_UNICODE_BREAK_CONTINGENT:
+ case G_UNICODE_BREAK_SPACE:
+ case G_UNICODE_BREAK_HYPHEN:
+ case G_UNICODE_BREAK_EXCLAMATION:
+ case G_UNICODE_BREAK_WORD_JOINER:
+ case G_UNICODE_BREAK_NEXT_LINE:
+ case G_UNICODE_BREAK_SYMBOL:
+ set_next = TRUE;
+
+ case G_UNICODE_BREAK_AFTER:
+ case G_UNICODE_BREAK_BEFORE:
+ case G_UNICODE_BREAK_BEFORE_AND_AFTER:
+ case G_UNICODE_BREAK_NON_STARTER:
+ case G_UNICODE_BREAK_OPEN_PUNCTUATION:
+ case G_UNICODE_BREAK_CLOSE_PUNCTUATION:
+ case G_UNICODE_BREAK_QUOTATION:
+ case G_UNICODE_BREAK_IDEOGRAPHIC:
+ case G_UNICODE_BREAK_NUMERIC:
+ case G_UNICODE_BREAK_INFIX_SEPARATOR:
+ case G_UNICODE_BREAK_ALPHABETIC:
+ case G_UNICODE_BREAK_PREFIX:
+ case G_UNICODE_BREAK_POSTFIX:
+ case G_UNICODE_BREAK_COMPLEX_CONTEXT:
+ case G_UNICODE_BREAK_AMBIGUOUS:
+ case G_UNICODE_BREAK_UNKNOWN:
+ case G_UNICODE_BREAK_HANGUL_L_JAMO:
+ case G_UNICODE_BREAK_HANGUL_V_JAMO:
+ case G_UNICODE_BREAK_HANGUL_T_JAMO:
+ case G_UNICODE_BREAK_HANGUL_LV_SYLLABLE:
+ case G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE:
+ break;
+ }
+ }
+}
+
static char *
read_id3v1_buffer (int fd, goffset size)
{
@@ -1005,10 +1103,13 @@ get_id3v24_tags (const gchar *data,
if (get_genre_number (word, &genre)) {
g_free (word);
word = g_strdup (get_genre_name (genre));
- }
+ } else {
+ if (g_ascii_strcasecmp (word, "unknown") == 0) {
+ g_free (word);
+ break;
+ }
- if (!word || strcasecmp (word, "unknown") == 0) {
- break;
+ improve_handwritten_genre (word);
}
} else if (strcmp (tmap[i].text, "TLEN") == 0) {
guint32 duration;
@@ -1255,6 +1356,7 @@ get_id3v23_tags (const gchar *data,
parts = g_strsplit (word, "/", 2);
g_free (word);
+
word = g_strdup (parts[0]);
g_strfreev (parts);
} else if (strcmp (tmap[i].text, "TCON") == 0) {
@@ -1263,10 +1365,13 @@ get_id3v23_tags (const gchar *data,
if (get_genre_number (word, &genre)) {
g_free (word);
word = g_strdup (get_genre_name (genre));
- }
+ } else {
+ if (g_ascii_strcasecmp (word, "unknown") == 0) {
+ g_free (word);
+ break;
+ }
- if (!word || strcasecmp (word, "unknown") == 0) {
- break;
+ improve_handwritten_genre (word);
}
} else if (strcmp (tmap[i].text, "TLEN") == 0) {
guint32 duration;
@@ -1492,19 +1597,19 @@ get_id3v20_tags (const gchar *data,
s = g_strdup (word + strlen (word) + 1);
g_free (word);
word = s;
- }
-
- if (strcmp (tmap[i].text, "TCO") == 0) {
+ } else if (strcmp (tmap[i].text, "TCO") == 0) {
gint genre;
if (get_genre_number (word, &genre)) {
g_free (word);
word = g_strdup (get_genre_name (genre));
- }
+ } else {
+ if (g_ascii_strcasecmp (word, "unknown") == 0) {
+ g_free (word);
+ break;
+ }
- if (!word || strcasecmp (word, "unknown") == 0) {
- g_free (word);
- break;
+ improve_handwritten_genre (word);
}
} else if (strcmp (tmap[i].text, "TLE") == 0) {
guint32 duration;
diff --git a/utils/Makefile.am b/utils/Makefile.am
index a7c271e..d2f1baa 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -4,8 +4,9 @@ if ENABLE_SQLITE_FTS
build_sqlite_fts = tracker-fts
endif
-SUBDIRS = \
- $(build_sqlite_fts) \
- albumart \
- qdbm \
+SUBDIRS = \
+ $(build_sqlite_fts) \
+ albumart \
+ mp3-genre-leading-uppercase \
+ qdbm \
sqlite
diff --git a/utils/mp3-genre-leading-uppercase/Makefile.am b/utils/mp3-genre-leading-uppercase/Makefile.am
new file mode 100644
index 0000000..0a0c71d
--- /dev/null
+++ b/utils/mp3-genre-leading-uppercase/Makefile.am
@@ -0,0 +1,18 @@
+include $(top_srcdir)/Makefile.decl
+
+noinst_PROGRAMS = mp3-genre-leading-uppercase
+
+INCLUDES = \
+ -DG_LOG_DOMAIN=\"Tracker\" \
+ -DTRACKER_COMPILATION \
+ -I$(top_srcdir)/src \
+ $(WARN_CFLAGS) \
+ $(GLIB2_CFLAGS)
+
+mp3_genre_leading_uppercase_SOURCES = \
+ mp3-genre-leading-uppercase.c
+
+mp3_genre_leading_uppercase_LDADD = \
+ $(top_builddir)/src/libtracker-common/libtracker-common.la \
+ $(GLIB2_LIBS)
+
diff --git a/utils/mp3-genre-leading-uppercase/mp3-genre-leading-uppercase.c b/utils/mp3-genre-leading-uppercase/mp3-genre-leading-uppercase.c
new file mode 100644
index 0000000..bb2fe6a
--- /dev/null
+++ b/utils/mp3-genre-leading-uppercase/mp3-genre-leading-uppercase.c
@@ -0,0 +1,166 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2008, Nokia
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <locale.h>
+
+#include <glib/gi18n.h>
+
+#include <libtracker-common/tracker-albumart.h>
+
+static gchar **text;
+
+static GOptionEntry entries[] = {
+ { G_OPTION_REMAINING, 0, 0,
+ G_OPTION_ARG_STRING_ARRAY, &text,
+ N_("genre"),
+ N_("EXPRESSION")
+ },
+ { NULL }
+};
+
+static void
+improve_handwritten_genre (gchar *genre)
+{
+ /* This function tries to make each first letter of each word
+ * upper case so we conform a bit more to the standards, for
+ * example, if it is "Fusion jazz", we want "Fussion Jazz" to
+ * make things more consistent.
+ */
+ gchar *p;
+ gunichar c;
+ gboolean set_next;
+
+ if (!genre) {
+ return;
+ }
+
+ c = g_utf8_get_char (genre);
+ *genre = g_unichar_toupper (c);
+
+ for (p = genre, set_next = FALSE; *p; p = g_utf8_next_char (p)) {
+ GUnicodeBreakType t;
+
+ c = g_utf8_get_char (p);
+ t = g_unichar_break_type (c);
+
+ if (set_next) {
+ *p = g_unichar_toupper (c);
+ set_next = FALSE;
+ }
+
+ switch (t) {
+ case G_UNICODE_BREAK_MANDATORY:
+ case G_UNICODE_BREAK_CARRIAGE_RETURN:
+ case G_UNICODE_BREAK_LINE_FEED:
+ case G_UNICODE_BREAK_COMBINING_MARK:
+ case G_UNICODE_BREAK_SURROGATE:
+ case G_UNICODE_BREAK_ZERO_WIDTH_SPACE:
+ case G_UNICODE_BREAK_INSEPARABLE:
+ case G_UNICODE_BREAK_NON_BREAKING_GLUE:
+ case G_UNICODE_BREAK_CONTINGENT:
+ case G_UNICODE_BREAK_SPACE:
+ case G_UNICODE_BREAK_HYPHEN:
+ case G_UNICODE_BREAK_EXCLAMATION:
+ case G_UNICODE_BREAK_WORD_JOINER:
+ case G_UNICODE_BREAK_NEXT_LINE:
+ case G_UNICODE_BREAK_SYMBOL:
+ set_next = TRUE;
+
+ case G_UNICODE_BREAK_AFTER:
+ case G_UNICODE_BREAK_BEFORE:
+ case G_UNICODE_BREAK_BEFORE_AND_AFTER:
+ case G_UNICODE_BREAK_NON_STARTER:
+ case G_UNICODE_BREAK_OPEN_PUNCTUATION:
+ case G_UNICODE_BREAK_CLOSE_PUNCTUATION:
+ case G_UNICODE_BREAK_QUOTATION:
+ case G_UNICODE_BREAK_IDEOGRAPHIC:
+ case G_UNICODE_BREAK_NUMERIC:
+ case G_UNICODE_BREAK_INFIX_SEPARATOR:
+ case G_UNICODE_BREAK_ALPHABETIC:
+ case G_UNICODE_BREAK_PREFIX:
+ case G_UNICODE_BREAK_POSTFIX:
+ case G_UNICODE_BREAK_COMPLEX_CONTEXT:
+ case G_UNICODE_BREAK_AMBIGUOUS:
+ case G_UNICODE_BREAK_UNKNOWN:
+ case G_UNICODE_BREAK_HANGUL_L_JAMO:
+ case G_UNICODE_BREAK_HANGUL_V_JAMO:
+ case G_UNICODE_BREAK_HANGUL_T_JAMO:
+ case G_UNICODE_BREAK_HANGUL_LV_SYLLABLE:
+ case G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE:
+ break;
+ }
+ }
+}
+
+int
+main (int argc, char *argv[])
+{
+ GOptionContext *context;
+ gchar *summary;
+ gchar **p;
+
+ setlocale (LC_ALL, "");
+
+ context = g_option_context_new (_("- Test MP3 handwritten genre conversion to leading uppercase"));
+ summary = g_strconcat (_("You can use this to check genre strings get converted correctly, for example:"),
+ "\n",
+ "\n",
+ " \"fOo-bar bAz ping/pong sliff's & sloffs\"",
+ "\n",
+ "\n",
+ _("Should be converted to"),
+ "\n",
+ "\n",
+ " \"FOo-Bar BAz Ping/Pong Sliff's & Sloffs\"",
+ NULL);
+ g_option_context_set_summary (context, summary);
+ g_option_context_add_main_entries (context, entries, NULL);
+ g_option_context_parse (context, &argc, &argv, NULL);
+ g_free (summary);
+
+ if (!text || !*text) {
+ gchar *help;
+
+ g_printerr ("%s\n\n",
+ _("No genre text was provided"));
+
+ help = g_option_context_get_help (context, TRUE, NULL);
+ g_option_context_free (context);
+ g_printerr ("%s", help);
+ g_free (help);
+
+ return EXIT_FAILURE;
+ }
+
+ g_option_context_free (context);
+
+ for (p = text; *p; p++) {
+ g_print ("\n");
+
+ g_print ("%s:\n", _("Converted to"));
+
+ improve_handwritten_genre (*p);
+ g_print (" %s\n", *p);
+ }
+
+ return EXIT_SUCCESS;
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]