[tracker] Updated albumart strip code to work with utf8 and be clearer
- From: Martyn James Russell <mr src gnome org>
- To: svn-commits-list gnome org
- Subject: [tracker] Updated albumart strip code to work with utf8 and be clearer
- Date: Wed, 22 Apr 2009 10:59:09 -0400 (EDT)
commit b4ffe1d9270dcbedda1767db315e50b877ce7356
Author: Martyn Russell <martyn imendio com>
Date: Wed Apr 22 15:32:19 2009 +0100
Updated albumart strip code to work with utf8 and be clearer
* utils/albumart/albumart-strip.c: Added to test these strings out
using the libtracker-common API call.
---
configure.ac | 1 +
src/libtracker-common/tracker-albumart.c | 187 ++++++++++++++++++------------
src/libtracker-common/tracker-albumart.h | 41 ++++---
utils/Makefile.am | 2 +-
utils/albumart/Makefile.am | 18 +++
utils/albumart/albumart-strip.c | 91 +++++++++++++++
6 files changed, 246 insertions(+), 94 deletions(-)
diff --git a/configure.ac b/configure.ac
index 63ceb9c..2072d2b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1390,6 +1390,7 @@ AC_CONFIG_FILES([
tests/tracker-indexer/Makefile
tests/tracker-extract/Makefile
utils/Makefile
+ utils/albumart/Makefile
utils/tracker-fts/Makefile
utils/services/Makefile
])
diff --git a/src/libtracker-common/tracker-albumart.c b/src/libtracker-common/tracker-albumart.c
index f220db3..85e7481 100644
--- a/src/libtracker-common/tracker-albumart.c
+++ b/src/libtracker-common/tracker-albumart.c
@@ -175,89 +175,130 @@ make_directory_with_parents (GFile *file,
return g_file_make_directory (file, cancellable, error);
}
-static gchar*
-strip_characters (const gchar *original)
+static gboolean
+strip_find_next_block (const gchar *original,
+ const gunichar open_char,
+ const gunichar close_char,
+ gint *open_pos,
+ gint *close_pos)
{
- const gchar *foo = "()[]<>{}_! #$^&*+=|\\/\"'?~";
- guint osize = strlen (original);
- gchar *retval = (gchar *) g_malloc0 (sizeof (gchar *) * osize + 1);
- guint i = 0, y = 0;
-
- while (i < osize) {
- /* Remove (anything) */
- if (original[i] == '(') {
- gchar *loc = strchr (original+i, ')');
- if (loc) {
- i = loc - original + 1;
- continue;
- }
- }
-
- /* Remove [anything] */
- if (original[i] == '[') {
- gchar *loc = strchr (original+i, ']');
- if (loc) {
- i = loc - original + 1;
- continue;
- }
- }
+ const gchar *p1, *p2;
- /* Remove {anything} */
- if (original[i] == '{') {
- gchar *loc = strchr (original+i, '}');
- if (loc) {
- i = loc - original + 1;
- continue;
- }
- }
+ if (open_pos) {
+ *open_pos = -1;
+ }
- /* Remove <anything> */
- if (original[i] == '<') {
- gchar *loc = strchr (original+i, '>');
- if (loc) {
- i = loc - original + 1;
- continue;
- }
- }
+ if (close_pos) {
+ *close_pos = -1;
+ }
- /* Remove double whitespaces */
- if ((y > 0) &&
- (original[i] == ' ' || original[i] == '\t') &&
- (retval[y-1] == ' ' || retval[y-1] == '\t')) {
- i++;
- continue;
+ p1 = g_utf8_strchr (original, -1, open_char);
+ if (p1) {
+ if (open_pos) {
+ *open_pos = p1 - original;
}
- /* Remove strange characters */
- if (!strchr (foo, original[i])) {
- retval[y] = original[i]!='\t'?original[i]:' ';
- y++;
+ p2 = g_utf8_strchr (g_utf8_next_char (p1), -1, close_char);
+ if (p2) {
+ if (close_pos) {
+ *close_pos = p2 - original;
+ }
+
+ return TRUE;
}
-
- i++;
}
- retval[y] = 0;
-
- y--;
- while (retval[y] == ' ') {
- retval[y] = 0;
- y--;
- }
+ return FALSE;
+}
- if (retval[0] == ' ') {
- guint r = 0;
- gchar *newr;
+gchar *
+tracker_albumart_strip_invalid_entities (const gchar *original)
+{
+ GString *str_no_blocks;
+ gchar **strv;
+ gchar *str;
+ gboolean blocks_done = FALSE;
+ const gchar *p;
+ const gchar *invalid_chars = "()[]<>{}_! #$^&*+=|\\/\"'?~";
+ const gchar *invalid_chars_delimiter = "*";
+ const gchar *convert_chars = "\t";
+ const gchar *convert_chars_delimiter = " ";
+ const gunichar blocks[5][2] = {
+ { '(', ')' },
+ { '{', '}' },
+ { '[', ']' },
+ { '<', '>' },
+ { 0, 0 }
+ };
+
+ str_no_blocks = g_string_new ("");
+
+ p = original;
+
+ while (!blocks_done) {
+ gint pos1, pos2, i;
+
+ pos1 = -1;
+ pos2 = -1;
+
+ for (i = 0; blocks[i][0] != 0; i++) {
+ gint start, end;
+
+ /* Go through blocks, find the earliest block we can */
+ if (strip_find_next_block (p, blocks[i][0], blocks[i][1], &start, &end)) {
+ if (pos1 == -1 || start < pos1) {
+ pos1 = start;
+ pos2 = end;
+ }
+ }
+ }
+
+ /* If either are -1 we didn't find any */
+ if (pos1 == -1) {
+ /* This means no blocks were found */
+ g_string_append (str_no_blocks, p);
+ blocks_done = TRUE;
+ } else {
+ /* Append the test BEFORE the block */
+ if (pos1 > 0) {
+ g_string_append_len (str_no_blocks, p, pos1);
+ }
- while (retval[r] == ' ')
- r++;
+ p = g_utf8_next_char (p + pos2);
- newr = g_strdup (retval + r);
- g_free (retval);
- retval = newr;
+ /* Do same again for position AFTER block */
+ if (*p == '\0') {
+ blocks_done = TRUE;
+ }
+ }
}
- return retval;
+ str = g_string_free (str_no_blocks, FALSE);
+
+ /* Now strip invalid chars */
+ g_strdelimit (str, invalid_chars, *invalid_chars_delimiter);
+ strv = g_strsplit (str, invalid_chars_delimiter, -1);
+ g_free (str);
+ str = g_strjoinv (NULL, strv);
+ g_strfreev (strv);
+
+ /* Now convert chars */
+ g_strdelimit (str, convert_chars, *convert_chars_delimiter);
+ strv = g_strsplit (str, convert_chars_delimiter, -1);
+ g_free (str);
+ str = g_strjoinv (convert_chars_delimiter, strv);
+ g_strfreev (strv);
+
+ /* Now remove double spaces */
+ strv = g_strsplit (str, " ", -1);
+ g_free (str);
+ str = g_strjoinv (" ", strv);
+ g_strfreev (strv);
+
+ /* Now strip leading/trailing white space */
+ g_strstrip (str);
+
+ return str;
}
void
@@ -405,11 +446,11 @@ tracker_albumart_heuristic (const gchar *artist_,
}
if (artist_) {
- artist = strip_characters (artist_);
+ artist = tracker_albumart_strip_invalid_entities (artist_);
}
if (album_) {
- album = strip_characters (album_);
+ album = tracker_albumart_strip_invalid_entities (album_);
}
/* If amount of files and amount of tracks in the album somewhat match */
@@ -612,13 +653,13 @@ tracker_albumart_get_path (const gchar *a,
if (!a) {
f_a = g_strdup (" ");
} else {
- f_a = strip_characters (a);
+ f_a = tracker_albumart_strip_invalid_entities (a);
}
if (!b) {
f_b = g_strdup (" ");
} else {
- f_b = strip_characters (b);
+ f_b = tracker_albumart_strip_invalid_entities (b);
}
down1 = g_utf8_strdown (f_a, -1);
diff --git a/src/libtracker-common/tracker-albumart.h b/src/libtracker-common/tracker-albumart.h
index 9e7e15a..390e727 100644
--- a/src/libtracker-common/tracker-albumart.h
+++ b/src/libtracker-common/tracker-albumart.h
@@ -32,26 +32,27 @@ G_BEGIN_DECLS
#include "tracker-hal.h"
-gboolean tracker_albumart_heuristic (const gchar *artist_,
- const gchar *album_,
- const gchar *tracks_str,
- const gchar *filename,
- const gchar *local_uri,
- gboolean *copied);
-void tracker_albumart_copy_to_local (TrackerHal *hal,
- const gchar *filename,
- const gchar *local_uri);
-void tracker_albumart_get_path (const gchar *a,
- const gchar *b,
- const gchar *prefix,
- const gchar *uri,
- gchar **path,
- gchar **local);
-void tracker_albumart_request_download (TrackerHal *hal,
- const gchar *album,
- const gchar *artist,
- const gchar *local_uri,
- const gchar *art_path);
+gboolean tracker_albumart_heuristic (const gchar *artist_,
+ const gchar *album_,
+ const gchar *tracks_str,
+ const gchar *filename,
+ const gchar *local_uri,
+ gboolean *copied);
+gchar * tracker_albumart_strip_invalid_entities (const gchar *original);
+void tracker_albumart_copy_to_local (TrackerHal *hal,
+ const gchar *filename,
+ const gchar *local_uri);
+void tracker_albumart_get_path (const gchar *a,
+ const gchar *b,
+ const gchar *prefix,
+ const gchar *uri,
+ gchar **path,
+ gchar **local);
+void tracker_albumart_request_download (TrackerHal *hal,
+ const gchar *album,
+ const gchar *artist,
+ const gchar *local_uri,
+ const gchar *art_path);
G_END_DECLS
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 88852c9..4419038 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -1,6 +1,6 @@
include $(top_srcdir)/Makefile.decl
SUBDIRS = \
+ albumart \
services \
tracker-fts
-
diff --git a/utils/albumart/Makefile.am b/utils/albumart/Makefile.am
new file mode 100644
index 0000000..d3a6e09
--- /dev/null
+++ b/utils/albumart/Makefile.am
@@ -0,0 +1,18 @@
+include $(top_srcdir)/Makefile.decl
+
+noinst_PROGRAMS = albumart-strip
+
+INCLUDES = \
+ -DG_LOG_DOMAIN=\"Tracker\" \
+ -DTRACKER_COMPILATION \
+ -I$(top_srcdir)/src \
+ $(WARN_CFLAGS) \
+ $(GLIB2_CFLAGS)
+
+albumart_strip_SOURCES = \
+ albumart-strip.c
+
+albumart_strip_LDADD = \
+ $(top_builddir)/src/libtracker-common/libtracker-common.la \
+ $(GLIB2_LIBS)
+
diff --git a/utils/albumart/albumart-strip.c b/utils/albumart/albumart-strip.c
new file mode 100644
index 0000000..2fdb9a1
--- /dev/null
+++ b/utils/albumart/albumart-strip.c
@@ -0,0 +1,91 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2008, Nokia
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <locale.h>
+
+#include <glib/gi18n.h>
+
+#include <libtracker-common/tracker-albumart.h>
+
+static gchar **text;
+
+static GOptionEntry entries[] = {
+ { G_OPTION_REMAINING, 0, 0,
+ G_OPTION_ARG_STRING_ARRAY, &text,
+ N_("album or artist"),
+ N_("EXPRESSION")
+ },
+ { NULL }
+};
+
+int
+main (int argc, char *argv[])
+{
+ GOptionContext *context;
+ gchar *summary;
+ gchar **p;
+
+ setlocale (LC_ALL, "");
+
+ context = g_option_context_new (_("- Test albumart text stripping"));
+ summary = g_strconcat (_("You can use this utility to check album/artist strings, for example:"),
+ "\n",
+ "\n",
+ " \"[I_am_da_man dwz m1 mp3ieez] Jhonny Dee - "
+ "The tale of bla {Moo 2009, we are great}\"",
+ NULL);
+ g_option_context_set_summary (context, summary);
+ g_option_context_add_main_entries (context, entries, NULL);
+ g_option_context_parse (context, &argc, &argv, NULL);
+ g_free (summary);
+
+ if (!text || !*text) {
+ gchar *help;
+
+ g_printerr ("%s\n\n",
+ _("No album/artist text was provided"));
+
+ help = g_option_context_get_help (context, TRUE, NULL);
+ g_option_context_free (context);
+ g_printerr ("%s", help);
+ g_free (help);
+
+ return EXIT_FAILURE;
+ }
+
+ g_option_context_free (context);
+
+ for (p = text; *p; p++) {
+ gchar *output;
+
+ g_print ("\n");
+
+ g_print ("%s:\n", _("Converted to"));
+
+ output = tracker_albumart_strip_invalid_entities (*p);
+ g_print (" %s\n", output);
+
+ g_free (output);
+ }
+
+ return EXIT_SUCCESS;
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]