[tracker] Updated albumart strip code to work with utf8 and be clearer



commit b4ffe1d9270dcbedda1767db315e50b877ce7356
Author: Martyn Russell <martyn imendio com>
Date:   Wed Apr 22 15:32:19 2009 +0100

    Updated albumart strip code to work with utf8 and be clearer
    
    * utils/albumart/albumart-strip.c: Added to test these strings out
      using the libtracker-common API call.
---
 configure.ac                             |    1 +
 src/libtracker-common/tracker-albumart.c |  187 ++++++++++++++++++------------
 src/libtracker-common/tracker-albumart.h |   41 ++++---
 utils/Makefile.am                        |    2 +-
 utils/albumart/Makefile.am               |   18 +++
 utils/albumart/albumart-strip.c          |   91 +++++++++++++++
 6 files changed, 246 insertions(+), 94 deletions(-)

diff --git a/configure.ac b/configure.ac
index 63ceb9c..2072d2b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1390,6 +1390,7 @@ AC_CONFIG_FILES([
 	tests/tracker-indexer/Makefile
 	tests/tracker-extract/Makefile
 	utils/Makefile
+	utils/albumart/Makefile
 	utils/tracker-fts/Makefile
 	utils/services/Makefile
 ])
diff --git a/src/libtracker-common/tracker-albumart.c b/src/libtracker-common/tracker-albumart.c
index f220db3..85e7481 100644
--- a/src/libtracker-common/tracker-albumart.c
+++ b/src/libtracker-common/tracker-albumart.c
@@ -175,89 +175,130 @@ make_directory_with_parents (GFile         *file,
   return g_file_make_directory (file, cancellable, error);
 }
 
-static gchar*
-strip_characters (const gchar *original)
+static gboolean
+strip_find_next_block (const gchar    *original,
+		       const gunichar  open_char,
+		       const gunichar  close_char,
+		       gint           *open_pos,
+		       gint           *close_pos)
 {
-	const gchar *foo = "()[]<>{}_! #$^&*+=|\\/\"'?~";
-	guint osize = strlen (original);
-	gchar *retval = (gchar *) g_malloc0 (sizeof (gchar *) * osize + 1);
-	guint i = 0, y = 0;
-
-	while (i < osize) {
-		/* Remove (anything) */
-		if (original[i] == '(') {
-			gchar *loc = strchr (original+i, ')');
-			if (loc) {
-				i = loc - original + 1;
-				continue;
-			}
-		}
-
-		/* Remove [anything] */
-		if (original[i] == '[') {
-			gchar *loc = strchr (original+i, ']');
-			if (loc) {
-				i = loc - original + 1;
-				continue;
-			}
-		}
+	const gchar *p1, *p2;
 
-		/* Remove {anything} */
-		if (original[i] == '{') {
-			gchar *loc = strchr (original+i, '}');
-			if (loc) {
-				i = loc - original + 1;
-				continue;
-			}
-		}
+	if (open_pos) {
+		*open_pos = -1;
+	}
 
-		/* Remove <anything> */
-		if (original[i] == '<') {
-			gchar *loc = strchr (original+i, '>');
-			if (loc) {
-				i = loc - original + 1;
-				continue;
-			}
-		}
+	if (close_pos) {
+		*close_pos = -1;
+	}
 
-		/* Remove double whitespaces */
-		if ((y > 0) &&
-		    (original[i] == ' ' || original[i] == '\t') &&
-		    (retval[y-1] == ' ' || retval[y-1] == '\t')) {
-			i++;
-			continue;
+	p1 = g_utf8_strchr (original, -1, open_char);
+	if (p1) {
+		if (open_pos) {
+			*open_pos = p1 - original;
 		}
 
-		/* Remove strange characters */
-		if (!strchr (foo, original[i])) {
-			retval[y] = original[i]!='\t'?original[i]:' ';
-			y++;
+		p2 = g_utf8_strchr (g_utf8_next_char (p1), -1, close_char);
+		if (p2) {
+			if (close_pos) {
+				*close_pos = p2 - original;
+			}
+			
+			return TRUE;
 		}
-
-		i++;
 	}
 
-	retval[y] = 0;
-
-	y--;
-	while (retval[y] == ' ') {
-		retval[y] = 0;
-		y--;
-	}
+	return FALSE;
+}
 
-	if (retval[0] == ' ') {
-		guint r = 0;
-		gchar *newr;
+gchar *
+tracker_albumart_strip_invalid_entities (const gchar *original)
+{
+	GString         *str_no_blocks;
+	gchar          **strv;
+	gchar           *str;
+	gboolean         blocks_done = FALSE;
+	const gchar     *p;
+	const gchar     *invalid_chars = "()[]<>{}_! #$^&*+=|\\/\"'?~";
+	const gchar     *invalid_chars_delimiter = "*";
+	const gchar     *convert_chars = "\t";
+	const gchar     *convert_chars_delimiter = " ";
+	const gunichar   blocks[5][2] = {
+		{ '(', ')' },
+		{ '{', '}' }, 
+		{ '[', ']' }, 
+		{ '<', '>' }, 
+		{  0,   0  }
+	};
+
+	str_no_blocks = g_string_new ("");
+
+	p = original;
+
+	while (!blocks_done) {
+		gint pos1, pos2, i;
+
+		pos1 = -1;
+		pos2 = -1;
+	
+		for (i = 0; blocks[i][0] != 0; i++) {
+			gint start, end;
+			
+			/* Go through blocks, find the earliest block we can */
+			if (strip_find_next_block (p, blocks[i][0], blocks[i][1], &start, &end)) {
+				if (pos1 == -1 || start < pos1) {
+					pos1 = start;
+					pos2 = end;
+				}
+			}
+		}
+		
+		/* If either are -1 we didn't find any */
+		if (pos1 == -1) {
+			/* This means no blocks were found */
+			g_string_append (str_no_blocks, p);
+			blocks_done = TRUE;
+		} else {
+			/* Append the test BEFORE the block */
+                        if (pos1 > 0) {
+                                g_string_append_len (str_no_blocks, p, pos1);
+                        }
 
-		while (retval[r] == ' ')
-			r++;
+                        p = g_utf8_next_char (p + pos2);
 
-		newr = g_strdup (retval + r);
-		g_free (retval);
-		retval = newr;
+			/* Do same again for position AFTER block */
+			if (*p == '\0') {
+				blocks_done = TRUE;
+			}
+		}	
 	}
 
-	return retval;
+	str = g_string_free (str_no_blocks, FALSE);
+
+	/* Now strip invalid chars */
+	g_strdelimit (str, invalid_chars, *invalid_chars_delimiter);
+	strv = g_strsplit (str, invalid_chars_delimiter, -1);
+	g_free (str);
+        str = g_strjoinv (NULL, strv);
+	g_strfreev (strv);
+
+	/* Now convert chars */
+	g_strdelimit (str, convert_chars, *convert_chars_delimiter);
+	strv = g_strsplit (str, convert_chars_delimiter, -1);
+	g_free (str);
+        str = g_strjoinv (convert_chars_delimiter, strv);
+	g_strfreev (strv);
+
+        /* Now remove double spaces */
+	strv = g_strsplit (str, "  ", -1);
+	g_free (str);
+        str = g_strjoinv (" ", strv);
+	g_strfreev (strv);
+        
+        /* Now strip leading/trailing white space */
+        g_strstrip (str);
+
+	return str;
 }
 
 void
@@ -405,11 +446,11 @@ tracker_albumart_heuristic (const gchar *artist_,
 	}
 
 	if (artist_) {
-		artist = strip_characters (artist_);
+		artist = tracker_albumart_strip_invalid_entities (artist_);
 	}
 
 	if (album_) {
-		album = strip_characters (album_);
+		album = tracker_albumart_strip_invalid_entities (album_);
 	}
 
 	/* If amount of files and amount of tracks in the album somewhat match */
@@ -612,13 +653,13 @@ tracker_albumart_get_path (const gchar  *a,
 	if (!a) {
 		f_a = g_strdup (" ");
 	} else {
-		f_a = strip_characters (a);
+		f_a = tracker_albumart_strip_invalid_entities (a);
 	}
 
 	if (!b) {
 		f_b = g_strdup (" ");
 	} else {
-		f_b = strip_characters (b); 
+		f_b = tracker_albumart_strip_invalid_entities (b); 
 	}
 
 	down1 = g_utf8_strdown (f_a, -1);
diff --git a/src/libtracker-common/tracker-albumart.h b/src/libtracker-common/tracker-albumart.h
index 9e7e15a..390e727 100644
--- a/src/libtracker-common/tracker-albumart.h
+++ b/src/libtracker-common/tracker-albumart.h
@@ -32,26 +32,27 @@ G_BEGIN_DECLS
 
 #include "tracker-hal.h"
 
-gboolean tracker_albumart_heuristic        (const gchar *artist_,  
-					    const gchar *album_, 
-					    const gchar *tracks_str, 
-					    const gchar *filename,
-					    const gchar *local_uri,
-					    gboolean    *copied);
-void     tracker_albumart_copy_to_local    (TrackerHal  *hal,
-					    const gchar *filename, 
-					    const gchar *local_uri);
-void     tracker_albumart_get_path         (const gchar  *a, 
-					    const gchar  *b, 
-					    const gchar  *prefix, 
-					    const gchar  *uri,
-					    gchar       **path,
-					    gchar       **local);
-void     tracker_albumart_request_download (TrackerHal  *hal,
-					    const gchar *album, 
-					    const gchar *artist, 
-					    const gchar *local_uri, 
-					    const gchar *art_path);
+gboolean tracker_albumart_heuristic              (const gchar  *artist_,
+						  const gchar  *album_,
+						  const gchar  *tracks_str,
+						  const gchar  *filename,
+						  const gchar  *local_uri,
+						  gboolean     *copied);
+gchar *  tracker_albumart_strip_invalid_entities (const gchar  *original);
+void     tracker_albumart_copy_to_local          (TrackerHal   *hal,
+						  const gchar  *filename,
+						  const gchar  *local_uri);
+void     tracker_albumart_get_path               (const gchar  *a,
+						  const gchar  *b,
+						  const gchar  *prefix,
+						  const gchar  *uri,
+						  gchar       **path,
+						  gchar       **local);
+void     tracker_albumart_request_download       (TrackerHal   *hal,
+						  const gchar  *album,
+						  const gchar  *artist,
+						  const gchar  *local_uri,
+						  const gchar  *art_path);
 
 G_END_DECLS
 
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 88852c9..4419038 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -1,6 +1,6 @@
 include $(top_srcdir)/Makefile.decl
 
 SUBDIRS = 			\
+	albumart		\
 	services		\
 	tracker-fts
-
diff --git a/utils/albumart/Makefile.am b/utils/albumart/Makefile.am
new file mode 100644
index 0000000..d3a6e09
--- /dev/null
+++ b/utils/albumart/Makefile.am
@@ -0,0 +1,18 @@
+include $(top_srcdir)/Makefile.decl
+
+noinst_PROGRAMS = albumart-strip
+
+INCLUDES = 								\
+	-DG_LOG_DOMAIN=\"Tracker\"					\
+	-DTRACKER_COMPILATION						\
+	-I$(top_srcdir)/src						\
+	$(WARN_CFLAGS)							\
+	$(GLIB2_CFLAGS)
+
+albumart_strip_SOURCES =						\
+	albumart-strip.c
+
+albumart_strip_LDADD =							\
+	$(top_builddir)/src/libtracker-common/libtracker-common.la 	\
+	$(GLIB2_LIBS)
+
diff --git a/utils/albumart/albumart-strip.c b/utils/albumart/albumart-strip.c
new file mode 100644
index 0000000..2fdb9a1
--- /dev/null
+++ b/utils/albumart/albumart-strip.c
@@ -0,0 +1,91 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2008, Nokia
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <locale.h>
+
+#include <glib/gi18n.h>
+
+#include <libtracker-common/tracker-albumart.h>
+
+static gchar	    **text;
+
+static GOptionEntry   entries[] = {
+	{ G_OPTION_REMAINING, 0, 0,
+	  G_OPTION_ARG_STRING_ARRAY, &text,
+	  N_("album or artist"),
+	  N_("EXPRESSION")
+	},
+	{ NULL }
+};
+
+int
+main (int argc, char *argv[])
+{
+	GOptionContext  *context;
+        gchar           *summary;
+	gchar          **p;
+
+	setlocale (LC_ALL, "");
+
+	context = g_option_context_new (_("- Test albumart text stripping"));
+	summary = g_strconcat (_("You can use this utility to check album/artist strings, for example:"),
+			       "\n",
+			       "\n",
+			       "  \"[I_am_da_man dwz m1 mp3ieez] Jhonny Dee - "
+			       "The tale of bla {Moo 2009, we are great}\"",
+			       NULL);
+	g_option_context_set_summary (context, summary);
+	g_option_context_add_main_entries (context, entries, NULL);
+	g_option_context_parse (context, &argc, &argv, NULL);
+	g_free (summary);
+
+	if (!text || !*text) {
+		gchar *help;
+
+		g_printerr ("%s\n\n",
+			    _("No album/artist text was provided"));
+
+		help = g_option_context_get_help (context, TRUE, NULL);
+		g_option_context_free (context);
+		g_printerr ("%s", help);
+		g_free (help);
+
+		return EXIT_FAILURE;
+	}
+
+	g_option_context_free (context);
+
+	for (p = text; *p; p++) {
+		gchar *output;
+
+		g_print ("\n");
+
+		g_print ("%s:\n", _("Converted to"));
+
+		output = tracker_albumart_strip_invalid_entities (*p);
+		g_print ("  %s\n", output);
+
+		g_free (output);
+	}
+       
+        return EXIT_SUCCESS;
+}



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]