[g-a-devel] Blessing needed on recode1.patch



Hello,

A couple of days ago I posted the (attached) patch for gnome-speech that
solves the problems with the Italian synthesis.

The patch follows the discussion we had in the list:
 - it asks the Festival voices what encoding they prefer as was
   suggested by Milan Zamaza[1]
 - it defaults to ISO-8859-1

I also tried to make the patch as unintrusive as possible: I choose to
maintain a static, private list of per-voice preferred encodings, so
that I didn't need to change any .h file, nor the gnome-speech IDL, to
store the encoding information.  While there can probably be better
ways, this one should add the functionality without breaking anything
code-wise.

What this patch could break is the Indian festival voices, which want
UTF-8 on input and probably don't report the coding in their
(voice.description).  I volunteer to prepare a patch for the Indian
festival voices to fix this, if someone tells me where they are.

Now, in order to carry on my work towards a speaking Italian Ubuntu
Dapper Gnome desktop, I'd like this patch to be accepted into
gnome-speech.

Once the patch has some official blessing, I can probably prod the Ubuntu
maintainers to release a gnome-speech update which would allow the
Italian desktop to speak on Dapper almost out of the box.

Please?


Ciao,

Enrico kneeling and patiently waiting for the spray of holy water.


[1] I'd love to post links to the list archives, but they seem not to be
    updated.
-- 
GPG key: 1024D/797EBFAB 2000-12-05 Enrico Zini <enrico debian org>
--- festivalsynthesisdriver.c	2006-05-14 15:49:21.000000000 +0100
+++ festivalsynthesisdriver.c.buono	2006-06-29 17:23:25.000000000 +0100
@@ -45,6 +45,8 @@
 #undef FESTIVAL_DEBUG_SEND
 #undef FESTIVAL_DEBUG_TEXT
 
+#define DEFAULT_ENCODING "ISO-8859-1"
+
 static gint 		text_id   = 0;
 static GObjectClass 	*parent_class;
 static gboolean 	festival_server_exists = FALSE;
@@ -52,6 +54,7 @@
 static GSList		*driver_list = NULL;
 static GSList		*markers_list = NULL;
 static GSList		*voices_list = NULL;
+static GSList		*encodings_list = NULL;
 
 typedef struct
 {
@@ -98,6 +101,37 @@
 static void 	festival_process_text_out 			(FestivalTextOut *text_out);
 static void 	festival_free_list 				(FestivalSynthesisDriver *d);
 
+static void encoding_set(GNOME_Speech_VoiceInfo* voice, gchar* encoding)
+{
+	/* First, try to see if we have it already */
+	GSList* cur = encodings_list;
+	int len = strlen(voice->name);
+	for ( ; cur != NULL; cur = cur->next)
+		if (strncmp((gchar*)cur->data, voice->name, len) == 0)
+		{
+			/* If we have it, replace it */
+			g_free(cur->data);
+			cur->data = g_strdup_printf("%s:%s", voice->name, encoding);
+			return;
+		}
+	/* Otherwise, insert it */
+	encodings_list = g_slist_prepend (encodings_list, g_strdup_printf("%s:%s", voice->name, encoding));
+}
+
+static gchar* encoding_get(FestivalSpeaker* speaker)
+{
+	/* Look for the item in the list */
+	GSList* cur = encodings_list;
+	int len = strlen(speaker->voice);
+	/* Speaker should be at least "(voice_?)\n" */
+	if (len < 10)
+		return DEFAULT_ENCODING;
+	for ( ; cur != NULL; cur = cur->next)
+		if (strncmp((gchar*)cur->data, speaker->voice + 7, len - 9) == 0)
+			return (gchar*)cur->data + len - 9 + 1;
+	/* If not found, default to latin1 */
+	return DEFAULT_ENCODING;
+}
 
 static FestivalTextMarker *
 festival_text_marker_new ()
@@ -329,12 +363,19 @@
 		p = strstr (voices[i], ")\n");
 		if (p != NULL)
 		    *p = 0; 
-		query_voice_des = g_strdup_printf ("(list 'VOICEDESC: "
-						   "(nth 0 (voice.description '%s)) "
-						   "(nth 1 (nth 0 (nth 1 (voice.description '%s)))) "
-						   "(nth 1 (nth 2 (nth 1 (voice.description '%s)))) "
-						   "(nth 1 (nth 1 (nth 1 (voice.description '%s)))))\n",
-						   voices[i], voices[i], voices[i], voices[i]);
+		/* Adds an extra nil to help tokenization later */
+		query_voice_des = g_strdup_printf (
+			"(list 'VOICEDESC: "
+			"(nth 0 (voice.description '%s)) "
+			"(cadr (assoc 'language (cadr (voice.description '%s)))) "
+			"(cadr (assoc 'dialect  (cadr (voice.description '%s)))) "
+			"(cadr (assoc 'gender   (cadr (voice.description '%s)))) "
+			"(or "
+			"  (cadr (assoc 'coding (cadr (voice.description '%s)))) "
+			"  \"" DEFAULT_ENCODING "\") "
+			"nil)\n",
+				voices[i], voices[i], voices[i], voices[i], voices[i]);
+
 		festival_synthesis_driver_say_raw (driver, query_voice_des);
 		voices_waiting_for_description++;
 		g_free (query_voice_des);
@@ -349,7 +390,8 @@
 		voice[1] != NULL && strcmp (voice[1], "nil") && 
 		voice[2] != NULL && strcmp (voice[2], "nil") && 
 		voice[3] != NULL && strcmp (voice[3], "nil") && 
-		voice[4] != NULL && strcmp (voice[4], "nil"))
+		voice[4] != NULL && strcmp (voice[4], "nil") &&
+		voice[5] != NULL && strcmp (voice[5], "nil"))
 	    {
 		GNOME_Speech_VoiceInfo *new_info;
 		new_info = GNOME_Speech_VoiceInfo__alloc ();
@@ -360,6 +402,7 @@
 		else
 		    new_info->gender = GNOME_Speech_gender_male;
 		voices_list = g_slist_append (voices_list, new_info);
+		encoding_set(new_info, voice[5]);
 	    }
 	    voices_waiting_for_description--;
 	    if (voices_waiting_for_description == 0) 
@@ -932,6 +975,7 @@
 	    festival_synthesis_driver_say_raw (d, s->voice);
 	    speaker_refresh_parameters (SPEAKER(s));
 	    d->last_speaker = s;
+	    g_io_channel_set_encoding(d->channel_sock, encoding_get(d->last_speaker), NULL);
 	}
 
 	clb_list_free (d->crt_clbs);

Attachment: signature.asc
Description: Digital signature



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]