[rhythmbox] Português Lyrics - grab lyrics from letras.terra.com.br



commit 6e956c822b242d3d1d33bacdee74b61345d611e8
Author: Hardy Beltran Monasterios <hbm hardy com bo>
Date:   Sun Sep 20 23:55:25 2009 -0400

    Português Lyrics - grab lyrics from letras.terra.com.br
    
    Added a new site to grab lyrics in brazilian português.

 plugins/lyrics/lyrics/LyricsSites.py |    5 +-
 plugins/lyrics/lyrics/Makefile.am    |    3 +-
 plugins/lyrics/lyrics/TerraParser.py |  110 ++++++++++++++++++++++++++++++++++
 3 files changed, 116 insertions(+), 2 deletions(-)
---
diff --git a/plugins/lyrics/lyrics/LyricsSites.py b/plugins/lyrics/lyrics/LyricsSites.py
index 971b3dc..e995e51 100644
--- a/plugins/lyrics/lyrics/LyricsSites.py
+++ b/plugins/lyrics/lyrics/LyricsSites.py
@@ -29,11 +29,14 @@ from LyrcParser import LyrcParser
 from AstrawebParser import AstrawebParser
 from LeoslyricsParser import LeoslyricsParser
 from WinampcnParser import WinampcnParser
+from TerraParser import TerraParser
 
 lyrics_sites = [
 	{ 'id': 'lyrc.com.ar', 		'class': LyrcParser, 		'name': _("Lyrc (lyrc.com.ar)") 		},
 	{ 'id': 'astraweb.com', 	'class': AstrawebParser, 	'name': _("Astraweb (www.astraweb.com)") 	},
 	{ 'id': 'leoslyrics.com', 	'class': LeoslyricsParser, 	'name': _("Leo's Lyrics (www.leoslyrics.com)") 	},
-	{ 'id': 'winampcn.com', 	'class': WinampcnParser, 	'name': _("WinampCN (www.winampcn.com)") 	}
+	{ 'id': 'winampcn.com', 	'class': WinampcnParser, 	'name': _("WinampCN (www.winampcn.com)") 	},
+	{ 'id': 'terra.com.br',		'class': TerraParser,		'name': _("TerraBrasil (terra.com.br)")		}
+
 ]
 
diff --git a/plugins/lyrics/lyrics/Makefile.am b/plugins/lyrics/lyrics/Makefile.am
index e4c4137..3a5dd82 100644
--- a/plugins/lyrics/lyrics/Makefile.am
+++ b/plugins/lyrics/lyrics/Makefile.am
@@ -10,4 +10,5 @@ plugin_PYTHON =				\
        AstrawebParser.py		\
        LeoslyricsParser.py		\
        LyricWikiParser.py		\
-       WinampcnParser.py
+       WinampcnParser.py		\
+       TerraParser.py
diff --git a/plugins/lyrics/lyrics/TerraParser.py b/plugins/lyrics/lyrics/TerraParser.py
new file mode 100644
index 0000000..6b200aa
--- /dev/null
+++ b/plugins/lyrics/lyrics/TerraParser.py
@@ -0,0 +1,110 @@
+# -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*-
+#
+# Copyright (C) 2009 Hardy Beltran Monasterios
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# The Rhythmbox authors hereby grant permission for non-GPL compatible
+# GStreamer plugins to be used and distributed together with GStreamer
+# and Rhythmbox. This permission is above and beyond the permissions granted
+# by the GPL license by which Rhythmbox is covered. If you modify this code
+# you may extend this exception to your version of the code, but you are not
+# obligated to do so. If you do not wish to do so, delete this exception
+# statement from your version.
+#
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA.
+
+
+import urllib
+import rb
+import re
+import sys
+
+# Deal with html entitys and utf-8
+# code taken from django/utils/text.py
+
+from htmlentitydefs import name2codepoint
+
+pattern = re.compile("&(\w+?);")
+
+def _replace_entity(match):
+	text = match.group(1)
+	if text[0] == u'#':
+		text = text[1:]
+		try:
+			if text[0] in u'xX':
+				c = int(text[1:], 16)
+			else:
+				c = int(text)
+			return unichr(c)
+		except ValueError:
+			return match.group(0)
+	else:
+		try:
+			return unichr(name2codepoint[text])
+		except (ValueError, KeyError):
+			return match.group(0)
+
+def unescape_entities(text):
+	return pattern.sub(_replace_entity, text)
+
+class TerraParser (object):
+	def __init__(self, artist, title):
+		self.artist = artist
+		self.title = title
+
+	def search(self, callback, *data):
+		path = 'http://letras.terra.com.br/'
+
+		artist = urllib.quote(self.artist)
+		title = urllib.quote(self.title)
+		join = urllib.quote(' - ')
+
+		wurl = 'winamp.php?t=%s%s%s' % (artist, join, title)
+		print "search URL: " + wurl
+
+		loader = rb.Loader()
+		loader.get_url (path + wurl, self.got_lyrics, callback, *data)
+
+	def got_lyrics(self, result, callback, *data):
+		if result is None:
+			callback (None, *data)
+			return
+
+		if result is not None:
+			result = result.decode('iso-8859-1').encode('UTF-8')
+			if re.search('M&uacute;sica n&atilde;o encontrada', result):
+				print "not found"
+				callback (None, *data)
+			elif re.search('<div id="letra">', result):
+				callback(self.parse_lyrics(result), *data)
+			else:
+				callback (None, *data)
+		else:
+			callback (None, *data)
+
+
+	def parse_lyrics(self, source):
+		source = re.split('<div id="letra">', source)[1]
+		source = re.split('<p>', source)
+		# Parse artist and title
+		artistitle = re.sub('<.*?>', '', source[0])
+		# Parse lyrics
+		lyrics = re.split('</p>', source[1])[0]
+		lyrics = re.sub('<[Bb][Rr]/>', '', lyrics)
+
+		lyrics = unescape_entities(artistitle) + unescape_entities(lyrics)
+		lyrics += "\n\nEsta letra foi disponibilizada pelo site\nhttp://letras.terra.com.br";
+
+		return lyrics



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]