[rhythmbox] Português Lyrics - grab lyrics from letras.terra.com.br
- From: Jonathan Matthew <jmatthew src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [rhythmbox] Português Lyrics - grab lyrics from letras.terra.com.br
- Date: Sun, 20 Dec 2009 11:02:12 +0000 (UTC)
commit 6e956c822b242d3d1d33bacdee74b61345d611e8
Author: Hardy Beltran Monasterios <hbm hardy com bo>
Date: Sun Sep 20 23:55:25 2009 -0400
Português Lyrics - grab lyrics from letras.terra.com.br
Added a new site to grab lyrics in brazilian português.
plugins/lyrics/lyrics/LyricsSites.py | 5 +-
plugins/lyrics/lyrics/Makefile.am | 3 +-
plugins/lyrics/lyrics/TerraParser.py | 110 ++++++++++++++++++++++++++++++++++
3 files changed, 116 insertions(+), 2 deletions(-)
---
diff --git a/plugins/lyrics/lyrics/LyricsSites.py b/plugins/lyrics/lyrics/LyricsSites.py
index 971b3dc..e995e51 100644
--- a/plugins/lyrics/lyrics/LyricsSites.py
+++ b/plugins/lyrics/lyrics/LyricsSites.py
@@ -29,11 +29,14 @@ from LyrcParser import LyrcParser
from AstrawebParser import AstrawebParser
from LeoslyricsParser import LeoslyricsParser
from WinampcnParser import WinampcnParser
+from TerraParser import TerraParser
lyrics_sites = [
{ 'id': 'lyrc.com.ar', 'class': LyrcParser, 'name': _("Lyrc (lyrc.com.ar)") },
{ 'id': 'astraweb.com', 'class': AstrawebParser, 'name': _("Astraweb (www.astraweb.com)") },
{ 'id': 'leoslyrics.com', 'class': LeoslyricsParser, 'name': _("Leo's Lyrics (www.leoslyrics.com)") },
- { 'id': 'winampcn.com', 'class': WinampcnParser, 'name': _("WinampCN (www.winampcn.com)") }
+ { 'id': 'winampcn.com', 'class': WinampcnParser, 'name': _("WinampCN (www.winampcn.com)") },
+ { 'id': 'terra.com.br', 'class': TerraParser, 'name': _("TerraBrasil (terra.com.br)") }
+
]
diff --git a/plugins/lyrics/lyrics/Makefile.am b/plugins/lyrics/lyrics/Makefile.am
index e4c4137..3a5dd82 100644
--- a/plugins/lyrics/lyrics/Makefile.am
+++ b/plugins/lyrics/lyrics/Makefile.am
@@ -10,4 +10,5 @@ plugin_PYTHON = \
AstrawebParser.py \
LeoslyricsParser.py \
LyricWikiParser.py \
- WinampcnParser.py
+ WinampcnParser.py \
+ TerraParser.py
diff --git a/plugins/lyrics/lyrics/TerraParser.py b/plugins/lyrics/lyrics/TerraParser.py
new file mode 100644
index 0000000..6b200aa
--- /dev/null
+++ b/plugins/lyrics/lyrics/TerraParser.py
@@ -0,0 +1,110 @@
+# -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*-
+#
+# Copyright (C) 2009 Hardy Beltran Monasterios
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# The Rhythmbox authors hereby grant permission for non-GPL compatible
+# GStreamer plugins to be used and distributed together with GStreamer
+# and Rhythmbox. This permission is above and beyond the permissions granted
+# by the GPL license by which Rhythmbox is covered. If you modify this code
+# you may extend this exception to your version of the code, but you are not
+# obligated to do so. If you do not wish to do so, delete this exception
+# statement from your version.
+#
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import urllib
+import rb
+import re
+import sys
+
+# Deal with html entitys and utf-8
+# code taken from django/utils/text.py
+
+from htmlentitydefs import name2codepoint
+
+pattern = re.compile("&(\w+?);")
+
+def _replace_entity(match):
+ text = match.group(1)
+ if text[0] == u'#':
+ text = text[1:]
+ try:
+ if text[0] in u'xX':
+ c = int(text[1:], 16)
+ else:
+ c = int(text)
+ return unichr(c)
+ except ValueError:
+ return match.group(0)
+ else:
+ try:
+ return unichr(name2codepoint[text])
+ except (ValueError, KeyError):
+ return match.group(0)
+
+def unescape_entities(text):
+ return pattern.sub(_replace_entity, text)
+
+class TerraParser (object):
+ def __init__(self, artist, title):
+ self.artist = artist
+ self.title = title
+
+ def search(self, callback, *data):
+ path = 'http://letras.terra.com.br/'
+
+ artist = urllib.quote(self.artist)
+ title = urllib.quote(self.title)
+ join = urllib.quote(' - ')
+
+ wurl = 'winamp.php?t=%s%s%s' % (artist, join, title)
+ print "search URL: " + wurl
+
+ loader = rb.Loader()
+ loader.get_url (path + wurl, self.got_lyrics, callback, *data)
+
+ def got_lyrics(self, result, callback, *data):
+ if result is None:
+ callback (None, *data)
+ return
+
+ if result is not None:
+ result = result.decode('iso-8859-1').encode('UTF-8')
+ if re.search('Música não encontrada', result):
+ print "not found"
+ callback (None, *data)
+ elif re.search('<div id="letra">', result):
+ callback(self.parse_lyrics(result), *data)
+ else:
+ callback (None, *data)
+ else:
+ callback (None, *data)
+
+
+ def parse_lyrics(self, source):
+ source = re.split('<div id="letra">', source)[1]
+ source = re.split('<p>', source)
+ # Parse artist and title
+ artistitle = re.sub('<.*?>', '', source[0])
+ # Parse lyrics
+ lyrics = re.split('</p>', source[1])[0]
+ lyrics = re.sub('<[Bb][Rr]/>', '', lyrics)
+
+ lyrics = unescape_entities(artistitle) + unescape_entities(lyrics)
+ lyrics += "\n\nEsta letra foi disponibilizada pelo site\nhttp://letras.terra.com.br"
+
+ return lyrics
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]