[rhythmbox] lyrics: add a lyrics parser for darklyrics.com (bug #525094)
- From: Jonathan Matthew <jmatthew src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [rhythmbox] lyrics: add a lyrics parser for darklyrics.com (bug #525094)
- Date: Sat, 29 May 2010 06:24:22 +0000 (UTC)
commit 06d99662b27d1f8e8cb396f02f62436853b9262a
Author: Edgar Luna <edgar luna gmail com>
Date: Sat May 29 16:23:02 2010 +1000
lyrics: add a lyrics parser for darklyrics.com (bug #525094)
plugins/lyrics/lyrics/DarkLyricsParser.py | 163 +++++++++++++++++++++++++++++
plugins/lyrics/lyrics/LyricsSites.py | 4 +-
plugins/lyrics/lyrics/Makefile.am | 3 +-
3 files changed, 168 insertions(+), 2 deletions(-)
---
diff --git a/plugins/lyrics/lyrics/DarkLyricsParser.py b/plugins/lyrics/lyrics/DarkLyricsParser.py
new file mode 100644
index 0000000..7370ece
--- /dev/null
+++ b/plugins/lyrics/lyrics/DarkLyricsParser.py
@@ -0,0 +1,163 @@
+# -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*-
+#
+# Copyright (C) 2008, 2009, 2010 Edgar Luna
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# The Rhythmbox authors hereby grant permission for non-GPL compatible
+# GStreamer plugins to be used and distributed together with GStreamer
+# and Rhythmbox. This permission is above and beyond the permissions granted
+# by the GPL license by which Rhythmbox is covered. If you modify this code
+# you may extend this exception to your version of the code, but you are not
+# obligated to do so. If you do not wish to do so, delete this exception
+# statement from your version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+
+import re
+import string
+import rb
+
+min_artist_match = .5
+min_song_match = .5
+from rb.stringmatch import string_match
+
+class DarkLyricsParser (object):
+ """Parser for Lyrics from www.darklyrics.com"""
+
+
+ def __init__(self, artist, title):
+ self.artist = artist
+ self.title = title
+ self.artist_ascii = ''
+ self.titlenumber = ''
+
+ def search(self, callback, *data):
+ """Do a request of a specific url based on artist's first letter name."""
+
+ self.artist_ascii = ''.join(c for c in self.artist.lower() \
+ if c in string.ascii_letters)
+ self.artist_ascii = self.artist_ascii.lower()
+ firstcharurl = 'http://www.darklyrics.com/%s.html' % (self.artist_ascii[0])
+ loader = rb.Loader()
+ loader.get_url (firstcharurl, self.search_artist, callback, *data)
+
+ def search_artist(self, artist_page, callback, *data):
+ """Search for the link to the page of artist in artists_page
+ """
+ if artist_page is None:
+ callback (None, *data)
+ return
+
+ link_section = re.split ('<SCRIPT LANGUAGE="javascript" src="tban2.js"></SCRIPT>',
+ artist_page, 1)[1]
+ pattern_link = '<a href="'
+ pattern_artist = '([^"]*)">*([^<]*)</a><br><br>'
+ links = re.split (pattern_link, link_section.lower())
+ links.pop(0)
+ best_match = ()
+ for line in links:
+ artist = re.findall(pattern_artist, line)
+ if len(artist) == 0:
+ continue
+ artist_link, artist_name = artist[0]
+ artist_url = 'http://www.darklyrics.com/%s' % (artist_link)
+ if artist_link[:5] == 'http:':
+ continue
+ artist_name = artist_name.strip()
+ smvalue = string_match (artist_name, self.artist_ascii)
+ if smvalue > min_artist_match:
+ best_match = (smvalue, artist_url, artist_name)
+
+ if not best_match:
+ # Lyrics are located in external site
+ callback (None, *data)
+ return
+ loader = rb.Loader ()
+ self.artist = best_match[2]
+ loader.get_url (best_match[1], self.search_song, callback, *data)
+
+ class SongFound (object):
+ def __init__ (self, smvalue, title, number, album, artist):
+ self.smvalue = smvalue
+ self.title = title
+ self.number = number
+ self.album = album
+ self.artist = artist
+
+ def __str__(self):
+ return '(' + str(self.smvalue) + '. ' + self.title + '. ' + self.album + '. ' + self.artist + ')'
+
+ def search_song (self, songlist, callback, *data):
+ """If artist's page is found, search_song looks for the song.
+
+ The artist page contains a list of all the albums and
+ links to the songs lyrics from this.
+ """
+ if songlist is None:
+ callback (None, *data)
+ return
+ # Search for all the <a>
+ # filter for those that has the artist name string_match
+ # and for those which its content is artist string_match
+ # Sort by values given from string_match
+ # and get the best
+ link_section = re.split('LYRICS<BR></FONT>', songlist)[1]
+ link_section = link_section.lower()
+ pattern_song = '<a href="../lyrics/(.*)/(.*).html#([^"]+)" target="_blank"><FONT COLOR="#CCCCCC">(.*)</FONT></a><br>'
+ matches = re.findall (pattern_song.lower(), link_section)
+ best_match = ""
+ for line in matches:
+ artist, album, number, title = line
+ smvalue = string_match (title.lower().replace(' ', '' ),
+ self.title.lower().replace(' ', ''))
+ if smvalue > min_song_match:
+ best_match = self.SongFound(smvalue,
+ title,
+ number,
+ album,
+ artist)
+ if not best_match:
+ callback (None, *data)
+ return
+ loader = rb.Loader ()
+ url = 'http://www.darklyrics.com/lyrics/%s/%s.html' % (best_match.artist, best_match.album)
+ self.title = best_match.title
+ self.titlenumber = best_match.number
+ loader.get_url (url, self.parse_lyrics, callback, *data)
+
+ def parse_lyrics (self, album, callback, *data):
+ """In the album's page parse_lyrics get the lyrics of the song.
+
+ This page contains all the lyrics for self.album, but
+ this method get rides of everything that isn't the
+ lyrics of self.title"""
+ if album is None:
+ callback (None, *data)
+ return
+ titleline = '(?mis)<a name=%s><FONT color=#DDDDDD><b>%s. %s</b></font>(.+?)<[a|f]' % \
+ (self.titlenumber, self.titlenumber, re.escape(self.title.title()))
+ lyricmatch = re.split (titleline, album)
+ if len (lyricmatch) > 1:
+ lyrics = lyricmatch[1]
+ lyrics = lyrics.replace ('\r', "")
+ lyrics = re.sub (r'<.*?>', "", lyrics)
+ lyrics = lyrics.strip ("\n")
+ title = "%s - %s\n\n" % (self.artist.title(), self.title.title())
+
+ lyrics = title + str (lyrics)
+ lyrics += "\n\nLyrics provided by Dark Lyrics"
+ callback (lyrics, *data)
+ else:
+ callback (None, *data)
+ return
diff --git a/plugins/lyrics/lyrics/LyricsSites.py b/plugins/lyrics/lyrics/LyricsSites.py
index e995e51..eb13011 100644
--- a/plugins/lyrics/lyrics/LyricsSites.py
+++ b/plugins/lyrics/lyrics/LyricsSites.py
@@ -30,13 +30,15 @@ from AstrawebParser import AstrawebParser
from LeoslyricsParser import LeoslyricsParser
from WinampcnParser import WinampcnParser
from TerraParser import TerraParser
+from DarkLyricsParser import DarkLyricsParser
lyrics_sites = [
{ 'id': 'lyrc.com.ar', 'class': LyrcParser, 'name': _("Lyrc (lyrc.com.ar)") },
{ 'id': 'astraweb.com', 'class': AstrawebParser, 'name': _("Astraweb (www.astraweb.com)") },
{ 'id': 'leoslyrics.com', 'class': LeoslyricsParser, 'name': _("Leo's Lyrics (www.leoslyrics.com)") },
{ 'id': 'winampcn.com', 'class': WinampcnParser, 'name': _("WinampCN (www.winampcn.com)") },
- { 'id': 'terra.com.br', 'class': TerraParser, 'name': _("TerraBrasil (terra.com.br)") }
+ { 'id': 'terra.com.br', 'class': TerraParser, 'name': _("TerraBrasil (terra.com.br)") },
+ { 'id': 'darklyrics.com', 'class': DarkLyricsParser, 'name': _("Dark Lyrics (darklyrics.com)") }
]
diff --git a/plugins/lyrics/lyrics/Makefile.am b/plugins/lyrics/lyrics/Makefile.am
index 3a5dd82..05026bd 100644
--- a/plugins/lyrics/lyrics/Makefile.am
+++ b/plugins/lyrics/lyrics/Makefile.am
@@ -11,4 +11,5 @@ plugin_PYTHON = \
LeoslyricsParser.py \
LyricWikiParser.py \
WinampcnParser.py \
- TerraParser.py
+ TerraParser.py \
+ DarkLyricsParser.py
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]