[grilo-plugins] metrolyrics: fix html parser
- From: Victor Toso de Carvalho <victortoso src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [grilo-plugins] metrolyrics: fix html parser
- Date: Mon, 5 Sep 2016 14:42:27 +0000 (UTC)
commit 1596afd2ee226b1994f5429a7a892e052dfa052e
Author: Victor Toso <me victortoso com>
Date: Sat Sep 3 16:56:34 2016 +0200
metrolyrics: fix html parser
Seems that in some lyrics a new <div></div> can be included with some
info. That was breaking the html parser.
Instead on relying on ending </div> for the lyric, let's use something
else that is present and not so common (<p class="writers") as ending
point for the lyrics and remove what is not interesting for us.
https://bugzilla.gnome.org/show_bug.cgi?id=770806
src/lua-factory/sources/grl-metrolyrics.lua | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
---
diff --git a/src/lua-factory/sources/grl-metrolyrics.lua b/src/lua-factory/sources/grl-metrolyrics.lua
index 4239a87..067ede1 100644
--- a/src/lua-factory/sources/grl-metrolyrics.lua
+++ b/src/lua-factory/sources/grl-metrolyrics.lua
@@ -88,8 +88,10 @@ end
function metrolyrics_get_lyrics(feed)
local media = {}
- local lyrics_body = '<div id="lyrics%-body%-text".->(.-)</div>'
+ local lyrics_body = '<div id="lyrics%-body%-text".->(.-)<p class="writers"'
local noise_array = {
+ { noise = '<div id="mid%-song%-discussion".->.+</div>\n<p', sub = "<p" },
+ { noise = '</div>', sub = "" },
{ noise = "</p>", sub = "\n\n" },
{ noise = "<p class='verse'><p class='verse'>", sub = "\n\n" },
{ noise = "<p class='verse'>", sub = "" },
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]