rhythmbox r6237 - in trunk: . plugins/jamendo/jamendo
- From: jmatthew svn gnome org
- To: svn-commits-list gnome org
- Subject: rhythmbox r6237 - in trunk: . plugins/jamendo/jamendo
- Date: Fri, 20 Mar 2009 03:02:36 +0000 (UTC)
Author: jmatthew
Date: Fri Mar 20 03:02:36 2009
New Revision: 6237
URL: http://svn.gnome.org/viewvc/rhythmbox?rev=6237&view=rev
Log:
2009-03-20 Jonathan Matthew <jonathan d14n org>
patch by: Kim Sullivan <alicebot seznam cz>
* plugins/jamendo/jamendo/JamendoSaxHandler.py:
* plugins/jamendo/jamendo/JamendoSource.py:
Rework the jamendo xml parser to create database entries in a single
pass, rather than creating an intermediate structure and converting
that to database entries. Speeds up catalogue loading and reduces
memory consumption. From #424423.
Modified:
trunk/ChangeLog
trunk/plugins/jamendo/jamendo/JamendoSaxHandler.py
trunk/plugins/jamendo/jamendo/JamendoSource.py
Modified: trunk/plugins/jamendo/jamendo/JamendoSaxHandler.py
==============================================================================
--- trunk/plugins/jamendo/jamendo/JamendoSaxHandler.py (original)
+++ trunk/plugins/jamendo/jamendo/JamendoSaxHandler.py Fri Mar 20 03:02:36 2009
@@ -18,110 +18,84 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+import rhythmdb
import xml.sax, xml.sax.handler
+import datetime
-markups = ["JamendoData", "Artists", "artist", "Albums", "album", "Tracks", "track"]
-ignore = ["location", "country", "state", "city", "latitude", "longitude"]
+data = {"artist" : ["name"],
+ "album" : ["name","id","releasedate","id3genre"],
+ "track" : ["name","id","numalbum","duration","id3genre"]}
+
+stream_url = "http://api.jamendo.com/get2/stream/track/redirect/?id=%s&streamencoding=ogg2"
class JamendoSaxHandler(xml.sax.handler.ContentHandler):
- def __init__(self):
+ def __init__(self,db,entry_type):
xml.sax.handler.ContentHandler.__init__(self)
+ self.__db = db
+ self.__entry_type = entry_type
+ self.__data = {}
+ for section in data:
+ self.__data[section]={}
+ self.__section = ""
+ self.__num_tracks = 0
- self.current = {}
def startElement(self, name, attrs):
self.__text = ""
- self.__ignore = False
+ self.__parse_content = False
- if name in markups:
- fct = getattr (self, "start" + name)
- fct (attrs)
-
- if name in ignore:
- self.__ignore = True
+ if name in data:
+ self.__section = name
+ elif self.__section and name in data[self.__section]:
+ self.__parse_content = True
def endElement(self, name):
- if name in markups:
- fct = getattr (self, "end" + name)
- fct ()
- elif self.__ignore is False:
- self.current[name] = self.__text
+ if self.__parse_content:
+ self.__data[self.__section][name] = self.__text
+ elif name == "track":
+ self.__num_tracks = self.__num_tracks + 1
+
+ track_url = stream_url % (self.__data["track"]["id"])
+
+ release_date = self.__data["album"]["releasedate"]
+ year = int(release_date[0:4])
+ date = datetime.date(year, 1, 1).toordinal()
+
+ try:
+ albumgenre = genre_id3[int(self.__data["album"]["id3genre"])]
+ except Exception:
+ albumgenre = _('Unknown')
+
+ try:
+ duration = int(float(self.__data["track"]["duration"]))
+ except Exception:
+ duration = 0
+
+ entry = self.__db.entry_lookup_by_location (track_url)
+ if entry == None:
+ entry = self.__db.entry_new(self.__entry_type, track_url)
+ self.__db.set(entry, rhythmdb.PROP_ARTIST, self.__data["artist"]["name"])
+ self.__db.set(entry, rhythmdb.PROP_ALBUM, self.__data["album"]["name"])
+ self.__db.set(entry, rhythmdb.PROP_TITLE, self.__data["track"]["name"])
+ self.__db.set(entry, rhythmdb.PROP_TRACK_NUMBER, int(self.__data["track"]["numalbum"]))
+ self.__db.set(entry, rhythmdb.PROP_DATE, date)
+ self.__db.set(entry, rhythmdb.PROP_GENRE, albumgenre)
+ self.__db.set(entry, rhythmdb.PROP_DURATION, duration)
+
+ # slight misuse, but this is far more efficient than having a python dict
+ # containing this data.
+ self.__db.set(entry, rhythmdb.PROP_MUSICBRAINZ_ALBUMID, self.__data["album"]["id"])
+
+ if self.__num_tracks % 1000 == 0:
+ self.__db.commit()
+ elif name == "JamendoData":
+ self.__db.commit()
+ #clean up data
+ if name in data:
+ self.__data[name].clear ()
def characters(self, content):
- if self.__ignore is False:
+ if self.__parse_content:
self.__text = self.__text + content
- # start markups
- def startJamendoData (self, attrs):
- pass
-
- def startArtists (self, attrs):
- self.artists = {}
-
- def startartist (self, attrs):
- self.artist = {}
- for attr in attrs.getNames():
- self.artist[attr] = attrs[attr]
- self.current = self.artist
-
- def startAlbums (self, attrs):
- self.albums = {}
-
- def startalbum (self, attrs):
- self.album = {}
- for attr in attrs.getNames():
- self.album[attr] = attrs[attr]
- self.current = self.album
-
- def startTracks (self, attrs):
- self.tracks = {}
-
- def starttrack (self, attrs):
- self.track = {}
- for attr in attrs.getNames():
- self.track[attr] = attrs[attr]
- self.current = self.track
-
- # end markups
- def endJamendoData (self):
- pass # end of file
-
- def endArtists (self):
- pass # we have load all artists
-
- def endartist (self):
- self.artists[self.artist['id']] = self.artist
-
- def endAlbums (self):
- self.artist['ALBUMS'] = self.albums
-
- def endalbum (self):
- self.albums[self.album['id']] = self.album
-
- def endTracks (self):
- self.album['TRACKS'] = self.tracks
-
- def endtrack (self):
- self.tracks[self.track['id']] = self.track
-
-
-if __name__ == "__main__":
- parser = xml.sax.make_parser()
- handler = JamendoSaxHandler()
- parser.setContentHandler(handler)
- datasource = open("/tmp/dbdump.en.xml")
- #datasource = open("exemple_jamendo.xml")
- parser.parse(datasource)
- #print handler.artists
- #print handler.albums
- #print handler.tracks
-
- tracks = handler.tracks
- artists = handler.artists
- albums = handler.albums
- for track_key in tracks.keys():
- track = tracks[track_key]
- album = albums[track['albumID']]
- artist = artists[album['artistID']]
- #print track['dispname'], track['trackno'], track['lengths'], album['dispname'], artist['dispname']
- print album['P2PLinks']
+genre_id3 = ["Blues","Classic Rock","Country","Dance","Disco","Funk","Grunge","Hip-Hop","Jazz","Metal","New Age","Oldies","Other","Pop","R&B","Rap","Reggae","Rock","Techno","Industrial","Alternative","Ska","Death Metal","Pranks","Soundtrack","Euro-Techno","Ambient","Trip-Hop","Vocal","Jazz+Funk","Fusion","Trance","Classical","Instrumental","Acid","House","Game","Sound Clip","Gospel","Noise","AlternRock","Bass","Soul","Punk","Space","Meditative","Instrumental Pop","Instrumental Rock","Ethnic","Gothic","Darkwave","Techno-Industrial","Electronic","Pop-Folk","Eurodance","Dream","Southern Rock","Comedy","Cult","Gangsta","Top 40","Christian Rap","Pop/Funk","Jungle","Native American","Cabaret","New Wave","Psychadelic","Rave","Showtunes","Trailer","Lo-Fi","Tribal","Acid Punk","Acid Jazz","Polka","Retro","Musical","Rock & Roll","Hard Rock","Folk","Folk-Rock","National Folk","Swing","Fast Fusion","Bebob","Latin","Revival","Celtic","Bluegrass","Avantgarde","Gothic Rock","Progressive Ro
ck","Psychedelic Rock","Symphonic Rock","Slow Rock","Big Band","Chorus","Easy Listening","Acoustic","Humour","Speech","Chanson","Opera","Chamber Music","Sonata","Symphony","Booty Bass","Primus","Porn Groove","Satire","Slow Jam","Club","Tango","Samba","Folklore","Ballad","Power Ballad","Rhythmic Soul","Freestyle","Duet","Punk Rock","Drum Solo","Acapella","Euro-House","Dance Hall"]
Modified: trunk/plugins/jamendo/jamendo/JamendoSource.py
==============================================================================
--- trunk/plugins/jamendo/jamendo/JamendoSource.py (original)
+++ trunk/plugins/jamendo/jamendo/JamendoSource.py Fri Mar 20 03:02:36 2009
@@ -49,12 +49,9 @@
# Album Covers are available here: http://api.jamendo.com/get2/image/album/redirect/?id={ALBUMID}&imagesize={100-600}
-stream_url = "http://api.jamendo.com/get2/stream/track/redirect/?id=%s&streamencoding=ogg2"
artwork_url = "http://api.jamendo.com/get2/image/album/redirect/?id=%s&imagesize=200"
artist_url = "http://www.jamendo.com/get/artist/id/album/page/plain/"
-genre_id3 = ["Blues","Classic Rock","Country","Dance","Disco","Funk","Grunge","Hip-Hop","Jazz","Metal","New Age","Oldies","Other","Pop","R&B","Rap","Reggae","Rock","Techno","Industrial","Alternative","Ska","Death Metal","Pranks","Soundtrack","Euro-Techno","Ambient","Trip-Hop","Vocal","Jazz+Funk","Fusion","Trance","Classical","Instrumental","Acid","House","Game","Sound Clip","Gospel","Noise","AlternRock","Bass","Soul","Punk","Space","Meditative","Instrumental Pop","Instrumental Rock","Ethnic","Gothic","Darkwave","Techno-Industrial","Electronic","Pop-Folk","Eurodance","Dream","Southern Rock","Comedy","Cult","Gangsta","Top 40","Christian Rap","Pop/Funk","Jungle","Native American","Cabaret","New Wave","Psychadelic","Rave","Showtunes","Trailer","Lo-Fi","Tribal","Acid Punk","Acid Jazz","Polka","Retro","Musical","Rock & Roll","Hard Rock","Folk","Folk-Rock","National Folk","Swing","Fast Fusion","Bebob","Latin","Revival","Celtic","Bluegrass","Avantgarde","Gothic Rock","Progressive Ro
ck","Psychedelic Rock","Symphonic Rock","Slow Rock","Big Band","Chorus","Easy Listening","Acoustic","Humour","Speech","Chanson","Opera","Chamber Music","Sonata","Symphony","Booty Bass","Primus","Porn Groove","Satire","Slow Jam","Club","Tango","Samba","Folklore","Ballad","Power Ballad","Rhythmic Soul","Freestyle","Duet","Punk Rock","Drum Solo","Acapella","Euro-House","Dance Hall"]
-
class JamendoSource(rb.BrowserSource):
__gproperties__ = {
'plugin': (rb.Plugin, 'plugin', 'plugin', gobject.PARAM_WRITABLE|gobject.PARAM_CONSTRUCT_ONLY),
@@ -179,9 +176,15 @@
self.__parser.close()
self.__db_load_finished = True
self.__updating = False
- self.__load_db ()
+ self.__saxHandler = None
self.__show_loading_screen (False)
- self.__catalogue_loader = None
+
+ # hack around bug 575781: if the catalogue loader is destroyed in this callback
+ # we'll crash, but afterwards is OK.
+ def done(self):
+ self.__catalogue_loader = None
+ return False
+ gobject.idle_add(done, self)
return
self.__parser.feed(result)
@@ -194,7 +197,7 @@
self.__notify_status_changed()
self.__db_load_finished = False
- self.__saxHandler = JamendoSaxHandler()
+ self.__saxHandler = JamendoSaxHandler(self.__db, self.__entry_type)
self.__parser = xml.sax.make_parser()
self.__parser.setContentHandler(self.__saxHandler)
@@ -267,63 +270,6 @@
self.__info_screen.set_property("visible", show)
self.__paned_box.set_property("visible", not show)
- def __load_db(self):
- artists = self.__saxHandler.artists
-
- nbAlbums = 0
- nbTracks = 0
- for artist_key in artists.keys():
- artist = artists[artist_key]
- for album_key in artist['ALBUMS'].keys():
- nbAlbums = nbAlbums + 1
- album = artist['ALBUMS'][album_key]
- for track_key in album['TRACKS'].keys():
- nbTracks = nbTracks + 1
- track = album['TRACKS'][track_key]
- track_id = track['id']
- stream = stream_url % (track_id)
- entry = self.__db.entry_lookup_by_location (stream)
- if entry == None:
- entry = self.__db.entry_new(self.__entry_type, stream)
-
- release_date = album['releasedate']
- if release_date:
- year = int(release_date[0:4])
- date = datetime.date(year, 1, 1).toordinal()
- self.__db.set(entry, rhythmdb.PROP_DATE, date)
-
- self.__db.set(entry, rhythmdb.PROP_TITLE, track['name'])
- self.__db.set(entry, rhythmdb.PROP_ARTIST, artist['name'])
- try:
- genre = genre_id3[int(album['id3genre'])]
- except Exception:
- genre = _('Unknown')
-
- self.__db.set(entry, rhythmdb.PROP_GENRE, genre)
- self.__db.set(entry, rhythmdb.PROP_ALBUM, album['name'])
-
- trackno = int(track['numalbum'])
- if trackno >= 0:
- self.__db.set(entry, rhythmdb.PROP_TRACK_NUMBER, trackno)
-
- try:
- duration = float(track['duration'])
- self.__db.set(entry, rhythmdb.PROP_DURATION, int(duration))
- except Exception:
- # No length, nevermind
- pass
-
- # slight misuse, but this is far more efficient than having a python dict
- # containing this data.
- self.__db.set(entry, rhythmdb.PROP_MUSICBRAINZ_ALBUMID, album['id'])
-
- print "Nb artistes : " + str(len(artists))
- print "Nb albums : " + str(nbAlbums)
- print "Nb tracks : " + str(nbTracks)
-
- self.__db.commit()
- self.__saxHandler = None
-
def __notify_status_changed(self):
def change_idle_cb():
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]