rhythmbox r6237 - in trunk: . plugins/jamendo/jamendo



Author: jmatthew
Date: Fri Mar 20 03:02:36 2009
New Revision: 6237
URL: http://svn.gnome.org/viewvc/rhythmbox?rev=6237&view=rev

Log:
2009-03-20  Jonathan Matthew  <jonathan d14n org>

	patch by:  Kim Sullivan  <alicebot seznam cz>

	* plugins/jamendo/jamendo/JamendoSaxHandler.py:
	* plugins/jamendo/jamendo/JamendoSource.py:
	Rework the jamendo xml parser to create database entries in a single
	pass, rather than creating an intermediate structure and converting
	that to database entries.  Speeds up catalogue loading and reduces
	memory consumption.  From #424423.


Modified:
   trunk/ChangeLog
   trunk/plugins/jamendo/jamendo/JamendoSaxHandler.py
   trunk/plugins/jamendo/jamendo/JamendoSource.py

Modified: trunk/plugins/jamendo/jamendo/JamendoSaxHandler.py
==============================================================================
--- trunk/plugins/jamendo/jamendo/JamendoSaxHandler.py	(original)
+++ trunk/plugins/jamendo/jamendo/JamendoSaxHandler.py	Fri Mar 20 03:02:36 2009
@@ -18,110 +18,84 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 
+import rhythmdb
 import xml.sax, xml.sax.handler
+import datetime
 
-markups = ["JamendoData", "Artists", "artist", "Albums", "album", "Tracks", "track"]
-ignore = ["location", "country", "state", "city", "latitude", "longitude"]
+data = {"artist" : ["name"],
+        "album" : ["name","id","releasedate","id3genre"],
+        "track" : ["name","id","numalbum","duration","id3genre"]}
+        
+stream_url = "http://api.jamendo.com/get2/stream/track/redirect/?id=%s&streamencoding=ogg2";
 
 class JamendoSaxHandler(xml.sax.handler.ContentHandler):
-	def __init__(self):
+	def __init__(self,db,entry_type):
 		xml.sax.handler.ContentHandler.__init__(self)
+		self.__db = db
+		self.__entry_type = entry_type
+		self.__data = {}
+		for section in data:
+			self.__data[section]={}
+		self.__section = ""
+		self.__num_tracks = 0
 
-		self.current = {}
 
 	def startElement(self, name, attrs):
 		self.__text = ""
-		self.__ignore = False
+		self.__parse_content = False
 
-		if name in markups:
-			fct = getattr (self, "start" + name)
-			fct (attrs)
-
-		if name in ignore:
-			self.__ignore = True
+		if name in data:
+			self.__section = name
+		elif self.__section and name in data[self.__section]:
+			self.__parse_content = True
 
 	def endElement(self, name):
-		if name in markups:
-			fct = getattr (self, "end" + name)
-			fct ()
-		elif self.__ignore is False:
-			self.current[name] = self.__text
+		if self.__parse_content:
+			self.__data[self.__section][name] = self.__text
+		elif name == "track":
+			self.__num_tracks = self.__num_tracks + 1
+
+			track_url = stream_url % (self.__data["track"]["id"])	
+						
+			release_date = self.__data["album"]["releasedate"]
+			year = int(release_date[0:4])
+			date = datetime.date(year, 1, 1).toordinal()
+
+			try:
+				albumgenre = genre_id3[int(self.__data["album"]["id3genre"])]
+			except Exception:
+				albumgenre = _('Unknown')			
+
+			try:
+				duration = int(float(self.__data["track"]["duration"]))
+			except Exception:
+				duration = 0
+			
+			entry = self.__db.entry_lookup_by_location (track_url)
+			if entry == None:
+				entry = self.__db.entry_new(self.__entry_type, track_url)
+			self.__db.set(entry, rhythmdb.PROP_ARTIST, self.__data["artist"]["name"])
+			self.__db.set(entry, rhythmdb.PROP_ALBUM, self.__data["album"]["name"])
+			self.__db.set(entry, rhythmdb.PROP_TITLE, self.__data["track"]["name"])
+			self.__db.set(entry, rhythmdb.PROP_TRACK_NUMBER, int(self.__data["track"]["numalbum"]))
+			self.__db.set(entry, rhythmdb.PROP_DATE, date)
+			self.__db.set(entry, rhythmdb.PROP_GENRE, albumgenre)
+			self.__db.set(entry, rhythmdb.PROP_DURATION, duration)
+
+			# slight misuse, but this is far more efficient than having a python dict
+			# containing this data.
+			self.__db.set(entry, rhythmdb.PROP_MUSICBRAINZ_ALBUMID, self.__data["album"]["id"])
+			
+			if self.__num_tracks % 1000 == 0:
+				self.__db.commit()
+		elif name == "JamendoData":
+			self.__db.commit()
+		#clean up data
+		if name in data:
+			self.__data[name].clear ()
 
 	def characters(self, content):
-		if self.__ignore is False:
+		if self.__parse_content:
 			self.__text = self.__text + content
 
-	# start markups
-	def startJamendoData (self, attrs):
-		pass
-
-	def startArtists (self, attrs):
-		self.artists = {}
-
-	def startartist (self, attrs):
-		self.artist = {}
-		for attr in attrs.getNames():
-			self.artist[attr] = attrs[attr]
-		self.current = self.artist
-
-	def startAlbums (self, attrs):
-		self.albums = {}
-
-	def startalbum (self, attrs):
-		self.album = {}
-		for attr in attrs.getNames():
-			self.album[attr] = attrs[attr]
-		self.current = self.album
-
-	def startTracks (self, attrs):
-		self.tracks = {}
-
-	def starttrack (self, attrs):
-		self.track = {}
-		for attr in attrs.getNames():
-			self.track[attr] = attrs[attr]
-		self.current = self.track
-
-	# end markups
-	def endJamendoData (self):
-		pass # end of file
-
-	def endArtists (self):
-		pass # we have load all artists
-
-	def endartist (self):
-		self.artists[self.artist['id']] = self.artist
-		
-	def endAlbums (self):
-		self.artist['ALBUMS'] = self.albums
-
-	def endalbum (self):
-		self.albums[self.album['id']] = self.album
-
-	def endTracks (self):
-		self.album['TRACKS'] = self.tracks
-
-	def endtrack (self):
-		self.tracks[self.track['id']] = self.track
-
-
-if __name__ == "__main__":
-	parser = xml.sax.make_parser()
-	handler = JamendoSaxHandler()
-	parser.setContentHandler(handler)
-	datasource = open("/tmp/dbdump.en.xml")
-	#datasource = open("exemple_jamendo.xml")
-	parser.parse(datasource)
-	#print handler.artists
-	#print handler.albums
-	#print handler.tracks
-
-	tracks = handler.tracks
-	artists = handler.artists
-	albums = handler.albums
-	for track_key in tracks.keys():
-		track = tracks[track_key]
-		album = albums[track['albumID']]
-		artist = artists[album['artistID']]
-		#print track['dispname'], track['trackno'], track['lengths'], album['dispname'], artist['dispname']
-		print album['P2PLinks']
+genre_id3 = ["Blues","Classic Rock","Country","Dance","Disco","Funk","Grunge","Hip-Hop","Jazz","Metal","New Age","Oldies","Other","Pop","R&B","Rap","Reggae","Rock","Techno","Industrial","Alternative","Ska","Death Metal","Pranks","Soundtrack","Euro-Techno","Ambient","Trip-Hop","Vocal","Jazz+Funk","Fusion","Trance","Classical","Instrumental","Acid","House","Game","Sound Clip","Gospel","Noise","AlternRock","Bass","Soul","Punk","Space","Meditative","Instrumental Pop","Instrumental Rock","Ethnic","Gothic","Darkwave","Techno-Industrial","Electronic","Pop-Folk","Eurodance","Dream","Southern Rock","Comedy","Cult","Gangsta","Top 40","Christian Rap","Pop/Funk","Jungle","Native American","Cabaret","New Wave","Psychadelic","Rave","Showtunes","Trailer","Lo-Fi","Tribal","Acid Punk","Acid Jazz","Polka","Retro","Musical","Rock & Roll","Hard Rock","Folk","Folk-Rock","National Folk","Swing","Fast Fusion","Bebob","Latin","Revival","Celtic","Bluegrass","Avantgarde","Gothic Rock","Progressive Ro
 ck","Psychedelic Rock","Symphonic Rock","Slow Rock","Big Band","Chorus","Easy Listening","Acoustic","Humour","Speech","Chanson","Opera","Chamber Music","Sonata","Symphony","Booty Bass","Primus","Porn Groove","Satire","Slow Jam","Club","Tango","Samba","Folklore","Ballad","Power Ballad","Rhythmic Soul","Freestyle","Duet","Punk Rock","Drum Solo","Acapella","Euro-House","Dance Hall"]

Modified: trunk/plugins/jamendo/jamendo/JamendoSource.py
==============================================================================
--- trunk/plugins/jamendo/jamendo/JamendoSource.py	(original)
+++ trunk/plugins/jamendo/jamendo/JamendoSource.py	Fri Mar 20 03:02:36 2009
@@ -49,12 +49,9 @@
 
 # Album Covers are available here: http://api.jamendo.com/get2/image/album/redirect/?id={ALBUMID}&imagesize={100-600}
 
-stream_url = "http://api.jamendo.com/get2/stream/track/redirect/?id=%s&streamencoding=ogg2";
 artwork_url = "http://api.jamendo.com/get2/image/album/redirect/?id=%s&imagesize=200";
 artist_url = "http://www.jamendo.com/get/artist/id/album/page/plain/";
 
-genre_id3 = ["Blues","Classic Rock","Country","Dance","Disco","Funk","Grunge","Hip-Hop","Jazz","Metal","New Age","Oldies","Other","Pop","R&B","Rap","Reggae","Rock","Techno","Industrial","Alternative","Ska","Death Metal","Pranks","Soundtrack","Euro-Techno","Ambient","Trip-Hop","Vocal","Jazz+Funk","Fusion","Trance","Classical","Instrumental","Acid","House","Game","Sound Clip","Gospel","Noise","AlternRock","Bass","Soul","Punk","Space","Meditative","Instrumental Pop","Instrumental Rock","Ethnic","Gothic","Darkwave","Techno-Industrial","Electronic","Pop-Folk","Eurodance","Dream","Southern Rock","Comedy","Cult","Gangsta","Top 40","Christian Rap","Pop/Funk","Jungle","Native American","Cabaret","New Wave","Psychadelic","Rave","Showtunes","Trailer","Lo-Fi","Tribal","Acid Punk","Acid Jazz","Polka","Retro","Musical","Rock & Roll","Hard Rock","Folk","Folk-Rock","National Folk","Swing","Fast Fusion","Bebob","Latin","Revival","Celtic","Bluegrass","Avantgarde","Gothic Rock","Progressive Ro
 ck","Psychedelic Rock","Symphonic Rock","Slow Rock","Big Band","Chorus","Easy Listening","Acoustic","Humour","Speech","Chanson","Opera","Chamber Music","Sonata","Symphony","Booty Bass","Primus","Porn Groove","Satire","Slow Jam","Club","Tango","Samba","Folklore","Ballad","Power Ballad","Rhythmic Soul","Freestyle","Duet","Punk Rock","Drum Solo","Acapella","Euro-House","Dance Hall"]
-
 class JamendoSource(rb.BrowserSource):
 	__gproperties__ = {
 		'plugin': (rb.Plugin, 'plugin', 'plugin', gobject.PARAM_WRITABLE|gobject.PARAM_CONSTRUCT_ONLY),
@@ -179,9 +176,15 @@
 			self.__parser.close()
 			self.__db_load_finished = True
 			self.__updating = False
-			self.__load_db ()
+			self.__saxHandler = None
 			self.__show_loading_screen (False)
-			self.__catalogue_loader = None
+
+			# hack around bug 575781: if the catalogue loader is destroyed in this callback
+			# we'll crash, but afterwards is OK.
+			def done(self):
+				self.__catalogue_loader = None
+				return False
+			gobject.idle_add(done, self)
 			return
 
 		self.__parser.feed(result)
@@ -194,7 +197,7 @@
 		self.__notify_status_changed()
 		self.__db_load_finished = False
 
-		self.__saxHandler = JamendoSaxHandler()
+		self.__saxHandler = JamendoSaxHandler(self.__db, self.__entry_type)
 		self.__parser = xml.sax.make_parser()
 		self.__parser.setContentHandler(self.__saxHandler)
 
@@ -267,63 +270,6 @@
 		self.__info_screen.set_property("visible", show)
 		self.__paned_box.set_property("visible", not show)
 
-	def __load_db(self):
-		artists = self.__saxHandler.artists
-
-		nbAlbums = 0
-		nbTracks = 0
-		for artist_key in artists.keys():
-			artist = artists[artist_key]
-			for album_key in artist['ALBUMS'].keys():
-				nbAlbums = nbAlbums + 1
-				album = artist['ALBUMS'][album_key]
-				for track_key in album['TRACKS'].keys():
-					nbTracks = nbTracks + 1
-					track = album['TRACKS'][track_key]
-					track_id = track['id']
-					stream = stream_url % (track_id)
-					entry = self.__db.entry_lookup_by_location (stream)
-					if entry == None:
-						entry = self.__db.entry_new(self.__entry_type, stream)
-
-					release_date = album['releasedate']
-					if release_date:
-						year = int(release_date[0:4])
-						date = datetime.date(year, 1, 1).toordinal()
-						self.__db.set(entry, rhythmdb.PROP_DATE, date)
-
-					self.__db.set(entry, rhythmdb.PROP_TITLE, track['name'])
-					self.__db.set(entry, rhythmdb.PROP_ARTIST, artist['name'])
-					try:
-						genre = genre_id3[int(album['id3genre'])]
-					except Exception:
-						genre = _('Unknown')
-						
-					self.__db.set(entry, rhythmdb.PROP_GENRE, genre)
-					self.__db.set(entry, rhythmdb.PROP_ALBUM, album['name'])
-
-					trackno = int(track['numalbum'])
-					if trackno >= 0:
-						self.__db.set(entry, rhythmdb.PROP_TRACK_NUMBER, trackno)
-
-					try:
-						duration = float(track['duration'])
-						self.__db.set(entry, rhythmdb.PROP_DURATION, int(duration))
-					except Exception:
-						# No length, nevermind
-						pass
-					
-					# slight misuse, but this is far more efficient than having a python dict
-					# containing this data.
-					self.__db.set(entry, rhythmdb.PROP_MUSICBRAINZ_ALBUMID, album['id'])
-
-		print "Nb artistes : " + str(len(artists))
-		print "Nb albums : " + str(nbAlbums)
-		print "Nb tracks : " + str(nbTracks)
-
-		self.__db.commit()
-		self.__saxHandler = None
-
 
 	def __notify_status_changed(self):
 		def change_idle_cb():



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]