[rhythmbox] rb.URLCache: add a simple disk-based cache for web service data
- From: Jonathan Matthew <jmatthew src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [rhythmbox] rb.URLCache: add a simple disk-based cache for web service data
- Date: Sun, 1 Nov 2009 06:00:28 +0000 (UTC)
commit e2d328d2f150d8d2ab298d0b2375623f59cc79e7
Author: Jonathan Matthew <jonathan d14n org>
Date: Sat Oct 31 12:56:56 2009 +1000
rb.URLCache: add a simple disk-based cache for web service data
I wrote this for the context pane plugin, but it could also be useful
for the lyrics and artdisplay plugins too.
plugins/rb/Makefile.am | 1 +
plugins/rb/URLCache.py | 209 ++++++++++++++++++++++++++++++++++++++++++++++++
plugins/rb/__init__.py | 1 +
3 files changed, 211 insertions(+), 0 deletions(-)
---
diff --git a/plugins/rb/Makefile.am b/plugins/rb/Makefile.am
index bde518c..32baf37 100644
--- a/plugins/rb/Makefile.am
+++ b/plugins/rb/Makefile.am
@@ -3,6 +3,7 @@ plugindir = $(PLUGINDIR)/rb
plugin_PYTHON = \
Loader.py \
Coroutine.py \
+ URLCache.py \
stringmatch.py \
__init__.py
diff --git a/plugins/rb/URLCache.py b/plugins/rb/URLCache.py
new file mode 100644
index 0000000..e304dbf
--- /dev/null
+++ b/plugins/rb/URLCache.py
@@ -0,0 +1,209 @@
+# -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*-
+#
+# Copyright (C) 2009 Jonathan Matthew
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# The Rhythmbox authors hereby grant permission for non-GPL compatible
+# GStreamer plugins to be used and distributed together with GStreamer
+# and Rhythmbox. This permission is above and beyond the permissions granted
+# by the GPL license by which Rhythmbox is covered. If you modify this code
+# you may extend this exception to your version of the code, but you are not
+# obligated to do so. If you do not wish to do so, delete this exception
+# statement from your version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+
+import os
+import os.path
+import time
+import errno
+
+import rb
+
+SECS_PER_DAY = 86400
+
+class URLCache(object):
+ def __init__(self, name, path, refresh=-1, discard=-1, lifetime=-1):
+ """
+ Creates a new cache. 'name' is a symbolic name for the cache.
+ 'path' is either an absolute path to the cache directory, or a
+ path relative to the user cache directory.
+ 'refresh' is the length of time for which cache entries are always
+ considered valid. 'lifetime' is the maximum time an entry can live
+ in the cache. 'discard' is the length of time for which a cache entry
+ can go unused before being discarded. These are all specified in days,
+ with -1 meaning unlimited.
+ """
+ self.name = name
+ if path.startswith("/"):
+ self.path = path
+ else:
+ self.path = os.path.join(rb.user_cache_dir(), path)
+
+ self.refresh = refresh
+ self.discard = discard
+ self.lifetime = lifetime
+
+ def clean(self):
+ """
+ This sweeps all entries stored in the cache, removing entries that
+ are past the cache lifetime limit, or have not been used for longer
+ than the cache discard time. This should be called on plugin activation,
+ and perhaps periodically (infrequently) after that.
+ """
+ now = time.time()
+ if os.path.exists(self.path) == False:
+ print "cache directory %s does not exist" % self.path
+ return
+
+
+ print "cleaning cache directory %s" % self.path
+ for f in os.listdir(self.path):
+ try:
+ path = os.path.join(self.path, f)
+ stat = os.stat(path)
+
+ if self.lifetime != -1:
+ if stat.st_ctime + (self.lifetime * SECS_PER_DAY) < now:
+ print "removing stale cache file %s:%s: age %s (past lifetime limit)" % (self.name, f, int(now - stat.st_ctime))
+ os.unlink(path)
+ continue
+
+ if self.discard != -1:
+ # hmm, noatime mounts will break this, probably
+ if stat.st_atime + (self.discard * SECS_PER_DAY) < now:
+ print "removing stale cache file %s:%s: age %s (past discard limit)" % (self.name, f, int(now - stat.st_atime))
+ os.unlink(path)
+ continue
+
+ except Exception, e:
+ print "error while checking cache entry %s:%s: %s" % (self.name, f, str(e))
+ print "finished cleaning cache directory %s" % self.path
+
+ def cachefile(self, key):
+ """
+ Constructs the full path of the file used to store a given cache key.
+ """
+ fkey = key.replace('/', '_')
+ return os.path.join(self.path, fkey)
+
+ def check(self, key, can_refresh=True):
+ """
+ Checks for a fresh cache entry with a given key.
+ If can_refresh is True, only cache entries that are within the
+ refresh time will be considered.
+ If can_refresh is False, cache entries that are older than the
+ refresh time, but not past the lifetime limit or discard period,
+ will also be considered.
+ The intent is to allow older cache entries to be used if a network
+ connection is not available or if the origin site is down.
+
+ If successful, this returns the name of the file storing the cached data.
+ Otherwise, it returns None.
+ """
+ now = time.time()
+ try:
+ path = self.cachefile(key)
+ stat = os.stat(path)
+
+ # check freshness
+ stale = False
+ if can_refresh and self.refresh != -1:
+ if stat.st_ctime + (self.refresh * SECS_PER_DAY) < now:
+ stale = True
+
+ if self.lifetime != -1:
+ if stat.st_ctime + (self.lifetime * SECS_PER_DAY) < now:
+ stale = True
+
+ if stale:
+ print "removing stale cache entry %s:%s" % (self.name, key)
+ os.unlink(path)
+ return None
+
+ return path
+
+ except Exception, e:
+ if hasattr(e, 'errno') is False or (e.errno != errno.ENOENT):
+ print "error checking cache for %s:%s: %s" % (self.name, key, e)
+ return None
+
+
+ def store(self, key, data):
+ """
+ Stores an entry in the cache.
+ """
+ try:
+ # construct cache filename
+ if not os.path.exists(self.path):
+ os.makedirs(self.path, mode=0700)
+ path = self.cachefile(key)
+
+ # consider using gio set contents async?
+ f = open(path, 'w')
+ f.write(data)
+ f.close()
+
+ print "stored cache data %s:%s" % (self.name, key)
+ except Exception, e:
+ print "exception storing cache data %s:%s: %s" % (self.name, key, e)
+
+
+ def __fetch_cb(self, data, url, key, callback, args):
+ if data is None:
+ cachefile = self.check(key, False)
+ if cachefile is not None:
+ f = open(cachefile)
+ data = f.read()
+ f.close()
+ if callback(data, *args) is False:
+ print "cache entry %s:%s invalidated by callback" % (self.name, key)
+ os.unlink(cachefile)
+ else:
+ callback(None, *args)
+ else:
+ if callback(data, *args) is False:
+ print "cache entry %s:%s invalidated by callback" % (self.name, key)
+ else:
+ self.store(key, data)
+
+ def fetch(self, key, url, callback, *args):
+ """
+ Retrieve the specified URL, satisfying the request from the cache
+ if possible, and refreshing the cache if necessary.
+
+ The callback function may return False to indicate that the data
+ passed to it is invalid. Generally this should only happen if the
+ data cannot be parsed and it is likely that a later attempt to fetch
+ from the origin site will result in valid data.
+ """
+ # check if we've got a fresh entry in the cache
+ print "fetching cache entry %s:%s [%s]" % (self.name, key, url)
+ cachefile = self.check(key, True)
+ if cachefile is not None:
+ # could use a loader here, maybe
+ f = open(cachefile)
+ data = f.read()
+ f.close()
+ if callback(data, *args) is not False:
+ return
+
+ print "cache entry %s:%s invalidated by callback" % (self.name, key)
+ os.unlink(cachefile)
+
+ ld = rb.Loader()
+ ld.get_url(url, self.__fetch_cb, url, key, callback, args)
+
+
+# vim: set ts=4 sw=4 expandtab :
diff --git a/plugins/rb/__init__.py b/plugins/rb/__init__.py
index 62b4f3d..7b65e61 100644
--- a/plugins/rb/__init__.py
+++ b/plugins/rb/__init__.py
@@ -37,6 +37,7 @@ from Loader import Loader
from Loader import ChunkLoader
from Loader import UpdateCheck
from Coroutine import Coroutine
+from URLCache import URLCache
#def _excepthandler (exc_class, exc_inst, trace):
# import sys
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]