[kupfer] plugin.epiphany: Parse Epiphany bookmarks with ElementTree



commit 1e047a540cf9bb801d0041b1d7c148d267393e61
Author: Ulrik Sverdrup <ulrik sverdrup gmail com>
Date:   Wed Oct 28 16:18:04 2009 +0100

    plugin.epiphany: Parse Epiphany bookmarks with ElementTree
    
    Remove lots of old code that we never used for Favicon parsing and
    History parsing (we can retrieve it in the history later if we need
    it).
    
    Parse Epiphany's RDF bookmarks file with ElementTree, this makes the
    code smaller and more parts of kupfer use the same XML parsing
    package.

 kupfer/plugin/epiphany.py         |   18 +++-
 kupfer/plugin/epiphany_support.py |  237 +++++++------------------------------
 2 files changed, 58 insertions(+), 197 deletions(-)
---
diff --git a/kupfer/plugin/epiphany.py b/kupfer/plugin/epiphany.py
index 8de7b75..dfb507c 100644
--- a/kupfer/plugin/epiphany.py
+++ b/kupfer/plugin/epiphany.py
@@ -1,7 +1,11 @@
+import os
+
 from kupfer.objects import Leaf, Action, Source, AppLeafContentMixin
 from kupfer.objects import UrlLeaf
 from kupfer import plugin_support
 
+from kupfer.plugin import epiphany_support
+
 __kupfer_name__ = _("Epiphany Bookmarks")
 __kupfer_sources__ = ("EpiphanySource", )
 __kupfer_contents__ = ("EpiphanySource", )
@@ -19,9 +23,17 @@ class EpiphanySource (AppLeafContentMixin, Source):
 		super(EpiphanySource, self).__init__(_("Epiphany Bookmarks"))
 	
 	def get_items(self):
-		from epiphany_support import EpiphanyBookmarksParser
-		parser = EpiphanyBookmarksParser()
-		bookmarks = parser.get_items()
+		fpath = os.path.expanduser(epiphany_support.EPHY_BOOKMARKS_FILE)
+		if not os.path.exists(fpath):
+			self.output_debug("Epiphany bookmarks file not found:", fpath)
+			return ()
+
+		try:
+			bookmarks = list(epiphany_support.parse_epiphany_bookmarks(fpath))
+		except EnvironmentError, exc:
+			self.output_error(exc)
+			return ()
+
 		return (UrlLeaf(href, title) for title, href in bookmarks)
 
 	def get_description(self):
diff --git a/kupfer/plugin/epiphany_support.py b/kupfer/plugin/epiphany_support.py
index 4fb878d..08ae2dc 100644
--- a/kupfer/plugin/epiphany_support.py
+++ b/kupfer/plugin/epiphany_support.py
@@ -1,197 +1,46 @@
 """
-This file was originally the Epiphany handler from the deskbar project
-
-/deskbar/handlers/epiphany.py
-
-It was downloaded from http://ftp.gnome.org/pub/GNOME/sources/deskbar-applet/
-
-Copyright Holder: Nigel Tao  <nigel tao myrealbox com>
-                  Raphael Slinckx  <rslinckx cvs gnome org>
-                  Mikkel Kamstrup Erlandsen  <kamstrup daimi au dk>
-
-License:
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software Foundation,
-    Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+Parse Epiphany's bookmarks file
+Inspired by the Epiphany handler from the deskbar project
 """
 
-
-import xml.sax
-from os.path import join, expanduser, exists
-
-EPHY_BOOKMARKS_FILE = expanduser("~/.gnome2/epiphany/bookmarks.rdf")
-EPHY_HISTORY_FILE   = expanduser("~/.gnome2/epiphany/ephy-history.xml")
-
-favicon_cache = None
-bookmarks = None
-smart_bookmarks = None
-shortcuts_to_smart_bookmarks_map = {}
-
-		
-class EpiphanyBookmarksParser(xml.sax.ContentHandler):
-	def __init__(self):
-		xml.sax.ContentHandler.__init__(self)
-		
-		self.chars = ""
-		self.title = None
-		self.href = None
-		
-		self._indexer = set() 
-	
-	def get_items(self):
-		"""
-		Returns a completed indexer with the contents of bookmark file
-		"""
-		if not self._indexer:
-			self._index_bookmarks()
-		return self._indexer
-
-	def _index_bookmarks(self):
-		if exists(EPHY_BOOKMARKS_FILE):
-			parser = xml.sax.make_parser()
-			parser.setContentHandler(self)
-			parser.parse(EPHY_BOOKMARKS_FILE)
-	
-	def characters(self, chars):
-		self.chars = self.chars + chars
-		
-	def startElement(self, name, attrs):
-		self.chars = ""
-		if name == "item":
-			self.title = None
-			self.href = None
-
-	def endElement(self, name):
-		if name == "title":
-			self.title = self.chars.encode('utf8')
-		elif name == "link":
-			self.href = self.chars.encode('utf8')
-		elif name == "item":
-			if self.href.startswith("javascript:"):
-				return
-			else:
-				# save bookmark
-				if self.href:
-					self._indexer.add((self.title, self.href))
-
-class EpiphanyFaviconCacheParser(xml.sax.ContentHandler):
-	def __init__(self):
-		xml.sax.ContentHandler.__init__(self)
-		self.ephy_dir = expanduser("~/.gnome2/epiphany")
-		self.filename = join(self.ephy_dir, "ephy-favicon-cache.xml")
-		
-		self.cache = None
-		
-		self.chars = ""
-		self.url = None
-		self.name = None
-	
-	def get_cache(self):
-		"""
-		Returns a dictionary of (host, favicon path) entries where
-		  host is the hostname, like google.com (without www)
-		  favicon path is the on-disk path to the favicon image file.
-		"""
-		if self.cache != None:
-			return self.cache
-		
-		self.cache = {}
-		if exists(self.filename):
-			parser = xml.sax.make_parser()
-			parser.setContentHandler(self)
-			parser.parse(self.filename)
-			
-		return self.cache
-	
-	def characters(self, chars):
-		self.chars = self.chars + chars
-		
-	def startElement(self, name, attrs):
-		self.chars = ""
-		if name == "property" and attrs['id'] == "2":
-			self.url = None
-		if name == "property" and attrs['id'] == "3":
-			self.name = None
-
-	def endElement(self, name):
-		if name == "property":
-			if self.url == None:
-				self.url = self.chars
-			elif self.name == None:
-				self.name = self.chars
-		elif name == "node":
-			# Splithost requires //xxxx[:port]/xxxx, so we remove "http:"
-			host = get_url_host(self.url)
-			self.cache[host] = join(self.ephy_dir, "favicon_cache", self.name.encode('utf8'))
-
-class EpiphanyHistoryParser(xml.sax.ContentHandler):
-	def __init__(self, handler, cache):
-		xml.sax.ContentHandler.__init__(self)
-
-		self.handler = handler;
-		self._cache = cache;
-		
-		self.url = None
-		self.title = None
-		self.icon = None
-		self._id = None;
-	
-		self._indexer = deskbar.Indexer.Indexer()
-
-		self._index_history();
-
-	def get_indexer(self):
-		"""
-		Returns a completed indexer with the contents of the history file
-		"""
-		return self._indexer;
-
-	def _index_history(self):
-		if exists(EPHY_HISTORY_FILE):
-			parser = xml.sax.make_parser()
-			parser.setContentHandler(self)
-			try:
-				parser.parse(EPHY_HISTORY_FILE)
-			except Exception, e:
-				print "Couldn't parse epiphany history file:", e
-
-	
-	def characters(self, chars):
-		self.chars = self.chars + chars
-		
-	def startElement(self, name, attrs):
-		self.chars = ""
-		if name == "property":
-			self._id = attrs['id']
-
-		if name == "node":
-			self.title = None
-			self.url = None
-			self.icon = None
-
-	def endElement(self, name):
-		if name == "property":
-			if self._id == "2":
-				self.title = self.chars.encode('utf8')
-			elif self._id == "3":
-				self.url = self.chars.encode('utf8')
-			elif self._id == "9":
-				self.icon = self.chars.encode('utf8')
-		elif name == "node":
-			icon = None
-			if self.icon in self._cache:
-				icon = self._cache[self.icon]
-
-			item = BrowserMatch(self.handler, self.title, self.url, True, icon=icon)
-			self._indexer.add("%s %s" % (self.title, self.url), item)
+__author__ = "Ulrik Sverdrup <ulrik sverdrup gmail com>"
+
+
+import xml.etree.cElementTree as ElementTree
+
+EPHY_BOOKMARKS_FILE = "~/.gnome2/epiphany/bookmarks.rdf"
+
+def parse_epiphany_bookmarks(filename):
+	"""
+	Yield a sequence of bookmarks
+	"""
+	UNWANTED_SCHEME = set(("data", "javascript"))
+
+	ns = u"{http://purl.org/rss/1.0/}";
+	ITEM_NAME = ns + "item"
+	HREF_NAME = ns + "link"
+	TITLE_NAME = ns + "title"
+
+	def get_item(entry):
+		"""Return a bookmarks item or None if not good"""
+		title, href = None, None
+		for child in entry.getchildren():
+			if child.tag == HREF_NAME:
+				href = child.text
+				if not href or href.split(":", 1)[0].lower() in UNWANTED_SCHEME:
+					return None
+			if child.tag == TITLE_NAME:
+				title = child.text
+		return title and href and (title, href)
+
+	for event, entry in ElementTree.iterparse(filename):
+		if entry.tag != ITEM_NAME:
+			continue
+		item = get_item(entry)
+		if item:
+			yield item
+
+if __name__ == '__main__':
+	import os
+	f = os.path.expanduser(EPHY_BOOKMARKS_FILE)
+	print "Got ET # bookmarks:", len(list(parse_epiphany_bookmarks(f)))



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]