[tracker] Add util to generate real webhistory



commit e3d0bcb91f59616eb0aa8cc56f72315c362493cf
Author: Ivan Frade <ivan frade gmail com>
Date:   Tue Jul 14 00:48:52 2009 +0300

    Add util to generate real webhistory
    
    Added program that reads epiphany web browsing history and print it
    in turtle format.

 utils/webhistory/epiphany-history-to-ttl.py |   53 +++++++++++++++++++++++++++
 1 files changed, 53 insertions(+), 0 deletions(-)
---
diff --git a/utils/webhistory/epiphany-history-to-ttl.py b/utils/webhistory/epiphany-history-to-ttl.py
new file mode 100644
index 0000000..0ca4a56
--- /dev/null
+++ b/utils/webhistory/epiphany-history-to-ttl.py
@@ -0,0 +1,53 @@
+import xml.dom.minidom
+from xml.dom.minidom import Node
+import time
+import sys, os
+
+PROPERTIES = {2: ("nie:title", str),
+              3: ("nfo:uri", str),
+              4: ("nie:usageCounter", int),
+              6: ("nie:lastRefreshed", time.struct_time)}
+# Use time.struct_time as type for dates, even when the format is not that!
+
+def get_text (node):
+    text = ""
+    for subnode in node.childNodes:
+      if subnode.nodeType == Node.TEXT_NODE:
+        text += subnode.data
+    return text.encode ('utf8').replace ('"', '') # Use a safer method!
+
+def process_file (filename):
+    doc = xml.dom.minidom.parse(filename)
+    
+    for node in doc.getElementsByTagName ("node"):
+        print "<uri:uuid:epiphany-webhistory-%s> a nfo:WebHistory" % (node.getAttribute ("id")),
+        
+        for prop in node.getElementsByTagName ("property"):
+            prop_id = int(prop.getAttribute ("id"))
+
+            if (PROPERTIES.has_key (prop_id)):
+                prop_name, prop_type = PROPERTIES [prop_id]
+
+                if (prop_type == str):
+                    print  ';\n\t%s "%s"' % (prop_name, get_text (prop)),
+
+                elif (prop_type == int):
+                    print ';\n\t%s %s' % (prop_name, get_text (prop)),
+
+                elif (prop_type == time.struct_time):
+                    print ';\n\t%s "%s"' % (prop_name, time.strftime ("%Y%m%dT%H:%m:%S",time.localtime (int(get_text (prop))))),
+        print ".\n"
+        
+
+def print_headers ():
+    print "@prefix nfo: <http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#>."
+    print "@prefix nie: <http://www.semanticdesktop.org/ontologies/2007/01/19/nie#> ."
+
+if __name__ == "__main__":
+
+    epi_history = os.path.join (os.getenv ("HOME"), ".gnome2", "epiphany", "ephy-history.xml")
+    print >> sys.stderr, "Scanning", epi_history
+
+    print_headers ()
+    if (os.path.exists (epi_history)):
+        process_file (epi_history)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]