r7465 - dumbhippo/trunk/firehose/firehose/jobs
- From: commits mugshot org
- To: online-desktop-list gnome org
- Subject: r7465 - dumbhippo/trunk/firehose/firehose/jobs
- Date: Thu, 1 May 2008 16:04:08 -0500 (CDT)
Author: walters
Date: 2008-05-01 16:04:07 -0500 (Thu, 01 May 2008)
New Revision: 7465
Modified:
dumbhippo/trunk/firehose/firehose/jobs/poller.py
Log:
Add some more feed processing
Modified: dumbhippo/trunk/firehose/firehose/jobs/poller.py
===================================================================
--- dumbhippo/trunk/firehose/firehose/jobs/poller.py 2008-05-01 20:10:26 UTC (rev 7464)
+++ dumbhippo/trunk/firehose/firehose/jobs/poller.py 2008-05-01 21:04:07 UTC (rev 7465)
@@ -68,6 +68,20 @@
parent.remove(node)
return lxml.etree.tostring(tree, pretty_print=True)
+class RegexpEater(FeedPostProcessor):
+ def __init__(self, regexps):
+ self.__regexps = map(re.compile, regexps)
+
+ def get_value(self):
+ value = StringIO(super(RegexpEater, self).get_value())
+ outvalue = StringIO()
+ for line in value:
+ for regexp in self.__regexps:
+ if regexp.search(line):
+ continue
+ outvalue.write(line)
+ return outvalue
+
class ChainedProcessors(object):
def __init__(self, processors):
super(ChainedProcessors, self).__init__()
@@ -85,9 +99,14 @@
buf = processor.get_value()
return buf
+# Define a shared eater for rss which has a lastBuildDate
+rss_eater = XmlElementEater(['/rss/channel/lastBuildDate', '/rss/channel/pubDate'])
# This maps from a regular expression matching a URL to a list of processors
feed_transformations = [
- (r'digg.com/users/.*/history/diggs.rss', [XmlElementEater(['/rss/channel/lastBuildDate', '/rss/channel/pubDate'])]),
+ (r'digg.com/users/.*/history/diggs.rss', [rss_eater]),
+ (r'picasaweb.google.com.*feed.*base.*album', [rss_eater]),
+ (r'google.com/reader/public', [XmlElementEater(['/feed/updated'])]),
+ (r'blogs.gnome.org', [RegexpEater(['<!--.*page served in.*seconds.*-->'])]),
]
feed_transformations = [(re.compile(r'^https?://([A-Z0-9]+\.)*' + x[0]), x[1]) for x in feed_transformations]
@@ -294,4 +313,7 @@
processor = ChainedProcessors(transformers)
processor.feed(testdata)
print processor.get_value()
+ processor = ChainedProcessors([])
+ processor.feed(testdata)
+ print processor.get_value()
\ No newline at end of file
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]