r7470 - in dumbhippo/trunk/firehose/firehose: . jobs
- From: commits mugshot org
- To: online-desktop-list gnome org
- Subject: r7470 - in dumbhippo/trunk/firehose/firehose: . jobs
- Date: Tue, 6 May 2008 17:19:31 -0500 (CDT)
Author: walters
Date: 2008-05-06 17:19:31 -0500 (Tue, 06 May 2008)
New Revision: 7470
Modified:
dumbhippo/trunk/firehose/firehose/jobs/poller.py
dumbhippo/trunk/firehose/firehose/logstats.groovy
Log:
Tweaks to log parsing and feed caching
Modified: dumbhippo/trunk/firehose/firehose/jobs/poller.py
===================================================================
--- dumbhippo/trunk/firehose/firehose/jobs/poller.py 2008-05-06 19:48:53 UTC (rev 7469)
+++ dumbhippo/trunk/firehose/firehose/jobs/poller.py 2008-05-06 22:19:31 UTC (rev 7470)
@@ -109,7 +109,7 @@
class FeedTaskHandler(object):
FAMILY = 'FEED'
- def run(self, id, prev_hash, prev_timestamp, outpath=None):
+ def run(self, id, prev_hash, prev_timestamp, cachedir=None):
targeturl = id
transformlist = get_transformations(targeturl)
parsedurl = urlparse.urlparse(targeturl)
@@ -130,13 +130,15 @@
if response.status == 304:
_logger.info("Got 304 Unmodified for %r", targeturl)
return (prev_hash, prev_timestamp)
- if outpath is not None:
- outpath_tmpname = outpath+'.tmp'
+ if cachedir is not None:
+ quotedname = urllib.quote_plus(targeturl)
+ ts = int(time.time())
+ outpath = os.path.join(cachedir, quotedname + '.' + unicode(ts))
+ outpath_tmpname = outpath + '.tmp'
outfile = open(outpath_tmpname, 'w')
else:
outpath_tmpname = None
outfile = None
- processor = ChainedProcessors(transformlist)
data = StringIO()
buf = response.read(8192)
while buf:
@@ -145,13 +147,17 @@
data.write(buf)
buf = response.read(8192)
datavalue = data.getvalue()
+ processor = ChainedProcessors(transformlist)
processed = processor.process(datavalue)
hash = sha.new()
hash.update(processed)
hash_hex = hash.hexdigest()
if outfile is not None:
outfile.close()
- os.rename(outpath_tmpname, outpath)
+ if prev_hash != hash_hex:
+ os.rename(outpath_tmpname, outpath)
+ else:
+ os.unlink(outpath_tmpname)
timestamp_str = response.getheader('Last-Modified', None)
if timestamp_str is not None:
timestamp = mktime_tz(parsedate_tz(timestamp_str))
@@ -241,10 +247,8 @@
inst = fclass()
kwargs = {}
if self.__savefetches:
- quotedname = urllib.quote_plus(taskid)
- ts = int(time.time())
- outpath = os.path.join(os.getcwd(), 'data', quotedname + '.' + unicode(ts))
- kwargs['outpath'] = outpath
+ outpath = os.path.join(os.getcwd(), 'data', 'feedcache')
+ kwargs['cachedir'] = outpath
try:
(new_hash, new_timestamp) = inst.run(tid, prev_hash, prev_timestamp, **kwargs)
except Exception, e:
Modified: dumbhippo/trunk/firehose/firehose/logstats.groovy
===================================================================
--- dumbhippo/trunk/firehose/firehose/logstats.groovy 2008-05-06 19:48:53 UTC (rev 7469)
+++ dumbhippo/trunk/firehose/firehose/logstats.groovy 2008-05-06 22:19:31 UTC (rev 7470)
@@ -79,11 +79,16 @@
return
}
-def printTopKeys(String prefix, domainHash) {
+def printTopKeys(String prefix, domainHash, other) {
def keys = new ArrayList(domainHash.keySet())
keys.sort({ a,b -> domainHash[b].compareTo(domainHash[a])})
keys.subList(0, 5).each { k ->
- println " ${prefix} ${k} -> ${domainHash[k]}"
+ def otherValue = other != null ? other[k] : null;
+ print " ${prefix} ${k} -> ${domainHash[k]}"
+ if (otherValue != null)
+ println " (${otherValue})"
+ else
+ println ""
}
}
@@ -92,6 +97,6 @@
Date d = new Date(group.startDate)
def pollsPerSec = group.updates/(timeSliceMilliseconds/1000)
println "updates from ${d}: ${group.updates} (${pollsPerSec} checks per second)"
- printTopKeys("U", group.domainUnmodified)
- printTopKeys("M", group.domainModified)
+ printTopKeys("U", group.domainUnmodified, group.domainModified)
+ printTopKeys("M", group.domainModified, group.domainUnmodified)
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]