Re: Contributed work statistic from git
- From: Milan Crha <mcrha redhat com>
- To: gnome-bugsquad gnome org
- Subject: Re: Contributed work statistic from git
- Date: Sun, 31 Jan 2010 23:17:14 +0100
Hi again,
I just did a few improvements to the script. The main change is that it
doesn't use local git checkout, but the cgit interface, accessed by
http://git.gnome.org/browse/
I ran it for year 2009, for all 688 listed modules, which found 90250
commits in total. It's not looking for 'bug' word in the whole commit
comment, but only in the first line, like is shown on the page without
expand enabled. I chose the 'bug' word, because it's used in gtk+,
evolution and other modules, but it's probably not commonly used. I do
not know. Nonetheless, if interested, the results are these:
Commits with bug reference:
Milan Crha 507
Matthew Barnes 230
Behdad Esfahbod 142
Bastien Nocera 136
Richard Hughes 104
Colin Walters 101
Christian Persch 99
Hans Breuer 93
Srinivasa Ragavan 87
Alexander Larsson 86
--------------------------------------------------------------------------------
Total commits:
Ulrik Sverdrup 1730
Bastien Nocera 1352
Jürg Billeter 1276
Richard Hughes 1232
Morten Welinder 1171
Matthew Barnes 1166
Zeeshan Ali (Khattak) 1141
Alexander Larsson 1098
Philippe Rouquier 1083
Daniel Nylander 1050
I've also commits per module numbers, but that's pretty boring reading.
Bye,
Milan
#!/usr/bin/python
from HTMLParser import HTMLParser
from datetime import datetime, timedelta
from urllib2 import urlopen
from sys import stdout
dt_start = "2009-01-01" # time interval
dt_end = "2009-12-31"
cgit_base_url = "http://git.gnome.org/browse/" # keep the last slash there
modules_list = [] # list of modules to check; if empty then filled by base URI
cbugs = {} # author->count of commits mentioning 'bug'
ctotal = {} # author->count of all commits
total_commits_read = 0
class ModulesHTMLParser(HTMLParser):
read_repo = False
def handle_starttag(self, tag, attrs):
global modules_list;
if (tag == "td") :
for name, value in attrs:
if (name == "class" and value.lower() == "sublevel-repo") :
self.read_repo = True
if (self.read_repo and tag == "a") :
for name, value in attrs:
if (name == "href" and value.find("/browse/") == 0) :
modules_list.append (value[8:-1])
self.read_repo = False
class CommitsHTMLParser(HTMLParser):
in_content = 0
found_content = False
col = -1
val_when = ""
val_text = ""
val_author = ""
read_commits = 0
read_done = False
def handle_starttag(self, tag, attrs):
if (tag == "div" and attrs[0][0] == "class" and attrs[0][1] == "content") :
self.in_content += 1
if (self.in_content > 0 and tag == "tr") :
self.col = -1
if (self.in_content > 0 and tag == "td") :
self.col += 1
def handle_endtag(self, tag):
global dt_start
global dt_end
global cbugs
global ctotal
if (self.in_content > 0 and tag == "tr") :
if (self.val_when != "" and self.val_author != "" and self.val_text != "" ) :
#print "'%s'" % self.val_when
#print " '%s'" % self.val_author
#print " '%s'" % self.val_text
if (self.val_when >= dt_start and self.val_when <= dt_end) :
self.read_commits += 1
if 'bug' in self.val_text.lower():
cbugs.setdefault(self.val_author, 0)
cbugs[self.val_author] += 1
ctotal.setdefault(self.val_author, 0)
ctotal[self.val_author] += 1
if (self.val_when < dt_start) :
self.read_done = True
self.val_when = ""
self.val_author = ""
self.val_text = ""
if (self.in_content > 0 and tag == "div") :
self.in_content -= 1
def handle_data(self, data):
if (self.in_content > 0):
if (self.col == 0):
if (data.lower().find ("days") > 0) :
days = int(data[0:data.find(" ")])
data = (datetime.now() - timedelta(days=days)).date().isoformat()
if (data.lower().find ("hours") > 0) :
hours = int(data[0:data.find(" ")])
data = (datetime.now() - timedelta(hours=hours)).date().isoformat()
if (data.lower().find ("min.") > 0) :
minutes = int(data[0:data.find(" ")])
data = (datetime.now() - timedelta(minutes=minutes)).date().isoformat()
self.val_when = data
self.found_content = True
if (self.col == 1):
self.val_text += data
if (self.col == 2):
self.val_author = data
def check_module(name, idx, tot):
global total_commits_read
print (" Checking in '%s' " % name) + ("(%d" % idx) + ("/%d)" % tot) ,
stdout.flush()
offset = 0
url = cgit_base_url + name + "/log/?ofs="
commitsParser = CommitsHTMLParser()
while not commitsParser.read_done :
u = url + "%d" % offset
offset += 50
commitsParser.found_content = False
f = urlopen(u)
try:
for line in f:
commitsParser.feed (line)
finally:
f.close()
if (not commitsParser.found_content) :
break
print " found %d commits" % commitsParser.read_commits
total_commits_read += commitsParser.read_commits
def count_commits(items):
commits_by_name = {}
for name, count in items.items():
commits_by_name.setdefault(name, 0)
commits_by_name[name] += count
for value, name in sorted([ i[::-1] for i in commits_by_name.items()], reverse=True)[:10]:
print "%-40s %s" % (name, value)
print "Calculate git commit statistics between %s" % dt_start + " and %s" % dt_end
if (modules_list == []) :
print " * Retrieving list of modules..."
modulesParser = ModulesHTMLParser()
f = urlopen (cgit_base_url)
try:
for line in f:
modulesParser.feed (line)
finally:
f.close()
modules_count = 0
for m in modules_list:
modules_count += 1
mod_idx = 1
for m in modules_list:
check_module(m, mod_idx, modules_count)
mod_idx += 1
print " * Done with all modules (total commits read: %d)" % total_commits_read
print ""
print 80 * "-"
print ""
print "Commits with bug reference:"
count_commits(cbugs)
print
print 80 * "-"
print
print "Total commits:"
count_commits(ctotal)
[
Date Prev][Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]