Re: Contributed work statistic from git

From: Milan Crha <mcrha redhat com>
To: gnome-bugsquad gnome org
Subject: Re: Contributed work statistic from git
Date: Sun, 31 Jan 2010 23:17:14 +0100

	Hi again,
I just did a few improvements to the script. The main change is that it
doesn't use local git checkout, but the cgit interface, accessed by
http://git.gnome.org/browse/

I ran it for year 2009, for all 688 listed modules, which found 90250
commits in total. It's not looking for 'bug' word in the whole commit
comment, but only in the first line, like is shown on the page without
expand enabled. I chose the 'bug' word, because it's used in gtk+,
evolution and other modules, but it's probably not commonly used. I do
not know. Nonetheless, if interested, the results are these:

Commits with bug reference:
Milan Crha                               507
Matthew Barnes                           230
Behdad Esfahbod                          142
Bastien Nocera                           136
Richard Hughes                           104
Colin Walters                            101
Christian Persch                         99
Hans Breuer                              93
Srinivasa Ragavan                        87
Alexander Larsson                        86

--------------------------------------------------------------------------------

Total commits:
Ulrik Sverdrup                           1730
Bastien Nocera                           1352
Jürg Billeter                            1276
Richard Hughes                           1232
Morten Welinder                          1171
Matthew Barnes                           1166
Zeeshan Ali (Khattak)                    1141
Alexander Larsson                        1098
Philippe Rouquier                        1083
Daniel Nylander                          1050

I've also commits per module numbers, but that's pretty boring reading.
	Bye,
	Milan

#!/usr/bin/python


from HTMLParser import HTMLParser
from datetime import datetime, timedelta
from urllib2 import urlopen
from sys import stdout

dt_start = "2009-01-01" # time interval
dt_end = "2009-12-31"

cgit_base_url = "http://git.gnome.org/browse/"; # keep the last slash there
modules_list = [] # list of modules to check; if empty then filled by base URI

cbugs = {} # author->count of commits mentioning 'bug'
ctotal = {} # author->count of all commits
total_commits_read = 0

class ModulesHTMLParser(HTMLParser):
	read_repo = False

	def handle_starttag(self, tag, attrs):
		global modules_list;
		if (tag == "td") :
			for name, value in attrs:
				if (name == "class" and value.lower() == "sublevel-repo") :
					self.read_repo = True
		if (self.read_repo and tag == "a") :
			for name, value in attrs:
				if (name == "href" and value.find("/browse/") == 0) :
					modules_list.append (value[8:-1])
					self.read_repo = False

class CommitsHTMLParser(HTMLParser):
	in_content = 0
	found_content = False
	col = -1
	val_when = ""
	val_text = ""
	val_author = ""
	read_commits = 0
	read_done = False

	def handle_starttag(self, tag, attrs):
		if (tag == "div" and attrs[0][0] == "class" and attrs[0][1] == "content") :
			self.in_content += 1
		if (self.in_content > 0 and tag == "tr") :
			self.col = -1
		if (self.in_content > 0 and tag == "td") :
			self.col += 1

	def handle_endtag(self, tag):
		global dt_start
		global dt_end
		global cbugs
		global ctotal

		if (self.in_content > 0 and tag == "tr") :
			if (self.val_when != "" and self.val_author != "" and self.val_text != "" ) :
				#print "'%s'" % self.val_when
				#print "   '%s'" % self.val_author
				#print "   '%s'" % self.val_text
				if (self.val_when >= dt_start and self.val_when <= dt_end) :
					self.read_commits += 1
					if 'bug' in self.val_text.lower():
						cbugs.setdefault(self.val_author, 0)
						cbugs[self.val_author] += 1
					ctotal.setdefault(self.val_author, 0)
					ctotal[self.val_author] += 1
					
				if (self.val_when < dt_start) :
					self.read_done = True
			self.val_when = ""
			self.val_author = ""
			self.val_text = ""
		if (self.in_content > 0 and tag == "div") :
			self.in_content -= 1

	def handle_data(self, data):
		if (self.in_content > 0):
			if (self.col == 0):
				if (data.lower().find ("days") > 0) :
					days = int(data[0:data.find(" ")])
					data = (datetime.now() - timedelta(days=days)).date().isoformat()
				if (data.lower().find ("hours") > 0) :
					hours = int(data[0:data.find(" ")])
					data = (datetime.now() - timedelta(hours=hours)).date().isoformat()
				if (data.lower().find ("min.") > 0) :
					minutes = int(data[0:data.find(" ")])
					data = (datetime.now() - timedelta(minutes=minutes)).date().isoformat()
				self.val_when = data
				self.found_content = True

			if (self.col == 1):
				self.val_text += data
			if (self.col == 2):
				self.val_author = data

def check_module(name, idx, tot):
	global total_commits_read

	print ("   Checking in '%s' " % name) + ("(%d" % idx) + ("/%d)" % tot) ,
	stdout.flush()

	offset = 0
	url = cgit_base_url + name + "/log/?ofs="
	commitsParser = CommitsHTMLParser()
	while not commitsParser.read_done :
		u = url + "%d" % offset
		offset += 50
		commitsParser.found_content = False
		f = urlopen(u)
		try:
			for line in f:
				commitsParser.feed (line)
		finally:
			f.close()
		if (not commitsParser.found_content) :
			break

	print " found %d commits" % commitsParser.read_commits
	total_commits_read += commitsParser.read_commits

def count_commits(items):
    commits_by_name = {}
    for name, count in items.items():
        commits_by_name.setdefault(name, 0)
        commits_by_name[name] += count

    for value, name in sorted([ i[::-1] for i in commits_by_name.items()], reverse=True)[:10]:
        print "%-40s %s" % (name, value)


print "Calculate git commit statistics between %s" % dt_start + " and %s" % dt_end

if (modules_list == []) :
	print " * Retrieving list of modules..."
	modulesParser = ModulesHTMLParser()
	f = urlopen (cgit_base_url)
	try:
		for line in f:
			modulesParser.feed (line)
	finally:
		f.close()

modules_count = 0
for m in modules_list:
	modules_count += 1

mod_idx = 1
for m in modules_list:
	check_module(m, mod_idx, modules_count)
	mod_idx += 1
print " * Done with all modules (total commits read: %d)" % total_commits_read
print ""
print 80 * "-"
print ""

print "Commits with bug reference:"
count_commits(cbugs)
print
print 80 * "-"
print
print "Total commits:"
count_commits(ctotal)

References:
- Contributed work statistic from git
  - From: Milan Crha

[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]