+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
"""
gerrit-stats: Generate codereview stats based from Gerrit commits
Copyright (C) 2012 Diederik van Liere, Wikimedia Foundation
from datetime import datetime
class Metric(object):
- '''
- The Metric class
- '''
- def __init__(self, name, raw_query, settings):
- self.raw_query = raw_query
- self.name = name
- self.query = 'ssh -p %s %s gerrit query --format=%s %s' % (settings.port, settings.host, settings.format, self.raw_query)
+ '''
+ The Metric class
+ '''
+ def __init__(self, name, raw_query, settings):
+ self.raw_query = raw_query
+ self.name = name
+ self.query = 'ssh -p %s %s gerrit query --format=%s %s' % (settings.port, settings.host, settings.format, self.raw_query)
class Settings(object):
- '''
- This object contains properties that apply to all repositories, including the queries that will be
- run to generate the statistics, a list of repositories to ignore and a set of engineers that do not use
- a WMF email address and hence will be classified as volunteer.
- '''
- def __init__(self, settings):
- self.queries = {'only+1':'-- CodeReview+1 -CodeReview+2 -CodeReview-1 -CodeReview-2',
- 'no_review':'-- -CodeReview+1 -CodeReview-1 -CodeReview+2 -CodeReview-2',
- }
- self.whitelist=set(['niklas.laxstrom@gmail.com','roan.kattouw@gmail.com','maxsem.wiki@gmail.com','s.mazeland@xs4all.nl','jeroendedauw@gmail.com','mediawiki@danielfriesen.name','jdlrobson@gmail.com','hashar@free.fr'])
- self.ignore_repos = ['test']
- self.metrics = {}
- self.parents = ['mediawiki/core',
- 'mediawiki/extensions',
- 'operations',
- 'analytics',
- ]
-
- for name, query in self.queries.iteritems():
- self.metrics[name] = Metric(name, query, settings)
-
- def __str__(self):
- return 'Metrics container object'
+ '''
+ This object contains properties that apply to all repositories, including the queries that will be
+ run to generate the statistics, a list of repositories to ignore and a set of engineers that do not use
+ a WMF email address and hence will be classified as volunteer.
+ '''
+ def __init__(self, settings):
+ self.queries = {'only+1':'-- CodeReview+1 -CodeReview+2 -CodeReview-1 -CodeReview-2',
+ 'no_review':'-- -CodeReview+1 -CodeReview-1 -CodeReview+2 -CodeReview-2',
+ }
+ self.whitelist=set(['niklas.laxstrom@gmail.com','roan.kattouw@gmail.com','maxsem.wiki@gmail.com','s.mazeland@xs4all.nl','jeroendedauw@gmail.com','mediawiki@danielfriesen.name','jdlrobson@gmail.com','hashar@free.fr'])
+ self.ignore_repos = ['test']
+ self.metrics = {}
+ self.parents = ['mediawiki/core',
+ 'mediawiki/extensions',
+ 'operations',
+ 'analytics',
+ ]
+
+ for name, query in self.queries.iteritems():
+ self.metrics[name] = Metric(name, query, settings)
+
+ def __str__(self):
+ return 'Metrics container object'
class Gerrit(object):
- '''
- This object contains the setings to interact with the gerrit server, nothing fancy these are just
- sensible defaults.
- '''
- def __init__(self):
- self.data_location = 'data'
- self.host = 'gerrit.wikimedia.org'
- self.port = 29418
- self.format = 'JSON'
-
- def __str__(self):
- return 'Codereview settings object.'
+ '''
+ This object contains the setings to interact with the gerrit server, nothing fancy these are just
+ sensible defaults.
+ '''
+ def __init__(self):
+ self.data_location = 'data'
+ self.host = 'gerrit.wikimedia.org'
+ self.port = 29418
+ self.format = 'JSON'
+
+ def __str__(self):
+ return 'Codereview settings object.'
class Repo(object):
- def __init__(self, name, settings, gerrit):
- self.touched = False
- self.name = name
- self.dataset = {}
- self.create_path(self.name, gerrit)
- self.filename = ('%s.csv' % (self.determine_filename(self.name)))
- self.filemode = self.determine_filemode(self.filename, gerrit)
-
- self.today = datetime.today()
- self.email = {}
- self.email['wikimedian'] = set()
- self.email['volunteer'] = set()
- self.num_metrics = 0
- for metric in settings.metrics:
- self.dataset[metric] = {}
- self.dataset[metric]['oldest'] = datetime(2030,1,1)
- self.dataset[metric]['wikimedian'] = 0
- self.dataset[metric]['volunteer'] = 0
- self.dataset[metric]['total'] = 0
- self.num_metrics +=1
-
- def __str__(self):
- return self.name
-
- def create_path(self, filename, gerrit):
- print filename
- dir= os.path.dirname(filename)
- if dir != '':
- dir = os.path.join(gerrit.data_location, dir)
- try:
- os.makedirs(dir)
- print 'Creating %s...' % dir
- except OSError:
- pass
-
- def determine_filename(self, filename):
- return os.path.basename(filename)
-
- def determine_filemode(self, filename, settings):
- if os.path.isfile('%s/%s' % (settings.data_location, filename)) == False:
- return 'w'
- else:
- return 'a'
+
+ def __init__(self, name, settings, gerrit):
+ self.touched = False
+ self.name = name
+ self.dataset = {}
+ self.create_path(self.name, gerrit)
+ self.filename = ('%s.csv' % (self.determine_filename(self.name)))
+ self.filemode = self.determine_filemode(self.filename, gerrit)
+
+ self.today = datetime.today()
+ self.email = {}
+ self.email['wikimedian'] = set()
+ self.email['volunteer'] = set()
+ self.num_metrics = 0
+
+ for metric in settings.metrics:
+ self.dataset[metric] = {}
+ self.dataset[metric]['oldest'] = datetime(2030,1,1)
+ self.dataset[metric]['wikimedian'] = 0
+ self.dataset[metric]['volunteer'] = 0
+ self.dataset[metric]['total'] = 0
+ self.num_metrics +=1
+
+ def __str__(self):
+ return self.name
+
+ def create_path(self, filename, gerrit):
+ print filename
+ dir= os.path.dirname(filename)
+ if dir != '':
+ dir = os.path.join(gerrit.data_location, dir)
+ try:
+ os.makedirs(dir)
+ print 'Creating %s...' % dir
+ except OSError:
+ pass
+
+ def determine_filename(self, filename):
+ return os.path.basename(filename)
+
+ def determine_filemode(self, filename, settings):
+ if os.path.isfile('%s/%s' % (settings.data_location, filename)) == False:
+ return 'w'
+ else:
+ return 'a'
-"""\r
-gerrit-stats: Generate codereview stats based from Gerrit commits\r
-Copyright (C) 2012 Diederik van Liere, Wikimedia Foundation\r
-\r
-This program is free software; you can redistribute it and/or\r
-modify it under the terms of the GNU General Public License\r
-as published by the Free Software Foundation; either version 2\r
-of the License, or (at your option) any later version.\r
-\r
-This program is distributed in the hope that it will be useful,\r
-but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
-GNU General Public License for more details.\r
-\r
-You should have received a copy of the GNU General Public License\r
-along with this program; if not, write to the Free Software\r
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\r
-"""\r
-import subprocess\r
-import json\r
-import sys\r
-import os\r
-from datetime import datetime\r
-\r
-from classes import Gerrit, Settings, Metric, Repo\r
-\r
-def create_repo_set(gerrit, settings):\r
- repos = {}\r
- output = run_gerrit_query('ssh -p 29418 gerrit.wikimedia.org gerrit ls-projects')\r
- output = output.split('\n')\r
- for repo in output:\r
- repo = repo.strip()\r
- if len(repo) > 1:\r
- tests = [repo.find(ignore) == -1 for ignore in settings.ignore_repos]\r
- if all(tests):\r
- rp = Repo(repo, settings, gerrit)\r
- repos[rp.name] = rp\r
- return repos\r
-\r
-\r
-def is_wikimedian(email, whitelist):\r
- if email in whitelist:\r
- return True\r
- if email.endswith('wikimedia.org'):\r
- return True\r
- else:\r
- return False\r
-\r
-\r
-def set_delimiter(fields, counter):\r
- num_fields = len(fields)\r
- if num_fields-counter != 1:\r
- return ','\r
- else:\r
- return ''\r
-\r
-def output_results(fh, *args):\r
- args = [str(arg) for arg in args]\r
- output = ''.join(args)\r
- fh.write(output)\r
- sys.stdout.write(output)\r
-\r
-def write_heading(fh, repo):\r
- output_results(fh, 'data',',','repository',',')\r
- #fh.write('%s,%s,' % ('date', 'repository'))\r
- #sys.stdout.write('%s,%s,' % ('date', 'repository'))\r
- for metric_counter, (name, metric) in enumerate(repo.dataset.iteritems()):\r
- headings = metric.keys()\r
- for counter, heading in enumerate(headings):\r
- if metric_counter +1 == repo.num_metrics:\r
- delim = set_delimiter(headings, counter)\r
- else:\r
- delim = ','\r
- #fh.write('%s_%s%s' % (name, heading, delim))\r
- #sys.stdout.write('%s_%s%s' % (name, heading, delim))\r
- output_results(fh, name,'_', heading, delim)\r
- fh.write('\n')\r
- sys.stdout.write('\n')\r
-\r
-\r
-def construct_timestamp(epoch):\r
- return datetime.fromtimestamp(epoch)\r
-\r
-\r
-def run_gerrit_query(query):\r
- query = query.split(' ')\r
- output = subprocess.Popen(query, shell=False, stdout=subprocess.PIPE).communicate()[0]\r
- return output\r
-\r
-\r
-def create_dataset(repos, gerrit):\r
- for key, repo in repos.iteritems():\r
- fh = open('%s/%s' % (gerrit.data_location, repo.filename), repo.filemode)\r
- if repo.filemode == 'w':\r
- write_heading(fh, repo)\r
- #sys.stdout.write('%s-%s-%s,%s,' % (repo.today.month,repo.today.day,repo.today.year, repo.name))\r
- #fh.write('%s-%s-%s,%s,' % (repo.today.month,repo.today.day,repo.today.year, repo.name))\r
- output_results(fh, repo.today.month,'-',repo.today.day,'-',repo.today.year,',',repo.name,',')\r
- print_dict(repo, fh)\r
- sys.stdout.write('\n*****************\n')\r
- sys.stdout.write('\n')\r
- fh.write('\n')\r
- fh.close()\r
-\r
-\r
-def print_dict(repo, fh, ident = '', braces=1):\r
- """ Recursively prints nested dictionaries."""\r
- dataset = repo.dataset\r
- for metric_counter, metric in enumerate(dataset):\r
- fields = dataset[metric].keys()\r
- for counter, field in enumerate(fields):\r
- if metric_counter +1 == repo.num_metrics:\r
- delim = set_delimiter(fields, counter)\r
- else:\r
- delim = ','\r
- #print delim\r
- sys.stdout.write('%s%s' % (dataset[metric][field], delim))\r
- fh.write('%s%s' % (dataset[metric][field], delim))\r
-\r
-\r
-def cleanup_volunteers(repos, whitelist):\r
- for name, repo in repos.iteritems():\r
- for ws in whitelist:\r
- if ws in repo.email['volunteer']:\r
- repo.email['wikimedian'].add(ws)\r
- repo.email['email']['volunteer'].remove(ws)\r
- return repos\r
-\r
-\r
-def construct_dataset(settings, repos, metric, output, gerrit): \r
- output=output.split('\n')\r
- for obs in output:\r
- try:\r
- obs= json.loads(obs)\r
- except ValueError, e:\r
- print e\r
-\r
- if isinstance(obs, dict) and 'rowCount' not in obs:\r
- try:\r
- project = obs['project']\r
- except KeyError, e:\r
- print e, obs\r
- email = obs['owner']['email']\r
- repo = repos.get(project, {})\r
- if repo == {}:\r
- continue\r
- dt = construct_timestamp(obs['createdOn'])\r
- \r
- # print "REPO: %s" % repo\r
- # print "PROJECT: %s" % project\r
- # print "METRIC: %s" % metric\r
- # print "DATASET: %s" % repo.dataset\r
-\r
- if repo.dataset[metric]['oldest'] > dt:\r
- repo.dataset[metric]['oldest'] = dt\r
- repo.dataset[metric]['total'] +=1\r
- if is_wikimedian(email, settings.whitelist) == True:\r
- repo.dataset[metric]['wikimedian'] +=1\r
- repo.email['wikimedian'].add(email)\r
- else:\r
- repo.dataset[metric]['volunteer'] +=1\r
- repo.email['volunteer'].add(email)\r
- repo.touched = True\r
-\r
-\r
-def main():\r
- gerrit = Gerrit()\r
- settings = Settings(gerrit)\r
- print 'Fetching list of all gerrit repositories...'\r
- repos = create_repo_set(gerrit, settings)\r
- \r
- for metric in settings.metrics.itervalues():\r
- #query = 'ssh -p %s %s gerrit query --format=%s %s' % (gerrit.port, gerrit.host, gerrit.format, question)\r
- output = run_gerrit_query(metric.query)\r
- print 'Running %s' % metric.query\r
- construct_dataset(settings, repos, metric.name, output, gerrit)\r
-\r
- print 'Fixing miscategorization of volunteer engineers...'\r
- repos = cleanup_volunteers(repos, settings.whitelist)\r
- print 'Creating datasets...'\r
- create_dataset(repos, gerrit)\r
-\r
-\r
-if __name__== '__main__':\r
- main()
\ No newline at end of file
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+gerrit-stats: Generate codereview stats based from Gerrit commits
+Copyright (C) 2012 Diederik van Liere, Wikimedia Foundation
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+"""
+import subprocess
+import json
+import sys
+import os
+from datetime import datetime
+
+from classes import Gerrit, Settings, Metric, Repo
+
+def create_repo_set(gerrit, settings):
+ repos = {}
+ output = run_gerrit_query('ssh -p 29418 gerrit.wikimedia.org gerrit ls-projects')
+ output = output.split('\n')
+ for repo in output:
+ repo = repo.strip()
+ if len(repo) > 1:
+ tests = [repo.find(ignore) == -1 for ignore in settings.ignore_repos]
+ if all(tests):
+ rp = Repo(repo, settings, gerrit)
+ repos[rp.name] = rp
+ return repos
+
+
+def is_wikimedian(email, whitelist):
+ if email in whitelist:
+ return True
+ if email.endswith('wikimedia.org'):
+ return True
+ else:
+ return False
+
+
+def set_delimiter(fields, counter):
+ num_fields = len(fields)
+ if num_fields-counter != 1:
+ return ','
+ else:
+ return ''
+
+def output_results(fh, *args):
+ args = [str(arg) for arg in args]
+ output = ''.join(args)
+ fh.write(output)
+ sys.stdout.write(output)
+
+def write_heading(fh, repo):
+ output_results(fh, 'data',',','repository',',')
+ #fh.write('%s,%s,' % ('date', 'repository'))
+ #sys.stdout.write('%s,%s,' % ('date', 'repository'))
+ for metric_counter, (name, metric) in enumerate(repo.dataset.iteritems()):
+ headings = metric.keys()
+ for counter, heading in enumerate(headings):
+ if metric_counter +1 == repo.num_metrics:
+ delim = set_delimiter(headings, counter)
+ else:
+ delim = ','
+ #fh.write('%s_%s%s' % (name, heading, delim))
+ #sys.stdout.write('%s_%s%s' % (name, heading, delim))
+ output_results(fh, name,'_', heading, delim)
+ fh.write('\n')
+ sys.stdout.write('\n')
+
+
+def construct_timestamp(epoch):
+ return datetime.fromtimestamp(epoch)
+
+
+def run_gerrit_query(query):
+ query = query.split(' ')
+ output = subprocess.Popen(query, shell=False, stdout=subprocess.PIPE).communicate()[0]
+ return output
+
+
+def create_dataset(repos, gerrit):
+ for key, repo in repos.iteritems():
+ fh = open('%s/%s' % (gerrit.data_location, repo.filename), repo.filemode)
+ if repo.filemode == 'w':
+ write_heading(fh, repo)
+ #sys.stdout.write('%s-%s-%s,%s,' % (repo.today.month,repo.today.day,repo.today.year, repo.name))
+ #fh.write('%s-%s-%s,%s,' % (repo.today.month,repo.today.day,repo.today.year, repo.name))
+ output_results(fh, repo.today.month,'-',repo.today.day,'-',repo.today.year,',',repo.name,',')
+ print_dict(repo, fh)
+ sys.stdout.write('\n*****************\n')
+ sys.stdout.write('\n')
+ fh.write('\n')
+ fh.close()
+
+
+def print_dict(repo, fh, ident = '', braces=1):
+ """ Recursively prints nested dictionaries."""
+ dataset = repo.dataset
+ for metric_counter, metric in enumerate(dataset):
+ fields = dataset[metric].keys()
+ for counter, field in enumerate(fields):
+ if metric_counter +1 == repo.num_metrics:
+ delim = set_delimiter(fields, counter)
+ else:
+ delim = ','
+ #print delim
+ sys.stdout.write('%s%s' % (dataset[metric][field], delim))
+ fh.write('%s%s' % (dataset[metric][field], delim))
+
+
+def cleanup_volunteers(repos, whitelist):
+ for name, repo in repos.iteritems():
+ for ws in whitelist:
+ if ws in repo.email['volunteer']:
+ repo.email['wikimedian'].add(ws)
+ repo.email['email']['volunteer'].remove(ws)
+ return repos
+
+
+def construct_dataset(settings, repos, metric, output, gerrit):
+ output=output.split('\n')
+ for obs in output:
+ try:
+ obs= json.loads(obs)
+ except ValueError, e:
+ print e
+
+ if isinstance(obs, dict) and 'rowCount' not in obs:
+ try:
+ project = obs['project']
+ except KeyError, e:
+ print e, obs
+ email = obs['owner']['email']
+ repo = repos.get(project, {})
+ if repo == {}:
+ continue
+ dt = construct_timestamp(obs['createdOn'])
+
+ # print "REPO: %s" % repo
+ # print "PROJECT: %s" % project
+ # print "METRIC: %s" % metric
+ # print "DATASET: %s" % repo.dataset
+
+ if repo.dataset[metric]['oldest'] > dt:
+ repo.dataset[metric]['oldest'] = dt
+ repo.dataset[metric]['total'] +=1
+ if is_wikimedian(email, settings.whitelist) == True:
+ repo.dataset[metric]['wikimedian'] +=1
+ repo.email['wikimedian'].add(email)
+ else:
+ repo.dataset[metric]['volunteer'] +=1
+ repo.email['volunteer'].add(email)
+ repo.touched = True
+
+
+def main():
+ gerrit = Gerrit()
+ settings = Settings(gerrit)
+ print 'Fetching list of all gerrit repositories...'
+ repos = create_repo_set(gerrit, settings)
+
+ for metric in settings.metrics.itervalues():
+ #query = 'ssh -p %s %s gerrit query --format=%s %s' % (gerrit.port, gerrit.host, gerrit.format, question)
+ output = run_gerrit_query(metric.query)
+ print 'Running %s' % metric.query
+ construct_dataset(settings, repos, metric.name, output, gerrit)
+
+ print 'Fixing miscategorization of volunteer engineers...'
+ repos = cleanup_volunteers(repos, settings.whitelist)
+ print 'Creating datasets...'
+ create_dataset(repos, gerrit)
+
+
+if __name__== '__main__':
+ main()
\ No newline at end of file