Normalizes whitespace.
authordsc <dsc@less.ly>
Fri, 25 May 2012 17:39:49 +0000 (10:39 -0700)
committerdsc <dsc@less.ly>
Fri, 25 May 2012 17:39:49 +0000 (10:39 -0700)
classes.py
stats.py

index 04b3096..7844d5f 100644 (file)
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 """
 gerrit-stats: Generate codereview stats based from Gerrit commits
 Copyright (C) 2012  Diederik van Liere, Wikimedia Foundation
@@ -21,98 +23,100 @@ import os
 from datetime import datetime
 
 class Metric(object):
-       '''
-       The Metric class 
-       '''
-       def __init__(self, name, raw_query, settings):
-               self.raw_query = raw_query
-               self.name = name
-               self.query = 'ssh -p %s %s gerrit query --format=%s %s' % (settings.port, settings.host, settings.format, self.raw_query)
+    '''
+    The Metric class
+    '''
+    def __init__(self, name, raw_query, settings):
+        self.raw_query = raw_query
+        self.name = name
+        self.query = 'ssh -p %s %s gerrit query --format=%s %s' % (settings.port, settings.host, settings.format, self.raw_query)
 
 
 class Settings(object):
-       '''
-       This object contains properties that apply to all repositories, including the queries that will be 
-       run to generate the statistics, a list of repositories to ignore and a set of engineers that do not use
-       a WMF email address and hence will be classified as volunteer.
-       '''
-       def __init__(self, settings):
-               self.queries = {'only+1':'-- CodeReview+1 -CodeReview+2 -CodeReview-1 -CodeReview-2',
-                                               'no_review':'-- -CodeReview+1 -CodeReview-1 -CodeReview+2 -CodeReview-2',
-                                               }
-               self.whitelist=set(['niklas.laxstrom@gmail.com','roan.kattouw@gmail.com','maxsem.wiki@gmail.com','s.mazeland@xs4all.nl','jeroendedauw@gmail.com','mediawiki@danielfriesen.name','jdlrobson@gmail.com','hashar@free.fr'])
-               self.ignore_repos = ['test']
-               self.metrics =  {}
-               self.parents = ['mediawiki/core',
-                                               'mediawiki/extensions',
-                                               'operations',
-                                               'analytics',    
-                                               ]
-
-               for name, query in self.queries.iteritems():
-                       self.metrics[name] = Metric(name, query, settings)
-
-       def __str__(self):
-               return 'Metrics container object'
+    '''
+    This object contains properties that apply to all repositories, including the queries that will be
+    run to generate the statistics, a list of repositories to ignore and a set of engineers that do not use
+    a WMF email address and hence will be classified as volunteer.
+    '''
+    def __init__(self, settings):
+        self.queries = {'only+1':'-- CodeReview+1 -CodeReview+2 -CodeReview-1 -CodeReview-2',
+                        'no_review':'-- -CodeReview+1 -CodeReview-1 -CodeReview+2 -CodeReview-2',
+                        }
+        self.whitelist=set(['niklas.laxstrom@gmail.com','roan.kattouw@gmail.com','maxsem.wiki@gmail.com','s.mazeland@xs4all.nl','jeroendedauw@gmail.com','mediawiki@danielfriesen.name','jdlrobson@gmail.com','hashar@free.fr'])
+        self.ignore_repos = ['test']
+        self.metrics =  {}
+        self.parents = ['mediawiki/core',
+                        'mediawiki/extensions',
+                        'operations',
+                        'analytics',
+                        ]
+        
+        for name, query in self.queries.iteritems():
+            self.metrics[name] = Metric(name, query, settings)
+    
+    def __str__(self):
+        return 'Metrics container object'
 
 
 class Gerrit(object):
-       '''
-       This object contains the setings to interact with the gerrit server, nothing fancy these are just
-       sensible defaults.
-       '''
-       def __init__(self):
-               self.data_location = 'data'
-               self.host = 'gerrit.wikimedia.org'
-               self.port = 29418
-               self.format = 'JSON'
-
-       def __str__(self):
-               return 'Codereview settings object.'
+    '''
+    This object contains the setings to interact with the gerrit server, nothing fancy these are just
+    sensible defaults.
+    '''
+    def __init__(self):
+        self.data_location = 'data'
+        self.host = 'gerrit.wikimedia.org'
+        self.port = 29418
+        self.format = 'JSON'
+    
+    def __str__(self):
+        return 'Codereview settings object.'
 
 
 class Repo(object):
-       def __init__(self, name, settings, gerrit):
-               self.touched = False
-               self.name = name
-               self.dataset = {}
-               self.create_path(self.name, gerrit)
-               self.filename = ('%s.csv' % (self.determine_filename(self.name)))
-               self.filemode = self.determine_filemode(self.filename, gerrit)
-
-               self.today = datetime.today()
-               self.email = {}
-               self.email['wikimedian'] = set()
-               self.email['volunteer'] = set()
-               self.num_metrics = 0
-               for metric in settings.metrics:
-                       self.dataset[metric] = {}
-                       self.dataset[metric]['oldest'] = datetime(2030,1,1)
-                       self.dataset[metric]['wikimedian'] = 0
-                       self.dataset[metric]['volunteer'] = 0
-                       self.dataset[metric]['total'] = 0
-                       self.num_metrics +=1
-
-       def __str__(self):
-               return self.name
-
-       def create_path(self, filename, gerrit):
-               print filename
-               dir= os.path.dirname(filename)
-               if dir != '':
-                       dir = os.path.join(gerrit.data_location, dir)
-                       try:
-                               os.makedirs(dir)
-                               print 'Creating %s...' % dir
-                       except OSError:
-                               pass
-               
-       def determine_filename(self, filename):
-               return os.path.basename(filename)
-
-       def determine_filemode(self, filename, settings):
-               if os.path.isfile('%s/%s' % (settings.data_location, filename)) == False:
-                       return 'w'
-               else:
-                       return 'a'
+    
+    def __init__(self, name, settings, gerrit):
+        self.touched = False
+        self.name = name
+        self.dataset = {}
+        self.create_path(self.name, gerrit)
+        self.filename = ('%s.csv' % (self.determine_filename(self.name)))
+        self.filemode = self.determine_filemode(self.filename, gerrit)
+        
+        self.today = datetime.today()
+        self.email = {}
+        self.email['wikimedian'] = set()
+        self.email['volunteer'] = set()
+        self.num_metrics = 0
+        
+        for metric in settings.metrics:
+            self.dataset[metric] = {}
+            self.dataset[metric]['oldest'] = datetime(2030,1,1)
+            self.dataset[metric]['wikimedian'] = 0
+            self.dataset[metric]['volunteer'] = 0
+            self.dataset[metric]['total'] = 0
+            self.num_metrics +=1
+    
+    def __str__(self):
+        return self.name
+    
+    def create_path(self, filename, gerrit):
+        print filename
+        dir= os.path.dirname(filename)
+        if dir != '':
+            dir = os.path.join(gerrit.data_location, dir)
+            try:
+                os.makedirs(dir)
+                print 'Creating %s...' % dir
+            except OSError:
+                pass
+    
+    def determine_filename(self, filename):
+        return os.path.basename(filename)
+    
+    def determine_filemode(self, filename, settings):
+        if os.path.isfile('%s/%s' % (settings.data_location, filename)) == False:
+            return 'w'
+        else:
+            return 'a'
 
index 40a9985..d1a295c 100644 (file)
--- a/stats.py
+++ b/stats.py
-"""\r
-gerrit-stats: Generate codereview stats based from Gerrit commits\r
-Copyright (C) 2012  Diederik van Liere, Wikimedia Foundation\r
-\r
-This program is free software; you can redistribute it and/or\r
-modify it under the terms of the GNU General Public License\r
-as published by the Free Software Foundation; either version 2\r
-of the License, or (at your option) any later version.\r
-\r
-This program is distributed in the hope that it will be useful,\r
-but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
-GNU General Public License for more details.\r
-\r
-You should have received a copy of the GNU General Public License\r
-along with this program; if not, write to the Free Software\r
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.\r
-"""\r
-import subprocess\r
-import json\r
-import sys\r
-import os\r
-from datetime import datetime\r
-\r
-from classes import Gerrit, Settings, Metric, Repo\r
-\r
-def create_repo_set(gerrit, settings):\r
-       repos = {}\r
-       output = run_gerrit_query('ssh -p 29418 gerrit.wikimedia.org gerrit ls-projects')\r
-       output = output.split('\n')\r
-       for repo in output:\r
-               repo = repo.strip()\r
-               if len(repo) > 1:\r
-                       tests = [repo.find(ignore) == -1 for ignore in settings.ignore_repos]\r
-                       if all(tests):\r
-                               rp = Repo(repo, settings, gerrit)\r
-                               repos[rp.name] = rp\r
-       return repos\r
-\r
-\r
-def is_wikimedian(email, whitelist):\r
-       if email in whitelist:\r
-               return True\r
-       if email.endswith('wikimedia.org'):\r
-               return True\r
-       else:\r
-               return False\r
-\r
-\r
-def set_delimiter(fields, counter):\r
-       num_fields = len(fields)\r
-       if num_fields-counter != 1:\r
-               return ','\r
-       else:\r
-               return ''\r
-\r
-def output_results(fh, *args):\r
-       args = [str(arg) for arg in args]\r
-       output = ''.join(args)\r
-       fh.write(output)\r
-       sys.stdout.write(output)\r
-\r
-def write_heading(fh, repo):\r
-       output_results(fh, 'data',',','repository',',')\r
-       #fh.write('%s,%s,' % ('date', 'repository'))\r
-       #sys.stdout.write('%s,%s,' % ('date', 'repository'))\r
-       for metric_counter, (name, metric) in enumerate(repo.dataset.iteritems()):\r
-               headings = metric.keys()\r
-               for counter, heading in enumerate(headings):\r
-                       if metric_counter +1 == repo.num_metrics:\r
-                               delim = set_delimiter(headings, counter)\r
-                       else:\r
-                               delim = ','\r
-                       #fh.write('%s_%s%s' % (name, heading, delim))\r
-                       #sys.stdout.write('%s_%s%s' % (name, heading, delim))\r
-                       output_results(fh, name,'_', heading, delim)\r
-       fh.write('\n')\r
-       sys.stdout.write('\n')\r
-\r
-\r
-def construct_timestamp(epoch):\r
-       return datetime.fromtimestamp(epoch)\r
-\r
-\r
-def run_gerrit_query(query):\r
-       query = query.split(' ')\r
-       output = subprocess.Popen(query, shell=False, stdout=subprocess.PIPE).communicate()[0]\r
-       return output\r
-\r
-\r
-def create_dataset(repos, gerrit):\r
-       for key, repo in repos.iteritems():\r
-               fh = open('%s/%s' % (gerrit.data_location, repo.filename), repo.filemode)\r
-               if repo.filemode == 'w':\r
-                       write_heading(fh, repo)\r
-               #sys.stdout.write('%s-%s-%s,%s,' % (repo.today.month,repo.today.day,repo.today.year, repo.name))\r
-               #fh.write('%s-%s-%s,%s,' % (repo.today.month,repo.today.day,repo.today.year, repo.name))\r
-               output_results(fh, repo.today.month,'-',repo.today.day,'-',repo.today.year,',',repo.name,',')\r
-               print_dict(repo, fh)\r
-               sys.stdout.write('\n*****************\n')\r
-               sys.stdout.write('\n')\r
-               fh.write('\n')\r
-               fh.close()\r
-\r
-\r
-def print_dict(repo, fh, ident = '', braces=1):\r
-       """ Recursively prints nested dictionaries."""\r
-       dataset = repo.dataset\r
-       for metric_counter, metric in enumerate(dataset):\r
-               fields = dataset[metric].keys()\r
-               for counter, field in enumerate(fields):\r
-                       if metric_counter +1 == repo.num_metrics:\r
-                               delim = set_delimiter(fields, counter)\r
-                       else:\r
-                               delim = ','\r
-                       #print delim\r
-                       sys.stdout.write('%s%s' % (dataset[metric][field], delim))\r
-                       fh.write('%s%s' % (dataset[metric][field], delim))\r
-\r
-\r
-def cleanup_volunteers(repos, whitelist):\r
-       for name, repo in repos.iteritems():\r
-               for ws in whitelist:\r
-                       if ws in repo.email['volunteer']:\r
-                               repo.email['wikimedian'].add(ws)\r
-                               repo.email['email']['volunteer'].remove(ws)\r
-       return repos\r
-\r
-\r
-def construct_dataset(settings, repos, metric, output, gerrit):                \r
-       output=output.split('\n')\r
-       for obs in output:\r
-               try:\r
-                       obs= json.loads(obs)\r
-               except ValueError, e:\r
-                       print e\r
-\r
-               if isinstance(obs, dict) and 'rowCount' not in obs:\r
-                       try:\r
-                               project = obs['project']\r
-                       except KeyError, e:\r
-                               print e, obs\r
-                       email = obs['owner']['email']\r
-                       repo = repos.get(project, {})\r
-                       if repo == {}:\r
-                               continue\r
-                       dt = construct_timestamp(obs['createdOn'])\r
-                       \r
-                       # print "REPO: %s" % repo\r
-                       # print "PROJECT: %s" % project\r
-                       # print "METRIC: %s" % metric\r
-                       # print "DATASET: %s" % repo.dataset\r
-\r
-                       if repo.dataset[metric]['oldest'] > dt:\r
-                               repo.dataset[metric]['oldest'] = dt\r
-                       repo.dataset[metric]['total'] +=1\r
-                       if is_wikimedian(email, settings.whitelist) == True:\r
-                               repo.dataset[metric]['wikimedian'] +=1\r
-                               repo.email['wikimedian'].add(email)\r
-                       else:\r
-                               repo.dataset[metric]['volunteer'] +=1\r
-                               repo.email['volunteer'].add(email)\r
-                       repo.touched = True\r
-\r
-\r
-def main():\r
-       gerrit = Gerrit()\r
-       settings = Settings(gerrit)\r
-       print 'Fetching list of all gerrit repositories...'\r
-       repos = create_repo_set(gerrit, settings)\r
\r
-       for metric in settings.metrics.itervalues():\r
-               #query = 'ssh -p %s %s gerrit query --format=%s %s' % (gerrit.port, gerrit.host, gerrit.format, question)\r
-               output = run_gerrit_query(metric.query)\r
-               print 'Running %s' % metric.query\r
-               construct_dataset(settings, repos, metric.name, output, gerrit)\r
-\r
-       print 'Fixing miscategorization of volunteer engineers...'\r
-       repos = cleanup_volunteers(repos, settings.whitelist)\r
-       print 'Creating datasets...'\r
-       create_dataset(repos, gerrit)\r
-\r
-\r
-if __name__== '__main__':\r
-       main()
\ No newline at end of file
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+gerrit-stats: Generate codereview stats based from Gerrit commits
+Copyright (C) 2012  Diederik van Liere, Wikimedia Foundation
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+import subprocess
+import json
+import sys
+import os
+from datetime import datetime
+
+from classes import Gerrit, Settings, Metric, Repo
+
+def create_repo_set(gerrit, settings):
+    repos = {}
+    output = run_gerrit_query('ssh -p 29418 gerrit.wikimedia.org gerrit ls-projects')
+    output = output.split('\n')
+    for repo in output:
+        repo = repo.strip()
+        if len(repo) > 1:
+            tests = [repo.find(ignore) == -1 for ignore in settings.ignore_repos]
+            if all(tests):
+                rp = Repo(repo, settings, gerrit)
+                repos[rp.name] = rp
+    return repos
+
+
+def is_wikimedian(email, whitelist):
+    if email in whitelist:
+        return True
+    if email.endswith('wikimedia.org'):
+        return True
+    else:
+        return False
+
+
+def set_delimiter(fields, counter):
+    num_fields = len(fields)
+    if num_fields-counter != 1:
+        return ','
+    else:
+        return ''
+
+def output_results(fh, *args):
+    args = [str(arg) for arg in args]
+    output = ''.join(args)
+    fh.write(output)
+    sys.stdout.write(output)
+
+def write_heading(fh, repo):
+    output_results(fh, 'data',',','repository',',')
+    #fh.write('%s,%s,' % ('date', 'repository'))
+    #sys.stdout.write('%s,%s,' % ('date', 'repository'))
+    for metric_counter, (name, metric) in enumerate(repo.dataset.iteritems()):
+        headings = metric.keys()
+        for counter, heading in enumerate(headings):
+            if metric_counter +1 == repo.num_metrics:
+                delim = set_delimiter(headings, counter)
+            else:
+                delim = ','
+            #fh.write('%s_%s%s' % (name, heading, delim))
+            #sys.stdout.write('%s_%s%s' % (name, heading, delim))
+            output_results(fh, name,'_', heading, delim)
+    fh.write('\n')
+    sys.stdout.write('\n')
+
+
+def construct_timestamp(epoch):
+    return datetime.fromtimestamp(epoch)
+
+
+def run_gerrit_query(query):
+    query = query.split(' ')
+    output = subprocess.Popen(query, shell=False, stdout=subprocess.PIPE).communicate()[0]
+    return output
+
+
+def create_dataset(repos, gerrit):
+    for key, repo in repos.iteritems():
+        fh = open('%s/%s' % (gerrit.data_location, repo.filename), repo.filemode)
+        if repo.filemode == 'w':
+            write_heading(fh, repo)
+        #sys.stdout.write('%s-%s-%s,%s,' % (repo.today.month,repo.today.day,repo.today.year, repo.name))
+        #fh.write('%s-%s-%s,%s,' % (repo.today.month,repo.today.day,repo.today.year, repo.name))
+        output_results(fh, repo.today.month,'-',repo.today.day,'-',repo.today.year,',',repo.name,',')
+        print_dict(repo, fh)
+        sys.stdout.write('\n*****************\n')
+        sys.stdout.write('\n')
+        fh.write('\n')
+        fh.close()
+
+
+def print_dict(repo, fh, ident = '', braces=1):
+    """ Recursively prints nested dictionaries."""
+    dataset = repo.dataset
+    for metric_counter, metric in enumerate(dataset):
+        fields = dataset[metric].keys()
+        for counter, field in enumerate(fields):
+            if metric_counter +1 == repo.num_metrics:
+                delim = set_delimiter(fields, counter)
+            else:
+                delim = ','
+            #print delim
+            sys.stdout.write('%s%s' % (dataset[metric][field], delim))
+            fh.write('%s%s' % (dataset[metric][field], delim))
+
+
+def cleanup_volunteers(repos, whitelist):
+    for name, repo in repos.iteritems():
+        for ws in whitelist:
+            if ws in repo.email['volunteer']:
+                repo.email['wikimedian'].add(ws)
+                repo.email['email']['volunteer'].remove(ws)
+    return repos
+
+
+def construct_dataset(settings, repos, metric, output, gerrit):
+    output=output.split('\n')
+    for obs in output:
+        try:
+            obs= json.loads(obs)
+        except ValueError, e:
+            print e
+        
+        if isinstance(obs, dict) and 'rowCount' not in obs:
+            try:
+                project = obs['project']
+            except KeyError, e:
+                print e, obs
+            email = obs['owner']['email']
+            repo = repos.get(project, {})
+            if repo == {}:
+                continue
+            dt = construct_timestamp(obs['createdOn'])
+            
+            # print "REPO: %s" % repo
+            # print "PROJECT: %s" % project
+            # print "METRIC: %s" % metric
+            # print "DATASET: %s" % repo.dataset
+            
+            if repo.dataset[metric]['oldest'] > dt:
+                repo.dataset[metric]['oldest'] = dt
+            repo.dataset[metric]['total'] +=1
+            if is_wikimedian(email, settings.whitelist) == True:
+                repo.dataset[metric]['wikimedian'] +=1
+                repo.email['wikimedian'].add(email)
+            else:
+                repo.dataset[metric]['volunteer'] +=1
+                repo.email['volunteer'].add(email)
+            repo.touched = True
+
+
+def main():
+    gerrit = Gerrit()
+    settings = Settings(gerrit)
+    print 'Fetching list of all gerrit repositories...'
+    repos = create_repo_set(gerrit, settings)
+    
+    for metric in settings.metrics.itervalues():
+        #query = 'ssh -p %s %s gerrit query --format=%s %s' % (gerrit.port, gerrit.host, gerrit.format, question)
+        output = run_gerrit_query(metric.query)
+        print 'Running %s' % metric.query
+        construct_dataset(settings, repos, metric.name, output, gerrit)
+    
+    print 'Fixing miscategorization of volunteer engineers...'
+    repos = cleanup_volunteers(repos, settings.whitelist)
+    print 'Creating datasets...'
+    create_dataset(repos, gerrit)
+
+
+if __name__== '__main__':
+    main()
\ No newline at end of file