i want to see the number of removed/added line, grouped by author for a given branch in git history. there is git shortlog -s which shows me the number of commi
On my repos I've gotten a lot of trash output from the one-liners floating around, so here is a Python script to do it right:
import subprocess
import collections
import sys
def get_lines_from_call(command):
return subprocess.check_output(command).splitlines()
def get_files(paths=()):
command = ['git', 'ls-files']
command.extend(paths)
return get_lines_from_call(command)
def get_blame(path):
return get_lines_from_call(['git', 'blame', path])
def extract_name(line):
"""
Extract the author from a line of a standard git blame
"""
return line.split('(', 1)[1].split(')', 1)[0].rsplit(None, 4)[0]
def get_file_authors(path):
return [extract_name(line) for line in get_blame(path)]
def blame_stats(paths=()):
counter = collections.Counter()
for filename in get_files(paths):
counter.update(get_file_authors(filename))
return counter
def main():
counter = blame_stats(sys.argv[1:])
max_width = len(str(counter.most_common(1)[0][1]))
for name, count in reversed(counter.most_common()):
print('%s %s' % (str(count).rjust(max_width), name))
if __name__ == '__main__':
main()
Note that the arguments to the script will be passed to git ls-files, so if you only want to show Python files:
blame_stats.py '**/*.py'
If you only want to show files in one subdirectory:blame_stats.py some_dir
And so on.