aboutsummaryrefslogtreecommitdiff
path: root/gcc/rust/monthly-diff.py
blob: c14cf14fa5e1a051883def976d2ddc82883179f6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/usr/bin/env python3

import matplotlib.pyplot as plt
import numpy as np

import matplotlib
import subprocess
import sys
import re
import os


try:
    number_of_weeks = int(sys.argv[1])
except Exception as e:
    print("script requires an integer argument for the number of weeks")
    sys.exit(-1)


cwd_is_root_repo = os.path.isdir('.git')
if not cwd_is_root_repo:
    print("script must be ran from the root of the repo")
    sys.exit(-1)
    

change_list = []
for i in range(number_of_weeks):
    since = "%i weeks ago" % (i + 1)
    until = "%i weeks ago" % i if i > 0 else None

    command = [ "git", "whatchanged", "--since=%s" % since ]
    if until is not None:
        command.append("--until=%s" % until)

    specific_paths = [ 'gcc/rust', 'gcc/testsuite/rust', 'gcc/testsuite/rust.test' ]
    command += ['--'] + specific_paths
    
    result = subprocess.run(
        command, capture_output=True, text=True
    )
    raw_diff = result.stdout
    
    commit_lines = re.findall('commit [a-z0-9]{40}', raw_diff)
    commit_shas = list(map(lambda i: i.split(' ')[1], commit_lines))

    changes = {
        'files_changed': 0,
        'insertions': 0,
        'deletions': 0,
        'contributors': set(),
        'contributions': list()
    }

    short_stat_out = None
    if len(commit_shas) > 0:
        if len(commit_shas) == 1:
            short_stat_command = [ 'git', 'show', '%s' % commit_shas[0], '--shortstat' ]
            result = subprocess.run(
                short_stat_command, capture_output=True, text=True
            )
            raw_short_stat = result.stdout
            short_stat_out = raw_short_stat.split('\n')[-2].strip()
        
        else:
            from_sha = commit_shas[-1]
            to_sha = commit_shas[0]
            short_stat_command = [ 'git', 'diff', '%s..%s' % (from_sha, to_sha), '--shortstat' ]
            result = subprocess.run(
                short_stat_command, capture_output=True, text=True
            )
            raw_short_stat = result.stdout
            short_stat_out = raw_short_stat.strip()

    
    if short_stat_out is not None:
        # pull out the numbers via regex
        search = re.findall('[0-9]* file', short_stat_out)
        if search is not None:
            changes['files_changed'] = int(search[0].split(' ')[0])

        search = re.findall('[0-9]* insertion', short_stat_out)
        if search is not None:
            changes['insertions'] = int(search[0].split(' ')[0])

        search = re.findall('[0-9]* deletion', short_stat_out)
        if search is not None:
            if len(search) > 0:
                changes['deletions'] = int(search[0].split(' ')[0])

    
    # figure out the unique contributors each week
    for sha in commit_shas:
        show_command = [ "git", "show", "-s", sha ]
        result = subprocess.run(
            show_command, capture_output=True, text=True
        )
        author_str = re.findall('Author: [a-zA-Z0-9-รจ ]* <[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+>', result.stdout)
        author_tokens = author_str[0].split(' ')
        author = " ".join(author_tokens[1:])
        changes['contributors'].add(author)
        changes['contributions'].append(author)
        

    # lets see it then
    print(changes)

    # hack to get rid of the gcc-merge
    if changes['files_changed'] == 23432:
        change_list.append(change_list[-1])
    else:
        change_list.append(changes)


unique_contributors = set()
for i in change_list:
    for y in i['contributors']:
        unique_contributors.add(y)

# calculate average lines added and removed per week
total_ins_lines = sum(map(lambda i: i['insertions'], change_list))
avg_ins_lines_per_week = total_ins_lines / float(len(change_list))

total_del_lines = sum(map(lambda i: i['deletions'], change_list))
avg_del_lines_per_week = total_del_lines / float(len(change_list))

print("average lines added per week:", avg_ins_lines_per_week)
print("average lines deleted per week:", avg_del_lines_per_week)

# find number of contributions per unique_contributor
num_contribs = {}
for i in unique_contributors:
    num_contribs[i] = 0
    for change in change_list:
        for y in change['contributions']:
            if y == i:
                num_contribs[i] = num_contribs[i] + 1


contribs = []
for i in num_contribs:
    contribs.append((i, num_contribs[i]))

contribs.sort(key=lambda i: i[1], reverse=True)
for i in contribs:
    print(i[0], i[1])
    
                
# graph the change_list
weeks = list(map(lambda i: i, range(number_of_weeks)))
insertions = list(map(lambda i: i['insertions'], change_list))
deletions = list(map(lambda i: i['deletions'], change_list))
files_changed = list(map(lambda i: i['files_changed'], change_list))
num_contributors = list(map(lambda i: len(i['contributors']), change_list))

weeks.reverse()
insertions.reverse()
deletions.reverse()
files_changed.reverse()
num_contributors.reverse()

fix, axs = plt.subplots(2)

axs[0].set_title("number of lines added + removed")
axs[0].plot(weeks, insertions)
axs[0].plot(weeks, deletions)
# plt.plot(weeks, files_changed)

axs[1].set_title("number of contributors")
axs[1].plot(weeks, num_contributors)

plt.show()