libcxx/utils/compare-benchmarks


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165

#!/usr/bin/env python3

import argparse
import functools
import pathlib
import re
import statistics
import sys
import tempfile

import numpy
import pandas
import plotly.express
import tabulate

def parse_lnt(lines, aggregate=statistics.median):
    """
    Parse lines in LNT format and return a list of dictionnaries of the form:

        [
            {
                'benchmark': <benchmark1>,
                <metric1>: float,
                <metric2>: float,
                ...
            },
            {
                'benchmark': <benchmark2>,
                <metric1>: float,
                <metric2>: float,
                ...
            },
            ...
        ]

    If a metric has multiple values associated to it, they are aggregated into a single
    value using the provided aggregation function.
    """
    results = {}
    for line in lines:
        line = line.strip()
        if not line:
            continue

        (identifier, value) = line.split(' ')
        (benchmark, metric) = identifier.split('.')
        if benchmark not in results:
            results[benchmark] = {'benchmark': benchmark}

        entry = results[benchmark]
        if metric not in entry:
            entry[metric] = []
        entry[metric].append(float(value))

    for (bm, entry) in results.items():
        for metric in entry:
            if isinstance(entry[metric], list):
                entry[metric] = aggregate(entry[metric])

    return list(results.values())

def plain_text_comparison(data, metric, baseline_name=None, candidate_name=None):
    """
    Create a tabulated comparison of the baseline and the candidate for the given metric.
    """
    # Compute additional info in new columns. In text mode, we can assume that we are
    # comparing exactly two data sets (suffixed _0 and _1).
    data['difference'] = data[f'{metric}_1'] - data[f'{metric}_0']
    data['percent'] = 100 * (data['difference'] / data[f'{metric}_0'])

    data = data.replace(numpy.nan, None).sort_values(by='benchmark') # avoid NaNs in tabulate output
    headers = ['Benchmark', baseline_name, candidate_name, 'Difference', '% Difference']
    fmt = (None, '.2f', '.2f', '.2f', '.2f')
    table = data[['benchmark', f'{metric}_0', f'{metric}_1', 'difference', 'percent']].set_index('benchmark')
    return tabulate.tabulate(table, headers=headers, floatfmt=fmt, numalign='right')

def create_chart(data, metric, subtitle=None, series_names=None):
    """
    Create a bar chart comparing the given metric across the provided series.
    """
    data = data.sort_values(by='benchmark').rename(columns={f'{metric}_{i}': series_names[i] for i in range(len(series_names))})
    title = ' vs '.join(series_names)
    figure = plotly.express.bar(data, title=title, subtitle=subtitle, x='benchmark', y=series_names, barmode='group')
    figure.update_layout(xaxis_title='', yaxis_title='', legend_title='')
    return figure

def main(argv):
    parser = argparse.ArgumentParser(
        prog='compare-benchmarks',
        description='Compare the results of multiple sets of benchmarks in LNT format.',
        epilog='This script depends on the modules listed in `libcxx/utils/requirements.txt`.')
    parser.add_argument('files', type=argparse.FileType('r'), nargs='+',
        help='Path to LNT format files containing the benchmark results to compare. In the text format, '
             'exactly two files must be compared.')
    parser.add_argument('--output', '-o', type=pathlib.Path, required=False,
        help='Path of a file where to output the resulting comparison. If the output format is `text`, '
             'default to stdout. If the output format is `chart`, default to a temporary file which is '
             'opened automatically once generated, but not removed after creation.')
    parser.add_argument('--metric', type=str, default='execution_time',
        help='The metric to compare. LNT data may contain multiple metrics (e.g. code size, execution time, etc) -- '
             'this option allows selecting which metric is being analyzed. The default is `execution_time`.')
    parser.add_argument('--filter', type=str, required=False,
        help='An optional regular expression used to filter the benchmarks included in the comparison. '
             'Only benchmarks whose names match the regular expression will be included.')
    parser.add_argument('--format', type=str, choices=['text', 'chart'], default='text',
        help='Select the output format. `text` generates a plain-text comparison in tabular form, and `chart` '
             'generates a self-contained HTML graph that can be opened in a browser. The default is `text`.')
    parser.add_argument('--open', action='store_true',
        help='Whether to automatically open the generated HTML file when finished. This option only makes sense '
             'when the output format is `chart`.')
    parser.add_argument('--series-names', type=str, required=False,
        help='Optional comma-delimited list of names to use for the various series. By default, we use '
             'Baseline and Candidate for two input files, and CandidateN for subsequent inputs.')
    parser.add_argument('--subtitle', type=str, required=False,
        help='Optional subtitle to use for the chart. This can be used to help identify the contents of the chart. '
             'This option cannot be used with the plain text output.')
    args = parser.parse_args(argv)

    if args.format == 'text':
        if len(args.files) != 2:
            parser.error('--format=text requires exactly two input files to compare')
        if args.subtitle is not None:
            parser.error('Passing --subtitle makes no sense with --format=text')
        if args.open:
            parser.error('Passing --open makes no sense with --format=text')

    if args.series_names is None:
        args.series_names = ['Baseline']
        if len(args.files) == 2:
            args.series_names += ['Candidate']
        elif len(args.files) > 2:
            args.series_names.extend(f'Candidate{n}' for n in range(1, len(args.files)))
    else:
        args.series_names = args.series_names.split(',')
        if len(args.series_names) != len(args.files):
            parser.error(f'Passed incorrect number of series names: got {len(args.series_names)} series names but {len(args.files)} inputs to compare')

    # Parse the raw LNT data and store each input in a dataframe
    lnt_inputs = [parse_lnt(file.readlines()) for file in args.files]
    inputs = [pandas.DataFrame(lnt).rename(columns={args.metric: f'{args.metric}_{i}'}) for (i, lnt) in enumerate(lnt_inputs)]

    # Join the inputs into a single dataframe
    data = functools.reduce(lambda a, b: a.merge(b, how='outer', on='benchmark'), inputs)

    if args.filter is not None:
        keeplist = [b for b in data['benchmark'] if re.search(args.filter, b) is not None]
        data = data[data['benchmark'].isin(keeplist)]

    if args.format == 'chart':
        figure = create_chart(data, args.metric, subtitle=args.subtitle, series_names=args.series_names)
        do_open = args.output is None or args.open
        output = args.output or tempfile.NamedTemporaryFile(suffix='.html').name
        plotly.io.write_html(figure, file=output, auto_open=do_open)
    else:
        diff = plain_text_comparison(data, args.metric, baseline_name=args.series_names[0],
                                                        candidate_name=args.series_names[1])
        diff += '\n'
        if args.output is not None:
            with open(args.output, 'w') as out:
                out.write(diff)
        else:
            sys.stdout.write(diff)

if __name__ == '__main__':
    main(sys.argv[1:])