diff options
Diffstat (limited to 'libcxx/utils/compare-benchmarks')
-rwxr-xr-x | libcxx/utils/compare-benchmarks | 131 |
1 files changed, 73 insertions, 58 deletions
diff --git a/libcxx/utils/compare-benchmarks b/libcxx/utils/compare-benchmarks index ea4816e..43db3d8 100755 --- a/libcxx/utils/compare-benchmarks +++ b/libcxx/utils/compare-benchmarks @@ -7,28 +7,33 @@ import statistics import sys import tempfile -import plotly +import numpy +import pandas +import plotly.express import tabulate -def parse_lnt(lines): +def parse_lnt(lines, aggregate=statistics.median): """ - Parse lines in LNT format and return a dictionnary of the form: + Parse lines in LNT format and return a list of dictionnaries of the form: - { - 'benchmark1': { - 'metric1': [float], - 'metric2': [float], + [ + { + 'benchmark': <benchmark1>, + <metric1>: float, + <metric2>: float, ... }, - 'benchmark2': { - 'metric1': [float], - 'metric2': [float], + { + 'benchmark': <benchmark2>, + <metric1>: float, + <metric2>: float, ... }, ... - } + ] - Each metric may have multiple values. + If a metric has multiple values associated to it, they are aggregated into a single + value using the provided aggregation function. """ results = {} for line in lines: @@ -37,56 +42,51 @@ def parse_lnt(lines): continue (identifier, value) = line.split(' ') - (name, metric) = identifier.split('.') - if name not in results: - results[name] = {} - if metric not in results[name]: - results[name][metric] = [] - results[name][metric].append(float(value)) - return results - -def plain_text_comparison(benchmarks, baseline, candidate): + (benchmark, metric) = identifier.split('.') + if benchmark not in results: + results[benchmark] = {'benchmark': benchmark} + + entry = results[benchmark] + if metric not in entry: + entry[metric] = [] + entry[metric].append(float(value)) + + for (bm, entry) in results.items(): + for metric in entry: + if isinstance(entry[metric], list): + entry[metric] = aggregate(entry[metric]) + + return list(results.values()) + +def plain_text_comparison(data, metric, baseline_name=None, candidate_name=None): """ - Create a tabulated comparison of the baseline and the candidate. + Create a tabulated comparison of the baseline and the candidate for the given metric. """ - headers = ['Benchmark', 'Baseline', 'Candidate', 'Difference', '% Difference'] + data = data.replace(numpy.nan, None).sort_values(by='benchmark') # avoid NaNs in tabulate output + headers = ['Benchmark', baseline_name, candidate_name, 'Difference', '% Difference'] fmt = (None, '.2f', '.2f', '.2f', '.2f') - table = [] - for (bm, base, cand) in zip(benchmarks, baseline, candidate): - diff = (cand - base) if base and cand else None - percent = 100 * (diff / base) if base and cand else None - row = [bm, base, cand, diff, percent] - table.append(row) + table = data[['benchmark', f'{metric}_baseline', f'{metric}_candidate', 'difference', 'percent']].set_index('benchmark') return tabulate.tabulate(table, headers=headers, floatfmt=fmt, numalign='right') -def create_chart(benchmarks, baseline, candidate): +def create_chart(data, metric, subtitle=None, baseline_name=None, candidate_name=None): """ - Create a bar chart comparing 'baseline' and 'candidate'. + Create a bar chart comparing the given metric between the baseline and the candidate. """ - figure = plotly.graph_objects.Figure() - figure.add_trace(plotly.graph_objects.Bar(x=benchmarks, y=baseline, name='Baseline')) - figure.add_trace(plotly.graph_objects.Bar(x=benchmarks, y=candidate, name='Candidate')) + data = data.sort_values(by='benchmark').rename(columns={ + f'{metric}_baseline': baseline_name, + f'{metric}_candidate': candidate_name + }) + figure = plotly.express.bar(data, title=f'{baseline_name} vs {candidate_name}', + subtitle=subtitle, + x='benchmark', y=[baseline_name, candidate_name], barmode='group') + figure.update_layout(xaxis_title='', yaxis_title='', legend_title='') return figure -def prepare_series(baseline, candidate, metric, aggregate=statistics.median): - """ - Prepare the data for being formatted or displayed as a chart. - - Metrics that have more than one value are aggregated using the given aggregation function. - """ - all_benchmarks = sorted(list(set(baseline.keys()) | set(candidate.keys()))) - baseline_series = [] - candidate_series = [] - for bm in all_benchmarks: - baseline_series.append(aggregate(baseline[bm][metric]) if bm in baseline and metric in baseline[bm] else None) - candidate_series.append(aggregate(candidate[bm][metric]) if bm in candidate and metric in candidate[bm] else None) - return (all_benchmarks, baseline_series, candidate_series) - def main(argv): parser = argparse.ArgumentParser( prog='compare-benchmarks', description='Compare the results of two sets of benchmarks in LNT format.', - epilog='This script requires the `tabulate` and the `plotly` Python modules.') + epilog='This script depends on the modules listed in `libcxx/utils/requirements.txt`.') parser.add_argument('baseline', type=argparse.FileType('r'), help='Path to a LNT format file containing the benchmark results for the baseline.') parser.add_argument('candidate', type=argparse.FileType('r'), @@ -107,28 +107,43 @@ def main(argv): parser.add_argument('--open', action='store_true', help='Whether to automatically open the generated HTML file when finished. This option only makes sense ' 'when the output format is `chart`.') + parser.add_argument('--baseline-name', type=str, default='Baseline', + help='Optional name to use for the "baseline" label.') + parser.add_argument('--candidate-name', type=str, default='Candidate', + help='Optional name to use for the "candidate" label.') + parser.add_argument('--subtitle', type=str, required=False, + help='Optional subtitle to use for the chart. This can be used to help identify the contents of the chart. ' + 'This option cannot be used with the plain text output.') args = parser.parse_args(argv) + if args.format == 'text' and args.subtitle is not None: + parser.error('Passing --subtitle makes no sense with --format=text') + if args.format == 'text' and args.open: parser.error('Passing --open makes no sense with --format=text') - baseline = parse_lnt(args.baseline.readlines()) - candidate = parse_lnt(args.candidate.readlines()) + baseline = pandas.DataFrame(parse_lnt(args.baseline.readlines())) + candidate = pandas.DataFrame(parse_lnt(args.candidate.readlines())) - if args.filter is not None: - regex = re.compile(args.filter) - baseline = {k: v for (k, v) in baseline.items() if regex.search(k)} - candidate = {k: v for (k, v) in candidate.items() if regex.search(k)} + # Join the baseline and the candidate into a single dataframe and add some new columns + data = baseline.merge(candidate, how='outer', on='benchmark', suffixes=('_baseline', '_candidate')) + data['difference'] = data[f'{args.metric}_candidate'] - data[f'{args.metric}_baseline'] + data['percent'] = 100 * (data['difference'] / data[f'{args.metric}_baseline']) - (benchmarks, baseline_series, candidate_series) = prepare_series(baseline, candidate, args.metric) + if args.filter is not None: + keeplist = [b for b in data['benchmark'] if re.search(args.filter, b) is not None] + data = data[data['benchmark'].isin(keeplist)] if args.format == 'chart': - figure = create_chart(benchmarks, baseline_series, candidate_series) + figure = create_chart(data, args.metric, subtitle=args.subtitle, + baseline_name=args.baseline_name, + candidate_name=args.candidate_name) do_open = args.output is None or args.open output = args.output or tempfile.NamedTemporaryFile(suffix='.html').name plotly.io.write_html(figure, file=output, auto_open=do_open) else: - diff = plain_text_comparison(benchmarks, baseline_series, candidate_series) + diff = plain_text_comparison(data, args.metric, baseline_name=args.baseline_name, + candidate_name=args.candidate_name) diff += '\n' if args.output is not None: with open(args.output, 'w') as out: |