aboutsummaryrefslogtreecommitdiff
path: root/libcxx/utils/compare-benchmarks
diff options
context:
space:
mode:
Diffstat (limited to 'libcxx/utils/compare-benchmarks')
-rwxr-xr-xlibcxx/utils/compare-benchmarks131
1 files changed, 73 insertions, 58 deletions
diff --git a/libcxx/utils/compare-benchmarks b/libcxx/utils/compare-benchmarks
index ea4816e..43db3d8 100755
--- a/libcxx/utils/compare-benchmarks
+++ b/libcxx/utils/compare-benchmarks
@@ -7,28 +7,33 @@ import statistics
import sys
import tempfile
-import plotly
+import numpy
+import pandas
+import plotly.express
import tabulate
-def parse_lnt(lines):
+def parse_lnt(lines, aggregate=statistics.median):
"""
- Parse lines in LNT format and return a dictionnary of the form:
+ Parse lines in LNT format and return a list of dictionnaries of the form:
- {
- 'benchmark1': {
- 'metric1': [float],
- 'metric2': [float],
+ [
+ {
+ 'benchmark': <benchmark1>,
+ <metric1>: float,
+ <metric2>: float,
...
},
- 'benchmark2': {
- 'metric1': [float],
- 'metric2': [float],
+ {
+ 'benchmark': <benchmark2>,
+ <metric1>: float,
+ <metric2>: float,
...
},
...
- }
+ ]
- Each metric may have multiple values.
+ If a metric has multiple values associated to it, they are aggregated into a single
+ value using the provided aggregation function.
"""
results = {}
for line in lines:
@@ -37,56 +42,51 @@ def parse_lnt(lines):
continue
(identifier, value) = line.split(' ')
- (name, metric) = identifier.split('.')
- if name not in results:
- results[name] = {}
- if metric not in results[name]:
- results[name][metric] = []
- results[name][metric].append(float(value))
- return results
-
-def plain_text_comparison(benchmarks, baseline, candidate):
+ (benchmark, metric) = identifier.split('.')
+ if benchmark not in results:
+ results[benchmark] = {'benchmark': benchmark}
+
+ entry = results[benchmark]
+ if metric not in entry:
+ entry[metric] = []
+ entry[metric].append(float(value))
+
+ for (bm, entry) in results.items():
+ for metric in entry:
+ if isinstance(entry[metric], list):
+ entry[metric] = aggregate(entry[metric])
+
+ return list(results.values())
+
+def plain_text_comparison(data, metric, baseline_name=None, candidate_name=None):
"""
- Create a tabulated comparison of the baseline and the candidate.
+ Create a tabulated comparison of the baseline and the candidate for the given metric.
"""
- headers = ['Benchmark', 'Baseline', 'Candidate', 'Difference', '% Difference']
+ data = data.replace(numpy.nan, None).sort_values(by='benchmark') # avoid NaNs in tabulate output
+ headers = ['Benchmark', baseline_name, candidate_name, 'Difference', '% Difference']
fmt = (None, '.2f', '.2f', '.2f', '.2f')
- table = []
- for (bm, base, cand) in zip(benchmarks, baseline, candidate):
- diff = (cand - base) if base and cand else None
- percent = 100 * (diff / base) if base and cand else None
- row = [bm, base, cand, diff, percent]
- table.append(row)
+ table = data[['benchmark', f'{metric}_baseline', f'{metric}_candidate', 'difference', 'percent']].set_index('benchmark')
return tabulate.tabulate(table, headers=headers, floatfmt=fmt, numalign='right')
-def create_chart(benchmarks, baseline, candidate):
+def create_chart(data, metric, subtitle=None, baseline_name=None, candidate_name=None):
"""
- Create a bar chart comparing 'baseline' and 'candidate'.
+ Create a bar chart comparing the given metric between the baseline and the candidate.
"""
- figure = plotly.graph_objects.Figure()
- figure.add_trace(plotly.graph_objects.Bar(x=benchmarks, y=baseline, name='Baseline'))
- figure.add_trace(plotly.graph_objects.Bar(x=benchmarks, y=candidate, name='Candidate'))
+ data = data.sort_values(by='benchmark').rename(columns={
+ f'{metric}_baseline': baseline_name,
+ f'{metric}_candidate': candidate_name
+ })
+ figure = plotly.express.bar(data, title=f'{baseline_name} vs {candidate_name}',
+ subtitle=subtitle,
+ x='benchmark', y=[baseline_name, candidate_name], barmode='group')
+ figure.update_layout(xaxis_title='', yaxis_title='', legend_title='')
return figure
-def prepare_series(baseline, candidate, metric, aggregate=statistics.median):
- """
- Prepare the data for being formatted or displayed as a chart.
-
- Metrics that have more than one value are aggregated using the given aggregation function.
- """
- all_benchmarks = sorted(list(set(baseline.keys()) | set(candidate.keys())))
- baseline_series = []
- candidate_series = []
- for bm in all_benchmarks:
- baseline_series.append(aggregate(baseline[bm][metric]) if bm in baseline and metric in baseline[bm] else None)
- candidate_series.append(aggregate(candidate[bm][metric]) if bm in candidate and metric in candidate[bm] else None)
- return (all_benchmarks, baseline_series, candidate_series)
-
def main(argv):
parser = argparse.ArgumentParser(
prog='compare-benchmarks',
description='Compare the results of two sets of benchmarks in LNT format.',
- epilog='This script requires the `tabulate` and the `plotly` Python modules.')
+ epilog='This script depends on the modules listed in `libcxx/utils/requirements.txt`.')
parser.add_argument('baseline', type=argparse.FileType('r'),
help='Path to a LNT format file containing the benchmark results for the baseline.')
parser.add_argument('candidate', type=argparse.FileType('r'),
@@ -107,28 +107,43 @@ def main(argv):
parser.add_argument('--open', action='store_true',
help='Whether to automatically open the generated HTML file when finished. This option only makes sense '
'when the output format is `chart`.')
+ parser.add_argument('--baseline-name', type=str, default='Baseline',
+ help='Optional name to use for the "baseline" label.')
+ parser.add_argument('--candidate-name', type=str, default='Candidate',
+ help='Optional name to use for the "candidate" label.')
+ parser.add_argument('--subtitle', type=str, required=False,
+ help='Optional subtitle to use for the chart. This can be used to help identify the contents of the chart. '
+ 'This option cannot be used with the plain text output.')
args = parser.parse_args(argv)
+ if args.format == 'text' and args.subtitle is not None:
+ parser.error('Passing --subtitle makes no sense with --format=text')
+
if args.format == 'text' and args.open:
parser.error('Passing --open makes no sense with --format=text')
- baseline = parse_lnt(args.baseline.readlines())
- candidate = parse_lnt(args.candidate.readlines())
+ baseline = pandas.DataFrame(parse_lnt(args.baseline.readlines()))
+ candidate = pandas.DataFrame(parse_lnt(args.candidate.readlines()))
- if args.filter is not None:
- regex = re.compile(args.filter)
- baseline = {k: v for (k, v) in baseline.items() if regex.search(k)}
- candidate = {k: v for (k, v) in candidate.items() if regex.search(k)}
+ # Join the baseline and the candidate into a single dataframe and add some new columns
+ data = baseline.merge(candidate, how='outer', on='benchmark', suffixes=('_baseline', '_candidate'))
+ data['difference'] = data[f'{args.metric}_candidate'] - data[f'{args.metric}_baseline']
+ data['percent'] = 100 * (data['difference'] / data[f'{args.metric}_baseline'])
- (benchmarks, baseline_series, candidate_series) = prepare_series(baseline, candidate, args.metric)
+ if args.filter is not None:
+ keeplist = [b for b in data['benchmark'] if re.search(args.filter, b) is not None]
+ data = data[data['benchmark'].isin(keeplist)]
if args.format == 'chart':
- figure = create_chart(benchmarks, baseline_series, candidate_series)
+ figure = create_chart(data, args.metric, subtitle=args.subtitle,
+ baseline_name=args.baseline_name,
+ candidate_name=args.candidate_name)
do_open = args.output is None or args.open
output = args.output or tempfile.NamedTemporaryFile(suffix='.html').name
plotly.io.write_html(figure, file=output, auto_open=do_open)
else:
- diff = plain_text_comparison(benchmarks, baseline_series, candidate_series)
+ diff = plain_text_comparison(data, args.metric, baseline_name=args.baseline_name,
+ candidate_name=args.candidate_name)
diff += '\n'
if args.output is not None:
with open(args.output, 'w') as out: