diff options
author | Wilco Dijkstra <wdijkstr@arm.com> | 2017-08-17 16:27:20 +0100 |
---|---|---|
committer | Wilco Dijkstra <wdijkstr@arm.com> | 2017-08-17 16:27:20 +0100 |
commit | d4505b895f2aa577a6b974ab4bd0a1fd0db60ceb (patch) | |
tree | 5ae35fc88eda5f2ad45b63f4617f4a87d470d885 /benchtests/bench-skeleton.c | |
parent | 34d6a3cbf2be45aa039a7eb9f0084a4b710437b8 (diff) | |
download | glibc-d4505b895f2aa577a6b974ab4bd0a1fd0db60ceb.zip glibc-d4505b895f2aa577a6b974ab4bd0a1fd0db60ceb.tar.gz glibc-d4505b895f2aa577a6b974ab4bd0a1fd0db60ceb.tar.bz2 |
Add math benchmark latency test
This patch further improves math function benchmarking by adding a latency
test in addition to throughput. This enables more accurate comparisons of the
math functions. The latency test works by creating a dependency on the previous
iteration: func_res = F (func_res * zero + input[i]). The multiply by zero
avoids changing the input.
It reports reciprocal throughput and latency in nanoseconds (depending on the
timing header used) and max/min throughput in iterations per second:
"workload-spec2006.wrf": {
"reciprocal-throughput": 100,
"latency": 200,
"max-throughput": 1.0e+07,
"min-throughput": 5.0e+06
}
* benchtests/bench-skeleton.c (main): Add support for
latency benchmarking.
* benchtests/scripts/bench.py: Add support for latency benchmarking.
Diffstat (limited to 'benchtests/bench-skeleton.c')
-rw-r--r-- | benchtests/bench-skeleton.c | 27 |
1 files changed, 23 insertions, 4 deletions
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c index 3c6dad7..955b2e1 100644 --- a/benchtests/bench-skeleton.c +++ b/benchtests/bench-skeleton.c @@ -71,8 +71,10 @@ main (int argc, char **argv) bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0; double d_total_i = 0; timing_t total = 0, max = 0, min = 0x7fffffffffffffff; + timing_t throughput = 0, latency = 0; int64_t c = 0; uint64_t cur; + BENCH_VARS; while (1) { if (is_bench) @@ -86,7 +88,16 @@ main (int argc, char **argv) BENCH_FUNC (v, i); TIMING_NOW (end); TIMING_DIFF (cur, start, end); - TIMING_ACCUM (total, cur); + TIMING_ACCUM (throughput, cur); + + TIMING_NOW (start); + for (k = 0; k < iters; k++) + for (i = 0; i < NUM_SAMPLES (v); i++) + BENCH_FUNC_LAT (v, i); + TIMING_NOW (end); + TIMING_DIFF (cur, start, end); + TIMING_ACCUM (latency, cur); + d_total_i += iters * NUM_SAMPLES (v); } else @@ -131,12 +142,20 @@ main (int argc, char **argv) /* Begin variant. */ json_attr_object_begin (&json_ctx, VARIANT (v)); - json_attr_double (&json_ctx, "duration", d_total_s); - json_attr_double (&json_ctx, "iterations", d_total_i); if (is_bench) - json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i); + { + json_attr_double (&json_ctx, "reciprocal-throughput", + throughput / d_total_i); + json_attr_double (&json_ctx, "latency", latency / d_total_i); + json_attr_double (&json_ctx, "max-throughput", + d_total_i / throughput * 1000000000.0); + json_attr_double (&json_ctx, "min-throughput", + d_total_i / latency * 1000000000.0); + } else { + json_attr_double (&json_ctx, "duration", d_total_s); + json_attr_double (&json_ctx, "iterations", d_total_i); json_attr_double (&json_ctx, "max", max / d_iters); json_attr_double (&json_ctx, "min", min / d_iters); json_attr_double (&json_ctx, "mean", d_total_s / d_total_i); |