diff options
author | Siddhesh Poyarekar <siddhesh@redhat.com> | 2013-05-13 13:44:32 +0530 |
---|---|---|
committer | Siddhesh Poyarekar <siddhesh@redhat.com> | 2013-05-13 13:44:32 +0530 |
commit | 43fe811b73d8f585a4ae837d4a9d4c0f5d46b779 (patch) | |
tree | 7280e44dba0f9839d1a5fbfcd890446e0c220953 /benchtests/bench-skeleton.c | |
parent | 0f7d347bd0530562257d7c03c62b8c50d810b655 (diff) | |
download | glibc-43fe811b73d8f585a4ae837d4a9d4c0f5d46b779.zip glibc-43fe811b73d8f585a4ae837d4a9d4c0f5d46b779.tar.gz glibc-43fe811b73d8f585a4ae837d4a9d4c0f5d46b779.tar.bz2 |
Use HP_TIMING for benchmarks if available
HP_TIMING uses native timestamping instructions if available, thus
greatly reducing the overhead of recording start and end times for
function calls. For architectures that don't have HP_TIMING
available, we fall back to the clock_gettime bits. One may also
override this by invoking the benchmark as follows:
make USE_CLOCK_GETTIME=1 bench
and get the benchmark results using clock_gettime. One has to do
`make bench-clean` to ensure that the benchmark programs are rebuilt.
Diffstat (limited to 'benchtests/bench-skeleton.c')
-rw-r--r-- | benchtests/bench-skeleton.c | 35 |
1 files changed, 14 insertions, 21 deletions
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c index 404900b..4e3a507 100644 --- a/benchtests/bench-skeleton.c +++ b/benchtests/bench-skeleton.c @@ -21,6 +21,7 @@ #include <stdio.h> #include <time.h> #include <inttypes.h> +#include "bench-timing.h" volatile unsigned int dontoptimize = 0; @@ -45,21 +46,16 @@ int main (int argc, char **argv) { unsigned long i, k; - struct timespec start, end, runtime; + struct timespec runtime; + timing_t start, end; startup(); memset (&runtime, 0, sizeof (runtime)); - memset (&start, 0, sizeof (start)); - memset (&end, 0, sizeof (end)); - clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start); + unsigned long iters; - /* Measure 1000 times the resolution of the clock. So for a 1ns resolution - clock, we measure 1000 iterations of the function call at a time. - Measurements close to the minimum clock resolution won't make much sense, - but it's better than having nothing at all. */ - unsigned long iters = 1000 * start.tv_nsec; + TIMING_INIT (iters); for (int v = 0; v < NUM_VARIANTS; v++) { @@ -68,19 +64,18 @@ main (int argc, char **argv) runtime.tv_sec += DURATION; double d_total_i = 0; - uint64_t total = 0, max = 0, min = 0x7fffffffffffffff; + timing_t total = 0, max = 0, min = 0x7fffffffffffffff; while (1) { for (i = 0; i < NUM_SAMPLES (v); i++) { - clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &start); + uint64_t cur; + TIMING_NOW (start); for (k = 0; k < iters; k++) BENCH_FUNC (v, i); - clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &end); + TIMING_NOW (end); - uint64_t cur = (end.tv_nsec - start.tv_nsec - + ((end.tv_sec - start.tv_sec) - * (uint64_t) 1000000000)); + TIMING_DIFF (cur, start, end); if (cur > max) max = cur; @@ -88,7 +83,7 @@ main (int argc, char **argv) if (cur < min) min = cur; - total += cur; + TIMING_ACCUM (total, cur); d_total_i += iters; } @@ -104,13 +99,11 @@ main (int argc, char **argv) double d_iters; done: - d_total_s = total * 1e-9; + d_total_s = total; d_iters = iters; - printf ("%s: ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n", - VARIANT (v), - d_total_i, d_total_s, max / d_iters, min / d_iters, - d_total_i / d_total_s); + TIMING_PRINT_STATS (VARIANT (v), d_total_s, d_iters, d_total_i, max, + min); } return 0; |