From c1f75dc386d533806d29b7e94118363a7b50eed8 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Tue, 16 Apr 2013 17:37:24 +0530 Subject: Begin porting string performance tests to benchtests This is the initial support for string function performance tests, along with copying tests for memcpy and memcpy-ifunc as proof of concept. The string function benchmarks perform operations at different alignments and for different sizes and compare performance between plain operations and the optimized string operations. Due to this their output is incompatible with the function benchmarks where we're interested in fastest time, throughput, etc. In future, the correctness checks in the benchmark tests can be removed. Same goes for the performance measurements in the string/test-*. --- ChangeLog | 13 +++ benchtests/Makefile | 27 ++++- benchtests/README | 15 +++ benchtests/bench-memcpy-ifunc.c | 20 ++++ benchtests/bench-memcpy.c | 163 ++++++++++++++++++++++++++++++ benchtests/bench-string.h | 212 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 447 insertions(+), 3 deletions(-) create mode 100644 benchtests/bench-memcpy-ifunc.c create mode 100644 benchtests/bench-memcpy.c create mode 100644 benchtests/bench-string.h diff --git a/ChangeLog b/ChangeLog index 0afbce7..737c746 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2013-06-11 Siddhesh Poyarekar + + * benchtests/Makefile: Disable parallel execution of targets. + (string-bench): Add memcpy. + (benchset): New variable to store a list of benchmark sets. + (bench-func): Renamed from bench. + (bench-set): New target. + (bench): Depend on bench-func and bench-set. + * benchtests/README: Add section on benchmark sets. + * benchtests/bench-memcpy-ifunc.c: New file. + * benchtests/bench-memcpy.c: New file. + * benchtests/bench-string.h: New file. + 2013-06-11 Andreas Schwab [BZ #15577] diff --git a/benchtests/Makefile b/benchtests/Makefile index 680440f..27d83f4 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -23,6 +23,13 @@ subdir := benchtests bench := acos acosh asin asinh atan atanh cos cosh exp log modf pow rint sin \ sinh tan tanh +# String function benchmarks. +string-bench := memcpy +string-bench-ifunc := $(addsuffix -ifunc, $(string-bench)) +string-bench-all := $(string-bench) $(string-bench-ifunc) + +benchset := $(string-bench-all) + acos-ARGLIST = double acos-RET = double LDFLAGS-bench-acos = -lm @@ -92,10 +99,15 @@ LDFLAGS-bench-tanh = -lm # Rules to build and execute the benchmarks. Do not put any benchmark # parameters beyond this point. +# We don't want the benchmark programs to run in parallel since that could +# affect their performance. +.NOTPARALLEL: + include ../Makeconfig include ../Rules binaries-bench := $(addprefix $(objpfx)bench-,$(bench)) +binaries-benchset := $(addprefix $(objpfx)bench-,$(benchset)) # The default duration: 10 seconds. ifndef BENCH_DURATION @@ -112,7 +124,7 @@ endif # This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed # for all these modules. -cpp-srcs-left := $(binaries-bench:=.c) +cpp-srcs-left := $(binaries-benchset:=.c) $(binaries-bench:=.c) lib := nonlib include $(patsubst %,$(..)cppflags-iterator.mk,$(cpp-srcs-left)) @@ -124,8 +136,17 @@ run-bench = $(test-wrapper-env) \ bench-clean: rm -f $(binaries-bench) $(addsuffix .o,$(binaries-bench)) + rm -f $(binaries-benchset) $(addsuffix .o,$(binaries-benchset)) + +bench: bench-set bench-func + +bench-set: $(binaries-benchset) + for run in $^; do \ + echo "Running $${run}"; \ + $(run-bench) > $${run}.out; \ + done -bench: $(binaries-bench) +bench-func: $(binaries-bench) { for run in $^; do \ echo "Running $${run}" >&2; \ $(run-bench); \ @@ -135,7 +156,7 @@ bench: $(binaries-bench) fi; \ mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out -$(binaries-bench): %: %.o \ +$(binaries-bench) $(binaries-benchset): %: %.o \ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit) $(+link) diff --git a/benchtests/README b/benchtests/README index 8135069..045b7a6 100644 --- a/benchtests/README +++ b/benchtests/README @@ -72,3 +72,18 @@ the same file by using the `name' directive that looks something like this: See the pow-inputs file for an example of what such a partitioned input file would look like. + +Benchmark Sets: +============== + +In addition to standard benchmarking of functions, one may also generate +custom outputs for a set of functions. This is currently used by string +function benchmarks where the aim is to compare performance between +implementations at various alignments and for various sizes. + +To add a benchset for `foo': + +- Add `foo' to the benchset variable. +- Write your bench-foo.c that prints out the measurements to stdout. +- On execution, a bench-foo.out is created in $(objpfx) with the contents of + stdout. diff --git a/benchtests/bench-memcpy-ifunc.c b/benchtests/bench-memcpy-ifunc.c new file mode 100644 index 0000000..b5a89f7 --- /dev/null +++ b/benchtests/bench-memcpy-ifunc.c @@ -0,0 +1,20 @@ +/* Measure IFUNC implementations of memcpy function. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define TEST_IFUNC 1 +#include "bench-memcpy.c" diff --git a/benchtests/bench-memcpy.c b/benchtests/bench-memcpy.c new file mode 100644 index 0000000..1b12671 --- /dev/null +++ b/benchtests/bench-memcpy.c @@ -0,0 +1,163 @@ +/* Measure memcpy functions. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef MEMCPY_RESULT +# define MEMCPY_RESULT(dst, len) dst +# define MIN_PAGE_SIZE 131072 +# define TEST_MAIN +# define TEST_NAME "memcpy" +# include "bench-string.h" + +char *simple_memcpy (char *, const char *, size_t); +char *builtin_memcpy (char *, const char *, size_t); + +IMPL (simple_memcpy, 0) +IMPL (builtin_memcpy, 0) +IMPL (memcpy, 1) + +char * +simple_memcpy (char *dst, const char *src, size_t n) +{ + char *ret = dst; + while (n--) + *dst++ = *src++; + return ret; +} + +char * +builtin_memcpy (char *dst, const char *src, size_t n) +{ + return __builtin_memcpy (dst, src, n); +} +#endif + +typedef char *(*proto_t) (char *, const char *, size_t); + +static void +do_one_test (impl_t *impl, char *dst, const char *src, + size_t len) +{ + if (CALL (impl, dst, src, len) != MEMCPY_RESULT (dst, len)) + { + error (0, 0, "Wrong result in function %s %p %p", impl->name, + CALL (impl, dst, src, len), MEMCPY_RESULT (dst, len)); + ret = 1; + return; + } + + if (memcmp (dst, src, len) != 0) + { + error (0, 0, "Wrong result in function %s dst \"%s\" src \"%s\"", + impl->name, dst, src); + ret = 1; + return; + } + + if (HP_TIMING_AVAIL) + { + hp_timing_t start __attribute ((unused)); + hp_timing_t stop __attribute ((unused)); + hp_timing_t best_time = ~ (hp_timing_t) 0; + size_t i; + + for (i = 0; i < 32; ++i) + { + HP_TIMING_NOW (start); + CALL (impl, dst, src, len); + HP_TIMING_NOW (stop); + HP_TIMING_BEST (best_time, start, stop); + } + + printf ("\t%zd", (size_t) best_time); + } +} + +static void +do_test (size_t align1, size_t align2, size_t len) +{ + size_t i, j; + char *s1, *s2; + + align1 &= 63; + if (align1 + len >= page_size) + return; + + align2 &= 63; + if (align2 + len >= page_size) + return; + + s1 = (char *) (buf1 + align1); + s2 = (char *) (buf2 + align2); + + for (i = 0, j = 1; i < len; i++, j += 23) + s1[i] = j; + + if (HP_TIMING_AVAIL) + printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2); + + FOR_EACH_IMPL (impl, 0) + do_one_test (impl, s2, s1, len); + + if (HP_TIMING_AVAIL) + putchar ('\n'); +} + +int +test_main (void) +{ + size_t i; + + test_init (); + + printf ("%23s", ""); + FOR_EACH_IMPL (impl, 0) + printf ("\t%s", impl->name); + putchar ('\n'); + + for (i = 0; i < 18; ++i) + { + do_test (0, 0, 1 << i); + do_test (i, 0, 1 << i); + do_test (0, i, 1 << i); + do_test (i, i, 1 << i); + } + + for (i = 0; i < 32; ++i) + { + do_test (0, 0, i); + do_test (i, 0, i); + do_test (0, i, i); + do_test (i, i, i); + } + + for (i = 3; i < 32; ++i) + { + if ((i & (i - 1)) == 0) + continue; + do_test (0, 0, 16 * i); + do_test (i, 0, 16 * i); + do_test (0, i, 16 * i); + do_test (i, i, 16 * i); + } + + do_test (0, 0, getpagesize ()); + + return ret; +} + +#include "../test-skeleton.c" diff --git a/benchtests/bench-string.h b/benchtests/bench-string.h new file mode 100644 index 0000000..2fe8d9f --- /dev/null +++ b/benchtests/bench-string.h @@ -0,0 +1,212 @@ +/* Measure string and memory functions. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +typedef struct +{ + const char *name; + void (*fn) (void); + long test; +} impl_t; +extern impl_t __start_impls[], __stop_impls[]; + +#define IMPL(name, test) \ + impl_t tst_ ## name \ + __attribute__ ((section ("impls"), aligned (sizeof (void *)))) \ + = { __STRING (name), (void (*) (void))name, test }; + +#ifdef TEST_MAIN + +# ifndef _GNU_SOURCE +# define _GNU_SOURCE +# endif + +# undef __USE_STRING_INLINES + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# define GL(x) _##x +# define GLRO(x) _##x +# include + + +# define TEST_FUNCTION test_main () +# define TIMEOUT (4 * 60) +# define OPT_ITERATIONS 10000 +# define OPT_RANDOM 10001 +# define OPT_SEED 10002 + +unsigned char *buf1, *buf2; +int ret, do_srandom; +unsigned int seed; +size_t page_size; + +hp_timing_t _dl_hp_timing_overhead; + +# ifndef ITERATIONS +size_t iterations = 100000; +# define ITERATIONS_OPTIONS \ + { "iterations", required_argument, NULL, OPT_ITERATIONS }, +# define ITERATIONS_PROCESS \ + case OPT_ITERATIONS: \ + iterations = strtoul (optarg, NULL, 0); \ + break; +# define ITERATIONS iterations +# else +# define ITERATIONS_OPTIONS +# define ITERATIONS_PROCESS +# endif + +# define CMDLINE_OPTIONS ITERATIONS_OPTIONS \ + { "random", no_argument, NULL, OPT_RANDOM }, \ + { "seed", required_argument, NULL, OPT_SEED }, +# define CMDLINE_PROCESS ITERATIONS_PROCESS \ + case OPT_RANDOM: \ + { \ + int fdr = open ("/dev/urandom", O_RDONLY); \ + \ + if (fdr < 0 || read (fdr, &seed, sizeof(seed)) != sizeof (seed)) \ + seed = time (NULL); \ + if (fdr >= 0) \ + close (fdr); \ + do_srandom = 1; \ + break; \ + } \ + \ + case OPT_SEED: \ + seed = strtoul (optarg, NULL, 0); \ + do_srandom = 1; \ + break; + +# define CALL(impl, ...) \ + (* (proto_t) (impl)->fn) (__VA_ARGS__) + +# if defined TEST_IFUNC && defined TEST_NAME +/* Increase size of FUNC_LIST if assert is triggered at run-time. */ +static struct libc_ifunc_impl func_list[32]; +static int func_count; +static int impl_count = -1; +static impl_t *impl_array; + +# define FOR_EACH_IMPL(impl, notall) \ + impl_t *impl; \ + int count; \ + if (impl_count == -1) \ + { \ + impl_count = 0; \ + if (func_count != 0) \ + { \ + int f; \ + impl_t *skip = NULL, *a; \ + for (impl = __start_impls; impl < __stop_impls; ++impl) \ + if (strcmp (impl->name, TEST_NAME) == 0) \ + skip = impl; \ + else \ + impl_count++; \ + a = impl_array = malloc ((impl_count + func_count) * \ + sizeof (impl_t)); \ + for (impl = __start_impls; impl < __stop_impls; ++impl) \ + if (impl != skip) \ + *a++ = *impl; \ + for (f = 0; f < func_count; f++) \ + if (func_list[f].usable) \ + { \ + a->name = func_list[f].name; \ + a->fn = func_list[f].fn; \ + a->test = 1; \ + a++; \ + } \ + impl_count = a - impl_array; \ + } \ + else \ + { \ + impl_count = __stop_impls - __start_impls; \ + impl_array = __start_impls; \ + } \ + } \ + impl = impl_array; \ + for (count = 0; count < impl_count; ++count, ++impl) \ + if (!notall || impl->test) +# else /* ! (defined TEST_IFUNC && defined TEST_NAME) */ +# define FOR_EACH_IMPL(impl, notall) \ + for (impl_t *impl = __start_impls; impl < __stop_impls; ++impl) \ + if (!notall || impl->test) +# endif /* ! (defined TEST_IFUNC && defined TEST_NAME) */ + +# define HP_TIMING_BEST(best_time, start, end) \ + do \ + { \ + hp_timing_t tmptime; \ + HP_TIMING_DIFF (tmptime, start + _dl_hp_timing_overhead, end); \ + if (best_time > tmptime) \ + best_time = tmptime; \ + } \ + while (0) + +# ifndef BUF1PAGES +# define BUF1PAGES 1 +# endif + +static void +test_init (void) +{ +# if defined TEST_IFUNC && defined TEST_NAME + func_count = __libc_ifunc_impl_list (TEST_NAME, func_list, + (sizeof func_list + / sizeof func_list[0])); +# endif + + page_size = 2 * getpagesize (); +# ifdef MIN_PAGE_SIZE + if (page_size < MIN_PAGE_SIZE) + page_size = MIN_PAGE_SIZE; +# endif + buf1 = mmap (0, (BUF1PAGES + 1) * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (buf1 == MAP_FAILED) + error (EXIT_FAILURE, errno, "mmap failed"); + if (mprotect (buf1 + BUF1PAGES * page_size, page_size, PROT_NONE)) + error (EXIT_FAILURE, errno, "mprotect failed"); + buf2 = mmap (0, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (buf2 == MAP_FAILED) + error (EXIT_FAILURE, errno, "mmap failed"); + if (mprotect (buf2 + page_size, page_size, PROT_NONE)) + error (EXIT_FAILURE, errno, "mprotect failed"); + HP_TIMING_DIFF_INIT (); + if (do_srandom) + { + printf ("Setting seed to 0x%x\n", seed); + srandom (seed); + } + + memset (buf1, 0xa5, BUF1PAGES * page_size); + memset (buf2, 0x5a, page_size); +} + +#endif /* TEST_MAIN */ -- cgit v1.1