Framework for performance benchmarking of functions

See benchtests/Makefile to know how to use it.
author: Siddhesh Poyarekar <siddhesh@redhat.com> 2013-03-15 12:30:03 +0530
committer: Siddhesh Poyarekar <siddhesh@redhat.com> 2013-03-15 12:30:03 +0530
commit: 8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70 (patch)
tree: 7eb91b35e7d04f1c4889563b3c922e512cfe2045
parent: d22ca8cdfb98001d03772ef264b244930d439b3f (diff)
download: glibc-8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70.zip
glibc-8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70.tar.gz
glibc-8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70.tar.bz2
9 files changed, 278 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index 6fc99c4..2e8affb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,21 @@
 2013-03-15  Siddhesh Poyarekar  <siddhesh@redhat.com>
+	    Richard Henderson  <rth@redhat.com>
+	    Tulio Magno Quites Machado Filho  <tuliom@linux.vnet.ibm.com>
+
+	* Makefile.in (bench): New target.
+	* NEWS: Mention the benchmark framework.
+	* Rules (bench): Likewise.
+	(binaries-bench): Generate binaries for functions to
+	benchmark.
+	* benchtests/Makefile: New makefile for benchmark tests.
+	* benchtests/bench-skeleton.c: New skeleton file for benchmark
+	programs.
+	* benchtests/exp-inputs: New input file for EXP function.
+	* benchtests/pow-inputs: New input file for POW function.
+	* scripts/bench.pl: New script to generate source files for
+	benchmark programs.
+
+2013-03-15  Siddhesh Poyarekar  <siddhesh@redhat.com>
 
 	* sysdeps/ieee754/dbl-64/mpa-arch.h: New file.
 	* sysdeps/ieee754/dbl-64/mpa.c (norm): Use MANTISSA_T to store
diff --git a/Makefile.in b/Makefile.in
index d73a78f..df75b8f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -3,7 +3,7 @@ srcdir = @srcdir@
 # Uncomment the line below if you want to do parallel build.
 # PARALLELMFLAGS = -j 4
 
-.PHONY: all install
+.PHONY: all install bench
 
 all .DEFAULT:
 	$(MAKE) -r PARALLELMFLAGS="$(PARALLELMFLAGS)" -C $(srcdir) objdir=`pwd` $@
@@ -11,3 +11,6 @@ all .DEFAULT:
 install:
 	LANGUAGE=C LC_ALL=C; export LANGUAGE LC_ALL; \
 	$(MAKE) -r PARALLELMFLAGS="$(PARALLELMFLAGS)" -C $(srcdir) objdir=`pwd` $@
+
+bench:
+	$(MAKE) -C $(srcdir)/benchtests $(PARALLELMFLAGS) objdir=`pwd` $@
diff --git a/NEWS b/NEWS
index 1f63e3f..cb96a74 100644
--- a/NEWS
+++ b/NEWS
@@ -23,6 +23,8 @@ Version 2.18
 * Support for priority inherited mutexes in pthread condition variables on
   non-x86 architectures.
 
+* Added a benchmark framework to track performance of functions in glibc.
+
 
 Version 2.17
 
diff --git a/Rules b/Rules
index 301a748..bc5dacd 100644
--- a/Rules
+++ b/Rules
@@ -83,7 +83,7 @@ common-generated += dummy.o dummy.c
 
 # This makes all the auxiliary and test programs.
 
-.PHONY: others tests
+.PHONY: others tests bench
 ifeq ($(multi-arch),no)
 tests := $(filter-out $(tests-ifunc), $(tests))
 xtests := $(filter-out $(xtests-ifunc), $(xtests))
@@ -188,6 +188,31 @@ $(objpfx)%.out: /dev/null $(objpfx)%	# Make it 2nd arg for canned sequence.
 	$(make-test-out) > $@
 
 endif	# tests
+
+# Build and run benchmark programs.
+binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
+
+run-bench = $(test-wrapper-env) \
+	    GCONV_PATH=$(common-objpfx)iconvdata LC_ALL=C \
+	    $($*-ENV) $(run-via-rtld-prefix) $${run}
+
+bench: $(binaries-bench)
+	if [ -f $(objpfx)bench.out ]; then \
+	  mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
+	fi
+	for run in $^; do \
+	  eval $(run-bench) >>  $(objpfx)bench.out; \
+	done
+
+$(binaries-bench): %: %.o \
+  $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
+  $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
+	$(+link)
+
+$(objpfx)bench-%.c: %-inputs bench-skeleton.c
+	$(..)scripts/bench.pl $(patsubst %-inputs,%,$<) \
+	  $($*-ITER) $($*-ARGLIST) $($*-RET) > $@
+
 
 .PHONY: distclean realclean subdir_distclean subdir_realclean \
 	subdir_clean subdir_mostlyclean subdir_testclean
diff --git a/benchtests/Makefile b/benchtests/Makefile
new file mode 100644
index 0000000..74938b9
--- /dev/null
+++ b/benchtests/Makefile
@@ -0,0 +1,59 @@
+# Copyright (C) 2013 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+
+# Makefile for benchmark tests.  The only useful target here is `bench`.
+
+# Adding a new function `foo`:
+# ---------------------------
+
+# - Append the function name to the bench variable
+
+# - Define foo-ITER with the number of iterations you want to run.  Keep it
+#   high enough that the overhead of clock_gettime is only a small fraction of
+#   the total run time of the test.
+
+# - Define foo-ARGLIST as a colon separated list of types of the input
+#   arguments.  Use `void` if function does not take any inputs.  Put in quotes
+#   if the input argument is a pointer, e.g.:
+
+#      malloc-ARGLIST: "void *"
+
+# - Define foo-RET as the type the function returns.  Skip if the function
+#   returns void.  One could even skip foo-ARGLIST if the function does not
+#   take any inputs AND the function returns void.
+
+
+# - Make a file called `foo-inputs` with one input value per line, an input
+#   being a comma separated list of arguments to be passed into the function.
+#   See pow-inputs for an example.
+
+subdir := benchtests
+bench := exp pow
+
+exp-ITER = 100000
+exp-ARGLIST = double
+exp-RET = double
+LDFLAGS-bench-exp = -lm
+
+pow-ITER = 100000
+pow-ARGLIST = double:double
+pow-RET = double
+LDFLAGS-bench-pow = -lm
+
+include ../Makeconfig
+include ../Rules
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
new file mode 100644
index 0000000..13f986d
--- /dev/null
+++ b/benchtests/bench-skeleton.c
@@ -0,0 +1,75 @@
+/* Skeleton for benchmark programs.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+#include <inttypes.h>
+
+int
+main (int argc, char **argv)
+{
+  unsigned long i, j, k;
+  uint64_t total = 0, max = 0, min = 0x7fffffffffffffff;
+  struct timespec start, end;
+
+  memset (&start, 0, sizeof (start));
+  memset (&end, 0, sizeof (end));
+
+  clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start);
+
+  /* Measure 1000 times the resolution of the clock.  So for a 1ns resolution
+     clock, we measure 1000 iterations of the function call at a time.
+     Measurements close to the minimum clock resolution won't make much sense,
+     but it's better than having nothing at all.  */
+  unsigned long iters = 1000 * start.tv_nsec;
+  unsigned long total_iters = ITER / iters;
+
+  for (i = 0; i < NUM_SAMPLES; i++)
+    {
+      for (j = 0; j < total_iters; j ++)
+	{
+	  clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &start);
+	  for (k = 0; k < iters; k++)
+	    BENCH_FUNC(i);
+	  clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &end);
+
+	  uint64_t cur = (end.tv_nsec - start.tv_nsec
+			 + ((end.tv_sec - start.tv_sec)
+			    * (uint64_t) 1000000000));
+
+	  if (cur > max)
+	    max = cur;
+
+	  if (cur < min)
+	    min = cur;
+
+	  total += cur;
+	}
+    }
+
+  double d_total_s = total * 1e-9;
+  double d_iters = iters;
+  double d_total_i = (double)ITER * NUM_SAMPLES;
+  printf (FUNCNAME ": ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n",
+	  d_total_i, d_total_s, max / d_iters, min / d_iters,
+	  d_total_i / d_total_s);
+
+  return 0;
+}
diff --git a/benchtests/exp-inputs b/benchtests/exp-inputs
new file mode 100644
index 0000000..a2086ba
--- /dev/null
+++ b/benchtests/exp-inputs
@@ -0,0 +1 @@
+708.00096423260981737257679924368858
diff --git a/benchtests/pow-inputs b/benchtests/pow-inputs
new file mode 100644
index 0000000..dbb1270
--- /dev/null
+++ b/benchtests/pow-inputs
@@ -0,0 +1 @@
+1.0000000000000020, 1.5
diff --git a/scripts/bench.pl b/scripts/bench.pl
new file mode 100755
index 0000000..bb7f648
--- /dev/null
+++ b/scripts/bench.pl
@@ -0,0 +1,93 @@
+#! /usr/bin/perl -w
+# Copyright (C) 2013 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+
+use strict;
+use warnings;
+# Generate a benchmark source file for a given input.
+
+if (@ARGV < 2) {
+  die "Usage: bench.pl <function> <iterations> [parameter types] [return type]"
+}
+
+my $arg;
+my $func = $ARGV[0];
+my $iters = $ARGV[1];
+my @args;
+my $ret = "void";
+my $getret = "";
+my $retval = "";
+
+if (@ARGV >= 3) {
+  @args = split(':', $ARGV[2]);
+}
+
+if (@ARGV == 4) {
+  $ret = $ARGV[3];
+}
+
+my $decl = "extern $ret $func (";
+
+if (@args == 0 || $args[0] eq "void") {
+  print "$decl void);\n";
+  print "#define CALL_BENCH_FUNC(j) $func();\n";
+  print "#define NUM_SAMPLES (1)\n";
+}
+else {
+  my $num = 0;
+  my $bench_func = "#define CALL_BENCH_FUNC(j) $func (";
+  my $struct = "struct args {";
+
+  foreach $arg (@args) {
+    if ($num > 0) {
+      $bench_func = "$bench_func,";
+      $decl = "$decl,";
+    }
+
+    $struct = "$struct $arg arg$num;";
+    $bench_func = "$bench_func in[j].arg$num";
+    $decl = "$decl $arg";
+    $num = $num + 1;
+  }
+
+  print "$decl);\n";
+  print "$bench_func);\n";
+  print "$struct } in[] = {";
+
+  open INPUTS, "<$func-inputs" or die $!;
+
+  while (<INPUTS>) {
+    chomp;
+    print "{$_},\n";
+  }
+  print "};\n";
+  print "#define NUM_SAMPLES (sizeof (in) / sizeof (struct args))\n"
+}
+
+# In some cases not storing a return value seems to result in the function call
+# being optimized out.
+if ($ret ne "void") {
+  print "static volatile $ret ret = 0.0;\n";
+  $getret = "ret = ";
+}
+
+print "#define BENCH_FUNC(j) ({$getret CALL_BENCH_FUNC (j);})\n";
+
+print "#define ITER $iters\n";
+print "#define FUNCNAME \"$func\"\n";
+print "#include \"bench-skeleton.c\"\n";
author	Siddhesh Poyarekar <siddhesh@redhat.com>	2013-03-15 12:30:03 +0530
committer	Siddhesh Poyarekar <siddhesh@redhat.com>	2013-03-15 12:30:03 +0530
commit	8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70 (patch)
tree	7eb91b35e7d04f1c4889563b3c922e512cfe2045
parent	d22ca8cdfb98001d03772ef264b244930d439b3f (diff)
download	glibc-8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70.zip glibc-8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70.tar.gz glibc-8cfdb7e0560ab27e70a1d2e898fb4a0a67a13c70.tar.bz2