/* Copyright (C) 2002-2013 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ #define _GNU_SOURCE 1 #include <argp.h> #include <error.h> #include <errno.h> #include <fcntl.h> #include <inttypes.h> #include <limits.h> #include <pthread.h> #include <signal.h> #include <stdbool.h> #include <stdlib.h> #include <string.h> #include <time.h> #include <unistd.h> #include <sys/param.h> #include <sys/types.h> #ifndef MAX_THREADS # define MAX_THREADS 100000 #endif #ifndef DEFAULT_THREADS # define DEFAULT_THREADS 50 #endif #define OPT_TO_THREAD 300 #define OPT_TO_PROCESS 301 #define OPT_SYNC_SIGNAL 302 #define OPT_SYNC_JOIN 303 #define OPT_TOPLEVEL 304 static const struct argp_option options[] = { { NULL, 0, NULL, 0, "\ This is a test for threads so we allow ther user to selection the number of \ threads which are used at any one time. Independently the total number of \ rounds can be selected. This is the total number of threads which will have \ run when the process terminates:" }, { "threads", 't', "NUMBER", 0, "Number of threads used at once" }, { "starts", 's', "NUMBER", 0, "Total number of working threads" }, { "toplevel", OPT_TOPLEVEL, "NUMBER", 0, "Number of toplevel threads which start the other threads; this \ implies --sync-join" }, { NULL, 0, NULL, 0, "\ Each thread can do one of two things: sleep or do work. The latter is 100% \ CPU bound. The work load is the probability a thread does work. All values \ from zero to 100 (inclusive) are valid. How often each thread repeats this \ can be determined by the number of rounds. The work cost determines how long \ each work session (not sleeping) takes. If it is zero a thread would \ effectively nothing. By setting the number of rounds to zero the thread \ does no work at all and pure thread creation times can be measured." }, { "workload", 'w', "PERCENT", 0, "Percentage of time spent working" }, { "workcost", 'c', "NUMBER", 0, "Factor in the cost of each round of working" }, { "rounds", 'r', "NUMBER", 0, "Number of rounds each thread runs" }, { NULL, 0, NULL, 0, "\ There are a number of different methods how thread creation can be \ synchronized. Synchronization is necessary since the number of concurrently \ running threads is limited." }, { "sync-signal", OPT_SYNC_SIGNAL, NULL, 0, "Synchronize using a signal (default)" }, { "sync-join", OPT_SYNC_JOIN, NULL, 0, "Synchronize using pthread_join" }, { NULL, 0, NULL, 0, "\ One parameter for each threads execution is the size of the stack. If this \ parameter is not used the system's default stack size is used. If many \ threads are used the stack size should be chosen quite small." }, { "stacksize", 'S', "BYTES", 0, "Size of threads stack" }, { "guardsize", 'g', "BYTES", 0, "Size of stack guard area; must fit into the stack" }, { NULL, 0, NULL, 0, "Signal options:" }, { "to-thread", OPT_TO_THREAD, NULL, 0, "Send signal to main thread" }, { "to-process", OPT_TO_PROCESS, NULL, 0, "Send signal to process (default)" }, { NULL, 0, NULL, 0, "Administrative options:" }, { "progress", 'p', NULL, 0, "Show signs of progress" }, { "timing", 'T', NULL, 0, "Measure time from startup to the last thread finishing" }, { NULL, 0, NULL, 0, NULL } }; /* Prototype for option handler. */ static error_t parse_opt (int key, char *arg, struct argp_state *state); /* Data structure to communicate with argp functions. */ static struct argp argp = { options, parse_opt }; static unsigned long int threads = DEFAULT_THREADS; static unsigned long int workload = 75; static unsigned long int workcost = 20; static unsigned long int rounds = 10; static long int starts = 5000; static unsigned long int stacksize; static long int guardsize = -1; static bool progress; static bool timing; static bool to_thread; static unsigned long int toplevel = 1; static long int running; static pthread_mutex_t running_mutex = PTHREAD_MUTEX_INITIALIZER; static pid_t pid; static pthread_t tmain; static clockid_t cl; static struct timespec start_time; static pthread_mutex_t sum_mutex = PTHREAD_MUTEX_INITIALIZER; unsigned int sum; static enum { sync_signal, sync_join } sync_method; /* We use 64bit values for the times. */ typedef unsigned long long int hp_timing_t; /* Attributes for all created threads. */ static pthread_attr_t attr; static void * work (void *arg) { unsigned long int i; unsigned int state = (unsigned long int) arg; for (i = 0; i < rounds; ++i) { /* Determine what to do. */ unsigned int rnum; /* Uniform distribution. */ do rnum = rand_r (&state); while (rnum >= UINT_MAX - (UINT_MAX % 100)); rnum %= 100; if (rnum < workload) { int j; int a[4] = { i, rnum, i + rnum, rnum - i }; if (progress) write (STDERR_FILENO, "c", 1); for (j = 0; j < workcost; ++j) { a[0] += a[3] >> 12; a[1] += a[2] >> 20; a[2] += a[1] ^ 0x3423423; a[3] += a[0] - a[1]; } pthread_mutex_lock (&sum_mutex); sum += a[0] + a[1] + a[2] + a[3]; pthread_mutex_unlock (&sum_mutex); } else { /* Just sleep. */ struct timespec tv; tv.tv_sec = 0; tv.tv_nsec = 10000000; if (progress) write (STDERR_FILENO, "w", 1); nanosleep (&tv, NULL); } } return NULL; } static void * thread_function (void *arg) { work (arg); pthread_mutex_lock (&running_mutex); if (--running <= 0 && starts <= 0) { /* We are done. */ if (progress) write (STDERR_FILENO, "\n", 1); if (timing) { struct timespec end_time; if (clock_gettime (cl, &end_time) == 0) { end_time.tv_sec -= start_time.tv_sec; end_time.tv_nsec -= start_time.tv_nsec; if (end_time.tv_nsec < 0) { end_time.tv_nsec += 1000000000; --end_time.tv_sec; } printf ("\nRuntime: %lu.%09lu seconds\n", (unsigned long int) end_time.tv_sec, (unsigned long int) end_time.tv_nsec); } } printf ("Result: %08x\n", sum); exit (0); } pthread_mutex_unlock (&running_mutex); if (sync_method == sync_signal) { if (to_thread) /* This code sends a signal to the main thread. */ pthread_kill (tmain, SIGUSR1); else /* Use this code to test sending a signal to the process. */ kill (pid, SIGUSR1); } if (progress) write (STDERR_FILENO, "f", 1); return NULL; } struct start_info { unsigned int starts; unsigned int threads; }; static void * start_threads (void *arg) { struct start_info *si = arg; unsigned int starts = si->starts; pthread_t ths[si->threads]; unsigned int state = starts; unsigned int n; unsigned int i = 0; int err; if (progress) write (STDERR_FILENO, "T", 1); memset (ths, '\0', sizeof (pthread_t) * si->threads); while (starts-- > 0) { if (ths[i] != 0) { /* Wait for the threads in the order they were created. */ err = pthread_join (ths[i], NULL); if (err != 0) error (EXIT_FAILURE, err, "cannot join thread"); if (progress) write (STDERR_FILENO, "f", 1); } err = pthread_create (&ths[i], &attr, work, (void *) (long) (rand_r (&state) + starts + i)); if (err != 0) error (EXIT_FAILURE, err, "cannot start thread"); if (progress) write (STDERR_FILENO, "t", 1); if (++i == si->threads) i = 0; } n = i; do { if (ths[i] != 0) { err = pthread_join (ths[i], NULL); if (err != 0) error (EXIT_FAILURE, err, "cannot join thread"); if (progress) write (STDERR_FILENO, "f", 1); } if (++i == si->threads) i = 0; } while (i != n); if (progress) write (STDERR_FILENO, "F", 1); return NULL; } int main (int argc, char *argv[]) { int remaining; sigset_t ss; pthread_t th; pthread_t *ths = NULL; int empty = 0; int last; bool cont = true; /* Parse and process arguments. */ argp_parse (&argp, argc, argv, 0, &remaining, NULL); if (sync_method == sync_join) { ths = (pthread_t *) calloc (threads, sizeof (pthread_t)); if (ths == NULL) error (EXIT_FAILURE, errno, "cannot allocate memory for thread descriptor array"); last = threads; } else { ths = &th; last = 1; } if (toplevel > threads) { printf ("resetting number of toplevel threads to %lu to not surpass number to concurrent threads\n", threads); toplevel = threads; } if (timing) { if (clock_getcpuclockid (0, &cl) != 0 || clock_gettime (cl, &start_time) != 0) timing = false; } /* We need this later. */ pid = getpid (); tmain = pthread_self (); /* We use signal SIGUSR1 for communication between the threads and the main thread. We only want sychronous notification. */ if (sync_method == sync_signal) { sigemptyset (&ss); sigaddset (&ss, SIGUSR1); if (sigprocmask (SIG_BLOCK, &ss, NULL) != 0) error (EXIT_FAILURE, errno, "cannot set signal mask"); } /* Create the thread attributes. */ pthread_attr_init (&attr); /* If the user provided a stack size use it. */ if (stacksize != 0 && pthread_attr_setstacksize (&attr, stacksize) != 0) puts ("could not set stack size; will use default"); /* And stack guard size. */ if (guardsize != -1 && pthread_attr_setguardsize (&attr, guardsize) != 0) puts ("invalid stack guard size; will use default"); /* All threads are created detached if we are not using pthread_join to synchronize. */ if (sync_method != sync_join) pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); if (sync_method == sync_signal) { while (1) { int err; bool do_wait = false; pthread_mutex_lock (&running_mutex); if (starts-- < 0) cont = false; else do_wait = ++running >= threads && starts > 0; pthread_mutex_unlock (&running_mutex); if (! cont) break; if (progress) write (STDERR_FILENO, "t", 1); err = pthread_create (&ths[empty], &attr, thread_function, (void *) starts); if (err != 0) error (EXIT_FAILURE, err, "cannot start thread %lu", starts); if (++empty == last) empty = 0; if (do_wait) sigwaitinfo (&ss, NULL); } /* Do nothing anymore. On of the threads will terminate the program. */ sigfillset (&ss); sigdelset (&ss, SIGINT); while (1) sigsuspend (&ss); } else { pthread_t ths[toplevel]; struct start_info si[toplevel]; unsigned int i; for (i = 0; i < toplevel; ++i) { unsigned int child_starts = starts / (toplevel - i); unsigned int child_threads = threads / (toplevel - i); int err; si[i].starts = child_starts; si[i].threads = child_threads; err = pthread_create (&ths[i], &attr, start_threads, &si[i]); if (err != 0) error (EXIT_FAILURE, err, "cannot start thread"); starts -= child_starts; threads -= child_threads; } for (i = 0; i < toplevel; ++i) { int err = pthread_join (ths[i], NULL); if (err != 0) error (EXIT_FAILURE, err, "cannot join thread"); } /* We are done. */ if (progress) write (STDERR_FILENO, "\n", 1); if (timing) { struct timespec end_time; if (clock_gettime (cl, &end_time) == 0) { end_time.tv_sec -= start_time.tv_sec; end_time.tv_nsec -= start_time.tv_nsec; if (end_time.tv_nsec < 0) { end_time.tv_nsec += 1000000000; --end_time.tv_sec; } printf ("\nRuntime: %lu.%09lu seconds\n", (unsigned long int) end_time.tv_sec, (unsigned long int) end_time.tv_nsec); } } printf ("Result: %08x\n", sum); exit (0); } /* NOTREACHED */ return 0; } /* Handle program arguments. */ static error_t parse_opt (int key, char *arg, struct argp_state *state) { unsigned long int num; long int snum; switch (key) { case 't': num = strtoul (arg, NULL, 0); if (num <= MAX_THREADS) threads = num; else printf ("\ number of threads limited to %u; recompile with a higher limit if necessary", MAX_THREADS); break; case 'w': num = strtoul (arg, NULL, 0); if (num <= 100) workload = num; else puts ("workload must be between 0 and 100 percent"); break; case 'c': workcost = strtoul (arg, NULL, 0); break; case 'r': rounds = strtoul (arg, NULL, 0); break; case 's': starts = strtoul (arg, NULL, 0); break; case 'S': num = strtoul (arg, NULL, 0); if (num >= PTHREAD_STACK_MIN) stacksize = num; else printf ("minimum stack size is %d\n", PTHREAD_STACK_MIN); break; case 'g': snum = strtol (arg, NULL, 0); if (snum < 0) printf ("invalid guard size %s\n", arg); else guardsize = snum; break; case 'p': progress = true; break; case 'T': timing = true; break; case OPT_TO_THREAD: to_thread = true; break; case OPT_TO_PROCESS: to_thread = false; break; case OPT_SYNC_SIGNAL: sync_method = sync_signal; break; case OPT_SYNC_JOIN: sync_method = sync_join; break; case OPT_TOPLEVEL: num = strtoul (arg, NULL, 0); if (num < MAX_THREADS) toplevel = num; else printf ("\ number of threads limited to %u; recompile with a higher limit if necessary", MAX_THREADS); sync_method = sync_join; break; default: return ARGP_ERR_UNKNOWN; } return 0; } static hp_timing_t get_clockfreq (void) { /* We read the information from the /proc filesystem. It contains at least one line like cpu MHz : 497.840237 or also cpu MHz : 497.841 We search for this line and convert the number in an integer. */ static hp_timing_t result; int fd; /* If this function was called before, we know the result. */ if (result != 0) return result; fd = open ("/proc/cpuinfo", O_RDONLY); if (__builtin_expect (fd != -1, 1)) { /* XXX AFAIK the /proc filesystem can generate "files" only up to a size of 4096 bytes. */ char buf[4096]; ssize_t n; n = read (fd, buf, sizeof buf); if (__builtin_expect (n, 1) > 0) { char *mhz = memmem (buf, n, "cpu MHz", 7); if (__builtin_expect (mhz != NULL, 1)) { char *endp = buf + n; int seen_decpoint = 0; int ndigits = 0; /* Search for the beginning of the string. */ while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n') ++mhz; while (mhz < endp && *mhz != '\n') { if (*mhz >= '0' && *mhz <= '9') { result *= 10; result += *mhz - '0'; if (seen_decpoint) ++ndigits; } else if (*mhz == '.') seen_decpoint = 1; ++mhz; } /* Compensate for missing digits at the end. */ while (ndigits++ < 6) result *= 10; } } close (fd); } return result; } int clock_getcpuclockid (pid_t pid, clockid_t *clock_id) { /* We don't allow any process ID but our own. */ if (pid != 0 && pid != getpid ()) return EPERM; #ifdef CLOCK_PROCESS_CPUTIME_ID /* Store the number. */ *clock_id = CLOCK_PROCESS_CPUTIME_ID; return 0; #else /* We don't have a timer for that. */ return ENOENT; #endif } #ifdef i386 #define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var)) #elif defined __x86_64__ # define HP_TIMING_NOW(Var) \ ({ unsigned int _hi, _lo; \ asm volatile ("rdtsc" : "=a" (_lo), "=d" (_hi)); \ (Var) = ((unsigned long long int) _hi << 32) | _lo; }) #elif defined __ia64__ #define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (Var) : : "memory") #else #error "HP_TIMING_NOW missing" #endif /* Get current value of CLOCK and store it in TP. */ int clock_gettime (clockid_t clock_id, struct timespec *tp) { int retval = -1; switch (clock_id) { case CLOCK_PROCESS_CPUTIME_ID: { static hp_timing_t freq; hp_timing_t tsc; /* Get the current counter. */ HP_TIMING_NOW (tsc); if (freq == 0) { freq = get_clockfreq (); if (freq == 0) return EINVAL; } /* Compute the seconds. */ tp->tv_sec = tsc / freq; /* And the nanoseconds. This computation should be stable until we get machines with about 16GHz frequency. */ tp->tv_nsec = ((tsc % freq) * UINT64_C (1000000000)) / freq; retval = 0; } break; default: errno = EINVAL; break; } return retval; }