From db373e4c57159ac82df4b07b596dd29c4cfe9d86 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Wed, 24 Mar 2021 18:56:34 -0300 Subject: Remove architecture specific sched_cpucount optimizations And replace the generic algorithm with the Brian Kernighan's one. GCC optimize it with popcnt if the architecture supports, so there is no need to add the extra POPCNT define to enable it. This is really a micro-optimization that only adds complexity: recent ABIs already support it (x86-64-v2 or power64le) and it simplifies the code for internal usage, since i686 does not allow an internal iFUNC call. Checked on x86_64-linux-gnu, aarch64-linux-gnu, and powerpc64le-linux-gnu. --- posix/sched_cpucount.c | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) (limited to 'posix') diff --git a/posix/sched_cpucount.c b/posix/sched_cpucount.c index b0ca4ea..63d0e99 100644 --- a/posix/sched_cpucount.c +++ b/posix/sched_cpucount.c @@ -17,36 +17,24 @@ #include +/* Counting bits set, Brian Kernighan's way. + Using a open-coded routine is slight better for architectures that + do not have a popcount instruction (compiler might emit a library + call). */ +static inline int +countbits (__cpu_mask v) +{ + int s = 0; + for (; v != 0; s++) + v &= v - 1; + return s; +} int __sched_cpucount (size_t setsize, const cpu_set_t *setp) { int s = 0; - const __cpu_mask *p = setp->__bits; - const __cpu_mask *end = &setp->__bits[setsize / sizeof (__cpu_mask)]; - - while (p < end) - { - __cpu_mask l = *p++; - -#ifdef POPCNT - s += POPCNT (l); -#else - if (l == 0) - continue; - - _Static_assert (sizeof (l) == sizeof (unsigned int) - || sizeof (l) == sizeof (unsigned long) - || sizeof (l) == sizeof (unsigned long long), - "sizeof (__cpu_mask"); - if (sizeof (__cpu_mask) == sizeof (unsigned int)) - s += __builtin_popcount (l); - else if (sizeof (__cpu_mask) == sizeof (unsigned long)) - s += __builtin_popcountl (l); - else - s += __builtin_popcountll (l); -#endif - } - + for (int i = 0; i < setsize / sizeof (__cpu_mask); i++) + s += countbits (setp->__bits[i]); return s; } -- cgit v1.1