diff options
author | H.J. Lu <hongjiu.lu@intel.com> | 2009-08-07 09:39:36 -0700 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2009-08-07 09:39:36 -0700 |
commit | a546baa9cd2e5176e9851811d5df6f23e35d3bb8 (patch) | |
tree | 1db7355ddca82021eef7f9f2d2baa11e3da78a2b | |
parent | 77c84aeb81808c3109665949448dba59965c391e (diff) | |
download | glibc-a546baa9cd2e5176e9851811d5df6f23e35d3bb8.zip glibc-a546baa9cd2e5176e9851811d5df6f23e35d3bb8.tar.gz glibc-a546baa9cd2e5176e9851811d5df6f23e35d3bb8.tar.bz2 |
Properly count number of logical processors on Intel CPUs.
The meaning of the 25-14 bits in EAX returned from cpuid with EAX = 4
has been changed from "the maximum number of threads sharing the cache"
to "the maximum number of addressable IDs for logical processors sharing
the cache" if cpuid takes EAX = 11. We need to use results from both
EAX = 4 and EAX = 11 to get the number of threads sharing the cache.
The 25-14 bits in EAX on Core i7 is 15 although the number of logical
processors is 8. Here is a white paper on this:
http://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration/
This patch correctly counts number of logical processors on Intel CPUs
with EAX = 11 support on cpuid. Tested on Dinnington, Core i7 and
Nehalem EX/EP.
It also fixed Pentium Ds workaround since EBX may not have the right
value returned from cpuid with EAX = 1.
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | sysdeps/x86_64/cacheinfo.c | 42 |
2 files changed, 43 insertions, 4 deletions
@@ -1,3 +1,8 @@ +2009-08-05 H.J. Lu <hongjiu.lu@intel.com> + + * sysdeps/x86_64/cacheinfo.c (init_cacheinfo): Properly use + EBX from EAX = 1. Handle EAX = 11. + 2009-08-07 Andreas Schwab <schwab@redhat.com> * Makefile (TAGS): Use separate sed -e expressions to avoid \ diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c index f252fc2..ddad63b 100644 --- a/sysdeps/x86_64/cacheinfo.c +++ b/sysdeps/x86_64/cacheinfo.c @@ -516,13 +516,15 @@ init_cacheinfo (void) shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid); } + unsigned int ebx_1; + #ifdef USE_MULTIARCH eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; - ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx; + ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx; ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx; #else - __cpuid (1, eax, ebx, ecx, edx); + __cpuid (1, eax, ebx_1, ecx, edx); #endif #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION @@ -554,14 +556,46 @@ init_cacheinfo (void) } while (((eax >> 5) & 0x7) != level); - threads = ((eax >> 14) & 0x3ff) + 1; + threads = (eax >> 14) & 0x3ff; + + /* If max_cpuid >= 11, THREADS is the maximum number of + addressable IDs for logical processors sharing the + cache, instead of the maximum number of threads + sharing the cache. */ + if (threads && max_cpuid >= 11) + { + /* Find the number of logical processors shipped in + one core and apply count mask. */ + i = 0; + while (1) + { + __cpuid_count (11, i++, eax, ebx, ecx, edx); + + int shipped = ebx & 0xff; + int type = ecx & 0xff0; + if (shipped == 0 || type == 0) + break; + else if (type == 0x200) + { + int count_mask; + + /* Compute count mask. */ + asm ("bsr %1, %0" + : "=r" (count_mask) : "g" (threads)); + count_mask = ~(-1 << (count_mask + 1)); + threads = (shipped - 1) & count_mask; + break; + } + } + } + threads += 1; } else { intel_bug_no_cache_info: /* Assume that all logical threads share the highest cache level. */ - threads = (ebx >> 16) & 0xff; + threads = (ebx_1 >> 16) & 0xff; } /* Cap usage of highest cache level to the number of supported |