1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
|
/**
* Identify the characteristics of the host CPU, providing information
* about cache sizes and assembly optimisation hints. This module is
* provided primarily for assembly language programmers.
*
* References:
* Some of this information was extremely difficult to track down. Some of the
* documents below were found only in cached versions stored by search engines!
* This code relies on information found in:
*
* $(UL
* $(LI "Intel(R) 64 and IA-32 Architectures Software Developers Manual,
* Volume 2A: Instruction Set Reference, A-M" (2007).
* )
* $(LI "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008).
* )
* $(LI "AMD Processor Recognition Application Note For Processors Prior to AMD
* Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005).
* )
* $(LI "AMD Geode(TM) GX Processors Data Book",
* Advanced Micro Devices, Publication ID 31505E, (2005).
* )
* $(LI "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000).
* )
* $(LI "Application note 106: Software Customization for the 6x86 Family",
* Cyrix Corporation, Rev 1.5 (1998)
* )
* $(LI $(LINK http://www.datasheetcatalog.org/datasheet/nationalsemiconductor/GX1.pdf))
* $(LI "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution",
* National Semiconductor, (2002)
* )
* $(LI "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008).
* )
* $(LI $(LINK http://www.sandpile.org/ia32/cpuid.htm))
* $(LI $(LINK http://www.akkadia.org/drepper/cpumemory.pdf))
* $(LI "What every programmer should know about memory",
* Ulrich Depper, Red Hat, Inc., (2007).
* )
* $(LI "CPU Identification by the Windows Kernel", G. Chappell (2009).
* $(LINK http://www.geoffchappell.com/viewer.htm?doc=studies/windows/km/cpu/cx8.htm)
* )
* $(LI "Intel(R) Processor Identification and the CPUID Instruction, Application
* Note 485" (2009).
* )
* )
*
* Bugs: Currently only works on x86 and Itanium CPUs.
* Many processors have bugs in their microcode for the CPUID instruction,
* so sometimes the cache information may be incorrect.
*
* Copyright: Copyright Don Clugston 2007 - 2009.
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
* Authors: Don Clugston, Tomas Lindquist Olsen <tomas@famolsen.dk>
* Source: $(DRUNTIMESRC core/_cpuid.d)
*/
module core.cpuid;
version (GNU) version = GNU_OR_LDC;
version (LDC) version = GNU_OR_LDC;
@trusted:
nothrow:
@nogc:
// If optimizing for a particular processor, it is generally better
// to identify based on features rather than model. NOTE: Normally
// it's only worthwhile to optimise for the latest Intel and AMD CPU,
// with a backup for other CPUs.
// Pentium -- preferPentium1()
// PMMX -- + mmx()
// PPro -- default
// PII -- + mmx()
// PIII -- + mmx() + sse()
// PentiumM -- + mmx() + sse() + sse2()
// Pentium4 -- preferPentium4()
// PentiumD -- + isX86_64()
// Core2 -- default + isX86_64()
// AMD K5 -- preferPentium1()
// AMD K6 -- + mmx()
// AMD K6-II -- + mmx() + 3dnow()
// AMD K7 -- preferAthlon()
// AMD K8 -- + sse2()
// AMD K10 -- + isX86_64()
// Cyrix 6x86 -- preferPentium1()
// 6x86MX -- + mmx()
// GDC support uses extended inline assembly:
// https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html (general information and hints)
// https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html (binding variables to registers)
// https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html (x86 specific register short names)
public:
/// Cache size and behaviour
struct CacheInfo
{
/// Size of the cache, in kilobytes, per CPU.
/// For L1 unified (data + code) caches, this size is half the physical size.
/// (we don't halve it for larger sizes, since normally
/// data size is much greater than code size for critical loops).
size_t size;
/// Number of ways of associativity, eg:
/// $(UL
/// $(LI 1 = direct mapped)
/// $(LI 2 = 2-way set associative)
/// $(LI 3 = 3-way set associative)
/// $(LI ubyte.max = fully associative)
/// )
ubyte associativity;
/// Number of bytes read into the cache when a cache miss occurs.
uint lineSize;
}
public:
/// $(RED Scheduled for deprecation. Please use $(D dataCaches) instead.)
// Note: When we deprecate it, we simply make it private.
__gshared CacheInfo[5] datacache;
@property pure
{
/// The data caches. If there are fewer than 5 physical caches levels,
/// the remaining levels are set to size_t.max (== entire memory space)
const(CacheInfo)[5] dataCaches() { return _dataCaches; }
/// Returns vendor string, for display purposes only.
/// Do NOT use this to determine features!
/// Note that some CPUs have programmable vendorIDs.
string vendor() {return _vendor;}
/// Returns processor string, for display purposes only
string processor() {return _processor;}
/// Does it have an x87 FPU on-chip?
bool x87onChip() {return _x87onChip;}
/// Is MMX supported?
bool mmx() {return _mmx;}
/// Is SSE supported?
bool sse() {return _sse;}
/// Is SSE2 supported?
bool sse2() {return _sse2;}
/// Is SSE3 supported?
bool sse3() {return _sse3;}
/// Is SSSE3 supported?
bool ssse3() {return _ssse3;}
/// Is SSE4.1 supported?
bool sse41() {return _sse41;}
/// Is SSE4.2 supported?
bool sse42() {return _sse42;}
/// Is SSE4a supported?
bool sse4a() {return _sse4a;}
/// Is AES supported
bool aes() {return _aes;}
/// Is pclmulqdq supported
bool hasPclmulqdq() {return _hasPclmulqdq;}
/// Is rdrand supported
bool hasRdrand() {return _hasRdrand;}
/// Is AVX supported
bool avx() {return _avx;}
/// Is VEX-Encoded AES supported
bool vaes() {return _vaes;}
/// Is vpclmulqdq supported
bool hasVpclmulqdq(){return _hasVpclmulqdq; }
/// Is FMA supported
bool fma() {return _fma;}
/// Is FP16C supported
bool fp16c() {return _fp16c;}
/// Is AVX2 supported
bool avx2() {return _avx2;}
/// Is HLE (hardware lock elision) supported
bool hle() {return _hle;}
/// Is RTM (restricted transactional memory) supported
bool rtm() {return _rtm;}
/// Is AVX512F supported
bool avx512f() {return _avx512f;}
/// Is rdseed supported
bool hasRdseed() {return _hasRdseed;}
/// Is SHA supported
bool hasSha() {return _hasSha;}
/// Is AMD 3DNOW supported?
bool amd3dnow() {return _amd3dnow;}
/// Is AMD 3DNOW Ext supported?
bool amd3dnowExt() {return _amd3dnowExt;}
/// Are AMD extensions to MMX supported?
bool amdMmx() {return _amdMmx;}
/// Is fxsave/fxrstor supported?
bool hasFxsr() {return _hasFxsr;}
/// Is cmov supported?
bool hasCmov() {return _hasCmov;}
/// Is rdtsc supported?
bool hasRdtsc() {return _hasRdtsc;}
/// Is cmpxchg8b supported?
bool hasCmpxchg8b() {return _hasCmpxchg8b;}
/// Is cmpxchg8b supported?
bool hasCmpxchg16b() {return _hasCmpxchg16b;}
/// Is SYSENTER/SYSEXIT supported?
bool hasSysEnterSysExit() {return _hasSysEnterSysExit;}
/// Is 3DNow prefetch supported?
bool has3dnowPrefetch() {return _has3dnowPrefetch;}
/// Are LAHF and SAHF supported in 64-bit mode?
bool hasLahfSahf() {return _hasLahfSahf;}
/// Is POPCNT supported?
bool hasPopcnt() {return _hasPopcnt;}
/// Is LZCNT supported?
bool hasLzcnt() {return _hasLzcnt;}
/// Is this an Intel64 or AMD 64?
bool isX86_64() {return _isX86_64;}
/// Is this an IA64 (Itanium) processor?
bool isItanium() { return _isItanium; }
/// Is hyperthreading supported?
bool hyperThreading() { return _hyperThreading; }
/// Returns number of threads per CPU
uint threadsPerCPU() {return _threadsPerCPU;}
/// Returns number of cores in CPU
uint coresPerCPU() {return _coresPerCPU;}
/// Optimisation hints for assembly code.
///
/// For forward compatibility, the CPU is compared against different
/// microarchitectures. For 32-bit x86, comparisons are made against
/// the Intel PPro/PII/PIII/PM family.
///
/// The major 32-bit x86 microarchitecture 'dynasties' have been:
///
/// $(UL
/// $(LI Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). )
/// $(LI AMD Athlon (K7, K8, K10). )
/// $(LI Intel NetBurst (Pentium 4, Pentium D). )
/// $(LI In-order Pentium (Pentium1, PMMX, Atom) )
/// )
///
/// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta,
/// Cyrix, Rise) were mostly in-order.
///
/// Some new processors do not fit into the existing categories:
///
/// $(UL
/// $(LI Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. )
/// $(LI Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. )
/// )
///
/// Within each dynasty, the optimisation techniques are largely
/// identical (eg, use instruction pairing for group 4). Major
/// instruction set improvements occur within each dynasty.
/// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code?
bool preferAthlon() { return _preferAthlon; }
/// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code?
bool preferPentium4() { return _preferPentium4; }
/// Does this CPU perform better on Pentium I code than Pentium Pro code?
bool preferPentium1() { return _preferPentium1; }
}
private immutable
{
/* These exist as immutables so that the query property functions can
* be backwards compatible with code that called them with ().
* Also, immutables can only be set by the static this().
*/
const(CacheInfo)[5] _dataCaches;
string _vendor;
string _processor;
bool _x87onChip;
bool _mmx;
bool _sse;
bool _sse2;
bool _sse3;
bool _ssse3;
bool _sse41;
bool _sse42;
bool _sse4a;
bool _aes;
bool _hasPclmulqdq;
bool _hasRdrand;
bool _avx;
bool _vaes;
bool _hasVpclmulqdq;
bool _fma;
bool _fp16c;
bool _avx2;
bool _hle;
bool _rtm;
bool _avx512f;
bool _hasRdseed;
bool _hasSha;
bool _amd3dnow;
bool _amd3dnowExt;
bool _amdMmx;
bool _hasFxsr;
bool _hasCmov;
bool _hasRdtsc;
bool _hasCmpxchg8b;
bool _hasCmpxchg16b;
bool _hasSysEnterSysExit;
bool _has3dnowPrefetch;
bool _hasLahfSahf;
bool _hasPopcnt;
bool _hasLzcnt;
bool _isX86_64;
bool _isItanium;
bool _hyperThreading;
uint _threadsPerCPU;
uint _coresPerCPU;
bool _preferAthlon;
bool _preferPentium4;
bool _preferPentium1;
}
__gshared:
// All these values are set only once, and never subsequently modified.
public:
/// $(RED Warning: This field will be turned into a property in a future release.)
///
/// Processor type (vendor-dependent).
/// This should be visible ONLY for display purposes.
uint stepping, model, family;
/// $(RED This field has been deprecated. Please use $(D cacheLevels) instead.)
uint numCacheLevels = 1;
/// The number of cache levels in the CPU.
@property uint cacheLevels() { return numCacheLevels; }
private:
struct CpuFeatures
{
bool probablyIntel; // true = _probably_ an Intel processor, might be faking
bool probablyAMD; // true = _probably_ an AMD or Hygon processor
string processorName;
char [12] vendorID = 0;
char [48] processorNameBuffer = 0;
uint features = 0; // mmx, sse, sse2, hyperthreading, etc
uint miscfeatures = 0; // sse3, etc.
uint extfeatures = 0; // HLE, AVX2, RTM, etc.
uint amdfeatures = 0; // 3DNow!, mmxext, etc
uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc
ulong xfeatures = 0; // XFEATURES_ENABLED_MASK
uint maxCores = 1;
uint maxThreads = 1;
}
CpuFeatures cpuFeatures;
/* Hide from the optimizer where cf (a register) is coming from, so that
* cf doesn't get "optimized away". The idea is to reference
* the global data through cf so not so many fixups are inserted
* into the executable image.
*/
CpuFeatures* getCpuFeatures() @nogc nothrow
{
pragma(inline, false);
return &cpuFeatures;
}
// Note that this may indicate multi-core rather than hyperthreading.
@property bool hyperThreadingBit() { return (cpuFeatures.features&HTT_BIT)!=0;}
// feature flags CPUID1_EDX
enum : uint
{
FPU_BIT = 1,
TIMESTAMP_BIT = 1<<4, // rdtsc
MDSR_BIT = 1<<5, // RDMSR/WRMSR
CMPXCHG8B_BIT = 1<<8,
SYSENTERSYSEXIT_BIT = 1<<11,
CMOV_BIT = 1<<15,
MMX_BIT = 1<<23,
FXSR_BIT = 1<<24,
SSE_BIT = 1<<25,
SSE2_BIT = 1<<26,
HTT_BIT = 1<<28,
IA64_BIT = 1<<30
}
// feature flags misc CPUID1_ECX
enum : uint
{
SSE3_BIT = 1,
PCLMULQDQ_BIT = 1<<1, // from AVX
MWAIT_BIT = 1<<3,
SSSE3_BIT = 1<<9,
FMA_BIT = 1<<12, // from AVX
CMPXCHG16B_BIT = 1<<13,
SSE41_BIT = 1<<19,
SSE42_BIT = 1<<20,
POPCNT_BIT = 1<<23,
AES_BIT = 1<<25, // AES instructions from AVX
OSXSAVE_BIT = 1<<27, // Used for AVX
AVX_BIT = 1<<28,
FP16C_BIT = 1<<29,
RDRAND_BIT = 1<<30,
}
// Feature flags for cpuid.{EAX = 7, ECX = 0}.EBX.
enum : uint
{
FSGSBASE_BIT = 1 << 0,
SGX_BIT = 1 << 2,
BMI1_BIT = 1 << 3,
HLE_BIT = 1 << 4,
AVX2_BIT = 1 << 5,
SMEP_BIT = 1 << 7,
BMI2_BIT = 1 << 8,
ERMS_BIT = 1 << 9,
INVPCID_BIT = 1 << 10,
RTM_BIT = 1 << 11,
AVX512F_BIT = 1 << 16,
AVX512DQ_BIT = 1 << 17,
RDSEED_BIT = 1 << 18,
ADX_BIT = 1 << 19,
AVX512IFMA_BIT = 1 << 21,
CLFLUSHOPT_BIT = 1 << 23,
CLWB_BIT = 1 << 24,
AVX512PF_BIT = 1 << 26,
AVX512ER_BIT = 1 << 27,
AVX512CD_BIT = 1 << 28,
SHA_BIT = 1 << 29,
AVX512BW_BIT = 1 << 30,
AVX512VL_BIT = 1 << 31,
}
// feature flags XFEATURES_ENABLED_MASK
enum : ulong
{
XF_FP_BIT = 0x1,
XF_SSE_BIT = 0x2,
XF_YMM_BIT = 0x4,
}
// AMD feature flags CPUID80000001_EDX
enum : uint
{
AMD_MMX_BIT = 1<<22,
// FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions.
FFXSR_BIT = 1<<25,
PAGE1GB_BIT = 1<<26, // support for 1GB pages
RDTSCP_BIT = 1<<27,
AMD64_BIT = 1<<29,
AMD_3DNOW_EXT_BIT = 1<<30,
AMD_3DNOW_BIT = 1<<31
}
// AMD misc feature flags CPUID80000001_ECX
enum : uint
{
LAHFSAHF_BIT = 1,
LZCNT_BIT = 1<<5,
SSE4A_BIT = 1<<6,
AMD_3DNOW_PREFETCH_BIT = 1<<8,
}
version (GNU_OR_LDC) {
version (X86)
enum supportedX86 = true;
else version (X86_64)
enum supportedX86 = true;
else
enum supportedX86 = false;
} else version (D_InlineAsm_X86) {
enum supportedX86 = true;
} else version (D_InlineAsm_X86_64) {
enum supportedX86 = true;
} else {
enum supportedX86 = false;
}
static if (supportedX86) {
// Note that this code will also work for Itanium in x86 mode.
__gshared uint max_cpuid, max_extended_cpuid;
// CPUID2: "cache and tlb information"
void getcacheinfoCPUID2()
{
// We are only interested in the data caches
void decipherCpuid2(ubyte x) @nogc nothrow {
if (x==0) return;
// Values from http://www.sandpile.org/ia32/cpuid.htm.
// Includes Itanium and non-Intel CPUs.
//
static immutable ubyte [63] ids = [
0x0A, 0x0C, 0x0D, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68,
// level 2 cache
0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F,
0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E,
0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81,
// level 3 cache
0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D,
0xD0, 0xD1, 0xD2, 0xD6, 0xD7, 0xD8, 0xDC, 0xDD, 0xDE,
0xE2, 0xE3, 0xE4, 0xEA, 0xEB, 0xEC
];
static immutable uint [63] sizes = [
8, 16, 16, 64, 16, 24, 8, 16, 32,
128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512,
256, 512, 1024, 2048, 512, 1024, 4096, 6*1024,
128, 192, 128, 256, 384, 512, 3072, 512, 128,
512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024,
512, 1024, 2048, 1024, 2048, 4096, 1024+512, 3*1024, 6*1024,
2*1024, 4*1024, 8*1024, 12*1024, 28*1024, 24*1024
];
// CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative
static immutable ubyte [63] ways = [
2, 4, 4, 8, 8, 6, 4, 4, 4,
4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2,
8, 8, 8, 8, 4, 8, 16, 24,
4, 6, 2, 4, 6, 4, 12, 8, 8,
4, 8, 8, 8, 4, 8, 12, 16, 12, 16,
4, 4, 4, 8, 8, 8, 12, 12, 12,
16, 16, 16, 24, 24, 24
];
enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 }
for (size_t i=0; i< ids.length; ++i) {
if (x==ids[i]) {
int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2;
if (x==0x49 && family==0xF && model==0x6) level=2;
datacache[level].size=sizes[i];
datacache[level].associativity=ways[i];
if (level == 3 || x==0x2C || x==0x0D || (x>=0x48 && x<=0x80)
|| x==0x86 || x==0x87
|| (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){
datacache[level].lineSize = 64;
} else datacache[level].lineSize = 32;
}
}
}
uint[4] a;
bool firstTime = true;
// On a multi-core system, this could theoretically fail, but it's only used
// for old single-core CPUs.
uint numinfos = 1;
do {
version (GNU_OR_LDC) asm pure nothrow @nogc {
"cpuid" : "=a" (a[0]), "=b" (a[1]), "=c" (a[2]), "=d" (a[3]) : "a" (2);
} else asm pure nothrow @nogc {
mov EAX, 2;
cpuid;
mov a+0, EAX;
mov a+4, EBX;
mov a+8, ECX;
mov a+12, EDX;
}
if (firstTime) {
if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) {
// Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080.
// These are NOT standard Intel values
// (TLB = 32 entry, 4 way associative, 4K pages)
// (L1 cache = 16K, 4way, linesize16)
datacache[0].size=8;
datacache[0].associativity=4;
datacache[0].lineSize=16;
return;
}
// lsb of a is how many times to loop.
numinfos = a[0] & 0xFF;
// and otherwise it should be ignored
a[0] &= 0xFFFF_FF00;
firstTime = false;
}
for (int c=0; c<4;++c) {
// high bit set == no info.
if (a[c] & 0x8000_0000) continue;
decipherCpuid2(cast(ubyte)(a[c] & 0xFF));
decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF));
decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF));
decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF));
}
} while (--numinfos);
}
// CPUID4: "Deterministic cache parameters" leaf
void getcacheinfoCPUID4()
{
int cachenum = 0;
for (;;) {
uint a, b, number_of_sets;
version (GNU_OR_LDC) asm pure nothrow @nogc {
"cpuid" : "=a" (a), "=b" (b), "=c" (number_of_sets) : "a" (4), "c" (cachenum) : "edx";
} else asm pure nothrow @nogc {
mov EAX, 4;
mov ECX, cachenum;
cpuid;
mov a, EAX;
mov b, EBX;
mov number_of_sets, ECX;
}
++cachenum;
if ((a&0x1F)==0) break; // no more caches
immutable uint numthreads = ((a>>14) & 0xFFF) + 1;
immutable uint numcores = ((a>>26) & 0x3F) + 1;
if (numcores > cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches
++number_of_sets;
immutable ubyte level = cast(ubyte)(((a>>5)&7)-1);
if (level > datacache.length) continue; // ignore deep caches
datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1);
datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size
immutable uint line_partitions = ((b >> 12)& 0x3FF) + 1;
// Size = number of sets * associativity * cachelinesize * linepartitions
// and must convert to Kb, also dividing by the number of hyperthreads using this cache.
immutable ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets *
datacache[level].associativity : number_of_sets;
datacache[level].size = cast(size_t)(
(sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024));
if (level == 0 && (a&0xF)==3) {
// Halve the size for unified L1 caches
datacache[level].size/=2;
}
}
}
// CPUID8000_0005 & 6
void getAMDcacheinfo()
{
uint dummy, c5, c6, d6;
version (GNU_OR_LDC) asm pure nothrow @nogc {
"cpuid" : "=a" (dummy), "=c" (c5) : "a" (0x8000_0005) : "ebx", "edx";
} else asm pure nothrow @nogc {
mov EAX, 0x8000_0005; // L1 cache
cpuid;
// EAX has L1_TLB_4M.
// EBX has L1_TLB_4K
// EDX has L1 instruction cache
mov c5, ECX;
}
datacache[0].size = ( (c5>>24) & 0xFF);
datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF);
datacache[0].lineSize = c5 & 0xFF;
if (max_extended_cpuid >= 0x8000_0006) {
// AMD K6-III or K6-2+ or later.
uint numcores = 1;
if (max_extended_cpuid >= 0x8000_0008) {
// read the number of physical cores (minus 1) from the 8 lowest ECX bits
version (GNU_OR_LDC) asm pure nothrow @nogc {
"cpuid" : "=a" (dummy), "=c" (numcores) : "a" (0x8000_0008) : "ebx", "edx";
} else asm pure nothrow @nogc {
mov EAX, 0x8000_0008;
cpuid;
mov numcores, ECX;
}
numcores = (numcores & 0xFF) + 1;
if (numcores>cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
}
version (GNU_OR_LDC) asm pure nothrow @nogc {
"cpuid" : "=a" (dummy), "=c" (c6), "=d" (d6) : "a" (0x8000_0006) : "ebx";
} else asm pure nothrow @nogc {
mov EAX, 0x8000_0006; // L2/L3 cache
cpuid;
mov c6, ECX; // L2 cache info
mov d6, EDX; // L3 cache info
}
static immutable ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ];
datacache[1].size = (c6>>16) & 0xFFFF;
datacache[1].associativity = assocmap[(c6>>12)&0xF];
datacache[1].lineSize = c6 & 0xFF;
// The L3 cache value is TOTAL, not per core.
datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1.
datacache[2].associativity = assocmap[(d6>>12)&0xF];
datacache[2].lineSize = d6 & 0xFF;
}
}
// For Intel CoreI7 and later, use function 0x0B
// to determine number of processors.
void getCpuInfo0B()
{
int threadsPerCore;
uint a, b, c, d;
// I'm not sure about this. The docs state that there
// are 2 hyperthreads per core if HT is factory enabled.
for (int level = 0; level < 2; level++)
{
version (GNU_OR_LDC) asm pure nothrow @nogc {
"cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (0x0B), "c" (level);
} else asm pure nothrow @nogc {
mov EAX, 0x0B;
mov ECX, level;
cpuid;
mov a, EAX;
mov b, EBX;
mov c, ECX;
mov d, EDX;
}
if (b != 0)
{
if (level == 0)
threadsPerCore = b & 0xFFFF;
else if (level == 1)
{
cpuFeatures.maxThreads = b & 0xFFFF;
cpuFeatures.maxCores = cpuFeatures.maxThreads / threadsPerCore;
}
}
// Got "invalid domain" returned from cpuid
if (a == 0 && b == 0)
break;
}
}
void cpuidX86()
{
auto cf = getCpuFeatures();
uint a, b, c, d;
uint* venptr = cast(uint*)cf.vendorID.ptr;
version (GNU_OR_LDC)
{
asm pure nothrow @nogc {
"cpuid" : "=a" (max_cpuid), "=b" (venptr[0]), "=d" (venptr[1]), "=c" (venptr[2]) : "a" (0);
"cpuid" : "=a" (max_extended_cpuid) : "a" (0x8000_0000) : "ebx", "ecx", "edx";
}
}
else
{
uint a2;
version (D_InlineAsm_X86)
{
asm pure nothrow @nogc {
mov EAX, 0;
cpuid;
mov a, EAX;
mov EAX, venptr;
mov [EAX], EBX;
mov [EAX + 4], EDX;
mov [EAX + 8], ECX;
}
}
else version (D_InlineAsm_X86_64)
{
asm pure nothrow @nogc {
mov EAX, 0;
cpuid;
mov a, EAX;
mov RAX, venptr;
mov [RAX], EBX;
mov [RAX + 4], EDX;
mov [RAX + 8], ECX;
}
}
asm pure nothrow @nogc {
mov EAX, 0x8000_0000;
cpuid;
mov a2, EAX;
}
max_cpuid = a;
max_extended_cpuid = a2;
}
cf.probablyIntel = cf.vendorID == "GenuineIntel";
cf.probablyAMD = (cf.vendorID == "AuthenticAMD" || cf.vendorID == "HygonGenuine");
uint apic = 0; // brand index, apic id
version (GNU_OR_LDC) asm pure nothrow @nogc {
"cpuid" : "=a" (a), "=b" (apic), "=c" (cf.miscfeatures), "=d" (cf.features) : "a" (1);
} else {
asm pure nothrow @nogc {
mov EAX, 1; // model, stepping
cpuid;
mov a, EAX;
mov apic, EBX;
mov c, ECX;
mov d, EDX;
}
cf.features = d;
cf.miscfeatures = c;
}
stepping = a & 0xF;
immutable uint fbase = (a >> 8) & 0xF;
immutable uint mbase = (a >> 4) & 0xF;
family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase;
model = ((fbase == 0xF) || (fbase == 6 && cf.probablyIntel) ) ?
mbase + ((a >> 12) & 0xF0) : mbase;
if (max_cpuid >= 7)
{
version (GNU_OR_LDC) asm pure nothrow @nogc {
"cpuid" : "=a" (a), "=b" (cf.extfeatures), "=c" (c) : "a" (7), "c" (0) : "edx";
} else {
uint ext;
asm pure nothrow @nogc {
mov EAX, 7; // Structured extended feature leaf.
mov ECX, 0; // Main leaf.
cpuid;
mov ext, EBX; // HLE, AVX2, RTM, etc.
}
cf.extfeatures = ext;
}
}
if (cf.miscfeatures & OSXSAVE_BIT)
{
version (GNU_OR_LDC) asm pure nothrow @nogc {
/* Old assemblers do not recognize xgetbv, and there is no easy way
* to conditionally compile based on the assembler used, so use the
* raw .byte sequence instead. */
".byte 0x0f, 0x01, 0xd0" : "=a" (a), "=d" (d) : "c" (0);
} else asm pure nothrow @nogc {
mov ECX, 0;
xgetbv;
mov d, EDX;
mov a, EAX;
}
cf.xfeatures = cast(ulong)d << 32 | a;
}
cf.amdfeatures = 0;
cf.amdmiscfeatures = 0;
if (max_extended_cpuid >= 0x8000_0001) {
version (GNU_OR_LDC) asm pure nothrow @nogc {
"cpuid" : "=a" (a), "=c" (cf.amdmiscfeatures), "=d" (cf.amdfeatures) : "a" (0x8000_0001) : "ebx";
} else {
asm pure nothrow @nogc {
mov EAX, 0x8000_0001;
cpuid;
mov c, ECX;
mov d, EDX;
}
cf.amdmiscfeatures = c;
cf.amdfeatures = d;
}
}
// Try to detect fraudulent vendorIDs
if (amd3dnow) cf.probablyIntel = false;
if (!cf.probablyIntel && max_extended_cpuid >= 0x8000_0008) {
//http://support.amd.com/TechDocs/25481.pdf pg.36
cf.maxCores = 1;
if (hyperThreadingBit) {
// determine max number of cores for AMD
version (GNU_OR_LDC) asm pure nothrow @nogc {
"cpuid" : "=a" (a), "=c" (c) : "a" (0x8000_0008) : "ebx", "edx";
} else asm pure nothrow @nogc {
mov EAX, 0x8000_0008;
cpuid;
mov c, ECX;
}
cf.maxCores += c & 0xFF;
}
}
if (max_extended_cpuid >= 0x8000_0004) {
uint* pnb = cast(uint*)cf.processorNameBuffer.ptr;
version (GNU_OR_LDC)
{
asm pure nothrow @nogc {
"cpuid" : "=a" (pnb[0]), "=b" (pnb[1]), "=c" (pnb[ 2]), "=d" (pnb[ 3]) : "a" (0x8000_0002);
"cpuid" : "=a" (pnb[4]), "=b" (pnb[5]), "=c" (pnb[ 6]), "=d" (pnb[ 7]) : "a" (0x8000_0003);
"cpuid" : "=a" (pnb[8]), "=b" (pnb[9]), "=c" (pnb[10]), "=d" (pnb[11]) : "a" (0x8000_0004);
}
}
else version (D_InlineAsm_X86)
{
asm pure nothrow @nogc {
push ESI;
mov ESI, pnb;
mov EAX, 0x8000_0002;
cpuid;
mov [ESI], EAX;
mov [ESI+4], EBX;
mov [ESI+8], ECX;
mov [ESI+12], EDX;
mov EAX, 0x8000_0003;
cpuid;
mov [ESI+16], EAX;
mov [ESI+20], EBX;
mov [ESI+24], ECX;
mov [ESI+28], EDX;
mov EAX, 0x8000_0004;
cpuid;
mov [ESI+32], EAX;
mov [ESI+36], EBX;
mov [ESI+40], ECX;
mov [ESI+44], EDX;
pop ESI;
}
}
else version (D_InlineAsm_X86_64)
{
asm pure nothrow @nogc {
push RSI;
mov RSI, pnb;
mov EAX, 0x8000_0002;
cpuid;
mov [RSI], EAX;
mov [RSI+4], EBX;
mov [RSI+8], ECX;
mov [RSI+12], EDX;
mov EAX, 0x8000_0003;
cpuid;
mov [RSI+16], EAX;
mov [RSI+20], EBX;
mov [RSI+24], ECX;
mov [RSI+28], EDX;
mov EAX, 0x8000_0004;
cpuid;
mov [RSI+32], EAX;
mov [RSI+36], EBX;
mov [RSI+40], ECX;
mov [RSI+44], EDX;
pop RSI;
}
}
// Intel P4 and PM pad at front with spaces.
// Other CPUs pad at end with nulls.
int start = 0, end = 0;
while (cf.processorNameBuffer[start] == ' ') { ++start; }
while (cf.processorNameBuffer[cf.processorNameBuffer.length-end-1] == 0) { ++end; }
cf.processorName = cast(string)(cf.processorNameBuffer[start..$-end]);
} else {
cf.processorName = "Unknown CPU";
}
// Determine cache sizes
// Intel docs specify that they return 0 for 0x8000_0005.
// AMD docs do not specify the behaviour for 0004 and 0002.
// Centaur/VIA and most other manufacturers use the AMD method,
// except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2!
// NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour
// for CPUID80000005. But Geode GX uses the AMD method
// Deal with Geode GX1 - make it same as MediaGX MMX.
if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) {
max_extended_cpuid = 0x8000_0004;
}
// Therefore, we try the AMD method unless it's an Intel chip.
// If we still have no info, try the Intel methods.
datacache[0].size = 0;
if (max_cpuid<2 || !cf.probablyIntel) {
if (max_extended_cpuid >= 0x8000_0005) {
getAMDcacheinfo();
} else if (cf.probablyAMD) {
// According to AMDProcRecognitionAppNote, this means CPU
// K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4)
// Am5x86 has 16Kb 4-way unified data & code cache.
datacache[0].size = 8;
datacache[0].associativity = 4;
datacache[0].lineSize = 32;
} else {
// Some obscure CPU.
// Values for Cyrix 6x86MX (family 6, model 0)
datacache[0].size = 64;
datacache[0].associativity = 4;
datacache[0].lineSize = 32;
}
}
if ((datacache[0].size == 0) && max_cpuid>=4) {
getcacheinfoCPUID4();
}
if ((datacache[0].size == 0) && max_cpuid>=2) {
getcacheinfoCPUID2();
}
if (datacache[0].size == 0) {
// Pentium, PMMX, late model 486, or an obscure CPU
if (mmx) { // Pentium MMX. Also has 8kB code cache.
datacache[0].size = 16;
datacache[0].associativity = 4;
datacache[0].lineSize = 32;
} else { // Pentium 1 (which also has 8kB code cache)
// or 486.
// Cyrix 6x86: 16, 4way, 32 linesize
datacache[0].size = 8;
datacache[0].associativity = 2;
datacache[0].lineSize = 32;
}
}
if (cf.probablyIntel && max_cpuid >= 0x0B) {
// For Intel i7 and later, use function 0x0B to determine
// cores and hyperthreads.
getCpuInfo0B();
} else {
if (hyperThreadingBit) cf.maxThreads = (apic>>>16) & 0xFF;
else cf.maxThreads = cf.maxCores;
if (cf.probablyAMD && max_extended_cpuid >= 0x8000_001E) {
version (GNU_OR_LDC) asm pure nothrow @nogc {
"cpuid" : "=a" (a), "=b" (b) : "a" (0x8000_001E) : "ecx", "edx";
} else {
asm pure nothrow @nogc {
mov EAX, 0x8000_001e;
cpuid;
mov b, EBX;
}
}
ubyte coresPerComputeUnit = ((b >> 8) & 3) + 1;
cf.maxCores = cf.maxThreads / coresPerComputeUnit;
}
}
}
// Return true if the cpuid instruction is supported.
// BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines.
bool hasCPUID()
{
version (X86_64)
return true;
else
{
uint flags;
version (GNU_OR_LDC)
{
// http://wiki.osdev.org/CPUID#Checking_CPUID_availability
asm nothrow @nogc { "
pushfl # Save EFLAGS
pushfl # Store EFLAGS
xorl $0x00200000, (%%esp) # Invert the ID bit in stored EFLAGS
popfl # Load stored EFLAGS (with ID bit inverted)
pushfl # Store EFLAGS again (ID bit may or may not be inverted)
popl %%eax # eax = modified EFLAGS (ID bit may or may not be inverted)
xorl (%%esp), %%eax # eax = whichever bits were changed
popfl # Restore original EFLAGS
" : "=a" (flags);
}
}
else version (D_InlineAsm_X86)
{
asm nothrow @nogc {
pushfd;
pop EAX;
mov flags, EAX;
xor EAX, 0x0020_0000;
push EAX;
popfd;
pushfd;
pop EAX;
xor flags, EAX;
}
}
return (flags & 0x0020_0000) != 0;
}
}
} else { // supported X86
bool hasCPUID() { return false; }
void cpuidX86()
{
datacache[0].size = 8;
datacache[0].associativity = 2;
datacache[0].lineSize = 32;
}
}
/*
// TODO: Implement this function with OS support
void cpuidPPC()
{
enum :int { PPC601, PPC603, PPC603E, PPC604,
PPC604E, PPC620, PPCG3, PPCG4, PPCG5 }
// TODO:
// asm { mfpvr; } returns the CPU version but unfortunately it can
// only be used in kernel mode. So OS support is required.
int cputype = PPC603;
// 601 has a 8KB combined data & code L1 cache.
uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64];
ubyte ways[] = [8, 2, 4, 4, 4, 8, 8, 8, 8];
uint L2size[]= [0, 0, 0, 0, 0, 0, 0, 256, 512];
uint L3size[]= [0, 0, 0, 0, 0, 0, 0, 2048, 0];
datacache[0].size = sizes[cputype];
datacache[0].associativity = ways[cputype];
datacache[0].lineSize = (cputype==PPCG5)? 128 :
(cputype == PPC620 || cputype == PPCG3)? 64 : 32;
datacache[1].size = L2size[cputype];
datacache[2].size = L3size[cputype];
datacache[1].lineSize = datacache[0].lineSize;
datacache[2].lineSize = datacache[0].lineSize;
}
// TODO: Implement this function with OS support
void cpuidSparc()
{
// UltaSparcIIi : L1 = 16, 2way. L2 = 512, 4 way.
// UltraSparcIII : L1 = 64, 4way. L2= 4096 or 8192.
// UltraSparcIIIi: L1 = 64, 4way. L2= 1024, 4 way
// UltraSparcIV : L1 = 64, 4way. L2 = 16*1024.
// UltraSparcIV+ : L1 = 64, 4way. L2 = 2048, L3=32*1024.
// Sparc64V : L1 = 128, 2way. L2 = 4096 4way.
}
*/
pragma(crt_constructor) void cpuid_initialization()
{
auto cf = getCpuFeatures();
if (hasCPUID()) {
cpuidX86();
} else {
// it's a 386 or 486, or a Cyrix 6x86.
//Probably still has an external cache.
}
if (datacache[0].size==0) {
// Guess same as Pentium 1.
datacache[0].size = 8;
datacache[0].associativity = 2;
datacache[0].lineSize = 32;
}
numCacheLevels = 1;
// And now fill up all the unused levels with full memory space.
for (size_t i=1; i< datacache.length; ++i) {
if (datacache[i].size==0) {
// Set all remaining levels of cache equal to full address space.
datacache[i].size = size_t.max/1024;
datacache[i].associativity = 1;
datacache[i].lineSize = datacache[i-1].lineSize;
}
else
++numCacheLevels;
}
// Set the immortals
_dataCaches = datacache;
_vendor = cast(string)cf.vendorID;
_processor = cf.processorName;
_x87onChip = (cf.features&FPU_BIT)!=0;
_mmx = (cf.features&MMX_BIT)!=0;
_sse = (cf.features&SSE_BIT)!=0;
_sse2 = (cf.features&SSE2_BIT)!=0;
_sse3 = (cf.miscfeatures&SSE3_BIT)!=0;
_ssse3 = (cf.miscfeatures&SSSE3_BIT)!=0;
_sse41 = (cf.miscfeatures&SSE41_BIT)!=0;
_sse42 = (cf.miscfeatures&SSE42_BIT)!=0;
_sse4a = (cf.amdmiscfeatures&SSE4A_BIT)!=0;
_aes = (cf.miscfeatures&AES_BIT)!=0;
_hasPclmulqdq = (cf.miscfeatures&PCLMULQDQ_BIT)!=0;
_hasRdrand = (cf.miscfeatures&RDRAND_BIT)!=0;
enum avx_mask = XF_SSE_BIT|XF_YMM_BIT;
_avx = (cf.xfeatures & avx_mask) == avx_mask && (cf.miscfeatures&AVX_BIT)!=0;
_vaes = avx && aes;
_hasVpclmulqdq = avx && hasPclmulqdq;
_fma = avx && (cf.miscfeatures&FMA_BIT)!=0;
_fp16c = avx && (cf.miscfeatures&FP16C_BIT)!=0;
_avx2 = avx && (cf.extfeatures & AVX2_BIT) != 0;
_hle = (cf.extfeatures & HLE_BIT) != 0;
_rtm = (cf.extfeatures & RTM_BIT) != 0;
_avx512f = (cf.extfeatures & AVX512F_BIT) != 0;
_hasRdseed = (cf.extfeatures&RDSEED_BIT)!=0;
_hasSha = (cf.extfeatures&SHA_BIT)!=0;
_amd3dnow = (cf.amdfeatures&AMD_3DNOW_BIT)!=0;
_amd3dnowExt = (cf.amdfeatures&AMD_3DNOW_EXT_BIT)!=0;
_amdMmx = (cf.amdfeatures&AMD_MMX_BIT)!=0;
_hasFxsr = (cf.features&FXSR_BIT)!=0;
_hasCmov = (cf.features&CMOV_BIT)!=0;
_hasRdtsc = (cf.features&TIMESTAMP_BIT)!=0;
_hasCmpxchg8b = (cf.features&CMPXCHG8B_BIT)!=0;
_hasCmpxchg16b = (cf.miscfeatures&CMPXCHG16B_BIT)!=0;
_hasSysEnterSysExit =
// The SYSENTER/SYSEXIT features were buggy on Pentium Pro and early PentiumII.
// (REF: www.geoffchappell.com).
(cf.probablyIntel && (family < 6 || (family==6 && (model< 3 || (model==3 && stepping<3)))))
? false
: (cf.features & SYSENTERSYSEXIT_BIT)!=0;
_has3dnowPrefetch = (cf.amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0;
_hasLahfSahf = (cf.amdmiscfeatures&LAHFSAHF_BIT)!=0;
_hasPopcnt = (cf.miscfeatures&POPCNT_BIT)!=0;
_hasLzcnt = (cf.amdmiscfeatures&LZCNT_BIT)!=0;
_isX86_64 = (cf.amdfeatures&AMD64_BIT)!=0;
_isItanium = (cf.features&IA64_BIT)!=0;
_hyperThreading = cf.maxThreads>cf.maxCores;
_threadsPerCPU = cf.maxThreads;
_coresPerCPU = cf.maxCores;
_preferAthlon = cf.probablyAMD && family >=6;
_preferPentium4 = cf.probablyIntel && family == 0xF;
_preferPentium1 = family < 6 || (family==6 && model < 0xF && !cf.probablyIntel);
}
|