From 933d6be8e8c4a81f6409f4daaf704e7f363c6508 Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas Date: Thu, 13 Jun 2024 11:45:39 +0100 Subject: [FMV][compiler-rt] Fix cpu features initialization. (#95149) To detect features we either use HWCAPs or directly extract system register bitfields and compare with a value. In many cases equality comparisons give wrong results for example FEAT_SVE is not set if SVE2 is available (see the issue #93651). I am also making the access to __aarch64_cpu_features atomic. The corresponding PR for the ACLE specification is https://github.com/ARM-software/acle/pull/322. --- compiler-rt/lib/builtins/aarch64/sme-abi-vg.c | 12 +- .../lib/builtins/cpu_model/aarch64/fmv/android.inc | 4 +- .../lib/builtins/cpu_model/aarch64/fmv/freebsd.inc | 4 +- .../lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc | 8 +- .../lib/builtins/cpu_model/aarch64/fmv/mrs.inc | 135 +++++++-------------- .../lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc | 4 +- .../lib/builtins/cpu_model/aarch64/hwcap.inc | 6 + 7 files changed, 68 insertions(+), 105 deletions(-) (limited to 'compiler-rt') diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c index e384ab7..062cf80 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c +++ b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c @@ -5,7 +5,7 @@ #include "../cpu_model/aarch64.h" struct FEATURES { - long long features; + unsigned long long features; }; extern struct FEATURES __aarch64_cpu_features; @@ -23,14 +23,18 @@ extern bool __aarch64_has_sme_and_tpidr2_el0; #pragma GCC diagnostic ignored "-Wprio-ctor-dtor" #endif __attribute__((constructor(90))) static void get_aarch64_cpu_features(void) { - if (!__aarch64_cpu_features.features) - __init_cpu_features(); + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) + return; + + __init_cpu_features(); } __attribute__((target("sve"))) long __arm_get_current_vg(void) __arm_streaming_compatible { struct SME_STATE State = __arm_sme_state(); - bool HasSVE = __aarch64_cpu_features.features & (1ULL << FEAT_SVE); + unsigned long long features = + __atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED); + bool HasSVE = features & (1ULL << FEAT_SVE); if (!HasSVE && !__aarch64_has_sme_and_tpidr2_el0) return 0; diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc index f7114314..a9e3594 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc @@ -1,6 +1,6 @@ void __init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) { - if (__aarch64_cpu_features.features) + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; // ifunc resolvers don't have hwcaps in arguments on Android API lower @@ -17,7 +17,7 @@ void __init_cpu_features_resolver(unsigned long hwcap, void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { // CPU features already initialized. - if (__aarch64_cpu_features.features) + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; // Don't set any CPU features, diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc index 793adef..aa975dc 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc @@ -1,6 +1,6 @@ void __init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) { - if (__aarch64_cpu_features.features) + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; __init_cpu_features_constructor(hwcap, arg); @@ -10,7 +10,7 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { unsigned long hwcap = 0; unsigned long hwcap2 = 0; // CPU features already initialized. - if (__aarch64_cpu_features.features) + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; int res = 0; diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc index d8e0280..1ae4780 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc @@ -2,7 +2,7 @@ #include void __init_cpu_features_resolver() { - if (__aarch64_cpu_features.features) + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; // This ensures the vDSO is a direct link-time dependency of anything that @@ -13,8 +13,8 @@ void __init_cpu_features_resolver() { if (status != ZX_OK) return; -#define setCPUFeature(cpu_feature) \ - __aarch64_cpu_features.features |= 1ULL << cpu_feature + unsigned long long feat = 0; +#define setCPUFeature(cpu_feature) feat |= 1ULL << cpu_feature if (features & ZX_ARM64_FEATURE_ISA_FP) setCPUFeature(FEAT_FP); @@ -48,4 +48,6 @@ void __init_cpu_features_resolver() { setCPUFeature(FEAT_SVE); setCPUFeature(FEAT_INIT); + + __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED); } diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc index 32a21a2..e4d5e7f 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc @@ -3,11 +3,10 @@ #define HAVE_SYS_AUXV_H #endif - - static void __init_cpu_features_constructor(unsigned long hwcap, const __ifunc_arg_t *arg) { -#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F + unsigned long long feat = 0; +#define setCPUFeature(F) feat |= 1ULL << F #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr)) #define extractBits(val, start, number) \ (val & ((1ULL << number) - 1ULL) << start) >> start @@ -20,26 +19,20 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_PMULL); if (hwcap & HWCAP_FLAGM) setCPUFeature(FEAT_FLAGM); - if (hwcap2 & HWCAP2_FLAGM2) { - setCPUFeature(FEAT_FLAGM); + if (hwcap2 & HWCAP2_FLAGM2) setCPUFeature(FEAT_FLAGM2); - } - if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4) + if (hwcap & HWCAP_SM4) setCPUFeature(FEAT_SM4); if (hwcap & HWCAP_ASIMDDP) setCPUFeature(FEAT_DOTPROD); if (hwcap & HWCAP_ASIMDFHM) setCPUFeature(FEAT_FP16FML); - if (hwcap & HWCAP_FPHP) { + if (hwcap & HWCAP_FPHP) setCPUFeature(FEAT_FP16); - setCPUFeature(FEAT_FP); - } if (hwcap & HWCAP_DIT) setCPUFeature(FEAT_DIT); if (hwcap & HWCAP_ASIMDRDM) setCPUFeature(FEAT_RDM); - if (hwcap & HWCAP_ILRCPC) - setCPUFeature(FEAT_RCPC2); if (hwcap & HWCAP_AES) setCPUFeature(FEAT_AES); if (hwcap & HWCAP_SHA1) @@ -52,23 +45,20 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_FCMA); if (hwcap & HWCAP_SB) setCPUFeature(FEAT_SB); - if (hwcap & HWCAP_SSBS) + if (hwcap & HWCAP_SSBS) { + setCPUFeature(FEAT_SSBS); setCPUFeature(FEAT_SSBS2); + } if (hwcap2 & HWCAP2_MTE) { setCPUFeature(FEAT_MEMTAG); setCPUFeature(FEAT_MEMTAG2); } - if (hwcap2 & HWCAP2_MTE3) { - setCPUFeature(FEAT_MEMTAG); - setCPUFeature(FEAT_MEMTAG2); + if (hwcap2 & HWCAP2_MTE3) setCPUFeature(FEAT_MEMTAG3); - } if (hwcap2 & HWCAP2_SVEAES) setCPUFeature(FEAT_SVE_AES); - if (hwcap2 & HWCAP2_SVEPMULL) { - setCPUFeature(FEAT_SVE_AES); + if (hwcap2 & HWCAP2_SVEPMULL) setCPUFeature(FEAT_SVE_PMULL128); - } if (hwcap2 & HWCAP2_SVEBITPERM) setCPUFeature(FEAT_SVE_BITPERM); if (hwcap2 & HWCAP2_SVESHA3) @@ -105,6 +95,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_WFXT); if (hwcap2 & HWCAP2_SME) setCPUFeature(FEAT_SME); + if (hwcap2 & HWCAP2_SME2) + setCPUFeature(FEAT_SME2); if (hwcap2 & HWCAP2_SME_I16I64) setCPUFeature(FEAT_SME_I64); if (hwcap2 & HWCAP2_SME_F64F64) @@ -113,86 +105,45 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_MOPS); if (hwcap & HWCAP_CPUID) { unsigned long ftr; - getCPUFeature(ID_AA64PFR1_EL1, ftr); - // ID_AA64PFR1_EL1.MTE >= 0b0001 - if (extractBits(ftr, 8, 4) >= 0x1) - setCPUFeature(FEAT_MEMTAG); - // ID_AA64PFR1_EL1.SSBS == 0b0001 - if (extractBits(ftr, 4, 4) == 0x1) - setCPUFeature(FEAT_SSBS); - // ID_AA64PFR1_EL1.SME == 0b0010 - if (extractBits(ftr, 24, 4) == 0x2) - setCPUFeature(FEAT_SME2); - getCPUFeature(ID_AA64PFR0_EL1, ftr); - // ID_AA64PFR0_EL1.FP != 0b1111 - if (extractBits(ftr, 16, 4) != 0xF) { - setCPUFeature(FEAT_FP); - // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP - setCPUFeature(FEAT_SIMD); - } - // ID_AA64PFR0_EL1.SVE != 0b0000 - if (extractBits(ftr, 32, 4) != 0x0) { - // get ID_AA64ZFR0_EL1, that name supported - // if sve enabled only - getCPUFeature(S3_0_C0_C4_4, ftr); - // ID_AA64ZFR0_EL1.SVEver == 0b0000 - if (extractBits(ftr, 0, 4) == 0x0) - setCPUFeature(FEAT_SVE); - // ID_AA64ZFR0_EL1.SVEver == 0b0001 - if (extractBits(ftr, 0, 4) == 0x1) - setCPUFeature(FEAT_SVE2); - // ID_AA64ZFR0_EL1.BF16 != 0b0000 - if (extractBits(ftr, 20, 4) != 0x0) - setCPUFeature(FEAT_SVE_BF16); - } - getCPUFeature(ID_AA64ISAR0_EL1, ftr); - // ID_AA64ISAR0_EL1.SHA3 != 0b0000 - if (extractBits(ftr, 32, 4) != 0x0) - setCPUFeature(FEAT_SHA3); + getCPUFeature(ID_AA64ISAR1_EL1, ftr); - // ID_AA64ISAR1_EL1.DPB >= 0b0001 - if (extractBits(ftr, 0, 4) >= 0x1) - setCPUFeature(FEAT_DPB); - // ID_AA64ISAR1_EL1.LRCPC != 0b0000 - if (extractBits(ftr, 20, 4) != 0x0) - setCPUFeature(FEAT_RCPC); - // ID_AA64ISAR1_EL1.LRCPC == 0b0011 - if (extractBits(ftr, 20, 4) == 0x3) - setCPUFeature(FEAT_RCPC3); - // ID_AA64ISAR1_EL1.SPECRES == 0b0001 - if (extractBits(ftr, 40, 4) == 0x2) + /* ID_AA64ISAR1_EL1.SPECRES >= 0b0001 */ + if (extractBits(ftr, 40, 4) >= 0x1) setCPUFeature(FEAT_PREDRES); - // ID_AA64ISAR1_EL1.BF16 != 0b0000 - if (extractBits(ftr, 44, 4) != 0x0) - setCPUFeature(FEAT_BF16); - // ID_AA64ISAR1_EL1.LS64 >= 0b0001 + /* ID_AA64ISAR1_EL1.LS64 >= 0b0001 */ if (extractBits(ftr, 60, 4) >= 0x1) setCPUFeature(FEAT_LS64); - // ID_AA64ISAR1_EL1.LS64 >= 0b0010 + /* ID_AA64ISAR1_EL1.LS64 >= 0b0010 */ if (extractBits(ftr, 60, 4) >= 0x2) setCPUFeature(FEAT_LS64_V); - // ID_AA64ISAR1_EL1.LS64 >= 0b0011 + /* ID_AA64ISAR1_EL1.LS64 >= 0b0011 */ if (extractBits(ftr, 60, 4) >= 0x3) setCPUFeature(FEAT_LS64_ACCDATA); - } else { - // Set some features in case of no CPUID support - if (hwcap & (HWCAP_FP | HWCAP_FPHP)) { - setCPUFeature(FEAT_FP); - // FP and AdvSIMD fields have the same value - setCPUFeature(FEAT_SIMD); - } - if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP) - setCPUFeature(FEAT_DPB); - if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC) - setCPUFeature(FEAT_RCPC); - if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16) - setCPUFeature(FEAT_BF16); - if (hwcap2 & HWCAP2_SVEBF16) - setCPUFeature(FEAT_SVE_BF16); - if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE) - setCPUFeature(FEAT_SVE2); - if (hwcap & HWCAP_SHA3) - setCPUFeature(FEAT_SHA3); } + if (hwcap & HWCAP_FP) { + setCPUFeature(FEAT_FP); + // FP and AdvSIMD fields have the same value + setCPUFeature(FEAT_SIMD); + } + if (hwcap & HWCAP_DCPOP) + setCPUFeature(FEAT_DPB); + if (hwcap & HWCAP_LRCPC) + setCPUFeature(FEAT_RCPC); + if (hwcap & HWCAP_ILRCPC) + setCPUFeature(FEAT_RCPC2); + if (hwcap2 & HWCAP2_LRCPC3) + setCPUFeature(FEAT_RCPC3); + if (hwcap2 & HWCAP2_BF16) + setCPUFeature(FEAT_BF16); + if (hwcap2 & HWCAP2_SVEBF16) + setCPUFeature(FEAT_SVE_BF16); + if (hwcap & HWCAP_SVE) + setCPUFeature(FEAT_SVE); + if (hwcap2 & HWCAP2_SVE2) + setCPUFeature(FEAT_SVE2); + if (hwcap & HWCAP_SHA3) + setCPUFeature(FEAT_SHA3); setCPUFeature(FEAT_INIT); + + __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED); } diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc index fb5722c..486f77a 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc @@ -1,13 +1,13 @@ void __init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) { - if (__aarch64_cpu_features.features) + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; __init_cpu_features_constructor(hwcap, arg); } void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { // CPU features already initialized. - if (__aarch64_cpu_features.features) + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; unsigned long hwcap = getauxval(AT_HWCAP); diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc index 7ddc125..41aba82 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc @@ -178,6 +178,12 @@ #ifndef HWCAP2_SVE_EBF16 #define HWCAP2_SVE_EBF16 (1ULL << 33) #endif +#ifndef HWCAP2_SME2 +#define HWCAP2_SME2 (1UL << 37) +#endif #ifndef HWCAP2_MOPS #define HWCAP2_MOPS (1ULL << 43) #endif +#ifndef HWCAP2_LRCPC3 +#define HWCAP2_LRCPC3 (1UL << 46) +#endif -- cgit v1.1