aboutsummaryrefslogtreecommitdiff
path: root/compiler-rt
diff options
context:
space:
mode:
authorAlexandros Lamprineas <alexandros.lamprineas@arm.com>2024-06-13 11:45:39 +0100
committerGitHub <noreply@github.com>2024-06-13 11:45:39 +0100
commit933d6be8e8c4a81f6409f4daaf704e7f363c6508 (patch)
treeb17cf471070db4f18e9145b571686ccb8056bdb5 /compiler-rt
parent5563d914a776e11c22fa6f9617e8cb6c46e977cf (diff)
downloadllvm-933d6be8e8c4a81f6409f4daaf704e7f363c6508.zip
llvm-933d6be8e8c4a81f6409f4daaf704e7f363c6508.tar.gz
llvm-933d6be8e8c4a81f6409f4daaf704e7f363c6508.tar.bz2
[FMV][compiler-rt] Fix cpu features initialization. (#95149)
To detect features we either use HWCAPs or directly extract system register bitfields and compare with a value. In many cases equality comparisons give wrong results for example FEAT_SVE is not set if SVE2 is available (see the issue #93651). I am also making the access to __aarch64_cpu_features atomic. The corresponding PR for the ACLE specification is https://github.com/ARM-software/acle/pull/322.
Diffstat (limited to 'compiler-rt')
-rw-r--r--compiler-rt/lib/builtins/aarch64/sme-abi-vg.c12
-rw-r--r--compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc4
-rw-r--r--compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc4
-rw-r--r--compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc8
-rw-r--r--compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc135
-rw-r--r--compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc4
-rw-r--r--compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc6
7 files changed, 68 insertions, 105 deletions
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
index e384ab7..062cf80 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
@@ -5,7 +5,7 @@
#include "../cpu_model/aarch64.h"
struct FEATURES {
- long long features;
+ unsigned long long features;
};
extern struct FEATURES __aarch64_cpu_features;
@@ -23,14 +23,18 @@ extern bool __aarch64_has_sme_and_tpidr2_el0;
#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
#endif
__attribute__((constructor(90))) static void get_aarch64_cpu_features(void) {
- if (!__aarch64_cpu_features.features)
- __init_cpu_features();
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+ return;
+
+ __init_cpu_features();
}
__attribute__((target("sve"))) long
__arm_get_current_vg(void) __arm_streaming_compatible {
struct SME_STATE State = __arm_sme_state();
- bool HasSVE = __aarch64_cpu_features.features & (1ULL << FEAT_SVE);
+ unsigned long long features =
+ __atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED);
+ bool HasSVE = features & (1ULL << FEAT_SVE);
if (!HasSVE && !__aarch64_has_sme_and_tpidr2_el0)
return 0;
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc
index f7114314..a9e3594 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc
@@ -1,6 +1,6 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
- if (__aarch64_cpu_features.features)
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
// ifunc resolvers don't have hwcaps in arguments on Android API lower
@@ -17,7 +17,7 @@ void __init_cpu_features_resolver(unsigned long hwcap,
void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
// CPU features already initialized.
- if (__aarch64_cpu_features.features)
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
// Don't set any CPU features,
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc
index 793adef..aa975dc 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc
@@ -1,6 +1,6 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
- if (__aarch64_cpu_features.features)
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
__init_cpu_features_constructor(hwcap, arg);
@@ -10,7 +10,7 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
unsigned long hwcap = 0;
unsigned long hwcap2 = 0;
// CPU features already initialized.
- if (__aarch64_cpu_features.features)
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
int res = 0;
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc
index d8e0280..1ae4780 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc
@@ -2,7 +2,7 @@
#include <zircon/syscalls.h>
void __init_cpu_features_resolver() {
- if (__aarch64_cpu_features.features)
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
// This ensures the vDSO is a direct link-time dependency of anything that
@@ -13,8 +13,8 @@ void __init_cpu_features_resolver() {
if (status != ZX_OK)
return;
-#define setCPUFeature(cpu_feature) \
- __aarch64_cpu_features.features |= 1ULL << cpu_feature
+ unsigned long long feat = 0;
+#define setCPUFeature(cpu_feature) feat |= 1ULL << cpu_feature
if (features & ZX_ARM64_FEATURE_ISA_FP)
setCPUFeature(FEAT_FP);
@@ -48,4 +48,6 @@ void __init_cpu_features_resolver() {
setCPUFeature(FEAT_SVE);
setCPUFeature(FEAT_INIT);
+
+ __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
}
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
index 32a21a2..e4d5e7f 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
@@ -3,11 +3,10 @@
#define HAVE_SYS_AUXV_H
#endif
-
-
static void __init_cpu_features_constructor(unsigned long hwcap,
const __ifunc_arg_t *arg) {
-#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
+ unsigned long long feat = 0;
+#define setCPUFeature(F) feat |= 1ULL << F
#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
#define extractBits(val, start, number) \
(val & ((1ULL << number) - 1ULL) << start) >> start
@@ -20,26 +19,20 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_PMULL);
if (hwcap & HWCAP_FLAGM)
setCPUFeature(FEAT_FLAGM);
- if (hwcap2 & HWCAP2_FLAGM2) {
- setCPUFeature(FEAT_FLAGM);
+ if (hwcap2 & HWCAP2_FLAGM2)
setCPUFeature(FEAT_FLAGM2);
- }
- if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
+ if (hwcap & HWCAP_SM4)
setCPUFeature(FEAT_SM4);
if (hwcap & HWCAP_ASIMDDP)
setCPUFeature(FEAT_DOTPROD);
if (hwcap & HWCAP_ASIMDFHM)
setCPUFeature(FEAT_FP16FML);
- if (hwcap & HWCAP_FPHP) {
+ if (hwcap & HWCAP_FPHP)
setCPUFeature(FEAT_FP16);
- setCPUFeature(FEAT_FP);
- }
if (hwcap & HWCAP_DIT)
setCPUFeature(FEAT_DIT);
if (hwcap & HWCAP_ASIMDRDM)
setCPUFeature(FEAT_RDM);
- if (hwcap & HWCAP_ILRCPC)
- setCPUFeature(FEAT_RCPC2);
if (hwcap & HWCAP_AES)
setCPUFeature(FEAT_AES);
if (hwcap & HWCAP_SHA1)
@@ -52,23 +45,20 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_FCMA);
if (hwcap & HWCAP_SB)
setCPUFeature(FEAT_SB);
- if (hwcap & HWCAP_SSBS)
+ if (hwcap & HWCAP_SSBS) {
+ setCPUFeature(FEAT_SSBS);
setCPUFeature(FEAT_SSBS2);
+ }
if (hwcap2 & HWCAP2_MTE) {
setCPUFeature(FEAT_MEMTAG);
setCPUFeature(FEAT_MEMTAG2);
}
- if (hwcap2 & HWCAP2_MTE3) {
- setCPUFeature(FEAT_MEMTAG);
- setCPUFeature(FEAT_MEMTAG2);
+ if (hwcap2 & HWCAP2_MTE3)
setCPUFeature(FEAT_MEMTAG3);
- }
if (hwcap2 & HWCAP2_SVEAES)
setCPUFeature(FEAT_SVE_AES);
- if (hwcap2 & HWCAP2_SVEPMULL) {
- setCPUFeature(FEAT_SVE_AES);
+ if (hwcap2 & HWCAP2_SVEPMULL)
setCPUFeature(FEAT_SVE_PMULL128);
- }
if (hwcap2 & HWCAP2_SVEBITPERM)
setCPUFeature(FEAT_SVE_BITPERM);
if (hwcap2 & HWCAP2_SVESHA3)
@@ -105,6 +95,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_WFXT);
if (hwcap2 & HWCAP2_SME)
setCPUFeature(FEAT_SME);
+ if (hwcap2 & HWCAP2_SME2)
+ setCPUFeature(FEAT_SME2);
if (hwcap2 & HWCAP2_SME_I16I64)
setCPUFeature(FEAT_SME_I64);
if (hwcap2 & HWCAP2_SME_F64F64)
@@ -113,86 +105,45 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_MOPS);
if (hwcap & HWCAP_CPUID) {
unsigned long ftr;
- getCPUFeature(ID_AA64PFR1_EL1, ftr);
- // ID_AA64PFR1_EL1.MTE >= 0b0001
- if (extractBits(ftr, 8, 4) >= 0x1)
- setCPUFeature(FEAT_MEMTAG);
- // ID_AA64PFR1_EL1.SSBS == 0b0001
- if (extractBits(ftr, 4, 4) == 0x1)
- setCPUFeature(FEAT_SSBS);
- // ID_AA64PFR1_EL1.SME == 0b0010
- if (extractBits(ftr, 24, 4) == 0x2)
- setCPUFeature(FEAT_SME2);
- getCPUFeature(ID_AA64PFR0_EL1, ftr);
- // ID_AA64PFR0_EL1.FP != 0b1111
- if (extractBits(ftr, 16, 4) != 0xF) {
- setCPUFeature(FEAT_FP);
- // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP
- setCPUFeature(FEAT_SIMD);
- }
- // ID_AA64PFR0_EL1.SVE != 0b0000
- if (extractBits(ftr, 32, 4) != 0x0) {
- // get ID_AA64ZFR0_EL1, that name supported
- // if sve enabled only
- getCPUFeature(S3_0_C0_C4_4, ftr);
- // ID_AA64ZFR0_EL1.SVEver == 0b0000
- if (extractBits(ftr, 0, 4) == 0x0)
- setCPUFeature(FEAT_SVE);
- // ID_AA64ZFR0_EL1.SVEver == 0b0001
- if (extractBits(ftr, 0, 4) == 0x1)
- setCPUFeature(FEAT_SVE2);
- // ID_AA64ZFR0_EL1.BF16 != 0b0000
- if (extractBits(ftr, 20, 4) != 0x0)
- setCPUFeature(FEAT_SVE_BF16);
- }
- getCPUFeature(ID_AA64ISAR0_EL1, ftr);
- // ID_AA64ISAR0_EL1.SHA3 != 0b0000
- if (extractBits(ftr, 32, 4) != 0x0)
- setCPUFeature(FEAT_SHA3);
+
getCPUFeature(ID_AA64ISAR1_EL1, ftr);
- // ID_AA64ISAR1_EL1.DPB >= 0b0001
- if (extractBits(ftr, 0, 4) >= 0x1)
- setCPUFeature(FEAT_DPB);
- // ID_AA64ISAR1_EL1.LRCPC != 0b0000
- if (extractBits(ftr, 20, 4) != 0x0)
- setCPUFeature(FEAT_RCPC);
- // ID_AA64ISAR1_EL1.LRCPC == 0b0011
- if (extractBits(ftr, 20, 4) == 0x3)
- setCPUFeature(FEAT_RCPC3);
- // ID_AA64ISAR1_EL1.SPECRES == 0b0001
- if (extractBits(ftr, 40, 4) == 0x2)
+ /* ID_AA64ISAR1_EL1.SPECRES >= 0b0001 */
+ if (extractBits(ftr, 40, 4) >= 0x1)
setCPUFeature(FEAT_PREDRES);
- // ID_AA64ISAR1_EL1.BF16 != 0b0000
- if (extractBits(ftr, 44, 4) != 0x0)
- setCPUFeature(FEAT_BF16);
- // ID_AA64ISAR1_EL1.LS64 >= 0b0001
+ /* ID_AA64ISAR1_EL1.LS64 >= 0b0001 */
if (extractBits(ftr, 60, 4) >= 0x1)
setCPUFeature(FEAT_LS64);
- // ID_AA64ISAR1_EL1.LS64 >= 0b0010
+ /* ID_AA64ISAR1_EL1.LS64 >= 0b0010 */
if (extractBits(ftr, 60, 4) >= 0x2)
setCPUFeature(FEAT_LS64_V);
- // ID_AA64ISAR1_EL1.LS64 >= 0b0011
+ /* ID_AA64ISAR1_EL1.LS64 >= 0b0011 */
if (extractBits(ftr, 60, 4) >= 0x3)
setCPUFeature(FEAT_LS64_ACCDATA);
- } else {
- // Set some features in case of no CPUID support
- if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
- setCPUFeature(FEAT_FP);
- // FP and AdvSIMD fields have the same value
- setCPUFeature(FEAT_SIMD);
- }
- if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
- setCPUFeature(FEAT_DPB);
- if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
- setCPUFeature(FEAT_RCPC);
- if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
- setCPUFeature(FEAT_BF16);
- if (hwcap2 & HWCAP2_SVEBF16)
- setCPUFeature(FEAT_SVE_BF16);
- if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
- setCPUFeature(FEAT_SVE2);
- if (hwcap & HWCAP_SHA3)
- setCPUFeature(FEAT_SHA3);
}
+ if (hwcap & HWCAP_FP) {
+ setCPUFeature(FEAT_FP);
+ // FP and AdvSIMD fields have the same value
+ setCPUFeature(FEAT_SIMD);
+ }
+ if (hwcap & HWCAP_DCPOP)
+ setCPUFeature(FEAT_DPB);
+ if (hwcap & HWCAP_LRCPC)
+ setCPUFeature(FEAT_RCPC);
+ if (hwcap & HWCAP_ILRCPC)
+ setCPUFeature(FEAT_RCPC2);
+ if (hwcap2 & HWCAP2_LRCPC3)
+ setCPUFeature(FEAT_RCPC3);
+ if (hwcap2 & HWCAP2_BF16)
+ setCPUFeature(FEAT_BF16);
+ if (hwcap2 & HWCAP2_SVEBF16)
+ setCPUFeature(FEAT_SVE_BF16);
+ if (hwcap & HWCAP_SVE)
+ setCPUFeature(FEAT_SVE);
+ if (hwcap2 & HWCAP2_SVE2)
+ setCPUFeature(FEAT_SVE2);
+ if (hwcap & HWCAP_SHA3)
+ setCPUFeature(FEAT_SHA3);
setCPUFeature(FEAT_INIT);
+
+ __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
}
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc
index fb5722c..486f77a 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc
@@ -1,13 +1,13 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
- if (__aarch64_cpu_features.features)
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
__init_cpu_features_constructor(hwcap, arg);
}
void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
// CPU features already initialized.
- if (__aarch64_cpu_features.features)
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
unsigned long hwcap = getauxval(AT_HWCAP);
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc
index 7ddc125..41aba82 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc
@@ -178,6 +178,12 @@
#ifndef HWCAP2_SVE_EBF16
#define HWCAP2_SVE_EBF16 (1ULL << 33)
#endif
+#ifndef HWCAP2_SME2
+#define HWCAP2_SME2 (1UL << 37)
+#endif
#ifndef HWCAP2_MOPS
#define HWCAP2_MOPS (1ULL << 43)
#endif
+#ifndef HWCAP2_LRCPC3
+#define HWCAP2_LRCPC3 (1UL << 46)
+#endif