aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandros Lamprineas <alexandros.lamprineas@arm.com>2024-01-23 15:48:12 +0000
committerGitHub <noreply@github.com>2024-01-23 15:48:12 +0000
commit179ba129f50aefe6e670800aec7091d958aa6f90 (patch)
treed2596f5a33f84be8bdba326b4ba034625945fa38
parent8c41e3fcb18c2bf7c369f50473367e9cdd072ecc (diff)
downloadllvm-179ba129f50aefe6e670800aec7091d958aa6f90.zip
llvm-179ba129f50aefe6e670800aec7091d958aa6f90.tar.gz
llvm-179ba129f50aefe6e670800aec7091d958aa6f90.tar.bz2
[AArch64][FMV] Support feature MOPS in Function Multi Versioning. (#78788)
The patch adds support for FEAT_MOPS (Memory Copy and Memory Set instructions) in Function Multi Versioning. The bits [19:16] of the system register ID_AA64ISAR2_EL1 indicate whether FEAT_MOPS is implemented in AArch64 state. This information is accessible via ELF hwcaps.
-rw-r--r--clang/test/CodeGen/attr-target-version.c30
-rw-r--r--clang/test/Sema/attr-target-clones-aarch64.c2
-rw-r--r--clang/test/SemaCXX/attr-target-version.cpp1
-rw-r--r--compiler-rt/lib/builtins/cpu_model/aarch64.c1
-rw-r--r--compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc2
-rw-r--r--compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc3
-rw-r--r--llvm/include/llvm/TargetParser/AArch64TargetParser.h3
7 files changed, 25 insertions, 17 deletions
diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c
index 13b895a..8927985 100644
--- a/clang/test/CodeGen/attr-target-version.c
+++ b/clang/test/CodeGen/attr-target-version.c
@@ -35,7 +35,7 @@ inline int __attribute__((target_version("sve+sve-bf16"))) fmv_inline(void) { re
inline int __attribute__((target_version("sve2-aes+sve2-sha3"))) fmv_inline(void) { return 5; }
inline int __attribute__((target_version("sve2+sve2-pmull128+sve2-bitperm"))) fmv_inline(void) { return 9; }
inline int __attribute__((target_version("sve2-sm4+memtag2"))) fmv_inline(void) { return 10; }
-inline int __attribute__((target_version("memtag3+rcpc3"))) fmv_inline(void) { return 11; }
+inline int __attribute__((target_version("memtag3+rcpc3+mops"))) fmv_inline(void) { return 11; }
inline int __attribute__((target_version("default"))) fmv_inline(void) { return 3; }
__attribute__((target_version("ls64"))) int fmv_e(void);
@@ -272,36 +272,36 @@ int hoo(void) {
// CHECK-NEXT: ret ptr @fmv_inline._Mfp16Mfp16MfcmaMsme
// CHECK: resolver_else:
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 893353197568
-// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 893353197568
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 864726312827224064
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 864726312827224064
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
// CHECK: resolver_return1:
-// CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm
+// CHECK-NEXT: ret ptr @fmv_inline._Mrcpc3Mmemtag3Mmops
// CHECK: resolver_else2:
// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 34359773184
-// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 34359773184
+// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 893353197568
+// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 893353197568
// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
// CHECK: resolver_return3:
-// CHECK-NEXT: ret ptr @fmv_inline._Msha1MpmullMf64mm
+// CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm
// CHECK: resolver_else4:
// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 17246986240
-// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 17246986240
+// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 34359773184
+// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 34359773184
// CHECK-NEXT: [[TMP15:%.*]] = and i1 true, [[TMP14]]
// CHECK-NEXT: br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]]
// CHECK: resolver_return5:
-// CHECK-NEXT: ret ptr @fmv_inline._Msha3Mi8mmMf32mm
+// CHECK-NEXT: ret ptr @fmv_inline._Msha1MpmullMf64mm
// CHECK: resolver_else6:
// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 288265560523800576
-// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 288265560523800576
+// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 17246986240
+// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 17246986240
// CHECK-NEXT: [[TMP19:%.*]] = and i1 true, [[TMP18]]
// CHECK-NEXT: br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]]
// CHECK: resolver_return7:
-// CHECK-NEXT: ret ptr @fmv_inline._Mrcpc3Mmemtag3
+// CHECK-NEXT: ret ptr @fmv_inline._Msha3Mi8mmMf32mm
// CHECK: resolver_else8:
// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 19791209299968
@@ -609,7 +609,7 @@ int hoo(void) {
//
//
// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mrcpc3Mmemtag3
+// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mrcpc3Mmemtag3Mmops
// CHECK-SAME: () #[[ATTR23:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 11
@@ -768,7 +768,7 @@ int hoo(void) {
// CHECK: attributes #[[ATTR20]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-sha3" }
// CHECK: attributes #[[ATTR21]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-bitperm" }
// CHECK: attributes #[[ATTR22]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+mte,+neon,+sve,+sve2,+sve2-sm4" }
-// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+mte,+rcpc,+rcpc3" }
+// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+mops,+mte,+rcpc,+rcpc3" }
// CHECK: attributes #[[ATTR24]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+sb" }
//.
// CHECK-NOFMV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }
diff --git a/clang/test/Sema/attr-target-clones-aarch64.c b/clang/test/Sema/attr-target-clones-aarch64.c
index 9adabf8..4054b7c 100644
--- a/clang/test/Sema/attr-target-clones-aarch64.c
+++ b/clang/test/Sema/attr-target-clones-aarch64.c
@@ -1,6 +1,6 @@
// RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify %s
-void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3+rcpc3"))) no_def(void);
+void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3+rcpc3+mops"))) no_def(void);
// expected-warning@+1 {{unsupported 'default' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
void __attribute__((target_clones("default+sha3"))) warn1(void);
diff --git a/clang/test/SemaCXX/attr-target-version.cpp b/clang/test/SemaCXX/attr-target-version.cpp
index 2e262cd..5c542ad 100644
--- a/clang/test/SemaCXX/attr-target-version.cpp
+++ b/clang/test/SemaCXX/attr-target-version.cpp
@@ -6,6 +6,7 @@ void __attribute__((target_version("vmull"))) wrong_tv(void);
void __attribute__((target_version("dotprod"))) no_def(void);
void __attribute__((target_version("rdm+fp"))) no_def(void);
void __attribute__((target_version("rcpc3"))) no_def(void);
+void __attribute__((target_version("mops"))) no_def(void);
// expected-error@+1 {{no matching function for call to 'no_def'}}
void foo(void) { no_def(); }
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c
index 44e1cf4..17bddfc 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64.c
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c
@@ -115,6 +115,7 @@ enum CPUFeatures {
FEAT_SME_I64,
FEAT_SME2,
FEAT_RCPC3,
+ FEAT_MOPS,
FEAT_MAX,
FEAT_EXT = 62, // Reserved to indicate presence of additional features field
// in __aarch64_cpu_features
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
index 2f712f4..32a21a2 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
@@ -109,6 +109,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_SME_I64);
if (hwcap2 & HWCAP2_SME_F64F64)
setCPUFeature(FEAT_SME_F64);
+ if (hwcap2 & HWCAP2_MOPS)
+ setCPUFeature(FEAT_MOPS);
if (hwcap & HWCAP_CPUID) {
unsigned long ftr;
getCPUFeature(ID_AA64PFR1_EL1, ftr);
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc
index 328d9c4..7ddc125 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc
@@ -178,3 +178,6 @@
#ifndef HWCAP2_SVE_EBF16
#define HWCAP2_SVE_EBF16 (1ULL << 33)
#endif
+#ifndef HWCAP2_MOPS
+#define HWCAP2_MOPS (1ULL << 43)
+#endif
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index ddf5ab4..623fdc2 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -94,6 +94,7 @@ enum CPUFeatures {
FEAT_SME_I64,
FEAT_SME2,
FEAT_RCPC3,
+ FEAT_MOPS,
FEAT_MAX,
FEAT_EXT = 62,
FEAT_INIT
@@ -246,7 +247,7 @@ inline constexpr ExtensionInfo Extensions[] = {
{"memtag", AArch64::AEK_MTE, "+mte", "-mte", FEAT_MEMTAG, "", 440},
{"memtag2", AArch64::AEK_NONE, {}, {}, FEAT_MEMTAG2, "+mte", 450},
{"memtag3", AArch64::AEK_NONE, {}, {}, FEAT_MEMTAG3, "+mte", 460},
- {"mops", AArch64::AEK_MOPS, "+mops", "-mops", FEAT_INIT, "", 0},
+ {"mops", AArch64::AEK_MOPS, "+mops", "-mops", FEAT_MOPS, "+mops", 650},
{"pauth", AArch64::AEK_PAUTH, "+pauth", "-pauth", FEAT_INIT, "", 0},
{"pmull", AArch64::AEK_NONE, {}, {}, FEAT_PMULL, "+aes,+fp-armv8,+neon", 160},
{"pmuv3", AArch64::AEK_PERFMON, "+perfmon", "-perfmon", FEAT_INIT, "", 0},