From 179ba129f50aefe6e670800aec7091d958aa6f90 Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas Date: Tue, 23 Jan 2024 15:48:12 +0000 Subject: [AArch64][FMV] Support feature MOPS in Function Multi Versioning. (#78788) The patch adds support for FEAT_MOPS (Memory Copy and Memory Set instructions) in Function Multi Versioning. The bits [19:16] of the system register ID_AA64ISAR2_EL1 indicate whether FEAT_MOPS is implemented in AArch64 state. This information is accessible via ELF hwcaps. --- clang/test/CodeGen/attr-target-version.c | 30 +++++++++++----------- clang/test/Sema/attr-target-clones-aarch64.c | 2 +- clang/test/SemaCXX/attr-target-version.cpp | 1 + compiler-rt/lib/builtins/cpu_model/aarch64.c | 1 + .../lib/builtins/cpu_model/aarch64/fmv/mrs.inc | 2 ++ .../lib/builtins/cpu_model/aarch64/hwcap.inc | 3 +++ .../llvm/TargetParser/AArch64TargetParser.h | 3 ++- 7 files changed, 25 insertions(+), 17 deletions(-) diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c index 13b895a..8927985 100644 --- a/clang/test/CodeGen/attr-target-version.c +++ b/clang/test/CodeGen/attr-target-version.c @@ -35,7 +35,7 @@ inline int __attribute__((target_version("sve+sve-bf16"))) fmv_inline(void) { re inline int __attribute__((target_version("sve2-aes+sve2-sha3"))) fmv_inline(void) { return 5; } inline int __attribute__((target_version("sve2+sve2-pmull128+sve2-bitperm"))) fmv_inline(void) { return 9; } inline int __attribute__((target_version("sve2-sm4+memtag2"))) fmv_inline(void) { return 10; } -inline int __attribute__((target_version("memtag3+rcpc3"))) fmv_inline(void) { return 11; } +inline int __attribute__((target_version("memtag3+rcpc3+mops"))) fmv_inline(void) { return 11; } inline int __attribute__((target_version("default"))) fmv_inline(void) { return 3; } __attribute__((target_version("ls64"))) int fmv_e(void); @@ -272,36 +272,36 @@ int hoo(void) { // CHECK-NEXT: ret ptr @fmv_inline._Mfp16Mfp16MfcmaMsme // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 893353197568 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 893353197568 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 864726312827224064 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 864726312827224064 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK: resolver_return1: -// CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm +// CHECK-NEXT: ret ptr @fmv_inline._Mrcpc3Mmemtag3Mmops // CHECK: resolver_else2: // CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 34359773184 -// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 34359773184 +// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 893353197568 +// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 893353197568 // CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]] // CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] // CHECK: resolver_return3: -// CHECK-NEXT: ret ptr @fmv_inline._Msha1MpmullMf64mm +// CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm // CHECK: resolver_else4: // CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 17246986240 -// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 17246986240 +// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 34359773184 +// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 34359773184 // CHECK-NEXT: [[TMP15:%.*]] = and i1 true, [[TMP14]] // CHECK-NEXT: br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]] // CHECK: resolver_return5: -// CHECK-NEXT: ret ptr @fmv_inline._Msha3Mi8mmMf32mm +// CHECK-NEXT: ret ptr @fmv_inline._Msha1MpmullMf64mm // CHECK: resolver_else6: // CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 288265560523800576 -// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 288265560523800576 +// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 17246986240 +// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 17246986240 // CHECK-NEXT: [[TMP19:%.*]] = and i1 true, [[TMP18]] // CHECK-NEXT: br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]] // CHECK: resolver_return7: -// CHECK-NEXT: ret ptr @fmv_inline._Mrcpc3Mmemtag3 +// CHECK-NEXT: ret ptr @fmv_inline._Msha3Mi8mmMf32mm // CHECK: resolver_else8: // CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 // CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 19791209299968 @@ -609,7 +609,7 @@ int hoo(void) { // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mrcpc3Mmemtag3 +// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mrcpc3Mmemtag3Mmops // CHECK-SAME: () #[[ATTR23:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 11 @@ -768,7 +768,7 @@ int hoo(void) { // CHECK: attributes #[[ATTR20]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-sha3" } // CHECK: attributes #[[ATTR21]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-bitperm" } // CHECK: attributes #[[ATTR22]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+mte,+neon,+sve,+sve2,+sve2-sm4" } -// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+mte,+rcpc,+rcpc3" } +// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+mops,+mte,+rcpc,+rcpc3" } // CHECK: attributes #[[ATTR24]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+sb" } //. // CHECK-NOFMV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" } diff --git a/clang/test/Sema/attr-target-clones-aarch64.c b/clang/test/Sema/attr-target-clones-aarch64.c index 9adabf8..4054b7c 100644 --- a/clang/test/Sema/attr-target-clones-aarch64.c +++ b/clang/test/Sema/attr-target-clones-aarch64.c @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify %s -void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3+rcpc3"))) no_def(void); +void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3+rcpc3+mops"))) no_def(void); // expected-warning@+1 {{unsupported 'default' in the 'target_clones' attribute string; 'target_clones' attribute ignored}} void __attribute__((target_clones("default+sha3"))) warn1(void); diff --git a/clang/test/SemaCXX/attr-target-version.cpp b/clang/test/SemaCXX/attr-target-version.cpp index 2e262cd..5c542ad 100644 --- a/clang/test/SemaCXX/attr-target-version.cpp +++ b/clang/test/SemaCXX/attr-target-version.cpp @@ -6,6 +6,7 @@ void __attribute__((target_version("vmull"))) wrong_tv(void); void __attribute__((target_version("dotprod"))) no_def(void); void __attribute__((target_version("rdm+fp"))) no_def(void); void __attribute__((target_version("rcpc3"))) no_def(void); +void __attribute__((target_version("mops"))) no_def(void); // expected-error@+1 {{no matching function for call to 'no_def'}} void foo(void) { no_def(); } diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c index 44e1cf4..17bddfc 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64.c +++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c @@ -115,6 +115,7 @@ enum CPUFeatures { FEAT_SME_I64, FEAT_SME2, FEAT_RCPC3, + FEAT_MOPS, FEAT_MAX, FEAT_EXT = 62, // Reserved to indicate presence of additional features field // in __aarch64_cpu_features diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc index 2f712f4..32a21a2 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc @@ -109,6 +109,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_SME_I64); if (hwcap2 & HWCAP2_SME_F64F64) setCPUFeature(FEAT_SME_F64); + if (hwcap2 & HWCAP2_MOPS) + setCPUFeature(FEAT_MOPS); if (hwcap & HWCAP_CPUID) { unsigned long ftr; getCPUFeature(ID_AA64PFR1_EL1, ftr); diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc index 328d9c4..7ddc125 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc @@ -178,3 +178,6 @@ #ifndef HWCAP2_SVE_EBF16 #define HWCAP2_SVE_EBF16 (1ULL << 33) #endif +#ifndef HWCAP2_MOPS +#define HWCAP2_MOPS (1ULL << 43) +#endif diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index ddf5ab4..623fdc2 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -94,6 +94,7 @@ enum CPUFeatures { FEAT_SME_I64, FEAT_SME2, FEAT_RCPC3, + FEAT_MOPS, FEAT_MAX, FEAT_EXT = 62, FEAT_INIT @@ -246,7 +247,7 @@ inline constexpr ExtensionInfo Extensions[] = { {"memtag", AArch64::AEK_MTE, "+mte", "-mte", FEAT_MEMTAG, "", 440}, {"memtag2", AArch64::AEK_NONE, {}, {}, FEAT_MEMTAG2, "+mte", 450}, {"memtag3", AArch64::AEK_NONE, {}, {}, FEAT_MEMTAG3, "+mte", 460}, - {"mops", AArch64::AEK_MOPS, "+mops", "-mops", FEAT_INIT, "", 0}, + {"mops", AArch64::AEK_MOPS, "+mops", "-mops", FEAT_MOPS, "+mops", 650}, {"pauth", AArch64::AEK_PAUTH, "+pauth", "-pauth", FEAT_INIT, "", 0}, {"pmull", AArch64::AEK_NONE, {}, {}, FEAT_PMULL, "+aes,+fp-armv8,+neon", 160}, {"pmuv3", AArch64::AEK_PERFMON, "+perfmon", "-perfmon", FEAT_INIT, "", 0}, -- cgit v1.1