aboutsummaryrefslogtreecommitdiff
path: root/clang
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2025-01-31 07:57:43 +0000
committerGitHub <noreply@github.com>2025-01-31 07:57:43 +0000
commit9f1c825fb62319b94ac9604f733afd59e9eb461b (patch)
tree9ebc67c2f31bacffe669bbbba3aaaa872aec3513 /clang
parentd2d8e2e0306ab1f0eac6406b5f2ec4d231b1f7ff (diff)
downloadllvm-9f1c825fb62319b94ac9604f733afd59e9eb461b.zip
llvm-9f1c825fb62319b94ac9604f733afd59e9eb461b.tar.gz
llvm-9f1c825fb62319b94ac9604f733afd59e9eb461b.tar.bz2
[AArch64] Enable vscale_range with +sme (#124466)
If we have +sme but not +sve, we would not set vscale_range on functions. It should be valid to apply it with the same range with just +sme, which can help mitigate some performance regressions in cases such as scalable vector bitcasts (https://godbolt.org/z/exhe4jd8d).
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Basic/TargetInfo.h3
-rw-r--r--clang/lib/AST/ASTContext.cpp3
-rw-r--r--clang/lib/AST/ItaniumMangle.cpp2
-rw-r--r--clang/lib/Basic/Targets/AArch64.cpp5
-rw-r--r--clang/lib/Basic/Targets/AArch64.h3
-rw-r--r--clang/lib/Basic/Targets/RISCV.cpp5
-rw-r--r--clang/lib/Basic/Targets/RISCV.h3
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.cpp17
-rw-r--r--clang/lib/CodeGen/Targets/RISCV.cpp4
-rw-r--r--clang/lib/Sema/SemaType.cpp3
-rw-r--r--clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp4
11 files changed, 30 insertions, 22 deletions
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 43c09cf..d762144 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1023,7 +1023,8 @@ public:
/// Returns target-specific min and max values VScale_Range.
virtual std::optional<std::pair<unsigned, unsigned>>
- getVScaleRange(const LangOptions &LangOpts) const {
+ getVScaleRange(const LangOptions &LangOpts,
+ bool IsArmStreamingFunction) const {
return std::nullopt;
}
/// The __builtin_clz* and __builtin_ctz* built-in
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 4e387da..2dc9669 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -10363,7 +10363,8 @@ bool ASTContext::areLaxCompatibleSveTypes(QualType FirstType,
/// getRVVTypeSize - Return RVV vector register size.
static uint64_t getRVVTypeSize(ASTContext &Context, const BuiltinType *Ty) {
assert(Ty->isRVVVLSBuiltinType() && "Invalid RVV Type");
- auto VScale = Context.getTargetInfo().getVScaleRange(Context.getLangOpts());
+ auto VScale =
+ Context.getTargetInfo().getVScaleRange(Context.getLangOpts(), false);
if (!VScale)
return 0;
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index e889b74..e380d41 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -4198,7 +4198,7 @@ void CXXNameMangler::mangleRISCVFixedRVVVectorType(const VectorType *T) {
// Apend the LMUL suffix.
auto VScale = getASTContext().getTargetInfo().getVScaleRange(
- getASTContext().getLangOpts());
+ getASTContext().getLangOpts(), false);
unsigned VLen = VScale->first * llvm::RISCV::RVVBitsPerBlock;
if (T->getVectorKind() == VectorKind::RVVFixedLengthData) {
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 0b89913..57c9849 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -703,12 +703,13 @@ ArrayRef<Builtin::Info> AArch64TargetInfo::getTargetBuiltins() const {
}
std::optional<std::pair<unsigned, unsigned>>
-AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts) const {
+AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts,
+ bool IsArmStreamingFunction) const {
if (LangOpts.VScaleMin || LangOpts.VScaleMax)
return std::pair<unsigned, unsigned>(
LangOpts.VScaleMin ? LangOpts.VScaleMin : 1, LangOpts.VScaleMax);
- if (hasFeature("sve"))
+ if (hasFeature("sve") || (IsArmStreamingFunction && hasFeature("sme")))
return std::pair<unsigned, unsigned>(1, 16);
return std::nullopt;
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 8695c07..79e012f 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -184,7 +184,8 @@ public:
ArrayRef<Builtin::Info> getTargetBuiltins() const override;
std::optional<std::pair<unsigned, unsigned>>
- getVScaleRange(const LangOptions &LangOpts) const override;
+ getVScaleRange(const LangOptions &LangOpts,
+ bool IsArmStreamingFunction) const override;
bool doesFeatureAffectCodeGen(StringRef Name) const override;
bool validateCpuSupports(StringRef FeatureStr) const override;
bool hasFeature(StringRef Feature) const override;
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index 8167d76..61b8ae9 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -222,7 +222,7 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
// Currently we support the v1.0 RISC-V V intrinsics.
Builder.defineMacro("__riscv_v_intrinsic", Twine(getVersionValue(1, 0)));
- auto VScale = getVScaleRange(Opts);
+ auto VScale = getVScaleRange(Opts, false);
if (VScale && VScale->first && VScale->first == VScale->second)
Builder.defineMacro("__riscv_v_fixed_vlen",
Twine(VScale->first * llvm::RISCV::RVVBitsPerBlock));
@@ -289,7 +289,8 @@ bool RISCVTargetInfo::initFeatureMap(
}
std::optional<std::pair<unsigned, unsigned>>
-RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts) const {
+RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts,
+ bool IsArmStreamingFunction) const {
// RISCV::RVVBitsPerBlock is 64.
unsigned VScaleMin = ISAInfo->getMinVLen() / llvm::RISCV::RVVBitsPerBlock;
diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index bb3f3a5..d31c46f 100644
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -99,7 +99,8 @@ public:
const std::vector<std::string> &FeaturesVec) const override;
std::optional<std::pair<unsigned, unsigned>>
- getVScaleRange(const LangOptions &LangOpts) const override;
+ getVScaleRange(const LangOptions &LangOpts,
+ bool IsArmStreamingFunction) const override;
bool hasFeature(StringRef Feature) const override;
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index bbef277..08165e0 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -551,14 +551,6 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
CurFn->addFnAttr("min-legal-vector-width",
llvm::utostr(LargestVectorWidth));
- // Add vscale_range attribute if appropriate.
- std::optional<std::pair<unsigned, unsigned>> VScaleRange =
- getContext().getTargetInfo().getVScaleRange(getLangOpts());
- if (VScaleRange) {
- CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
- getLLVMContext(), VScaleRange->first, VScaleRange->second));
- }
-
// If we generated an unreachable return block, delete it now.
if (ReturnBlock.isValid() && ReturnBlock.getBlock()->use_empty()) {
Builder.ClearInsertionPoint();
@@ -1110,6 +1102,15 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
if (FD && FD->isMain())
Fn->removeFnAttr("zero-call-used-regs");
+ // Add vscale_range attribute if appropriate.
+ std::optional<std::pair<unsigned, unsigned>> VScaleRange =
+ getContext().getTargetInfo().getVScaleRange(
+ getLangOpts(), FD ? IsArmStreamingFunction(FD, true) : false);
+ if (VScaleRange) {
+ CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
+ getLLVMContext(), VScaleRange->first, VScaleRange->second));
+ }
+
llvm::BasicBlock *EntryBB = createBasicBlock("entry", CurFn);
// Create a marker to make it easy to insert allocas into the entryblock
diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp
index 2b70f2b..2c48ba3 100644
--- a/clang/lib/CodeGen/Targets/RISCV.cpp
+++ b/clang/lib/CodeGen/Targets/RISCV.cpp
@@ -367,8 +367,8 @@ ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty) const {
const auto *VT = Ty->castAs<VectorType>();
assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
- auto VScale =
- getContext().getTargetInfo().getVScaleRange(getContext().getLangOpts());
+ auto VScale = getContext().getTargetInfo().getVScaleRange(
+ getContext().getLangOpts(), false);
unsigned NumElts = VT->getNumElements();
llvm::Type *EltType = llvm::Type::getInt1Ty(getVMContext());
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 2781651..1fa5239 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -8492,7 +8492,8 @@ static void HandleRISCVRVVVectorBitsTypeAttr(QualType &CurType,
return;
}
- auto VScale = S.Context.getTargetInfo().getVScaleRange(S.getLangOpts());
+ auto VScale =
+ S.Context.getTargetInfo().getVScaleRange(S.getLangOpts(), false);
if (!VScale || !VScale->first || VScale->first != VScale->second) {
S.Diag(Attr.getLoc(), diag::err_attribute_riscv_rvv_bits_unsupported)
<< Attr;
diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp b/clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp
index 54762c8..c734c69 100644
--- a/clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp
+++ b/clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp
@@ -300,12 +300,12 @@ int test_variadic_template() __arm_inout("za") {
preserves_za_decl);
}
-// CHECK: attributes #[[SM_ENABLED]] = { mustprogress noinline nounwind "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
+// CHECK: attributes #[[SM_ENABLED]] = { mustprogress noinline nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
// CHECK: attributes #[[NORMAL_DECL]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
// CHECK: attributes #[[SM_ENABLED_DECL]] = { "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
// CHECK: attributes #[[SM_COMPATIBLE]] = { mustprogress noinline nounwind "aarch64_pstate_sm_compatible" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
// CHECK: attributes #[[SM_COMPATIBLE_DECL]] = { "aarch64_pstate_sm_compatible" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
-// CHECK: attributes #[[SM_BODY]] = { mustprogress noinline nounwind "aarch64_pstate_sm_body" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
+// CHECK: attributes #[[SM_BODY]] = { mustprogress noinline nounwind vscale_range(1,16) "aarch64_pstate_sm_body" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
// CHECK: attributes #[[ZA_SHARED]] = { mustprogress noinline nounwind "aarch64_inout_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
// CHECK: attributes #[[ZA_SHARED_DECL]] = { "aarch64_inout_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
// CHECK: attributes #[[ZA_PRESERVED]] = { mustprogress noinline nounwind "aarch64_preserves_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }