diff options
author | David Green <david.green@arm.com> | 2025-01-31 07:57:43 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-01-31 07:57:43 +0000 |
commit | 9f1c825fb62319b94ac9604f733afd59e9eb461b (patch) | |
tree | 9ebc67c2f31bacffe669bbbba3aaaa872aec3513 /clang | |
parent | d2d8e2e0306ab1f0eac6406b5f2ec4d231b1f7ff (diff) | |
download | llvm-9f1c825fb62319b94ac9604f733afd59e9eb461b.zip llvm-9f1c825fb62319b94ac9604f733afd59e9eb461b.tar.gz llvm-9f1c825fb62319b94ac9604f733afd59e9eb461b.tar.bz2 |
[AArch64] Enable vscale_range with +sme (#124466)
If we have +sme but not +sve, we would not set vscale_range on
functions. It should be valid to apply it with the same range with just
+sme, which can help mitigate some performance regressions in cases such
as scalable vector bitcasts (https://godbolt.org/z/exhe4jd8d).
Diffstat (limited to 'clang')
-rw-r--r-- | clang/include/clang/Basic/TargetInfo.h | 3 | ||||
-rw-r--r-- | clang/lib/AST/ASTContext.cpp | 3 | ||||
-rw-r--r-- | clang/lib/AST/ItaniumMangle.cpp | 2 | ||||
-rw-r--r-- | clang/lib/Basic/Targets/AArch64.cpp | 5 | ||||
-rw-r--r-- | clang/lib/Basic/Targets/AArch64.h | 3 | ||||
-rw-r--r-- | clang/lib/Basic/Targets/RISCV.cpp | 5 | ||||
-rw-r--r-- | clang/lib/Basic/Targets/RISCV.h | 3 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.cpp | 17 | ||||
-rw-r--r-- | clang/lib/CodeGen/Targets/RISCV.cpp | 4 | ||||
-rw-r--r-- | clang/lib/Sema/SemaType.cpp | 3 | ||||
-rw-r--r-- | clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp | 4 |
11 files changed, 30 insertions, 22 deletions
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 43c09cf..d762144 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1023,7 +1023,8 @@ public: /// Returns target-specific min and max values VScale_Range. virtual std::optional<std::pair<unsigned, unsigned>> - getVScaleRange(const LangOptions &LangOpts) const { + getVScaleRange(const LangOptions &LangOpts, + bool IsArmStreamingFunction) const { return std::nullopt; } /// The __builtin_clz* and __builtin_ctz* built-in diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 4e387da..2dc9669 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -10363,7 +10363,8 @@ bool ASTContext::areLaxCompatibleSveTypes(QualType FirstType, /// getRVVTypeSize - Return RVV vector register size. static uint64_t getRVVTypeSize(ASTContext &Context, const BuiltinType *Ty) { assert(Ty->isRVVVLSBuiltinType() && "Invalid RVV Type"); - auto VScale = Context.getTargetInfo().getVScaleRange(Context.getLangOpts()); + auto VScale = + Context.getTargetInfo().getVScaleRange(Context.getLangOpts(), false); if (!VScale) return 0; diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index e889b74..e380d41 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -4198,7 +4198,7 @@ void CXXNameMangler::mangleRISCVFixedRVVVectorType(const VectorType *T) { // Apend the LMUL suffix. auto VScale = getASTContext().getTargetInfo().getVScaleRange( - getASTContext().getLangOpts()); + getASTContext().getLangOpts(), false); unsigned VLen = VScale->first * llvm::RISCV::RVVBitsPerBlock; if (T->getVectorKind() == VectorKind::RVVFixedLengthData) { diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 0b89913..57c9849 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -703,12 +703,13 @@ ArrayRef<Builtin::Info> AArch64TargetInfo::getTargetBuiltins() const { } std::optional<std::pair<unsigned, unsigned>> -AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts) const { +AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts, + bool IsArmStreamingFunction) const { if (LangOpts.VScaleMin || LangOpts.VScaleMax) return std::pair<unsigned, unsigned>( LangOpts.VScaleMin ? LangOpts.VScaleMin : 1, LangOpts.VScaleMax); - if (hasFeature("sve")) + if (hasFeature("sve") || (IsArmStreamingFunction && hasFeature("sme"))) return std::pair<unsigned, unsigned>(1, 16); return std::nullopt; diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 8695c07..79e012f 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -184,7 +184,8 @@ public: ArrayRef<Builtin::Info> getTargetBuiltins() const override; std::optional<std::pair<unsigned, unsigned>> - getVScaleRange(const LangOptions &LangOpts) const override; + getVScaleRange(const LangOptions &LangOpts, + bool IsArmStreamingFunction) const override; bool doesFeatureAffectCodeGen(StringRef Name) const override; bool validateCpuSupports(StringRef FeatureStr) const override; bool hasFeature(StringRef Feature) const override; diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 8167d76..61b8ae9 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -222,7 +222,7 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, // Currently we support the v1.0 RISC-V V intrinsics. Builder.defineMacro("__riscv_v_intrinsic", Twine(getVersionValue(1, 0))); - auto VScale = getVScaleRange(Opts); + auto VScale = getVScaleRange(Opts, false); if (VScale && VScale->first && VScale->first == VScale->second) Builder.defineMacro("__riscv_v_fixed_vlen", Twine(VScale->first * llvm::RISCV::RVVBitsPerBlock)); @@ -289,7 +289,8 @@ bool RISCVTargetInfo::initFeatureMap( } std::optional<std::pair<unsigned, unsigned>> -RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts) const { +RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts, + bool IsArmStreamingFunction) const { // RISCV::RVVBitsPerBlock is 64. unsigned VScaleMin = ISAInfo->getMinVLen() / llvm::RISCV::RVVBitsPerBlock; diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h index bb3f3a5..d31c46f 100644 --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -99,7 +99,8 @@ public: const std::vector<std::string> &FeaturesVec) const override; std::optional<std::pair<unsigned, unsigned>> - getVScaleRange(const LangOptions &LangOpts) const override; + getVScaleRange(const LangOptions &LangOpts, + bool IsArmStreamingFunction) const override; bool hasFeature(StringRef Feature) const override; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index bbef277..08165e0 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -551,14 +551,6 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth)); - // Add vscale_range attribute if appropriate. - std::optional<std::pair<unsigned, unsigned>> VScaleRange = - getContext().getTargetInfo().getVScaleRange(getLangOpts()); - if (VScaleRange) { - CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs( - getLLVMContext(), VScaleRange->first, VScaleRange->second)); - } - // If we generated an unreachable return block, delete it now. if (ReturnBlock.isValid() && ReturnBlock.getBlock()->use_empty()) { Builder.ClearInsertionPoint(); @@ -1110,6 +1102,15 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (FD && FD->isMain()) Fn->removeFnAttr("zero-call-used-regs"); + // Add vscale_range attribute if appropriate. + std::optional<std::pair<unsigned, unsigned>> VScaleRange = + getContext().getTargetInfo().getVScaleRange( + getLangOpts(), FD ? IsArmStreamingFunction(FD, true) : false); + if (VScaleRange) { + CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs( + getLLVMContext(), VScaleRange->first, VScaleRange->second)); + } + llvm::BasicBlock *EntryBB = createBasicBlock("entry", CurFn); // Create a marker to make it easy to insert allocas into the entryblock diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp index 2b70f2b..2c48ba3 100644 --- a/clang/lib/CodeGen/Targets/RISCV.cpp +++ b/clang/lib/CodeGen/Targets/RISCV.cpp @@ -367,8 +367,8 @@ ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty) const { const auto *VT = Ty->castAs<VectorType>(); assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); - auto VScale = - getContext().getTargetInfo().getVScaleRange(getContext().getLangOpts()); + auto VScale = getContext().getTargetInfo().getVScaleRange( + getContext().getLangOpts(), false); unsigned NumElts = VT->getNumElements(); llvm::Type *EltType = llvm::Type::getInt1Ty(getVMContext()); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 2781651..1fa5239 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -8492,7 +8492,8 @@ static void HandleRISCVRVVVectorBitsTypeAttr(QualType &CurType, return; } - auto VScale = S.Context.getTargetInfo().getVScaleRange(S.getLangOpts()); + auto VScale = + S.Context.getTargetInfo().getVScaleRange(S.getLangOpts(), false); if (!VScale || !VScale->first || VScale->first != VScale->second) { S.Diag(Attr.getLoc(), diag::err_attribute_riscv_rvv_bits_unsupported) << Attr; diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp b/clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp index 54762c8..c734c69 100644 --- a/clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp +++ b/clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp @@ -300,12 +300,12 @@ int test_variadic_template() __arm_inout("za") { preserves_za_decl); } -// CHECK: attributes #[[SM_ENABLED]] = { mustprogress noinline nounwind "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[SM_ENABLED]] = { mustprogress noinline nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } // CHECK: attributes #[[NORMAL_DECL]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } // CHECK: attributes #[[SM_ENABLED_DECL]] = { "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } // CHECK: attributes #[[SM_COMPATIBLE]] = { mustprogress noinline nounwind "aarch64_pstate_sm_compatible" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } // CHECK: attributes #[[SM_COMPATIBLE_DECL]] = { "aarch64_pstate_sm_compatible" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[SM_BODY]] = { mustprogress noinline nounwind "aarch64_pstate_sm_body" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[SM_BODY]] = { mustprogress noinline nounwind vscale_range(1,16) "aarch64_pstate_sm_body" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } // CHECK: attributes #[[ZA_SHARED]] = { mustprogress noinline nounwind "aarch64_inout_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } // CHECK: attributes #[[ZA_SHARED_DECL]] = { "aarch64_inout_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } // CHECK: attributes #[[ZA_PRESERVED]] = { mustprogress noinline nounwind "aarch64_preserves_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } |