aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clang-tools-extra/clang-include-fixer/IncludeFixer.cpp2
-rw-r--r--clang-tools-extra/include-cleaner/unittests/RecordTest.cpp7
-rw-r--r--clang/docs/ReleaseNotes.rst2
-rw-r--r--clang/include/clang/AST/OpenACCClause.h21
-rw-r--r--clang/include/clang/AST/TypeBase.h17
-rw-r--r--clang/include/clang/AST/TypeProperties.td5
-rw-r--r--clang/include/clang/Analysis/CFG.h1
-rw-r--r--clang/include/clang/Basic/Attr.td6
-rw-r--r--clang/include/clang/CIR/MissingFeatures.h2
-rw-r--r--clang/include/clang/Frontend/CompilerInstance.h6
-rw-r--r--clang/lib/AST/ItaniumMangle.cpp2
-rw-r--r--clang/lib/AST/StmtProfile.cpp8
-rw-r--r--clang/lib/AST/TypePrinter.cpp3
-rw-r--r--clang/lib/Analysis/CFG.cpp7
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenDecl.cpp22
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.h28
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp19
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp13
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h20
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp15
-rw-r--r--clang/lib/CodeGen/Targets/SPIR.cpp6
-rw-r--r--clang/lib/ExtractAPI/ExtractAPIConsumer.cpp3
-rw-r--r--clang/lib/Format/ContinuationIndenter.cpp50
-rw-r--r--clang/lib/Format/Format.cpp2
-rw-r--r--clang/lib/Format/FormatToken.cpp4
-rw-r--r--clang/lib/Format/FormatToken.h5
-rw-r--r--clang/lib/Format/FormatTokenLexer.cpp4
-rw-r--r--clang/lib/Format/MacroExpander.cpp2
-rw-r--r--clang/lib/Format/NamespaceEndCommentsFixer.cpp4
-rw-r--r--clang/lib/Format/ObjCPropertyAttributeOrderFixer.cpp2
-rw-r--r--clang/lib/Format/QualifierAlignmentFixer.cpp2
-rw-r--r--clang/lib/Format/SortJavaScriptImports.cpp4
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp106
-rw-r--r--clang/lib/Format/UnwrappedLineFormatter.cpp6
-rw-r--r--clang/lib/Format/UnwrappedLineParser.cpp42
-rw-r--r--clang/lib/Format/WhitespaceManager.cpp2
-rw-r--r--clang/lib/Frontend/ChainedIncludesSource.cpp2
-rw-r--r--clang/lib/Frontend/CompilerInstance.cpp13
-rw-r--r--clang/lib/Frontend/FrontendAction.cpp11
-rw-r--r--clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp110
-rw-r--r--clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h16
-rw-r--r--clang/lib/Sema/HLSLExternalSemaSource.cpp26
-rw-r--r--clang/lib/Sema/SemaChecking.cpp3
-rw-r--r--clang/lib/Sema/SemaHLSL.cpp11
-rw-r--r--clang/lib/Sema/SemaOpenACC.cpp30
-rw-r--r--clang/lib/Serialization/ASTReader.cpp15
-rw-r--r--clang/lib/Serialization/ASTWriter.cpp9
-rw-r--r--clang/lib/Testing/TestAST.cpp2
-rw-r--r--clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp11
-rw-r--r--clang/lib/Tooling/Tooling.cpp2
-rw-r--r--clang/test/AST/HLSL/StructuredBuffers-AST.hlsl23
-rw-r--r--clang/test/CodeGen/X86/avx512ifma-builtins.c5
-rw-r--r--clang/test/CodeGen/X86/avx512ifmavl-builtins.c6
-rw-r--r--clang/test/CodeGen/X86/avxifma-builtins.c6
-rw-r--r--clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl47
-rw-r--r--clang/test/CodeGenHLSL/resources/RasterizerOrderedStructuredBuffer-elementtype.hlsl26
-rw-r--r--clang/test/CodeGenHLSL/resources/StructuredBuffers-constructors.hlsl4
-rw-r--r--clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl6
-rw-r--r--clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl2
-rw-r--r--clang/test/CodeGenHLSL/resources/resource-bindings.hlsl2
-rw-r--r--clang/test/SemaCXX/builtin-assume-aligned.cpp6
-rw-r--r--clang/tools/clang-import-test/clang-import-test.cpp2
-rw-r--r--clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp7
-rw-r--r--clang/tools/libclang/CIndex.cpp9
-rw-r--r--clang/unittests/Analysis/CFGTest.cpp153
-rw-r--r--clang/unittests/CodeGen/TestCompiler.h2
-rw-r--r--clang/unittests/Serialization/ForceCheckFileInputTest.cpp4
-rw-r--r--clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp2
-rw-r--r--flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp294
-rw-r--r--flang/lib/Parser/parsing.cpp1
-rw-r--r--flang/lib/Parser/prescan.cpp5
-rw-r--r--flang/test/Lower/OpenMP/declare-mapper.f9039
-rw-r--r--flang/test/Semantics/OpenACC/acc-sentinel.f9014
-rw-r--r--libc/config/linux/aarch64/entrypoints.txt1
-rw-r--r--libc/config/linux/x86_64/entrypoints.txt1
-rw-r--r--libc/fuzzing/stdlib/strtointeger_differential_fuzz.cpp4
-rw-r--r--libc/include/llvm-libc-macros/linux/fcntl-macros.h3
-rw-r--r--libc/include/sys/syscall.h.def4
-rw-r--r--libc/include/unistd.yaml9
-rw-r--r--libc/src/unistd/CMakeLists.txt7
-rw-r--r--libc/src/unistd/faccessat.h20
-rw-r--r--libc/src/unistd/linux/CMakeLists.txt13
-rw-r--r--libc/src/unistd/linux/access.cpp2
-rw-r--r--libc/src/unistd/linux/faccessat.cpp37
-rw-r--r--libc/test/src/unistd/CMakeLists.txt17
-rw-r--r--libc/test/src/unistd/faccessat_test.cpp115
-rw-r--r--lld/COFF/Driver.cpp10
-rw-r--r--lld/COFF/Options.td4
-rw-r--r--lld/ELF/Driver.cpp5
-rw-r--r--lld/ELF/Options.td6
-rw-r--r--lldb/source/API/SBTarget.cpp2
-rw-r--r--lldb/source/Commands/CommandObjectTarget.cpp2
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp2
-rw-r--r--llvm/docs/TableGen/ProgRef.rst22
-rw-r--r--llvm/include/llvm/ADT/BitVector.h5
-rw-r--r--llvm/include/llvm/ADT/ConcurrentHashtable.h5
-rw-r--r--llvm/include/llvm/ADT/DirectedGraph.h10
-rw-r--r--llvm/include/llvm/Analysis/MemoryProfileInfo.h8
-rw-r--r--llvm/include/llvm/CodeGen/MachineFrameInfo.h6
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h1
-rw-r--r--llvm/include/llvm/IR/IntrinsicInst.h20
-rw-r--r--llvm/lib/Analysis/MemoryProfileInfo.cpp22
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp7
-rw-r--r--llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp2
-rw-r--r--llvm/lib/Support/APFloat.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp487
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.h26
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp20
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h83
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp443
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp5
-rw-r--r--llvm/lib/Target/RISCV/RISCVGISel.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoA.td9
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td52
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td5
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp48
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h1
-rw-r--r--llvm/lib/Transforms/Utils/LoopPeel.cpp121
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp10
-rw-r--r--llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll126
-rw-r--r--llvm/test/CodeGen/AArch64/framelayout-split-sve.mir587
-rw-r--r--llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir16
-rw-r--r--llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll824
-rw-r--r--llvm/test/CodeGen/AArch64/stack-hazard.ll876
-rw-r--r--llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll92
-rw-r--r--llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll89
-rw-r--r--llvm/test/CodeGen/NVPTX/f32x2-convert-i32x2.ll145
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/remat.ll132
-rw-r--r--llvm/test/DebugInfo/AArch64/asan-stack-vars.mir3
-rw-r--r--llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir3
-rw-r--r--llvm/test/DebugInfo/X86/dynamic-bitfield.ll13
-rw-r--r--llvm/test/Transforms/InstCombine/fcmp.ll40
-rw-r--r--llvm/test/Transforms/InstCombine/icmp-clamp.ll295
-rw-r--r--llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll75
-rw-r--r--llvm/test/Transforms/LoopUnroll/peel-branch-weights.ll64
-rw-r--r--llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll11
-rw-r--r--llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll13
-rw-r--r--llvm/test/Transforms/LoopUnroll/scev-invalidation-lcssa.ll4
-rw-r--r--llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll244
-rw-r--r--llvm/test/Transforms/LoopVectorize/reduction-order.ll116
-rw-r--r--llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll47
-rw-r--r--llvm/unittests/ADT/APFloatTest.cpp7
-rw-r--r--llvm/unittests/Analysis/MemoryProfileInfoTest.cpp21
-rw-r--r--mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td29
-rw-r--r--mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td69
-rw-r--r--mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td63
-rw-r--r--mlir/lib/Dialect/Arith/Transforms/EmulateUnsupportedFloats.cpp3
-rw-r--r--mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp154
-rw-r--r--mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp24
-rw-r--r--mlir/lib/Target/IRDLToCpp/IRDLToCpp.cpp162
-rw-r--r--mlir/lib/Target/IRDLToCpp/Templates/PerOperationDecl.txt53
-rw-r--r--mlir/lib/Target/IRDLToCpp/Templates/PerOperationDef.txt14
-rw-r--r--mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp42
-rw-r--r--mlir/test/Dialect/Arith/emulate-unsupported-floats.mlir11
-rw-r--r--mlir/test/Dialect/OpenMP/cli-tile.mlir138
-rw-r--r--mlir/test/Dialect/OpenMP/invalid-tile.mlir119
-rw-r--r--mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir19
-rw-r--r--mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir101
-rw-r--r--mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir190
-rw-r--r--mlir/test/lib/Dialect/TestIRDLToCpp/CMakeLists.txt2
-rw-r--r--mlir/test/lib/Dialect/TestIRDLToCpp/TestIRDLToCppDialect.cpp31
-rw-r--r--mlir/test/lib/Dialect/TestIRDLToCpp/test_conversion.testd.mlir18
-rw-r--r--mlir/test/lib/Dialect/TestIRDLToCpp/test_irdl_to_cpp.irdl.mlir51
-rw-r--r--mlir/test/lib/Dialect/TestIRDLToCpp/test_irdl_to_cpp_invalid_unsupported_types.irdl.mlir27
-rw-r--r--mlir/test/mlir-tblgen/op-format-invalid.td2
-rw-r--r--mlir/test/mlir-tblgen/op-format-spec.td2
-rw-r--r--mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp1
-rw-r--r--mlir/tools/mlir-tblgen/FormatGen.cpp2
-rw-r--r--mlir/tools/mlir-tblgen/OpFormatGen.cpp1
-rw-r--r--offload/libomptarget/OpenMP/InteropAPI.cpp41
-rw-r--r--offload/libomptarget/exports5
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp31
-rw-r--r--offload/plugins-nextgen/cuda/src/rtl.cpp44
-rw-r--r--offload/test/offloading/fortran/target-declare-mapper-parent-allocatable.f9043
182 files changed, 6817 insertions, 1617 deletions
diff --git a/clang-tools-extra/clang-include-fixer/IncludeFixer.cpp b/clang-tools-extra/clang-include-fixer/IncludeFixer.cpp
index d2ae13c..e825547 100644
--- a/clang-tools-extra/clang-include-fixer/IncludeFixer.cpp
+++ b/clang-tools-extra/clang-include-fixer/IncludeFixer.cpp
@@ -96,7 +96,7 @@ bool IncludeFixerActionFactory::runInvocation(
// diagnostics here.
Compiler.createDiagnostics(new clang::IgnoringDiagConsumer,
/*ShouldOwnClient=*/true);
- Compiler.createSourceManager(*Files);
+ Compiler.createSourceManager();
// We abort on fatal errors so don't let a large number of errors become
// fatal. A missing #include can cause thousands of errors.
diff --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
index 3fb49796..cbf7bae 100644
--- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
@@ -649,11 +649,12 @@ TEST_F(PragmaIncludeTest, ExportInUnnamedBuffer) {
Clang->createVirtualFileSystem(VFS);
Clang->createDiagnostics();
- auto *FM = Clang->createFileManager();
+ Clang->createFileManager();
+ FileManager &FM = Clang->getFileManager();
ASSERT_TRUE(Clang->ExecuteAction(*Inputs.MakeAction()));
EXPECT_THAT(
- PI.getExporters(llvm::cantFail(FM->getFileRef("foo.h")), *FM),
- testing::ElementsAre(llvm::cantFail(FM->getFileRef("exporter.h"))));
+ PI.getExporters(llvm::cantFail(FM.getFileRef("foo.h")), FM),
+ testing::ElementsAre(llvm::cantFail(FM.getFileRef("exporter.h"))));
}
TEST_F(PragmaIncludeTest, OutlivesFMAndSM) {
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 74b0647..145a83a 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -246,8 +246,6 @@ Non-comprehensive list of changes in this release
- ``__builtin_assume_dereferenceable`` now accepts non-constant size operands.
-- Fixed a crash when the second argument to ``__builtin_assume_aligned`` was not constant (#GH161314)
-
New Compiler Flags
------------------
- New option ``-fno-sanitize-debug-trap-reasons`` added to disable emitting trap reasons into the debug info when compiling with trapping UBSan (e.g. ``-fsanitize-trap=undefined``).
diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h
index 5f06117..58ba8d91 100644
--- a/clang/include/clang/AST/OpenACCClause.h
+++ b/clang/include/clang/AST/OpenACCClause.h
@@ -840,14 +840,13 @@ public:
// alloca at the level of the base, and the init at the element level.
struct OpenACCPrivateRecipe {
VarDecl *AllocaDecl;
- Expr *InitExpr;
- OpenACCPrivateRecipe(VarDecl *A, Expr *I) : AllocaDecl(A), InitExpr(I) {}
+ OpenACCPrivateRecipe(VarDecl *A) : AllocaDecl(A) {}
bool isSet() const { return AllocaDecl; }
static OpenACCPrivateRecipe Empty() {
- return OpenACCPrivateRecipe(nullptr, nullptr);
+ return OpenACCPrivateRecipe(/*AllocaDecl=*/nullptr);
}
};
@@ -899,18 +898,17 @@ public:
// InitFromTemporary is the 'temp' declaration we put in to be 'copied from'.
struct OpenACCFirstPrivateRecipe {
VarDecl *AllocaDecl;
- Expr *InitExpr;
VarDecl *InitFromTemporary;
- OpenACCFirstPrivateRecipe(VarDecl *A, Expr *I, VarDecl *T)
- : AllocaDecl(A), InitExpr(I), InitFromTemporary(T) {
- assert(!AllocaDecl || AllocaDecl->getInit() == nullptr);
+ OpenACCFirstPrivateRecipe(VarDecl *A, VarDecl *T)
+ : AllocaDecl(A), InitFromTemporary(T) {
assert(!InitFromTemporary || InitFromTemporary->getInit() == nullptr);
}
bool isSet() const { return AllocaDecl; }
static OpenACCFirstPrivateRecipe Empty() {
- return OpenACCFirstPrivateRecipe(nullptr, nullptr, nullptr);
+ return OpenACCFirstPrivateRecipe(/*AllocaDecl=*/nullptr,
+ /*InitFromTemporary=*/nullptr);
}
};
@@ -1282,16 +1280,13 @@ public:
// 'main' declaration used for initializaiton, which is fixed.
struct OpenACCReductionRecipe {
VarDecl *AllocaDecl;
- Expr *InitExpr;
// TODO: OpenACC: this should eventually have the operations here too.
- OpenACCReductionRecipe(VarDecl *A, Expr *I) : AllocaDecl(A), InitExpr(I) {
- assert(!AllocaDecl || AllocaDecl->getInit() == nullptr);
- }
+ OpenACCReductionRecipe(VarDecl *A) : AllocaDecl(A) {}
bool isSet() const { return AllocaDecl; }
static OpenACCReductionRecipe Empty() {
- return OpenACCReductionRecipe(nullptr, nullptr);
+ return OpenACCReductionRecipe(/*AllocaDecl=*/nullptr);
}
};
diff --git a/clang/include/clang/AST/TypeBase.h b/clang/include/clang/AST/TypeBase.h
index e0d00b8..6786b2f 100644
--- a/clang/include/clang/AST/TypeBase.h
+++ b/clang/include/clang/AST/TypeBase.h
@@ -6702,15 +6702,21 @@ public:
LLVM_PREFERRED_TYPE(bool)
uint8_t RawBuffer : 1;
+ LLVM_PREFERRED_TYPE(bool)
+ uint8_t IsCounter : 1;
+
Attributes(llvm::dxil::ResourceClass ResourceClass, bool IsROV = false,
- bool RawBuffer = false)
- : ResourceClass(ResourceClass), IsROV(IsROV), RawBuffer(RawBuffer) {}
+ bool RawBuffer = false, bool IsCounter = false)
+ : ResourceClass(ResourceClass), IsROV(IsROV), RawBuffer(RawBuffer),
+ IsCounter(IsCounter) {}
- Attributes() : Attributes(llvm::dxil::ResourceClass::UAV, false, false) {}
+ Attributes()
+ : Attributes(llvm::dxil::ResourceClass::UAV, false, false, false) {}
friend bool operator==(const Attributes &LHS, const Attributes &RHS) {
- return std::tie(LHS.ResourceClass, LHS.IsROV, LHS.RawBuffer) ==
- std::tie(RHS.ResourceClass, RHS.IsROV, RHS.RawBuffer);
+ return std::tie(LHS.ResourceClass, LHS.IsROV, LHS.RawBuffer,
+ LHS.IsCounter) == std::tie(RHS.ResourceClass, RHS.IsROV,
+ RHS.RawBuffer, RHS.IsCounter);
}
friend bool operator!=(const Attributes &LHS, const Attributes &RHS) {
return !(LHS == RHS);
@@ -6751,6 +6757,7 @@ public:
ID.AddInteger(static_cast<uint32_t>(Attrs.ResourceClass));
ID.AddBoolean(Attrs.IsROV);
ID.AddBoolean(Attrs.RawBuffer);
+ ID.AddBoolean(Attrs.IsCounter);
}
static bool classof(const Type *T) {
diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td
index b3932a6..9dc85fb 100644
--- a/clang/include/clang/AST/TypeProperties.td
+++ b/clang/include/clang/AST/TypeProperties.td
@@ -662,6 +662,9 @@ let Class = HLSLAttributedResourceType in {
def : Property<"rawBuffer", Bool> {
let Read = [{ node->getAttrs().RawBuffer }];
}
+ def : Property<"isCounter", Bool> {
+ let Read = [{ node->getAttrs().IsCounter }];
+ }
def : Property<"wrappedTy", QualType> {
let Read = [{ node->getWrappedType() }];
}
@@ -669,7 +672,7 @@ let Class = HLSLAttributedResourceType in {
let Read = [{ node->getContainedType() }];
}
def : Creator<[{
- HLSLAttributedResourceType::Attributes attrs(static_cast<llvm::dxil::ResourceClass>(resClass), isROV, rawBuffer);
+ HLSLAttributedResourceType::Attributes attrs(static_cast<llvm::dxil::ResourceClass>(resClass), isROV, rawBuffer, isCounter);
return ctx.getHLSLAttributedResourceType(wrappedTy, containedTy, attrs);
}]>;
}
diff --git a/clang/include/clang/Analysis/CFG.h b/clang/include/clang/Analysis/CFG.h
index 1b1ff5e..6dd7d13 100644
--- a/clang/include/clang/Analysis/CFG.h
+++ b/clang/include/clang/Analysis/CFG.h
@@ -1251,6 +1251,7 @@ public:
bool MarkElidedCXXConstructors = false;
bool AddVirtualBaseBranches = false;
bool OmitImplicitValueInitializers = false;
+ bool AssumeReachableDefaultInSwitchStatements = false;
BuildOptions() = default;
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index fe3ca70..3c697ed 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5074,6 +5074,12 @@ def HLSLRawBuffer : TypeAttr {
let Documentation = [InternalOnly];
}
+def HLSLIsCounter : TypeAttr {
+ let Spellings = [CXX11<"hlsl", "is_counter">];
+ let LangOpts = [HLSL];
+ let Documentation = [InternalOnly];
+}
+
def HLSLGroupSharedAddressSpace : TypeAttr {
let Spellings = [CustomKeyword<"groupshared">];
let Subjects = SubjectList<[Var]>;
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 7a6c084..3dfcafc 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -133,7 +133,6 @@ struct MissingFeatures {
// RecordType
static bool skippedLayout() { return false; }
static bool astRecordDeclAttr() { return false; }
- static bool recordZeroInit() { return false; }
static bool recordZeroInitPadding() { return false; }
static bool zeroSizeRecordMembers() { return false; }
@@ -192,6 +191,7 @@ struct MissingFeatures {
static bool builtinCheckKind() { return false; }
static bool cgCapturedStmtInfo() { return false; }
static bool cgFPOptionsRAII() { return false; }
+ static bool checkBitfieldClipping() { return false; }
static bool cirgenABIInfo() { return false; }
static bool cleanupAfterErrorDiags() { return false; }
static bool cleanupsToDeactivate() { return false; }
diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index a6b6993..44fff69 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -712,12 +712,10 @@ public:
const CodeGenOptions *CodeGenOpts = nullptr);
/// Create the file manager and replace any existing one with it.
- ///
- /// \return The new file manager on success, or null on failure.
- FileManager *createFileManager();
+ void createFileManager();
/// Create the source manager and replace any existing one with it.
- void createSourceManager(FileManager &FileMgr);
+ void createSourceManager();
/// Create the preprocessor, using the invocation, file, and source managers,
/// and replace any existing one with it.
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 2173aed..844db79 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -4624,6 +4624,8 @@ void CXXNameMangler::mangleType(const HLSLAttributedResourceType *T) {
Str += "_ROV";
if (Attrs.RawBuffer)
Str += "_Raw";
+ if (Attrs.IsCounter)
+ Str += "_Counter";
if (T->hasContainedType())
Str += "_CT";
mangleVendorQualifier(Str);
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 589a156..f3b5478 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -2655,8 +2655,6 @@ void OpenACCClauseProfiler::VisitPrivateClause(
for (auto &Recipe : Clause.getInitRecipes()) {
Profiler.VisitDecl(Recipe.AllocaDecl);
- if (Recipe.InitExpr)
- Profiler.VisitExpr(Recipe.InitExpr);
}
}
@@ -2666,8 +2664,6 @@ void OpenACCClauseProfiler::VisitFirstPrivateClause(
for (auto &Recipe : Clause.getInitRecipes()) {
Profiler.VisitDecl(Recipe.AllocaDecl);
- if (Recipe.InitExpr)
- Profiler.VisitExpr(Recipe.InitExpr);
Profiler.VisitDecl(Recipe.InitFromTemporary);
}
}
@@ -2773,12 +2769,10 @@ void OpenACCClauseProfiler::VisitReductionClause(
for (auto &Recipe : Clause.getRecipes()) {
Profiler.VisitDecl(Recipe.AllocaDecl);
- if (Recipe.InitExpr)
- Profiler.VisitExpr(Recipe.InitExpr);
// TODO: OpenACC: Make sure we remember to update this when we figure out
// what we're adding for the operation recipe, in the meantime, a static
// assert will make sure we don't add something.
- static_assert(sizeof(OpenACCReductionRecipe) == 2 * sizeof(int *));
+ static_assert(sizeof(OpenACCReductionRecipe) == sizeof(int *));
}
}
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index f3448af..66a1b68 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -2062,6 +2062,7 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
case attr::HLSLROV:
case attr::HLSLRawBuffer:
case attr::HLSLContainedType:
+ case attr::HLSLIsCounter:
llvm_unreachable("HLSL resource type attributes handled separately");
case attr::OpenCLPrivateAddressSpace:
@@ -2210,6 +2211,8 @@ void TypePrinter::printHLSLAttributedResourceAfter(
OS << " [[hlsl::is_rov]]";
if (Attrs.RawBuffer)
OS << " [[hlsl::raw_buffer]]";
+ if (Attrs.IsCounter)
+ OS << " [[hlsl::is_counter]]";
QualType ContainedTy = T->getContainedType();
if (!ContainedTy.isNull()) {
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp
index 60a2d11..cdde849 100644
--- a/clang/lib/Analysis/CFG.cpp
+++ b/clang/lib/Analysis/CFG.cpp
@@ -4516,10 +4516,13 @@ CFGBlock *CFGBuilder::VisitSwitchStmt(SwitchStmt *Terminator) {
//
// Note: We add a successor to a switch that is considered covered yet has no
// case statements if the enumeration has no enumerators.
+ // We also consider this successor reachable if
+ // BuildOpts.SwitchReqDefaultCoveredEnum is true.
bool SwitchAlwaysHasSuccessor = false;
SwitchAlwaysHasSuccessor |= switchExclusivelyCovered;
- SwitchAlwaysHasSuccessor |= Terminator->isAllEnumCasesCovered() &&
- Terminator->getSwitchCaseList();
+ SwitchAlwaysHasSuccessor |=
+ !BuildOpts.AssumeReachableDefaultInSwitchStatements &&
+ Terminator->isAllEnumCasesCovered() && Terminator->getSwitchCaseList();
addSuccessor(SwitchTerminatedBlock, DefaultCaseBlock,
!SwitchAlwaysHasSuccessor);
diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index 10b8255..563a753 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -35,8 +35,8 @@ CIRGenFunction::emitAutoVarAlloca(const VarDecl &d,
getContext().getLangOpts().ElideConstructors && d.isNRVOVariable();
CIRGenFunction::AutoVarEmission emission(d);
- emission.IsEscapingByRef = d.isEscapingByref();
- if (emission.IsEscapingByRef)
+ emission.isEscapingByRef = d.isEscapingByref();
+ if (emission.isEscapingByRef)
cgm.errorNYI(d.getSourceRange(),
"emitAutoVarDecl: decl escaping by reference");
@@ -78,7 +78,7 @@ CIRGenFunction::emitAutoVarAlloca(const VarDecl &d,
alignment);
}
- emission.Addr = address;
+ emission.addr = address;
setAddrOfLocalVar(&d, address);
return emission;
@@ -101,13 +101,13 @@ bool CIRGenFunction::isTrivialInitializer(const Expr *init) {
void CIRGenFunction::emitAutoVarInit(
const CIRGenFunction::AutoVarEmission &emission) {
- assert(emission.Variable && "emission was not valid!");
+ assert(emission.variable && "emission was not valid!");
// If this was emitted as a global constant, we're done.
if (emission.wasEmittedAsGlobal())
return;
- const VarDecl &d = *emission.Variable;
+ const VarDecl &d = *emission.variable;
QualType type = d.getType();
@@ -124,7 +124,7 @@ void CIRGenFunction::emitAutoVarInit(
return;
}
- const Address addr = emission.Addr;
+ const Address addr = emission.addr;
// Check whether this is a byref variable that's potentially
// captured and moved by its own initializer. If so, we'll need to
@@ -153,7 +153,7 @@ void CIRGenFunction::emitAutoVarInit(
}
mlir::Attribute constant;
- if (emission.IsConstantAggregate ||
+ if (emission.isConstantAggregate ||
d.mightBeUsableInConstantExpressions(getContext())) {
// FIXME: Differently from LLVM we try not to emit / lower too much
// here for CIR since we are interested in seeing the ctor in some
@@ -196,7 +196,7 @@ void CIRGenFunction::emitAutoVarInit(
// FIXME(cir): migrate most of this file to use mlir::TypedAttr directly.
auto typedConstant = mlir::dyn_cast<mlir::TypedAttr>(constant);
assert(typedConstant && "expected typed attribute");
- if (!emission.IsConstantAggregate) {
+ if (!emission.isConstantAggregate) {
// For simple scalar/complex initialization, store the value directly.
LValue lv = makeAddrLValue(addr, type);
assert(init && "expected initializer");
@@ -209,7 +209,7 @@ void CIRGenFunction::emitAutoVarInit(
void CIRGenFunction::emitAutoVarCleanups(
const CIRGenFunction::AutoVarEmission &emission) {
- const VarDecl &d = *emission.Variable;
+ const VarDecl &d = *emission.variable;
// Check the type for a cleanup.
if (QualType::DestructionKind dtorKind = d.needsDestruction(getContext()))
@@ -821,7 +821,7 @@ void CIRGenFunction::emitAutoVarTypeCleanup(
// original stack object, not the possibly forwarded object.
Address addr = emission.getObjectAddress(*this);
- const VarDecl *var = emission.Variable;
+ const VarDecl *var = emission.variable;
QualType type = var->getType();
CleanupKind cleanupKind = NormalAndEHCleanup;
@@ -834,7 +834,7 @@ void CIRGenFunction::emitAutoVarTypeCleanup(
case QualType::DK_cxx_destructor:
// If there's an NRVO flag on the emission, we need a different
// cleanup.
- if (emission.NRVOFlag) {
+ if (emission.nrvoFlag) {
cgm.errorNYI(var->getSourceRange(), "emitAutoVarTypeCleanup: NRVO");
return;
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index c0ed8b4..cb7cf98 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -479,55 +479,55 @@ public:
ConstantEmission tryEmitAsConstant(const MemberExpr *me);
struct AutoVarEmission {
- const clang::VarDecl *Variable;
+ const clang::VarDecl *variable;
/// The address of the alloca for languages with explicit address space
/// (e.g. OpenCL) or alloca casted to generic pointer for address space
/// agnostic languages (e.g. C++). Invalid if the variable was emitted
/// as a global constant.
- Address Addr;
+ Address addr;
/// True if the variable is of aggregate type and has a constant
/// initializer.
- bool IsConstantAggregate = false;
+ bool isConstantAggregate = false;
/// True if the variable is a __block variable that is captured by an
/// escaping block.
- bool IsEscapingByRef = false;
+ bool isEscapingByRef = false;
/// True if the variable was emitted as an offload recipe, and thus doesn't
/// have the same sort of alloca initialization.
- bool EmittedAsOffload = false;
+ bool emittedAsOffload = false;
- mlir::Value NRVOFlag{};
+ mlir::Value nrvoFlag{};
struct Invalid {};
- AutoVarEmission(Invalid) : Variable(nullptr), Addr(Address::invalid()) {}
+ AutoVarEmission(Invalid) : variable(nullptr), addr(Address::invalid()) {}
AutoVarEmission(const clang::VarDecl &variable)
- : Variable(&variable), Addr(Address::invalid()) {}
+ : variable(&variable), addr(Address::invalid()) {}
static AutoVarEmission invalid() { return AutoVarEmission(Invalid()); }
- bool wasEmittedAsGlobal() const { return !Addr.isValid(); }
+ bool wasEmittedAsGlobal() const { return !addr.isValid(); }
- bool wasEmittedAsOffloadClause() const { return EmittedAsOffload; }
+ bool wasEmittedAsOffloadClause() const { return emittedAsOffload; }
/// Returns the raw, allocated address, which is not necessarily
/// the address of the object itself. It is casted to default
/// address space for address space agnostic languages.
- Address getAllocatedAddress() const { return Addr; }
+ Address getAllocatedAddress() const { return addr; }
// Changes the stored address for the emission. This function should only
// be used in extreme cases, and isn't required to model normal AST
// initialization/variables.
- void setAllocatedAddress(Address A) { Addr = A; }
+ void setAllocatedAddress(Address a) { addr = a; }
/// Returns the address of the object within this declaration.
/// Note that this does not chase the forwarding pointer for
/// __block decls.
Address getObjectAddress(CIRGenFunction &cgf) const {
- if (!IsEscapingByRef)
- return Addr;
+ if (!isEscapingByRef)
+ return addr;
assert(!cir::MissingFeatures::opAllocaEscapeByReference());
return Address::invalid();
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
index f22f3e8..3d86f71 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
@@ -1001,7 +1001,7 @@ public:
OpenACCRecipeBuilder<mlir::acc::PrivateRecipeOp>(cgf, builder)
.getOrCreateRecipe(
cgf.getContext(), recipeInsertLocation, varExpr,
- varRecipe.AllocaDecl, varRecipe.InitExpr,
+ varRecipe.AllocaDecl,
/*temporary=*/nullptr, OpenACCReductionOperator::Invalid,
Decl::castToDeclContext(cgf.curFuncDecl), opInfo.origType,
opInfo.bounds.size(), opInfo.boundTypes, opInfo.baseType,
@@ -1036,20 +1036,13 @@ public:
{
mlir::OpBuilder::InsertionGuard guardCase(builder);
- // TODO: OpenACC: At the moment this is a bit of a hacky way of doing
- // this, and won't work when we get to bounds/etc. Do this for now to
- // limit the scope of this refactor.
- VarDecl *allocaDecl = varRecipe.AllocaDecl;
- allocaDecl->setInit(varRecipe.InitExpr);
- allocaDecl->setInitStyle(VarDecl::CallInit);
auto recipe =
OpenACCRecipeBuilder<mlir::acc::FirstprivateRecipeOp>(cgf,
builder)
.getOrCreateRecipe(
cgf.getContext(), recipeInsertLocation, varExpr,
- varRecipe.AllocaDecl, varRecipe.InitExpr,
- varRecipe.InitFromTemporary,
+ varRecipe.AllocaDecl, varRecipe.InitFromTemporary,
OpenACCReductionOperator::Invalid,
Decl::castToDeclContext(cgf.curFuncDecl), opInfo.origType,
opInfo.bounds.size(), opInfo.boundTypes, opInfo.baseType,
@@ -1086,18 +1079,12 @@ public:
{
mlir::OpBuilder::InsertionGuard guardCase(builder);
- // TODO: OpenACC: At the moment this is a bit of a hacky way of doing
- // this, and won't work when we get to bounds/etc. Do this for now to
- // limit the scope of this refactor.
- VarDecl *allocaDecl = varRecipe.AllocaDecl;
- allocaDecl->setInit(varRecipe.InitExpr);
- allocaDecl->setInitStyle(VarDecl::CallInit);
auto recipe =
OpenACCRecipeBuilder<mlir::acc::ReductionRecipeOp>(cgf, builder)
.getOrCreateRecipe(
cgf.getContext(), recipeInsertLocation, varExpr,
- varRecipe.AllocaDecl, varRecipe.InitExpr,
+ varRecipe.AllocaDecl,
/*temporary=*/nullptr, clause.getReductionOp(),
Decl::castToDeclContext(cgf.curFuncDecl), opInfo.origType,
opInfo.bounds.size(), opInfo.boundTypes, opInfo.baseType,
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
index f8e511e..ea6ea2c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
@@ -408,7 +408,7 @@ void OpenACCRecipeBuilderBase::makeBoundsInit(
CIRGenFunction::LexicalScope ls(cgf, loc, block);
CIRGenFunction::AutoVarEmission tempDeclEmission{*allocaDecl};
- tempDeclEmission.EmittedAsOffload = true;
+ tempDeclEmission.emittedAsOffload = true;
// The init section is the only one of the handful that only has a single
// argument for the 'type', so we have to drop 1 for init, and future calls
@@ -435,7 +435,7 @@ void OpenACCRecipeBuilderBase::createPrivateInitRecipe(
mlir::Location loc, mlir::Location locEnd, SourceRange exprRange,
mlir::Value mainOp, mlir::acc::PrivateRecipeOp recipe, size_t numBounds,
llvm::ArrayRef<QualType> boundTypes, const VarDecl *allocaDecl,
- QualType origType, const Expr *initExpr) {
+ QualType origType) {
assert(allocaDecl && "Required recipe variable not set?");
CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, allocaDecl};
@@ -473,9 +473,10 @@ void OpenACCRecipeBuilderBase::createPrivateInitRecipe(
// If the initializer is trivial, there is nothing to do here, so save
// ourselves some effort.
- if (initExpr && (!cgf.isTrivialInitializer(initExpr) ||
- cgf.getContext().getLangOpts().getTrivialAutoVarInit() !=
- LangOptions::TrivialAutoVarInitKind::Uninitialized))
+ if (allocaDecl->getInit() &&
+ (!cgf.isTrivialInitializer(allocaDecl->getInit()) ||
+ cgf.getContext().getLangOpts().getTrivialAutoVarInit() !=
+ LangOptions::TrivialAutoVarInitKind::Uninitialized))
makeBoundsInit(alloca, loc, block, allocaDecl, origType,
/*isInitSection=*/true);
}
@@ -504,7 +505,7 @@ void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy(
// that instead of the variable in the other block.
tempDeclEmission.setAllocatedAddress(
Address{toArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)});
- tempDeclEmission.EmittedAsOffload = true;
+ tempDeclEmission.emittedAsOffload = true;
CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, temporary};
cgf.setAddrOfLocalVar(
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
index 203eaff..a05b0bd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
@@ -70,8 +70,7 @@ protected:
mlir::acc::PrivateRecipeOp recipe,
size_t numBounds,
llvm::ArrayRef<QualType> boundTypes,
- const VarDecl *allocaDecl, QualType origType,
- const Expr *initExpr);
+ const VarDecl *allocaDecl, QualType origType);
void createRecipeDestroySection(mlir::Location loc, mlir::Location locEnd,
mlir::Value mainOp, CharUnits alignment,
@@ -212,15 +211,12 @@ public:
OpenACCRecipeBuilder(CIRGen::CIRGenFunction &cgf,
CIRGen::CIRGenBuilderTy &builder)
: OpenACCRecipeBuilderBase(cgf, builder) {}
- RecipeTy getOrCreateRecipe(ASTContext &astCtx,
- mlir::OpBuilder::InsertPoint &insertLocation,
- const Expr *varRef, const VarDecl *varRecipe,
- const Expr *initExpr, const VarDecl *temporary,
- OpenACCReductionOperator reductionOp,
- DeclContext *dc, QualType origType,
- size_t numBounds,
- llvm::ArrayRef<QualType> boundTypes,
- QualType baseType, mlir::Value mainOp) {
+ RecipeTy getOrCreateRecipe(
+ ASTContext &astCtx, mlir::OpBuilder::InsertPoint &insertLocation,
+ const Expr *varRef, const VarDecl *varRecipe, const VarDecl *temporary,
+ OpenACCReductionOperator reductionOp, DeclContext *dc, QualType origType,
+ size_t numBounds, llvm::ArrayRef<QualType> boundTypes, QualType baseType,
+ mlir::Value mainOp) {
assert(!varRecipe->getType()->isSpecificBuiltinType(
BuiltinType::ArraySection) &&
"array section shouldn't make it to recipe creation");
@@ -266,7 +262,7 @@ public:
if constexpr (std::is_same_v<RecipeTy, mlir::acc::PrivateRecipeOp>) {
createPrivateInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
recipe, numBounds, boundTypes, varRecipe,
- origType, initExpr);
+ origType);
} else {
createRecipeInitCopy(loc, locEnd, varRef->getSourceRange(), mainOp,
recipe, varRecipe, temporary);
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
index 2baeb43..87f2340 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
@@ -296,9 +296,8 @@ void CIRRecordLowering::lower(bool nonVirtualBaseType) {
}
llvm::stable_sort(members);
- // TODO: implement clipTailPadding once bitfields are implemented
- assert(!cir::MissingFeatures::bitfields());
- assert(!cir::MissingFeatures::recordZeroInit());
+ // TODO: Verify bitfield clipping
+ assert(!cir::MissingFeatures::checkBitfieldClipping());
members.push_back(makeStorageInfo(size, getUIntNType(8)));
determinePacked(nonVirtualBaseType);
@@ -319,9 +318,11 @@ void CIRRecordLowering::fillOutputFields() {
fieldIdxMap[member.fieldDecl->getCanonicalDecl()] =
fieldTypes.size() - 1;
// A field without storage must be a bitfield.
- assert(!cir::MissingFeatures::bitfields());
- if (!member.data)
+ if (!member.data) {
+ assert(member.fieldDecl &&
+ "member.data is a nullptr so member.fieldDecl should not be");
setBitFieldInfo(member.fieldDecl, member.offset, fieldTypes.back());
+ }
} else if (member.kind == MemberInfo::InfoKind::Base) {
nonVirtualBases[member.cxxRecordDecl] = fieldTypes.size() - 1;
} else if (member.kind == MemberInfo::InfoKind::VBase) {
@@ -697,13 +698,9 @@ CIRGenTypes::computeRecordLayout(const RecordDecl *rd, cir::RecordType *ty) {
ty ? *ty : cir::RecordType{}, baseTy ? baseTy : cir::RecordType{},
(bool)lowering.zeroInitializable, (bool)lowering.zeroInitializableAsBase);
- assert(!cir::MissingFeatures::recordZeroInit());
-
rl->nonVirtualBases.swap(lowering.nonVirtualBases);
rl->completeObjectVirtualBases.swap(lowering.virtualBases);
- assert(!cir::MissingFeatures::bitfields());
-
// Add all the field numbers.
rl->fieldIdxMap.swap(lowering.fieldIdxMap);
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 2e3fc53..4aa6314 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -486,6 +486,12 @@ llvm::Type *CommonSPIRTargetCodeGenInfo::getHLSLType(
return getSPIRVImageTypeFromHLSLResource(ResAttrs, ContainedTy, CGM);
}
+ if (ResAttrs.IsCounter) {
+ llvm::Type *ElemType = llvm::Type::getInt32Ty(Ctx);
+ uint32_t StorageClass = /* StorageBuffer storage class */ 12;
+ return llvm::TargetExtType::get(Ctx, "spirv.VulkanBuffer", {ElemType},
+ {StorageClass, true});
+ }
llvm::Type *ElemType = CGM.getTypes().ConvertTypeForMem(ContainedTy);
llvm::ArrayType *RuntimeArrayType = llvm::ArrayType::get(ElemType, 0);
uint32_t StorageClass = /* StorageBuffer storage class */ 12;
diff --git a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp
index 1087eb3..6966d40 100644
--- a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp
+++ b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp
@@ -444,8 +444,7 @@ bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) {
return true;
if (!CI.hasFileManager())
- if (!CI.createFileManager())
- return false;
+ CI.createFileManager();
auto Kind = Inputs[0].getKind();
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 9413c13..cd4c1aa 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -368,7 +368,7 @@ bool ContinuationIndenter::canBreak(const LineState &State) {
// If binary operators are moved to the next line (including commas for some
// styles of constructor initializers), that's always ok.
- if (!Current.isOneOf(TT_BinaryOperator, tok::comma) &&
+ if (Current.isNoneOf(TT_BinaryOperator, tok::comma) &&
// Allow breaking opening brace of lambdas (when passed as function
// arguments) to a new line when BeforeLambdaBody brace wrapping is
// enabled.
@@ -445,7 +445,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
(!Style.BreakBeforeTernaryOperators &&
Previous.is(TT_ConditionalExpr))) &&
CurrentState.BreakBeforeParameter && !Current.isTrailingComment() &&
- !Current.isOneOf(tok::r_paren, tok::r_brace)) {
+ Current.isNoneOf(tok::r_paren, tok::r_brace)) {
return true;
}
if (CurrentState.IsChainedConditional &&
@@ -523,9 +523,9 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
if (Style.AlwaysBreakBeforeMultilineStrings &&
(NewLineColumn == State.FirstIndent + Style.ContinuationIndentWidth ||
Previous.is(tok::comma) || Current.NestingLevel < 2) &&
- !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at,
+ Previous.isNoneOf(tok::kw_return, tok::lessless, tok::at,
Keywords.kw_dollar) &&
- !Previous.isOneOf(TT_InlineASMColon, TT_ConditionalExpr) &&
+ Previous.isNoneOf(TT_InlineASMColon, TT_ConditionalExpr) &&
nextIsMultilineString(State)) {
return true;
}
@@ -648,7 +648,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
// into the ColumnLimit, they are checked here in the ContinuationIndenter.
if (Style.ColumnLimit != 0 && Previous.is(BK_Block) &&
Previous.is(tok::l_brace) &&
- !Current.isOneOf(tok::r_brace, tok::comment)) {
+ Current.isNoneOf(tok::r_brace, tok::comment)) {
return true;
}
@@ -752,7 +752,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
return false;
const auto *Next = Comma->getNextNonComment();
- return Next && !Next->isOneOf(TT_LambdaLSquare, tok::l_brace, tok::caret);
+ return Next && Next->isNoneOf(TT_LambdaLSquare, tok::l_brace, tok::caret);
};
if (DisallowLineBreaks())
@@ -835,7 +835,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
return Tok.is(tok::l_brace) && Tok.isNot(BK_Block) &&
Style.Cpp11BracedListStyle;
};
- if (!Tok.isOneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) &&
+ if (Tok.isNoneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) &&
!IsStartOfBracedList()) {
return false;
}
@@ -843,7 +843,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
return true;
if (Tok.Previous->isIf())
return Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak;
- return !Tok.Previous->isOneOf(TT_CastRParen, tok::kw_for, tok::kw_while,
+ return Tok.Previous->isNoneOf(TT_CastRParen, tok::kw_for, tok::kw_while,
tok::kw_switch) &&
!(Style.isJavaScript() && Tok.Previous->is(Keywords.kw_await));
};
@@ -882,8 +882,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
Tok.isOneOf(tok::ellipsis, Keywords.kw_await))) {
return true;
}
- const auto *Previous = Tok.Previous;
- if (!Previous || (!Previous->isOneOf(TT_FunctionDeclarationLParen,
+ if (const auto *Previous = Tok.Previous;
+ !Previous || (Previous->isNoneOf(TT_FunctionDeclarationLParen,
TT_LambdaDefinitionLParen) &&
!IsFunctionCallParen(*Previous))) {
return true;
@@ -920,9 +920,9 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
// align the commas with the opening paren.
if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign &&
!CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() &&
- Previous.isNot(TT_ObjCMethodExpr) && Previous.isNot(TT_RequiresClause) &&
- Previous.isNot(TT_TableGenDAGArgOpener) &&
- Previous.isNot(TT_TableGenDAGArgOpenerToBreak) &&
+ Previous.isNoneOf(TT_ObjCMethodExpr, TT_RequiresClause,
+ TT_TableGenDAGArgOpener,
+ TT_TableGenDAGArgOpenerToBreak) &&
!(Current.MacroParent && Previous.MacroParent) &&
(Current.isNot(TT_LineComment) ||
Previous.isOneOf(BK_BracedInit, TT_VerilogMultiLineListLParen)) &&
@@ -962,7 +962,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
if (Current.isNot(tok::comment) && P &&
(P->isOneOf(TT_BinaryOperator, tok::comma) ||
(P->is(TT_ConditionalExpr) && P->is(tok::colon))) &&
- !P->isOneOf(TT_OverloadedOperator, TT_CtorInitializerComma) &&
+ P->isNoneOf(TT_OverloadedOperator, TT_CtorInitializerComma) &&
P->getPrecedence() != prec::Assignment &&
P->getPrecedence() != prec::Relational &&
P->getPrecedence() != prec::Spaceship) {
@@ -992,7 +992,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
// parameter, i.e. let nested calls have a continuation indent.
CurrentState.LastSpace = State.Column;
CurrentState.NestedBlockIndent = State.Column;
- } else if (!Current.isOneOf(tok::comment, tok::caret) &&
+ } else if (Current.isNoneOf(tok::comment, tok::caret) &&
((Previous.is(tok::comma) &&
Previous.isNot(TT_OverloadedOperator)) ||
(Previous.is(tok::colon) && Previous.is(TT_ObjCMethodExpr)))) {
@@ -1099,7 +1099,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
if (Current.isNot(TT_LambdaArrow) &&
(!Style.isJavaScript() || Current.NestingLevel != 0 ||
!PreviousNonComment || PreviousNonComment->isNot(tok::equal) ||
- !Current.isOneOf(Keywords.kw_async, Keywords.kw_function))) {
+ Current.isNoneOf(Keywords.kw_async, Keywords.kw_function))) {
CurrentState.NestedBlockIndent = State.Column;
}
@@ -1239,11 +1239,11 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
}
if (PreviousNonComment &&
- !PreviousNonComment->isOneOf(tok::comma, tok::colon, tok::semi) &&
+ PreviousNonComment->isNoneOf(tok::comma, tok::colon, tok::semi) &&
((PreviousNonComment->isNot(TT_TemplateCloser) &&
!PreviousNonComment->ClosesRequiresClause) ||
Current.NestingLevel != 0) &&
- !PreviousNonComment->isOneOf(
+ PreviousNonComment->isNoneOf(
TT_BinaryOperator, TT_FunctionAnnotationRParen, TT_JavaAnnotation,
TT_LeadingJavaAnnotation) &&
Current.isNot(TT_BinaryOperator) && !PreviousNonComment->opensScope() &&
@@ -1281,8 +1281,8 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
bool AllowAllConstructorInitializersOnNextLine =
Style.PackConstructorInitializers == FormatStyle::PCIS_NextLine ||
Style.PackConstructorInitializers == FormatStyle::PCIS_NextLineOnly;
- if (!(Previous.isOneOf(tok::l_paren, tok::l_brace, TT_BinaryOperator) ||
- PreviousIsBreakingCtorInitializerColon) ||
+ if ((Previous.isNoneOf(tok::l_paren, tok::l_brace, TT_BinaryOperator) &&
+ !PreviousIsBreakingCtorInitializerColon) ||
(!Style.AllowAllParametersOfDeclarationOnNextLine &&
State.Line->MustBeDeclaration) ||
(!Style.AllowAllArgumentsOnNextLine &&
@@ -1576,7 +1576,7 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
if (Previous.is(tok::r_paren) &&
Previous.isNot(TT_TableGenDAGArgOperatorToBreak) &&
!Current.isBinaryOperator() &&
- !Current.isOneOf(tok::colon, tok::comment)) {
+ Current.isNoneOf(tok::colon, tok::comment)) {
return ContinuationIndent;
}
if (Current.is(TT_ProtoExtensionLSquare))
@@ -1591,7 +1591,7 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
NextNonComment->SpacesRequiredBefore;
}
if (CurrentState.Indent == State.FirstIndent && PreviousNonComment &&
- !PreviousNonComment->isOneOf(tok::r_brace, TT_CtorInitializerComma)) {
+ PreviousNonComment->isNoneOf(tok::r_brace, TT_CtorInitializerComma)) {
// Ensure that we fall back to the continuation indent width instead of
// just flushing continuations left.
return CurrentState.Indent + Style.ContinuationIndentWidth;
@@ -1734,7 +1734,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
}
if (Previous && (Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr) ||
(Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) &&
- !Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr,
+ Previous->isNoneOf(TT_DictLiteral, TT_ObjCMethodExpr,
TT_CtorInitializerColon)))) {
CurrentState.NestedBlockInlined =
!Newline && hasNestedBlockInlined(Previous, Current, Style);
@@ -1758,7 +1758,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.StartOfStringLiteral = State.Column + 1;
} else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) {
State.StartOfStringLiteral = State.Column;
- } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
+ } else if (Current.isNoneOf(tok::comment, tok::identifier, tok::hash) &&
!Current.isStringLiteral()) {
State.StartOfStringLiteral = 0;
}
@@ -2057,7 +2057,7 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
// array literals as these follow different indentation rules.
bool NoLineBreak =
Current.Children.empty() &&
- !Current.isOneOf(TT_DictLiteral, TT_ArrayInitializerLSquare) &&
+ Current.isNoneOf(TT_DictLiteral, TT_ArrayInitializerLSquare) &&
(CurrentState.NoLineBreak || CurrentState.NoLineBreakInOperand ||
(Current.is(TT_TemplateOpener) &&
CurrentState.ContainsUnwrappedBuilder));
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 835071d..2bf6244 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -2435,7 +2435,7 @@ private:
const auto *NextLine = I + 1 == End ? nullptr : I[1];
for (const auto *Token = Line->First; Token && !Token->Finalized;
Token = Token->Next) {
- if (!Token->Optional || !Token->isOneOf(tok::l_brace, tok::r_brace))
+ if (!Token->Optional || Token->isNoneOf(tok::l_brace, tok::r_brace))
continue;
auto *Next = Token->Next;
assert(Next || Token == Line->Last);
diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp
index c60ae8f..c2956a1 100644
--- a/clang/lib/Format/FormatToken.cpp
+++ b/clang/lib/Format/FormatToken.cpp
@@ -108,7 +108,7 @@ unsigned CommaSeparatedList::formatAfterToken(LineState &State,
// Ensure that we start on the opening brace.
const FormatToken *LBrace =
State.NextToken->Previous->getPreviousNonComment();
- if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
+ if (!LBrace || LBrace->isNoneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
LBrace->is(BK_Block) || LBrace->is(TT_DictLiteral) ||
LBrace->Next->is(TT_DesignatedInitializerPeriod)) {
return 0;
@@ -177,7 +177,7 @@ static unsigned CodePointsBetween(const FormatToken *Begin,
void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
// FIXME: At some point we might want to do this for other lists, too.
if (!Token->MatchingParen ||
- !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) {
+ Token->isNoneOf(tok::l_brace, TT_ArrayInitializerLSquare)) {
return;
}
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index a28446a..e4ddd61 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -645,6 +645,9 @@ public:
return is(K1) || isOneOf(K2, Ks...);
}
template <typename T> bool isNot(T Kind) const { return !is(Kind); }
+ template <typename... Ts> bool isNoneOf(Ts... Ks) const {
+ return !isOneOf(Ks...);
+ }
bool isIf(bool AllowConstexprMacro = true) const {
return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) ||
@@ -748,7 +751,7 @@ public:
/// Returns \c true if this is a "." or "->" accessing a member.
bool isMemberAccess() const {
return isOneOf(tok::arrow, tok::period, tok::arrowstar) &&
- !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow,
+ isNoneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow,
TT_LambdaArrow, TT_LeadingJavaAnnotation);
}
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 3f4aa52..86a5185 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -733,7 +733,7 @@ void FormatTokenLexer::tryParseJavaTextBlock() {
// its text if successful.
void FormatTokenLexer::tryParseJSRegexLiteral() {
FormatToken *RegexToken = Tokens.back();
- if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
+ if (RegexToken->isNoneOf(tok::slash, tok::slashequal))
return;
FormatToken *Prev = nullptr;
@@ -1041,7 +1041,7 @@ void FormatTokenLexer::handleTemplateStrings() {
void FormatTokenLexer::tryParsePythonComment() {
FormatToken *HashToken = Tokens.back();
- if (!HashToken->isOneOf(tok::hash, tok::hashhash))
+ if (HashToken->isNoneOf(tok::hash, tok::hashhash))
return;
// Turn the remainder of this line into a comment.
const char *CommentBegin =
diff --git a/clang/lib/Format/MacroExpander.cpp b/clang/lib/Format/MacroExpander.cpp
index 85a53c9..445e173 100644
--- a/clang/lib/Format/MacroExpander.cpp
+++ b/clang/lib/Format/MacroExpander.cpp
@@ -86,7 +86,7 @@ private:
}
bool parseExpansion() {
- if (!Current->isOneOf(tok::equal, tok::eof))
+ if (Current->isNoneOf(tok::equal, tok::eof))
return false;
if (Current->is(tok::equal))
nextToken();
diff --git a/clang/lib/Format/NamespaceEndCommentsFixer.cpp b/clang/lib/Format/NamespaceEndCommentsFixer.cpp
index 08f8d68..95ccfac 100644
--- a/clang/lib/Format/NamespaceEndCommentsFixer.cpp
+++ b/clang/lib/Format/NamespaceEndCommentsFixer.cpp
@@ -70,7 +70,7 @@ std::string computeName(const FormatToken *NamespaceTok) {
// and closing parenthesis or comma.
assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
Tok = Tok->getNextNonComment();
- while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
+ while (Tok && Tok->isNoneOf(tok::r_paren, tok::comma)) {
name += Tok->TokenText;
Tok = Tok->getNextNonComment();
}
@@ -85,7 +85,7 @@ std::string computeName(const FormatToken *NamespaceTok) {
// one token before that up until the '{'. A '(' might be a macro with
// arguments.
const FormatToken *FirstNSTok = nullptr;
- while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) {
+ while (Tok && Tok->isNoneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) {
if (FirstNSTok)
FirstNSName += FirstNSTok->TokenText;
FirstNSTok = Tok;
diff --git a/clang/lib/Format/ObjCPropertyAttributeOrderFixer.cpp b/clang/lib/Format/ObjCPropertyAttributeOrderFixer.cpp
index b885942..b12b370 100644
--- a/clang/lib/Format/ObjCPropertyAttributeOrderFixer.cpp
+++ b/clang/lib/Format/ObjCPropertyAttributeOrderFixer.cpp
@@ -61,7 +61,7 @@ void ObjCPropertyAttributeOrderFixer::sortPropertyAttributes(
}
// Most attributes look like identifiers, but `class` is a keyword.
- if (!Tok->isOneOf(tok::identifier, tok::kw_class)) {
+ if (Tok->isNoneOf(tok::identifier, tok::kw_class)) {
// If we hit any other kind of token, just bail.
return;
}
diff --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp
index 043d957..e3e30ca 100644
--- a/clang/lib/Format/QualifierAlignmentFixer.cpp
+++ b/clang/lib/Format/QualifierAlignmentFixer.cpp
@@ -508,7 +508,7 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeLeft(
// Don't change declarations such as
// `foo(struct Foo const a);` -> `foo(struct Foo const a);`
- if (!Previous || !Previous->isOneOf(tok::kw_struct, tok::kw_class)) {
+ if (!Previous || Previous->isNoneOf(tok::kw_struct, tok::kw_class)) {
insertQualifierBefore(SourceMgr, Fixes, TypeToken, Qualifier);
removeToken(SourceMgr, Fixes, Tok);
}
diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp
index ace3dff..a403a4f 100644
--- a/clang/lib/Format/SortJavaScriptImports.cpp
+++ b/clang/lib/Format/SortJavaScriptImports.cpp
@@ -439,7 +439,7 @@ private:
// for grammar EBNF (production ModuleItem).
bool parseModuleReference(const AdditionalKeywords &Keywords,
JsModuleReference &Reference) {
- if (!Current || !Current->isOneOf(Keywords.kw_import, tok::kw_export))
+ if (!Current || Current->isNoneOf(Keywords.kw_import, tok::kw_export))
return false;
Reference.IsExport = Current->is(tok::kw_export);
@@ -570,7 +570,7 @@ private:
Symbol.Range.setEnd(Current->Tok.getLocation());
Reference.Symbols.push_back(Symbol);
- if (!Current->isOneOf(tok::r_brace, tok::comma))
+ if (Current->isNoneOf(tok::r_brace, tok::comma))
return false;
}
Reference.SymbolsEnd = Current->Tok.getLocation();
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 0c9c88a..59f81b3 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -203,7 +203,7 @@ private:
return false;
}
if (InExpr && SeenTernaryOperator &&
- (!Next || !Next->isOneOf(tok::l_paren, tok::l_brace))) {
+ (!Next || Next->isNoneOf(tok::l_paren, tok::l_brace))) {
return false;
}
if (!MaybeAngles)
@@ -577,7 +577,7 @@ private:
if (IsIf && CurrentToken->is(tok::semi)) {
for (auto *Tok = OpeningParen.Next;
Tok != CurrentToken &&
- !Tok->isOneOf(tok::equal, tok::l_paren, tok::l_brace);
+ Tok->isNoneOf(tok::equal, tok::l_paren, tok::l_brace);
Tok = Tok->Next) {
if (Tok->isPointerOrReference())
Tok->setFinalizedType(TT_PointerOrReference);
@@ -704,7 +704,7 @@ private:
!IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates &&
IsCpp && !IsCpp11AttributeSpecifier && !IsCSharpAttributeSpecifier &&
Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
- !CurrentToken->isOneOf(tok::l_brace, tok::r_square) &&
+ CurrentToken->isNoneOf(tok::l_brace, tok::r_square) &&
(!Parent ||
Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
tok::kw_return, tok::kw_throw) ||
@@ -1334,7 +1334,7 @@ private:
if (Style.isJavaScript()) {
if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
(Contexts.size() == 1 && // switch/case labels
- !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
+ Line.First->isNoneOf(tok::kw_enum, tok::kw_case)) ||
Contexts.back().ContextKind == tok::l_paren || // function params
Contexts.back().ContextKind == tok::l_square || // array type
(!Contexts.back().IsExpression &&
@@ -1411,7 +1411,7 @@ private:
} else if (Contexts.back().ColonIsForRangeExpr) {
Tok->setType(TT_RangeBasedForLoopColon);
for (auto *Token = Prev;
- Token && !Token->isOneOf(tok::semi, tok::l_paren);
+ Token && Token->isNoneOf(tok::semi, tok::l_paren);
Token = Token->Previous) {
if (Token->isPointerOrReference())
Token->setFinalizedType(TT_PointerOrReference);
@@ -1425,7 +1425,7 @@ private:
Scopes.back() == ST_Class)) {
Tok->setType(TT_BitFieldColon);
} else if (Contexts.size() == 1 &&
- !Line.getFirstNonComment()->isOneOf(tok::kw_enum, tok::kw_case,
+ Line.getFirstNonComment()->isNoneOf(tok::kw_enum, tok::kw_case,
tok::kw_default) &&
!Line.startsWith(tok::kw_typedef, tok::kw_enum)) {
if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept) ||
@@ -1562,10 +1562,10 @@ private:
if (Line.MustBeDeclaration && Contexts.size() == 1 &&
!Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
!Line.startsWith(tok::l_paren) &&
- !Tok->isOneOf(TT_TypeDeclarationParen, TT_RequiresExpressionLParen)) {
+ Tok->isNoneOf(TT_TypeDeclarationParen, TT_RequiresExpressionLParen)) {
if (!Prev ||
(!Prev->isAttribute() &&
- !Prev->isOneOf(TT_RequiresClause, TT_LeadingJavaAnnotation,
+ Prev->isNoneOf(TT_RequiresClause, TT_LeadingJavaAnnotation,
TT_BinaryOperator))) {
Line.MightBeFunctionDecl = true;
Tok->MightBeFunctionDeclParen = true;
@@ -1664,7 +1664,7 @@ private:
}
}
while (CurrentToken &&
- !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
+ CurrentToken->isNoneOf(tok::l_paren, tok::semi, tok::r_paren)) {
if (CurrentToken->isOneOf(tok::star, tok::amp))
CurrentToken->setType(TT_PointerOrReference);
auto Next = CurrentToken->getNextNonComment();
@@ -1728,8 +1728,8 @@ private:
// cond ? id : "B";
// cond ? cond2 ? "A" : "B" : "C";
if (!Contexts.back().IsExpression && Line.MustBeDeclaration &&
- (!Next || !Next->isOneOf(tok::identifier, tok::string_literal) ||
- !Next->Next || !Next->Next->isOneOf(tok::colon, tok::question))) {
+ (!Next || Next->isNoneOf(tok::identifier, tok::string_literal) ||
+ !Next->Next || Next->Next->isNoneOf(tok::colon, tok::question))) {
Tok->setType(TT_CSharpNullable);
break;
}
@@ -1796,7 +1796,7 @@ private:
if (!parseTableGenValue())
return false;
} else if (Tok->isOneOf(Keywords.kw_def, Keywords.kw_defm) &&
- (!Next || !Next->isOneOf(tok::colon, tok::l_brace))) {
+ (!Next || Next->isNoneOf(tok::colon, tok::l_brace))) {
// The case NameValue appears.
if (!parseTableGenValue(true))
return false;
@@ -2094,7 +2094,7 @@ private:
// Reset token type in case we have already looked at it and then
// recovered from an error (e.g. failure to find the matching >).
if (!CurrentToken->isTypeFinalized() &&
- !CurrentToken->isOneOf(
+ CurrentToken->isNoneOf(
TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro,
TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace,
TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow,
@@ -2230,7 +2230,7 @@ private:
// type or non-type.
if (Contexts.back().ContextKind == tok::less) {
assert(Current.Previous->Previous);
- return !Current.Previous->Previous->isOneOf(tok::kw_typename,
+ return Current.Previous->Previous->isNoneOf(tok::kw_typename,
tok::kw_class);
}
@@ -2266,7 +2266,7 @@ private:
if (!Line.startsWith(TT_UnaryOperator)) {
for (FormatToken *Previous = Current.Previous;
Previous && Previous->Previous &&
- !Previous->Previous->isOneOf(tok::comma, tok::semi);
+ Previous->Previous->isNoneOf(tok::comma, tok::semi);
Previous = Previous->Previous) {
if (Previous->isOneOf(tok::r_square, tok::r_paren, tok::greater)) {
Previous = Previous->MatchingParen;
@@ -2430,7 +2430,7 @@ private:
Current.setType(TT_BinaryOperator);
} else if (Current.is(tok::arrow) && AutoFound &&
Line.MightBeFunctionDecl && Current.NestingLevel == 0 &&
- !Current.Previous->isOneOf(tok::kw_operator, tok::identifier)) {
+ Current.Previous->isNoneOf(tok::kw_operator, tok::identifier)) {
// not auto operator->() -> xxx;
Current.setType(TT_TrailingReturnArrow);
} else if (Current.is(tok::arrow) && Current.Previous &&
@@ -2511,7 +2511,7 @@ private:
Current.setType(TT_CastRParen);
if (Current.MatchingParen && Current.Next &&
!Current.Next->isBinaryOperator() &&
- !Current.Next->isOneOf(
+ Current.Next->isNoneOf(
tok::semi, tok::colon, tok::l_brace, tok::l_paren, tok::comma,
tok::period, tok::arrow, tok::coloncolon, tok::kw_noexcept)) {
if (FormatToken *AfterParen = Current.MatchingParen->Next;
@@ -2569,7 +2569,7 @@ private:
} else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept,
tok::kw_requires) &&
Current.Previous &&
- !Current.Previous->isOneOf(tok::equal, tok::at,
+ Current.Previous->isNoneOf(tok::equal, tok::at,
TT_CtorInitializerComma,
TT_CtorInitializerColon) &&
Line.MightBeFunctionDecl && Contexts.size() == 1) {
@@ -2658,7 +2658,7 @@ private:
if (PreviousNotConst->is(TT_TemplateCloser)) {
return PreviousNotConst && PreviousNotConst->MatchingParen &&
PreviousNotConst->MatchingParen->Previous &&
- !PreviousNotConst->MatchingParen->Previous->isOneOf(
+ PreviousNotConst->MatchingParen->Previous->isNoneOf(
tok::period, tok::kw_template);
}
@@ -2780,7 +2780,7 @@ private:
// If there is an identifier (or with a few exceptions a keyword) right
// before the parentheses, this is unlikely to be a cast.
if (LeftOfParens->Tok.getIdentifierInfo() &&
- !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
+ LeftOfParens->isNoneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
tok::kw_delete, tok::kw_throw)) {
return false;
}
@@ -2918,7 +2918,7 @@ private:
const bool NextIsAmpOrStar = AfterRParen->isOneOf(tok::amp, tok::star);
if (!(AfterRParen->isUnaryOperator() || NextIsAmpOrStar) ||
AfterRParen->is(tok::plus) ||
- !AfterRParen->Next->isOneOf(tok::identifier, tok::numeric_constant)) {
+ AfterRParen->Next->isNoneOf(tok::identifier, tok::numeric_constant)) {
return false;
}
@@ -2948,7 +2948,7 @@ private:
// Search for unexpected tokens.
for (Prev = BeforeRParen; Prev != LParen; Prev = Prev->Previous)
- if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
+ if (Prev->isNoneOf(tok::kw_const, tok::identifier, tok::coloncolon))
return false;
return true;
@@ -3740,7 +3740,7 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
const bool InRequiresExpression = Line.Type == LT_RequiresExpression;
for (auto &Child : Line.Children) {
if (InRequiresExpression &&
- !Child->First->isOneOf(tok::kw_typename, tok::kw_requires,
+ Child->First->isNoneOf(tok::kw_typename, tok::kw_requires,
TT_CompoundRequirementLBrace)) {
Child->Type = LT_SimpleRequirement;
}
@@ -3857,7 +3857,7 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts,
// Find parentheses of parameter list.
if (Current.is(tok::kw_operator)) {
if (Previous.Tok.getIdentifierInfo() &&
- !Previous.isOneOf(tok::kw_return, tok::kw_co_return)) {
+ Previous.isNoneOf(tok::kw_return, tok::kw_co_return)) {
return true;
}
if (Previous.is(tok::r_paren) && Previous.is(TT_TypeDeclarationParen)) {
@@ -4328,7 +4328,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
// Slightly prefer formatting local lambda definitions like functions.
if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
return 35;
- if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
+ if (Right.isNoneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
TT_ArrayInitializerLSquare,
TT_DesignatedInitializerLSquare, TT_AttributeSquare)) {
return 500;
@@ -4519,7 +4519,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
const FormatToken &Left,
const FormatToken &Right) const {
if (Left.is(tok::kw_return) &&
- !Right.isOneOf(tok::semi, tok::r_paren, tok::hashhash)) {
+ Right.isNoneOf(tok::semi, tok::r_paren, tok::hashhash)) {
return true;
}
if (Left.is(tok::kw_throw) && Right.is(tok::l_paren) && Right.MatchingParen &&
@@ -4579,7 +4579,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
}
// co_await (x), co_yield (x), co_return (x)
if (Left.isOneOf(tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return) &&
- !Right.isOneOf(tok::semi, tok::r_paren)) {
+ Right.isNoneOf(tok::semi, tok::r_paren)) {
return true;
}
@@ -4656,7 +4656,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
return getTokenPointerOrReferenceAlignment(Right) !=
FormatStyle::PAS_Left;
}
- return !Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
+ return Left.isNoneOf(TT_PointerOrReference, tok::l_paren) &&
(getTokenPointerOrReferenceAlignment(Right) !=
FormatStyle::PAS_Left ||
(Line.IsMultiVariableDeclStmt &&
@@ -4729,7 +4729,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
const auto *LParen = Right.Next->MatchingParen;
return !LParen || LParen->isNot(TT_FunctionTypeLParen);
}
- return !BeforeLeft->isOneOf(tok::l_paren, tok::l_square);
+ return BeforeLeft->isNoneOf(tok::l_paren, tok::l_square);
}
// Ensure right pointer alignment with ellipsis e.g. int *...P
if (Left.is(tok::ellipsis) && BeforeLeft &&
@@ -4808,10 +4808,10 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
TT_LambdaLSquare)));
}
if (Right.is(tok::l_square) &&
- !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
+ Right.isNoneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
TT_DesignatedInitializerLSquare,
TT_StructuredBindingLSquare, TT_AttributeSquare) &&
- !Left.isOneOf(tok::numeric_constant, TT_DictLiteral) &&
+ Left.isNoneOf(tok::numeric_constant, TT_DictLiteral) &&
!(Left.isNot(tok::r_square) && Style.SpaceBeforeSquareBrackets &&
Right.is(TT_ArraySubscriptLSquare))) {
return false;
@@ -4894,7 +4894,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
spaceRequiredBeforeParens(Right);
}
- if (!BeforeLeft || !BeforeLeft->isOneOf(tok::period, tok::arrow)) {
+ if (!BeforeLeft || BeforeLeft->isNoneOf(tok::period, tok::arrow)) {
if (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch)) {
return Style.SpaceBeforeParensOptions.AfterControlStatements ||
spaceRequiredBeforeParens(Right);
@@ -4917,7 +4917,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
if (Left.is(tok::at) && Right.isNot(tok::objc_not_keyword))
return false;
if (Right.is(TT_UnaryOperator)) {
- return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
+ return Left.isNoneOf(tok::l_paren, tok::l_square, tok::at) &&
(Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
}
// No space between the variable name and the initializer list.
@@ -5260,7 +5260,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
if (Left.is(tok::ellipsis))
return false;
if (Left.is(TT_TemplateCloser) &&
- !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
+ Right.isNoneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
Keywords.kw_implements, Keywords.kw_extends)) {
// Type assertions ('<type>expr') are not followed by whitespace. Other
// locations that should have whitespace following are identified by the
@@ -5299,7 +5299,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
// Add space between things in a primitive's state table unless in a
// transition like `(0?)`.
if ((Left.is(TT_VerilogTableItem) &&
- !Right.isOneOf(tok::r_paren, tok::semi)) ||
+ Right.isNoneOf(tok::r_paren, tok::semi)) ||
(Right.is(TT_VerilogTableItem) && Left.isNot(tok::l_paren))) {
const FormatToken *Next = Right.getNextNonComment();
return !(Next && Next->is(tok::r_paren));
@@ -5348,8 +5348,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
// previous rule.
if ((Right.is(Keywords.kw_apostrophe) ||
(Right.is(BK_BracedInit) && Right.is(tok::l_brace))) &&
- !(Left.isOneOf(Keywords.kw_assign, Keywords.kw_unique) ||
- Keywords.isVerilogWordOperator(Left)) &&
+ Left.isNoneOf(Keywords.kw_assign, Keywords.kw_unique) &&
+ !Keywords.isVerilogWordOperator(Left) &&
(Left.isOneOf(tok::r_square, tok::r_paren, tok::r_brace,
tok::numeric_constant) ||
Keywords.isWordLike(Left))) {
@@ -5549,14 +5549,14 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
return Right.hasWhitespaceBefore();
}
if (Right.is(tok::coloncolon) &&
- !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren)) {
+ Left.isNoneOf(tok::l_brace, tok::comment, tok::l_paren)) {
// Put a space between < and :: in vector< ::std::string >
return (Left.is(TT_TemplateOpener) &&
((Style.Standard < FormatStyle::LS_Cpp11) ||
ShouldAddSpacesInAngles())) ||
- !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
- tok::kw___super, TT_TemplateOpener,
- TT_TemplateCloser)) ||
+ Left.isNoneOf(tok::l_paren, tok::r_paren, tok::l_square,
+ tok::kw___super, TT_TemplateOpener,
+ TT_TemplateCloser) ||
(Left.is(tok::l_paren) && Style.SpacesInParensOptions.Other);
}
if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
@@ -5567,7 +5567,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
}
// Space before TT_StructuredBindingLSquare.
if (Right.is(TT_StructuredBindingLSquare)) {
- return !Left.isOneOf(tok::amp, tok::ampamp) ||
+ return Left.isNoneOf(tok::amp, tok::ampamp) ||
getTokenReferenceAlignment(Left) != FormatStyle::PAS_Right;
}
// Space before & or && following a TT_StructuredBindingLSquare.
@@ -5599,7 +5599,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
// Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
static bool isAllmanBrace(const FormatToken &Tok) {
return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
- !Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral);
+ Tok.isNoneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral);
}
// Returns 'true' if 'Tok' is a function argument.
@@ -5617,7 +5617,7 @@ isEmptyLambdaAllowed(const FormatToken &Tok,
static bool isAllmanLambdaBrace(const FormatToken &Tok) {
return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
- !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
+ Tok.isNoneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
}
bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
@@ -5686,7 +5686,7 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
tok::kw_const) &&
// kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
// above.
- !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) {
+ Line.First->isNoneOf(Keywords.kw_var, Keywords.kw_let)) {
// Object literals on the top level of a file are treated as "enum-style".
// Each key/value pair is put on a separate line, instead of bin-packing.
return true;
@@ -5831,7 +5831,7 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
}
if (Right.is(tok::comment)) {
- return !Left.isOneOf(BK_BracedInit, TT_CtorInitializerColon) &&
+ return Left.isNoneOf(BK_BracedInit, TT_CtorInitializerColon) &&
Right.NewlinesBefore > 0 && Right.HasUnescapedNewline;
}
if (Left.isTrailingComment())
@@ -5873,7 +5873,7 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
case FormatStyle::RCPS_WithPreceding:
return Right.isNot(tok::semi);
case FormatStyle::RCPS_OwnLineWithBrace:
- return !Right.isOneOf(tok::semi, tok::l_brace);
+ return Right.isNoneOf(tok::semi, tok::l_brace);
default:
break;
}
@@ -6000,7 +6000,7 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
// Put multiple Java annotation on a new line.
if ((Style.isJava() || Style.isJavaScript()) &&
Left.is(TT_LeadingJavaAnnotation) &&
- !Right.isOneOf(TT_LeadingJavaAnnotation, tok::l_paren) &&
+ Right.isNoneOf(TT_LeadingJavaAnnotation, tok::l_paren) &&
(Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) {
return true;
}
@@ -6206,7 +6206,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
return false;
// Avoid to break after '(' in the cases that is in bang operators.
if (Right.is(tok::l_paren)) {
- return !Left.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator,
+ return Left.isNoneOf(TT_TableGenBangOperator, TT_TableGenCondOperator,
TT_TemplateCloser);
}
// Avoid to break between the value and its suffix part.
@@ -6294,7 +6294,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
}
if (Right.is(tok::colon) &&
- !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon,
+ Right.isNoneOf(TT_CtorInitializerColon, TT_InlineASMColon,
TT_BitFieldColon)) {
return false;
}
@@ -6378,7 +6378,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
}
if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator, tok::kw_operator))
return false;
- if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
+ if (Left.is(tok::equal) && Right.isNoneOf(tok::kw_default, tok::kw_delete) &&
Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) {
return false;
}
@@ -6405,7 +6405,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
// Allow breaking after a trailing annotation, e.g. after a method
// declaration.
if (Left.is(TT_TrailingAnnotation)) {
- return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
+ return Right.isNoneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
tok::less, tok::coloncolon);
}
@@ -6448,7 +6448,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
return true;
if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
- !Left.isOneOf(tok::arrowstar, tok::lessless) &&
+ Left.isNoneOf(tok::arrowstar, tok::lessless) &&
Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
(Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
Left.getPrecedence() == prec::Assignment)) {
diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp
index ac9d147..ac9c81d 100644
--- a/clang/lib/Format/UnwrappedLineFormatter.cpp
+++ b/clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -506,7 +506,7 @@ private:
(NextLine.First->is(tok::r_brace) &&
!Style.BraceWrapping.SplitEmptyRecord);
} else if (TheLine->InPPDirective ||
- !TheLine->First->isOneOf(tok::kw_class, tok::kw_enum,
+ TheLine->First->isNoneOf(tok::kw_class, tok::kw_enum,
tok::kw_struct)) {
// Try to merge a block with left brace unwrapped that wasn't yet
// covered.
@@ -686,8 +686,8 @@ private:
}
Limit = limitConsideringMacros(I + 1, E, Limit);
AnnotatedLine &Line = **I;
- if (Line.First->isNot(tok::kw_do) && Line.First->isNot(tok::kw_else) &&
- Line.Last->isNot(tok::kw_else) && Line.Last->isNot(tok::r_paren)) {
+ if (Line.First->isNoneOf(tok::kw_do, tok::kw_else) &&
+ Line.Last->isNoneOf(tok::kw_else, tok::r_paren)) {
return 0;
}
// Only merge `do while` if `do` is the only statement on the line.
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 6948b3d..2879743 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -405,7 +405,7 @@ bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
case tok::r_brace:
if (OpeningBrace) {
if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
- !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
+ OpeningBrace->isNoneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
return false;
}
if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
@@ -427,7 +427,7 @@ bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
unsigned StoredPosition = Tokens->getPosition();
auto *Next = Tokens->getNextNonComment();
FormatTok = Tokens->setPosition(StoredPosition);
- if (!Next->isOneOf(tok::colon, tok::arrow)) {
+ if (Next->isNoneOf(tok::colon, tok::arrow)) {
// default not followed by `:` or `->` is not a case label; treat it
// like an identifier.
parseStructuralElement();
@@ -584,7 +584,7 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
ProbablyBracedList =
ProbablyBracedList ||
(NextTok->is(tok::identifier) &&
- !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
+ PrevTok->isNoneOf(tok::semi, tok::r_brace, tok::l_brace));
ProbablyBracedList = ProbablyBracedList ||
(NextTok->is(tok::semi) &&
@@ -607,7 +607,7 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
// A statement can end with only `;` (simple statement), a block
// closing brace (compound statement), or `:` (label statement).
// If PrevTok is a block opening brace, Tok ends an empty block.
- !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) {
+ PrevTok->isNoneOf(tok::semi, BK_Block, tok::colon)) {
ProbablyBracedList = true;
}
}
@@ -1157,7 +1157,7 @@ void UnwrappedLineParser::parsePPDefine() {
IncludeGuard = IG_Defined;
IncludeGuardToken = nullptr;
for (auto &Line : Lines) {
- if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
+ if (Line.Tokens.front().Tok->isNoneOf(tok::comment, tok::hash)) {
IncludeGuard = IG_Rejected;
break;
}
@@ -1233,7 +1233,7 @@ void UnwrappedLineParser::parsePPUnknown() {
static bool tokenCanStartNewLine(const FormatToken &Tok) {
// Semicolon can be a null-statement, l_square can be a start of a macro or
// a C++11 attribute, but this doesn't seem to be common.
- return !Tok.isOneOf(tok::semi, tok::l_brace,
+ return Tok.isNoneOf(tok::semi, tok::l_brace,
// Tokens that can only be used as binary operators and a
// part of overloaded operator names.
tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
@@ -1256,7 +1256,7 @@ static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
// FIXME: This returns true for C/C++ keywords like 'struct'.
return FormatTok->is(tok::identifier) &&
(!FormatTok->Tok.getIdentifierInfo() ||
- !FormatTok->isOneOf(
+ FormatTok->isNoneOf(
Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
@@ -1322,7 +1322,7 @@ static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
return false;
if (!isC78Type(*Tok) &&
- !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
+ Tok->isNoneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
return false;
}
@@ -1345,7 +1345,7 @@ bool UnwrappedLineParser::parseModuleImport() {
if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
!Token->Tok.getIdentifierInfo() &&
- !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
+ Token->isNoneOf(tok::colon, tok::less, tok::string_literal)) {
return false;
}
@@ -1357,7 +1357,7 @@ bool UnwrappedLineParser::parseModuleImport() {
// Handle import <foo/bar.h> as we would an include statement.
else if (FormatTok->is(tok::less)) {
nextToken();
- while (!FormatTok->isOneOf(tok::semi, tok::greater) && !eof()) {
+ while (FormatTok->isNoneOf(tok::semi, tok::greater) && !eof()) {
// Mark tokens up to the trailing line comments as implicit string
// literals.
if (FormatTok->isNot(tok::comment) &&
@@ -2394,13 +2394,13 @@ bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
const auto *BeforeRParen = Previous->getPreviousNonComment();
// Lambdas can be cast to function types only, e.g. `std::function<int()>`
// and `int (*)()`.
- if (!BeforeRParen || !BeforeRParen->isOneOf(tok::greater, tok::r_paren))
+ if (!BeforeRParen || BeforeRParen->isNoneOf(tok::greater, tok::r_paren))
return false;
} else if (Previous->is(tok::star)) {
Previous = Previous->getPreviousNonComment();
}
if (Previous && Previous->Tok.getIdentifierInfo() &&
- !Previous->isOneOf(tok::kw_return, tok::kw_co_await, tok::kw_co_yield,
+ Previous->isNoneOf(tok::kw_return, tok::kw_co_await, tok::kw_co_yield,
tok::kw_co_return)) {
return false;
}
@@ -2450,7 +2450,7 @@ void UnwrappedLineParser::tryToParseJSFunction() {
if (FormatTok->is(tok::l_brace))
tryToParseBracedList();
else
- while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
+ while (FormatTok->isNoneOf(tok::l_brace, tok::semi) && !eof())
nextToken();
}
@@ -3108,11 +3108,11 @@ void UnwrappedLineParser::parseTryCatch() {
for (bool SeenCatch = false;;) {
if (FormatTok->is(tok::at))
nextToken();
- if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
- tok::kw___finally, tok::objc_catch,
- tok::objc_finally) ||
- ((Style.isJava() || Style.isJavaScript()) &&
- FormatTok->is(Keywords.kw_finally)))) {
+ if (FormatTok->isNoneOf(tok::kw_catch, Keywords.kw___except,
+ tok::kw___finally, tok::objc_catch,
+ tok::objc_finally) &&
+ !((Style.isJava() || Style.isJavaScript()) &&
+ FormatTok->is(Keywords.kw_finally))) {
break;
}
if (FormatTok->is(tok::kw_catch))
@@ -3290,7 +3290,7 @@ void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
Keywords.kw_repeat))) &&
"'for', 'while' or foreach macro expected");
const bool KeepBraces = !Style.RemoveBracesLLVM ||
- !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
+ FormatTok->isNoneOf(tok::kw_for, tok::kw_while);
nextToken();
// JS' for await ( ...
@@ -4339,7 +4339,7 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
// to the terminating `;`. For everything else, just return and continue
// parsing the structural element, i.e. the declaration or expression for
// `export default`.
- if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
+ if (!IsImport && FormatTok->isNoneOf(tok::l_brace, tok::star) &&
!FormatTok->isStringLiteral() &&
!(FormatTok->is(Keywords.kw_type) &&
Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
@@ -4886,7 +4886,7 @@ void UnwrappedLineParser::readToken(int LevelDifference) {
const auto *Next = Tokens->peekNextToken();
if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(*Next)) ||
(Style.isTableGen() &&
- !Next->isOneOf(tok::kw_else, tok::pp_define, tok::pp_ifdef,
+ Next->isNoneOf(tok::kw_else, tok::pp_define, tok::pp_ifdef,
tok::pp_ifndef, tok::pp_endif))) {
break;
}
diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp
index 30c06bb..54f366f 100644
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@@ -462,7 +462,7 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End,
if ((Style.PointerAlignment == FormatStyle::PAS_Right ||
Style.ReferenceAlignment == FormatStyle::RAS_Right) &&
CurrentChange.Spaces != 0 &&
- !CurrentChange.Tok->isOneOf(tok::equal, tok::r_paren,
+ CurrentChange.Tok->isNoneOf(tok::equal, tok::r_paren,
TT_TemplateCloser)) {
const bool ReferenceNotRightAligned =
Style.ReferenceAlignment != FormatStyle::RAS_Right &&
diff --git a/clang/lib/Frontend/ChainedIncludesSource.cpp b/clang/lib/Frontend/ChainedIncludesSource.cpp
index 82249f8..049277c 100644
--- a/clang/lib/Frontend/ChainedIncludesSource.cpp
+++ b/clang/lib/Frontend/ChainedIncludesSource.cpp
@@ -129,7 +129,7 @@ clang::createChainedIncludesSource(CompilerInstance &CI,
Clang->setTarget(TargetInfo::CreateTargetInfo(
Clang->getDiagnostics(), Clang->getInvocation().getTargetOpts()));
Clang->createFileManager();
- Clang->createSourceManager(Clang->getFileManager());
+ Clang->createSourceManager();
Clang->createPreprocessor(TU_Prefix);
Clang->getDiagnosticClient().BeginSourceFile(Clang->getLangOpts(),
&Clang->getPreprocessor());
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index b1fb905..5844366 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -382,17 +382,18 @@ IntrusiveRefCntPtr<DiagnosticsEngine> CompilerInstance::createDiagnostics(
// File Manager
-FileManager *CompilerInstance::createFileManager() {
+void CompilerInstance::createFileManager() {
assert(VFS && "CompilerInstance needs a VFS for creating FileManager");
FileMgr = llvm::makeIntrusiveRefCnt<FileManager>(getFileSystemOpts(), VFS);
- return FileMgr.get();
}
// Source Manager
-void CompilerInstance::createSourceManager(FileManager &FileMgr) {
- SourceMgr =
- llvm::makeIntrusiveRefCnt<SourceManager>(getDiagnostics(), FileMgr);
+void CompilerInstance::createSourceManager() {
+ assert(Diagnostics && "DiagnosticsEngine needed for creating SourceManager");
+ assert(FileMgr && "FileManager needed for creating SourceManager");
+ SourceMgr = llvm::makeIntrusiveRefCnt<SourceManager>(getDiagnostics(),
+ getFileManager());
}
// Initialize the remapping of files to alternative contents, e.g.,
@@ -1186,7 +1187,7 @@ std::unique_ptr<CompilerInstance> CompilerInstance::cloneForModuleCompileImpl(
if (llvm::is_contained(DiagOpts.SystemHeaderWarningsModules, ModuleName))
Instance.getDiagnostics().setSuppressSystemWarnings(false);
- Instance.createSourceManager(Instance.getFileManager());
+ Instance.createSourceManager();
SourceManager &SourceMgr = Instance.getSourceManager();
if (ThreadSafeConfig) {
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 6cc3b65..1b63c40 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -879,7 +879,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
// file, otherwise the CompilerInstance will happily destroy them.
CI.setVirtualFileSystem(AST->getFileManager().getVirtualFileSystemPtr());
CI.setFileManager(AST->getFileManagerPtr());
- CI.createSourceManager(CI.getFileManager());
+ CI.createSourceManager();
CI.getSourceManager().initializeForReplay(AST->getSourceManager());
// Preload all the module files loaded transitively by the AST unit. Also
@@ -971,13 +971,10 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
// Set up the file system, file and source managers, if needed.
if (!CI.hasVirtualFileSystem())
CI.createVirtualFileSystem();
- if (!CI.hasFileManager()) {
- if (!CI.createFileManager()) {
- return false;
- }
- }
+ if (!CI.hasFileManager())
+ CI.createFileManager();
if (!CI.hasSourceManager()) {
- CI.createSourceManager(CI.getFileManager());
+ CI.createSourceManager();
if (CI.getDiagnosticOpts().getFormat() == DiagnosticOptions::SARIF) {
static_cast<SARIFDiagnosticPrinter *>(&CI.getDiagnosticClient())
->setSarifWriter(
diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
index 97a6a7f..3c20ccd 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
@@ -138,7 +138,16 @@ private:
// LastStmt - refers to the last statement in the method body; referencing
// LastStmt will remove the statement from the method body since
// it will be linked from the new expression being constructed.
- enum class PlaceHolder { _0, _1, _2, _3, _4, Handle = 128, LastStmt };
+ enum class PlaceHolder {
+ _0,
+ _1,
+ _2,
+ _3,
+ _4,
+ Handle = 128,
+ CounterHandle,
+ LastStmt
+ };
Expr *convertPlaceholder(PlaceHolder PH);
Expr *convertPlaceholder(LocalVar &Var);
@@ -178,10 +187,14 @@ public:
template <typename ResourceT, typename ValueT>
BuiltinTypeMethodBuilder &setHandleFieldOnResource(ResourceT ResourceRecord,
ValueT HandleValue);
+ template <typename T>
+ BuiltinTypeMethodBuilder &
+ accessCounterHandleFieldOnResource(T ResourceRecord);
template <typename T> BuiltinTypeMethodBuilder &returnValue(T ReturnValue);
BuiltinTypeMethodBuilder &returnThis();
BuiltinTypeDeclBuilder &finalize();
Expr *getResourceHandleExpr();
+ Expr *getResourceCounterHandleExpr();
private:
void createDecl();
@@ -346,6 +359,8 @@ TemplateParameterListBuilder::finalizeTemplateArgs(ConceptDecl *CD) {
Expr *BuiltinTypeMethodBuilder::convertPlaceholder(PlaceHolder PH) {
if (PH == PlaceHolder::Handle)
return getResourceHandleExpr();
+ if (PH == PlaceHolder::CounterHandle)
+ return getResourceCounterHandleExpr();
if (PH == PlaceHolder::LastStmt) {
assert(!StmtsList.empty() && "no statements in the list");
@@ -467,6 +482,18 @@ Expr *BuiltinTypeMethodBuilder::getResourceHandleExpr() {
OK_Ordinary);
}
+Expr *BuiltinTypeMethodBuilder::getResourceCounterHandleExpr() {
+ ensureCompleteDecl();
+
+ ASTContext &AST = DeclBuilder.SemaRef.getASTContext();
+ CXXThisExpr *This = CXXThisExpr::Create(
+ AST, SourceLocation(), Method->getFunctionObjectParameterType(), true);
+ FieldDecl *HandleField = DeclBuilder.getResourceCounterHandleField();
+ return MemberExpr::CreateImplicit(AST, This, false, HandleField,
+ HandleField->getType(), VK_LValue,
+ OK_Ordinary);
+}
+
BuiltinTypeMethodBuilder &
BuiltinTypeMethodBuilder::declareLocalVar(LocalVar &Var) {
ensureCompleteDecl();
@@ -584,6 +611,22 @@ BuiltinTypeMethodBuilder::setHandleFieldOnResource(ResourceT ResourceRecord,
}
template <typename T>
+BuiltinTypeMethodBuilder &
+BuiltinTypeMethodBuilder::accessCounterHandleFieldOnResource(T ResourceRecord) {
+ ensureCompleteDecl();
+
+ Expr *ResourceExpr = convertPlaceholder(ResourceRecord);
+
+ ASTContext &AST = DeclBuilder.SemaRef.getASTContext();
+ FieldDecl *HandleField = DeclBuilder.getResourceCounterHandleField();
+ MemberExpr *HandleExpr = MemberExpr::CreateImplicit(
+ AST, ResourceExpr, false, HandleField, HandleField->getType(), VK_LValue,
+ OK_Ordinary);
+ StmtsList.push_back(HandleExpr);
+ return *this;
+}
+
+template <typename T>
BuiltinTypeMethodBuilder &BuiltinTypeMethodBuilder::returnValue(T ReturnValue) {
ensureCompleteDecl();
@@ -722,8 +765,31 @@ BuiltinTypeDeclBuilder::addMemberVariable(StringRef Name, QualType Type,
return *this;
}
+BuiltinTypeDeclBuilder &
+BuiltinTypeDeclBuilder::addBufferHandles(ResourceClass RC, bool IsROV,
+ bool RawBuffer, bool HasCounter,
+ AccessSpecifier Access) {
+ addHandleMember(RC, IsROV, RawBuffer, Access);
+ if (HasCounter)
+ addCounterHandleMember(RC, IsROV, RawBuffer, Access);
+ return *this;
+}
+
BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addHandleMember(
ResourceClass RC, bool IsROV, bool RawBuffer, AccessSpecifier Access) {
+ return addResourceMember("__handle", RC, IsROV, RawBuffer,
+ /*IsCounter=*/false, Access);
+}
+
+BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addCounterHandleMember(
+ ResourceClass RC, bool IsROV, bool RawBuffer, AccessSpecifier Access) {
+ return addResourceMember("__counter_handle", RC, IsROV, RawBuffer,
+ /*IsCounter=*/true, Access);
+}
+
+BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addResourceMember(
+ StringRef MemberName, ResourceClass RC, bool IsROV, bool RawBuffer,
+ bool IsCounter, AccessSpecifier Access) {
assert(!Record->isCompleteDefinition() && "record is already complete");
ASTContext &Ctx = SemaRef.getASTContext();
@@ -739,9 +805,12 @@ BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addHandleMember(
ElementTypeInfo
? HLSLContainedTypeAttr::CreateImplicit(Ctx, ElementTypeInfo)
: nullptr};
+ if (IsCounter)
+ Attrs.push_back(HLSLIsCounterAttr::CreateImplicit(Ctx));
+
if (CreateHLSLAttributedResourceType(SemaRef, Ctx.HLSLResourceTy, Attrs,
AttributedResTy))
- addMemberVariable("__handle", AttributedResTy, {}, Access);
+ addMemberVariable(MemberName, AttributedResTy, {}, Access);
return *this;
}
@@ -844,12 +913,17 @@ BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addCopyConstructor() {
using PH = BuiltinTypeMethodBuilder::PlaceHolder;
- return BuiltinTypeMethodBuilder(*this, /*Name=*/"", AST.VoidTy,
- /*IsConst=*/false, /*IsCtor=*/true)
- .addParam("other", ConstRecordRefType)
+ BuiltinTypeMethodBuilder MMB(*this, /*Name=*/"", AST.VoidTy,
+ /*IsConst=*/false, /*IsCtor=*/true);
+ MMB.addParam("other", ConstRecordRefType)
.accessHandleFieldOnResource(PH::_0)
- .assign(PH::Handle, PH::LastStmt)
- .finalize();
+ .assign(PH::Handle, PH::LastStmt);
+
+ if (getResourceCounterHandleField())
+ MMB.accessCounterHandleFieldOnResource(PH::_0).assign(PH::CounterHandle,
+ PH::LastStmt);
+
+ return MMB.finalize();
}
BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addCopyAssignmentOperator() {
@@ -863,12 +937,16 @@ BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addCopyAssignmentOperator() {
using PH = BuiltinTypeMethodBuilder::PlaceHolder;
DeclarationName Name = AST.DeclarationNames.getCXXOperatorName(OO_Equal);
- return BuiltinTypeMethodBuilder(*this, Name, RecordRefType)
- .addParam("other", ConstRecordRefType)
+ BuiltinTypeMethodBuilder MMB(*this, Name, RecordRefType);
+ MMB.addParam("other", ConstRecordRefType)
.accessHandleFieldOnResource(PH::_0)
- .assign(PH::Handle, PH::LastStmt)
- .returnThis()
- .finalize();
+ .assign(PH::Handle, PH::LastStmt);
+
+ if (getResourceCounterHandleField())
+ MMB.accessCounterHandleFieldOnResource(PH::_0).assign(PH::CounterHandle,
+ PH::LastStmt);
+
+ return MMB.returnThis().finalize();
}
BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addArraySubscriptOperators() {
@@ -903,6 +981,14 @@ FieldDecl *BuiltinTypeDeclBuilder::getResourceHandleField() const {
return I->second;
}
+FieldDecl *BuiltinTypeDeclBuilder::getResourceCounterHandleField() const {
+ auto I = Fields.find("__counter_handle");
+ if (I == Fields.end() ||
+ !I->second->getType()->isHLSLAttributedResourceType())
+ return nullptr;
+ return I->second;
+}
+
QualType BuiltinTypeDeclBuilder::getFirstTemplateTypeParam() {
assert(Template && "record it not a template");
if (const auto *TTD = dyn_cast<TemplateTypeParmDecl>(
diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
index 9448af1..a981602 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
@@ -72,8 +72,9 @@ public:
AccessSpecifier Access = AccessSpecifier::AS_private);
BuiltinTypeDeclBuilder &
- addHandleMember(ResourceClass RC, bool IsROV, bool RawBuffer,
- AccessSpecifier Access = AccessSpecifier::AS_private);
+ addBufferHandles(ResourceClass RC, bool IsROV, bool RawBuffer,
+ bool HasCounter,
+ AccessSpecifier Access = AccessSpecifier::AS_private);
BuiltinTypeDeclBuilder &addArraySubscriptOperators();
// Builtin types constructors
@@ -95,7 +96,18 @@ public:
BuiltinTypeDeclBuilder &addConsumeMethod();
private:
+ BuiltinTypeDeclBuilder &addResourceMember(StringRef MemberName,
+ ResourceClass RC, bool IsROV,
+ bool RawBuffer, bool IsCounter,
+ AccessSpecifier Access);
+ BuiltinTypeDeclBuilder &
+ addHandleMember(ResourceClass RC, bool IsROV, bool RawBuffer,
+ AccessSpecifier Access = AccessSpecifier::AS_private);
+ BuiltinTypeDeclBuilder &
+ addCounterHandleMember(ResourceClass RC, bool IsROV, bool RawBuffer,
+ AccessSpecifier Access = AccessSpecifier::AS_private);
FieldDecl *getResourceHandleField() const;
+ FieldDecl *getResourceCounterHandleField() const;
QualType getFirstTemplateTypeParam();
QualType getHandleElementType();
Expr *getConstantIntExpr(int value);
diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp
index 464922b..cc43e94 100644
--- a/clang/lib/Sema/HLSLExternalSemaSource.cpp
+++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp
@@ -230,9 +230,9 @@ void HLSLExternalSemaSource::defineTrivialHLSLTypes() {
/// Set up common members and attributes for buffer types
static BuiltinTypeDeclBuilder setupBufferType(CXXRecordDecl *Decl, Sema &S,
ResourceClass RC, bool IsROV,
- bool RawBuffer) {
+ bool RawBuffer, bool HasCounter) {
return BuiltinTypeDeclBuilder(S, Decl)
- .addHandleMember(RC, IsROV, RawBuffer)
+ .addBufferHandles(RC, IsROV, RawBuffer, HasCounter)
.addDefaultHandleConstructor()
.addCopyConstructor()
.addCopyAssignmentOperator()
@@ -377,7 +377,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::SRV, /*IsROV=*/false,
- /*RawBuffer=*/false)
+ /*RawBuffer=*/false, /*HasCounter=*/false)
.addArraySubscriptOperators()
.addLoadMethods()
.completeDefinition();
@@ -389,7 +389,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, /*IsROV=*/false,
- /*RawBuffer=*/false)
+ /*RawBuffer=*/false, /*HasCounter=*/false)
.addArraySubscriptOperators()
.addLoadMethods()
.completeDefinition();
@@ -401,7 +401,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
.finalizeForwardDeclaration();
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, /*IsROV=*/true,
- /*RawBuffer=*/false)
+ /*RawBuffer=*/false, /*HasCounter=*/false)
.addArraySubscriptOperators()
.addLoadMethods()
.completeDefinition();
@@ -412,7 +412,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
.finalizeForwardDeclaration();
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::SRV, /*IsROV=*/false,
- /*RawBuffer=*/true)
+ /*RawBuffer=*/true, /*HasCounter=*/false)
.addArraySubscriptOperators()
.addLoadMethods()
.completeDefinition();
@@ -423,7 +423,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
.finalizeForwardDeclaration();
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, /*IsROV=*/false,
- /*RawBuffer=*/true)
+ /*RawBuffer=*/true, /*HasCounter=*/true)
.addArraySubscriptOperators()
.addLoadMethods()
.addIncrementCounterMethod()
@@ -437,7 +437,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
.finalizeForwardDeclaration();
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, /*IsROV=*/false,
- /*RawBuffer=*/true)
+ /*RawBuffer=*/true, /*HasCounter=*/true)
.addAppendMethod()
.completeDefinition();
});
@@ -448,7 +448,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
.finalizeForwardDeclaration();
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, /*IsROV=*/false,
- /*RawBuffer=*/true)
+ /*RawBuffer=*/true, /*HasCounter=*/true)
.addConsumeMethod()
.completeDefinition();
});
@@ -459,7 +459,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
.finalizeForwardDeclaration();
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, /*IsROV=*/true,
- /*RawBuffer=*/true)
+ /*RawBuffer=*/true, /*HasCounter=*/true)
.addArraySubscriptOperators()
.addLoadMethods()
.addIncrementCounterMethod()
@@ -471,14 +471,14 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
.finalizeForwardDeclaration();
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::SRV, /*IsROV=*/false,
- /*RawBuffer=*/true)
+ /*RawBuffer=*/true, /*HasCounter=*/false)
.completeDefinition();
});
Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "RWByteAddressBuffer")
.finalizeForwardDeclaration();
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, /*IsROV=*/false,
- /*RawBuffer=*/true)
+ /*RawBuffer=*/true, /*HasCounter=*/false)
.completeDefinition();
});
Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace,
@@ -486,7 +486,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
.finalizeForwardDeclaration();
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, /*IsROV=*/true,
- /*RawBuffer=*/true)
+ /*RawBuffer=*/true, /*HasCounter=*/false)
.completeDefinition();
});
}
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 3cc61b1..7ce3513 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5954,9 +5954,6 @@ bool Sema::BuiltinAssumeAligned(CallExpr *TheCall) {
if (Result > Sema::MaximumAlignment)
Diag(TheCall->getBeginLoc(), diag::warn_assume_aligned_too_great)
<< SecondArg->getSourceRange() << Sema::MaximumAlignment;
-
- TheCall->setArg(1,
- ConstantExpr::Create(Context, SecondArg, APValue(Result)));
}
if (NumArgs > 2) {
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 129b03c..fa30c66b 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -1810,6 +1810,13 @@ bool clang::CreateHLSLAttributedResourceType(
}
ResAttrs.RawBuffer = true;
break;
+ case attr::HLSLIsCounter:
+ if (ResAttrs.IsCounter) {
+ S.Diag(A->getLocation(), diag::warn_duplicate_attribute_exact) << A;
+ return false;
+ }
+ ResAttrs.IsCounter = true;
+ break;
case attr::HLSLContainedType: {
const HLSLContainedTypeAttr *CTAttr = cast<HLSLContainedTypeAttr>(A);
QualType Ty = CTAttr->getType();
@@ -1902,6 +1909,10 @@ bool SemaHLSL::handleResourceTypeAttr(QualType T, const ParsedAttr &AL) {
A = HLSLRawBufferAttr::Create(getASTContext(), ACI);
break;
+ case ParsedAttr::AT_HLSLIsCounter:
+ A = HLSLIsCounterAttr::Create(getASTContext(), ACI);
+ break;
+
case ParsedAttr::AT_HLSLContainedType: {
if (AL.getNumArgs() != 1 && !AL.hasParsedType()) {
Diag(AL.getLoc(), diag::err_attribute_wrong_number_arguments) << AL << 1;
diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index a64f207..9aaf7f4 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -2789,7 +2789,7 @@ OpenACCPrivateRecipe SemaOpenACC::CreatePrivateInitRecipe(const Expr *VarExpr) {
AllocaDecl->setInitStyle(VarDecl::CallInit);
}
- return OpenACCPrivateRecipe(AllocaDecl, Init.get());
+ return OpenACCPrivateRecipe(AllocaDecl);
}
OpenACCFirstPrivateRecipe
@@ -2828,7 +2828,14 @@ SemaOpenACC::CreateFirstPrivateInitRecipe(const Expr *VarExpr) {
if (!ArrTy) {
ExprResult Init = FinishValueInit(
SemaRef.SemaRef, Entity, VarExpr->getBeginLoc(), VarTy, TemporaryDRE);
- return OpenACCFirstPrivateRecipe(AllocaDecl, Init.get(), Temporary);
+
+ // For 'no bounds' version, we can use this as a shortcut, so set the init
+ // anyway.
+ if (Init.isUsable()) {
+ AllocaDecl->setInit(Init.get());
+ AllocaDecl->setInitStyle(VarDecl::CallInit);
+ }
+ return OpenACCFirstPrivateRecipe(AllocaDecl, Temporary);
}
// Arrays need to have each individual element initialized as there
@@ -2875,8 +2882,16 @@ SemaOpenACC::CreateFirstPrivateInitRecipe(const Expr *VarExpr) {
ExprResult Init = FinishValueInit(SemaRef.SemaRef, Entity,
VarExpr->getBeginLoc(), VarTy, InitExpr);
- return OpenACCFirstPrivateRecipe(AllocaDecl, Init.get(), Temporary);
+ // For 'no bounds' version, we can use this as a shortcut, so set the init
+ // anyway.
+ if (Init.isUsable()) {
+ AllocaDecl->setInit(Init.get());
+ AllocaDecl->setInitStyle(VarDecl::CallInit);
+ }
+
+ return OpenACCFirstPrivateRecipe(AllocaDecl, Temporary);
}
+
OpenACCReductionRecipe SemaOpenACC::CreateReductionInitRecipe(
OpenACCReductionOperator ReductionOperator, const Expr *VarExpr) {
// TODO: OpenACC: This shouldn't be necessary, see PrivateInitRecipe
@@ -2932,5 +2947,12 @@ OpenACCReductionRecipe SemaOpenACC::CreateReductionInitRecipe(
ExprResult Init = FinishValueInit(SemaRef.SemaRef, Entity,
VarExpr->getBeginLoc(), VarTy, InitExpr);
- return OpenACCReductionRecipe(AllocaDecl, Init.get());
+
+ // For 'no bounds' version, we can use this as a shortcut, so set the init
+ // anyway.
+ if (Init.isUsable()) {
+ AllocaDecl->setInit(Init.get());
+ AllocaDecl->setInitStyle(VarDecl::CallInit);
+ }
+ return OpenACCReductionRecipe(AllocaDecl);
}
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index c05e428..6acf79a 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -12860,10 +12860,9 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() {
llvm::SmallVector<OpenACCPrivateRecipe> RecipeList;
for (unsigned I = 0; I < VarList.size(); ++I) {
- static_assert(sizeof(OpenACCPrivateRecipe) == 2 * sizeof(int *));
+ static_assert(sizeof(OpenACCPrivateRecipe) == 1 * sizeof(int *));
VarDecl *Alloca = readDeclAs<VarDecl>();
- Expr *InitExpr = readSubExpr();
- RecipeList.push_back({Alloca, InitExpr});
+ RecipeList.push_back({Alloca});
}
return OpenACCPrivateClause::Create(getContext(), BeginLoc, LParenLoc,
@@ -12886,11 +12885,10 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() {
llvm::SmallVector<Expr *> VarList = readOpenACCVarList();
llvm::SmallVector<OpenACCFirstPrivateRecipe> RecipeList;
for (unsigned I = 0; I < VarList.size(); ++I) {
- static_assert(sizeof(OpenACCFirstPrivateRecipe) == 3 * sizeof(int *));
+ static_assert(sizeof(OpenACCFirstPrivateRecipe) == 2 * sizeof(int *));
VarDecl *Recipe = readDeclAs<VarDecl>();
- Expr *InitExpr = readSubExpr();
VarDecl *RecipeTemp = readDeclAs<VarDecl>();
- RecipeList.push_back({Recipe, InitExpr, RecipeTemp});
+ RecipeList.push_back({Recipe, RecipeTemp});
}
return OpenACCFirstPrivateClause::Create(getContext(), BeginLoc, LParenLoc,
@@ -13011,10 +13009,9 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() {
llvm::SmallVector<OpenACCReductionRecipe> RecipeList;
for (unsigned I = 0; I < VarList.size(); ++I) {
- static_assert(sizeof(OpenACCReductionRecipe) == 2 * sizeof(int *));
+ static_assert(sizeof(OpenACCReductionRecipe) == sizeof(int *));
VarDecl *Recipe = readDeclAs<VarDecl>();
- Expr *InitExpr = readSubExpr();
- RecipeList.push_back({Recipe, InitExpr});
+ RecipeList.push_back({Recipe});
}
return OpenACCReductionClause::Create(getContext(), BeginLoc, LParenLoc, Op,
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index cdf95ba..09b1e58 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -8779,9 +8779,8 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) {
writeOpenACCVarList(PC);
for (const OpenACCPrivateRecipe &R : PC->getInitRecipes()) {
- static_assert(sizeof(R) == 2 * sizeof(int *));
+ static_assert(sizeof(R) == 1 * sizeof(int *));
AddDeclRef(R.AllocaDecl);
- AddStmt(const_cast<Expr *>(R.InitExpr));
}
return;
}
@@ -8803,9 +8802,8 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) {
writeOpenACCVarList(FPC);
for (const OpenACCFirstPrivateRecipe &R : FPC->getInitRecipes()) {
- static_assert(sizeof(R) == 3 * sizeof(int *));
+ static_assert(sizeof(R) == 2 * sizeof(int *));
AddDeclRef(R.AllocaDecl);
- AddStmt(const_cast<Expr *>(R.InitExpr));
AddDeclRef(R.InitFromTemporary);
}
return;
@@ -8927,9 +8925,8 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) {
writeOpenACCVarList(RC);
for (const OpenACCReductionRecipe &R : RC->getRecipes()) {
- static_assert(sizeof(OpenACCReductionRecipe) == 2 * sizeof(int *));
+ static_assert(sizeof(OpenACCReductionRecipe) == 1 * sizeof(int *));
AddDeclRef(R.AllocaDecl);
- AddStmt(const_cast<Expr *>(R.InitExpr));
}
return;
}
diff --git a/clang/lib/Testing/TestAST.cpp b/clang/lib/Testing/TestAST.cpp
index 9ad0de9..d333895 100644
--- a/clang/lib/Testing/TestAST.cpp
+++ b/clang/lib/Testing/TestAST.cpp
@@ -61,7 +61,7 @@ void createMissingComponents(CompilerInstance &Clang) {
if (!Clang.hasFileManager())
Clang.createFileManager();
if (!Clang.hasSourceManager())
- Clang.createSourceManager(Clang.getFileManager());
+ Clang.createSourceManager();
if (!Clang.hasTarget())
Clang.createTarget();
if (!Clang.hasPreprocessor())
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
index 66cf2688..010380d 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
@@ -415,7 +415,7 @@ bool DependencyScanningAction::runInvocation(
any(Service.getOptimizeArgs() & ScanningOptimizations::VFS);
// Create a new FileManager to match the invocation's FileSystemOptions.
- auto *FileMgr = ScanInstance.createFileManager();
+ ScanInstance.createFileManager();
// Use the dependency scanning optimized file system if requested to do so.
if (DepFS) {
@@ -423,16 +423,17 @@ bool DependencyScanningAction::runInvocation(
if (!ScanInstance.getHeaderSearchOpts().ModuleCachePath.empty()) {
SmallString<256> ModulesCachePath;
normalizeModuleCachePath(
- *FileMgr, ScanInstance.getHeaderSearchOpts().ModuleCachePath,
- ModulesCachePath);
+ ScanInstance.getFileManager(),
+ ScanInstance.getHeaderSearchOpts().ModuleCachePath, ModulesCachePath);
DepFS->setBypassedPathPrefix(ModulesCachePath);
}
ScanInstance.setDependencyDirectivesGetter(
- std::make_unique<ScanningDependencyDirectivesGetter>(*FileMgr));
+ std::make_unique<ScanningDependencyDirectivesGetter>(
+ ScanInstance.getFileManager()));
}
- ScanInstance.createSourceManager(*FileMgr);
+ ScanInstance.createSourceManager();
// Create a collection of stable directories derived from the ScanInstance
// for determining whether module dependencies would fully resolve from
diff --git a/clang/lib/Tooling/Tooling.cpp b/clang/lib/Tooling/Tooling.cpp
index 2d4790b..ea5a372 100644
--- a/clang/lib/Tooling/Tooling.cpp
+++ b/clang/lib/Tooling/Tooling.cpp
@@ -458,7 +458,7 @@ bool FrontendActionFactory::runInvocation(
if (!Compiler.hasDiagnostics())
return false;
- Compiler.createSourceManager(*Files);
+ Compiler.createSourceManager();
const bool Success = Compiler.ExecuteAction(*ScopedToolAction);
diff --git a/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl b/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl
index a490b22..6779abb 100644
--- a/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl
+++ b/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl
@@ -12,7 +12,7 @@
//
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump \
// RUN: -DRESOURCE=RWStructuredBuffer %s | FileCheck -DRESOURCE=RWStructuredBuffer \
-// RUN: -check-prefixes=CHECK,CHECK-UAV,CHECK-SUBSCRIPT,CHECK-SUBSCRIPT-UAV,CHECK-COUNTER,CHECK-LOAD %s
+// RUN: -check-prefixes=CHECK,CHECK-UAV,CHECK-SUBSCRIPT,CHECK-SUBSCRIPT-UAV,CHECK-COUNTER,CHECK-LOAD,CHECK-COUNTER-HANDLE %s
//
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump -DEMPTY \
// RUN: -DRESOURCE=AppendStructuredBuffer %s | FileCheck -DRESOURCE=AppendStructuredBuffer \
@@ -20,7 +20,7 @@
//
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump \
// RUN: -DRESOURCE=AppendStructuredBuffer %s | FileCheck -DRESOURCE=AppendStructuredBuffer \
-// RUN: -check-prefixes=CHECK,CHECK-UAV,CHECK-NOSUBSCRIPT,CHECK-APPEND %s
+// RUN: -check-prefixes=CHECK,CHECK-UAV,CHECK-NOSUBSCRIPT,CHECK-APPEND,CHECK-COUNTER-HANDLE %s
//
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump -DEMPTY \
// RUN: -DRESOURCE=ConsumeStructuredBuffer %s | FileCheck -DRESOURCE=ConsumeStructuredBuffer \
@@ -28,7 +28,7 @@
//
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump \
// RUN: -DRESOURCE=ConsumeStructuredBuffer %s | FileCheck -DRESOURCE=ConsumeStructuredBuffer \
-// RUN: -check-prefixes=CHECK,CHECK-UAV,CHECK-NOSUBSCRIPT,CHECK-CONSUME %s
+// RUN: -check-prefixes=CHECK,CHECK-UAV,CHECK-NOSUBSCRIPT,CHECK-CONSUME,CHECK-COUNTER-HANDLE %s
//
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump -DEMPTY \
// RUN: -DRESOURCE=RasterizerOrderedStructuredBuffer %s | FileCheck -DRESOURCE=RasterizerOrderedStructuredBuffer \
@@ -36,7 +36,7 @@
//
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump \
// RUN: -DRESOURCE=RasterizerOrderedStructuredBuffer %s | FileCheck -DRESOURCE=RasterizerOrderedStructuredBuffer \
-// RUN: -check-prefixes=CHECK,CHECK-UAV,CHECK-ROV,CHECK-SUBSCRIPT,CHECK-SUBSCRIPT-UAV,CHECK-LOAD %s
+// RUN: -check-prefixes=CHECK,CHECK-UAV,CHECK-ROV,CHECK-SUBSCRIPT,CHECK-SUBSCRIPT-UAV,CHECK-LOAD,CHECK-COUNTER-HANDLE %s
// This test tests two different AST generations for each structured buffer.
// The "EMPTY" test mode verifies the AST generated by forward declaration
@@ -113,6 +113,11 @@ RESOURCE<float> Buffer;
// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue implicit this
// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
// CHECK-NEXT: DeclRefExpr {{.*}} 'const hlsl::[[RESOURCE]]<element_type>' ParmVar {{.*}} 'other' 'const hlsl::[[RESOURCE]]<element_type> &'
+// CHECK-COUNTER-HANDLE-NEXT: BinaryOperator {{.*}} '='
+// CHECK-COUNTER-HANDLE-NEXT: MemberExpr {{.*}} lvalue .__counter_handle
+// CHECK-COUNTER-HANDLE-NEXT: CXXThisExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue implicit this
+// CHECK-COUNTER-HANDLE-NEXT: MemberExpr {{.*}} lvalue .__counter_handle
+// CHECK-COUNTER-HANDLE-NEXT: DeclRefExpr {{.*}} 'const hlsl::[[RESOURCE]]<element_type>' ParmVar {{.*}} 'other' 'const hlsl::[[RESOURCE]]<element_type> &'
// CHECK-NEXT: AlwaysInlineAttr
// operator=
@@ -125,6 +130,11 @@ RESOURCE<float> Buffer;
// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue implicit this
// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle
// CHECK-NEXT: DeclRefExpr {{.*}} 'const hlsl::[[RESOURCE]]<element_type>' ParmVar {{.*}} 'other' 'const hlsl::[[RESOURCE]]<element_type> &'
+// CHECK-COUNTER-HANDLE: BinaryOperator {{.*}} '='
+// CHECK-COUNTER-HANDLE: MemberExpr {{.*}} lvalue .__counter_handle
+// CHECK-COUNTER-HANDLE: CXXThisExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue implicit this
+// CHECK-COUNTER-HANDLE: MemberExpr {{.*}} lvalue .__counter_handle
+// CHECK-COUNTER-HANDLE: DeclRefExpr {{.*}} 'const hlsl::[[RESOURCE]]<element_type>' ParmVar {{.*}} 'other' 'const hlsl::[[RESOURCE]]<element_type> &'
// CHECK-NEXT: ReturnStmt
// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::[[RESOURCE]]<element_type>' lvalue implicit this
// CHECK-NEXT: AlwaysInlineAttr
@@ -334,3 +344,8 @@ RESOURCE<float> Buffer;
// CHECK-ROV-SAME{LITERAL}: [[hlsl::is_rov]]
// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
// CHECK-SAME{LITERAL}: [[hlsl::contained_type(float)]]
+// CHECK-COUNTER-HANDLE: FieldDecl {{.*}} implicit referenced __counter_handle '__hlsl_resource_t
+// CHECK-COUNTER-HANDLE-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+// CHECK-COUNTER-HANDLE-SAME{LITERAL}: [[hlsl::raw_buffer]]
+// CHECK-COUNTER-HANDLE-SAME{LITERAL}: [[hlsl::is_counter]]
+// CHECK-COUNTER-HANDLE-SAME{LITERAL}: [[hlsl::contained_type(float)]]
diff --git a/clang/test/CodeGen/X86/avx512ifma-builtins.c b/clang/test/CodeGen/X86/avx512ifma-builtins.c
index 7c7c492..eebefb0 100644
--- a/clang/test/CodeGen/X86/avx512ifma-builtins.c
+++ b/clang/test/CodeGen/X86/avx512ifma-builtins.c
@@ -3,6 +3,11 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+
#include <immintrin.h>
diff --git a/clang/test/CodeGen/X86/avx512ifmavl-builtins.c b/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
index c115b60..89108fc 100644
--- a/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
@@ -3,6 +3,12 @@
// RUN: %clang_cc1 -x c++ %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ %s -flax-vector-conversions=none -ffreestanding -triple=i386-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c %s -flax-vector-conversions=none -ffreestanding -triple=i386-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ %s -flax-vector-conversions=none -ffreestanding -triple=i386-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+
+
#include <immintrin.h>
__m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
diff --git a/clang/test/CodeGen/X86/avxifma-builtins.c b/clang/test/CodeGen/X86/avxifma-builtins.c
index dd0f220..aa15159 100644
--- a/clang/test/CodeGen/X86/avxifma-builtins.c
+++ b/clang/test/CodeGen/X86/avxifma-builtins.c
@@ -3,6 +3,12 @@
// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+
+
#include <immintrin.h>
__m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
diff --git a/clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl
index 472b9a8..9f0a5b7 100644
--- a/clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl
+++ b/clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl
@@ -1,23 +1,36 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK
// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPV
-// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i16, 1, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.0" = type { target("dx.RawBuffer", i16, 1, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.1" = type { target("dx.RawBuffer", i32, 1, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.3" = type { target("dx.RawBuffer", i64, 1, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.5" = type { target("dx.RawBuffer", half, 1, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.6" = type { target("dx.RawBuffer", float, 1, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.7" = type { target("dx.RawBuffer", double, 1, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 1, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 1, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 1, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 1, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.12" = type { target("dx.RawBuffer", i32, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.12" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1)
-// CHECK: %"class.hlsl::RWStructuredBuffer.13" = type { target("dx.RawBuffer", <4 x i32>, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.13" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1)
+// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i16, 1, 0), target("dx.RawBuffer", i16, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// CHECK: %"class.hlsl::RWStructuredBuffer.0" = type { target("dx.RawBuffer", i16, 1, 0), target("dx.RawBuffer", i16, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer.0" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// CHECK: %"class.hlsl::RWStructuredBuffer.1" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer.1" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// CHECK: %"class.hlsl::RWStructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer.2" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// CHECK: %"class.hlsl::RWStructuredBuffer.3" = type { target("dx.RawBuffer", i64, 1, 0), target("dx.RawBuffer", i64, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer.3" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// CHECK: %"class.hlsl::RWStructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 0), target("dx.RawBuffer", i64, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer.4" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// CHECK: %"class.hlsl::RWStructuredBuffer.5" = type { target("dx.RawBuffer", half, 1, 0), target("dx.RawBuffer", half, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer.5" = type { target("spirv.VulkanBuffer", [0 x half], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// CHECK: %"class.hlsl::RWStructuredBuffer.6" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer.6" = type { target("spirv.VulkanBuffer", [0 x float], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// CHECK: %"class.hlsl::RWStructuredBuffer.7" = type { target("dx.RawBuffer", double, 1, 0), target("dx.RawBuffer", double, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer.7" = type { target("spirv.VulkanBuffer", [0 x double], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// CHECK: %"class.hlsl::RWStructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 1, 0), target("dx.RawBuffer", <4 x i16>, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer.8" = type { target("spirv.VulkanBuffer", [0 x <4 x i16>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// CHECK: %"class.hlsl::RWStructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 1, 0), target("dx.RawBuffer", <3 x i32>, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer.9" = type { target("spirv.VulkanBuffer", [0 x <3 x i32>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// CHECK: %"class.hlsl::RWStructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 1, 0), target("dx.RawBuffer", <2 x half>, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer.10" = type { target("spirv.VulkanBuffer", [0 x <2 x half>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// CHECK: %"class.hlsl::RWStructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 1, 0), target("dx.RawBuffer", <3 x float>, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer.11" = type { target("spirv.VulkanBuffer", [0 x <3 x float>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// CHECK: %"class.hlsl::RWStructuredBuffer.12" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer.12" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// CHECK: %"class.hlsl::RWStructuredBuffer.13" = type { target("dx.RawBuffer", <4 x i32>, 1, 0), target("dx.RawBuffer", <4 x i32>, 1, 0) }
+// SPV: %"class.hlsl::RWStructuredBuffer.13" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
RWStructuredBuffer<int16_t> BufI16;
RWStructuredBuffer<uint16_t> BufU16;
diff --git a/clang/test/CodeGenHLSL/resources/RasterizerOrderedStructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/RasterizerOrderedStructuredBuffer-elementtype.hlsl
index 6c5a705..c97ad42 100644
--- a/clang/test/CodeGenHLSL/resources/RasterizerOrderedStructuredBuffer-elementtype.hlsl
+++ b/clang/test/CodeGenHLSL/resources/RasterizerOrderedStructuredBuffer-elementtype.hlsl
@@ -5,19 +5,19 @@ struct MyStruct {
int2 b;
};
-// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer" = type { target("dx.RawBuffer", i16, 1, 1) }
-// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.0" = type { target("dx.RawBuffer", i16, 1, 1) }
-// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.1" = type { target("dx.RawBuffer", i32, 1, 1) }
-// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 1) }
-// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.3" = type { target("dx.RawBuffer", i64, 1, 1) }
-// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 1) }
-// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.5" = type { target("dx.RawBuffer", half, 1, 1) }
-// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.6" = type { target("dx.RawBuffer", float, 1, 1) }
-// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.7" = type { target("dx.RawBuffer", double, 1, 1) }
-// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 1, 1) }
-// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 1, 1) }
-// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 1, 1) }
-// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 1, 1) }
+// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer" = type { target("dx.RawBuffer", i16, 1, 1), target("dx.RawBuffer", i16, 1, 1) }
+// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.0" = type { target("dx.RawBuffer", i16, 1, 1), target("dx.RawBuffer", i16, 1, 1) }
+// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.1" = type { target("dx.RawBuffer", i32, 1, 1), target("dx.RawBuffer", i32, 1, 1) }
+// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 1), target("dx.RawBuffer", i32, 1, 1) }
+// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.3" = type { target("dx.RawBuffer", i64, 1, 1), target("dx.RawBuffer", i64, 1, 1) }
+// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 1), target("dx.RawBuffer", i64, 1, 1) }
+// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.5" = type { target("dx.RawBuffer", half, 1, 1), target("dx.RawBuffer", half, 1, 1) }
+// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.6" = type { target("dx.RawBuffer", float, 1, 1), target("dx.RawBuffer", float, 1, 1) }
+// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.7" = type { target("dx.RawBuffer", double, 1, 1), target("dx.RawBuffer", double, 1, 1) }
+// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 1, 1), target("dx.RawBuffer", <4 x i16>, 1, 1) }
+// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 1, 1), target("dx.RawBuffer", <3 x i32>, 1, 1) }
+// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 1, 1), target("dx.RawBuffer", <2 x half>, 1, 1) }
+// DXIL: %"class.hlsl::RasterizerOrderedStructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 1, 1), target("dx.RawBuffer", <3 x float>, 1, 1) }
// DXIL: %struct.MyStruct = type <{ <4 x float>, <2 x i32> }>
RasterizerOrderedStructuredBuffer<int16_t> BufI16;
diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-constructors.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-constructors.hlsl
index 4f005ea..89a66b0 100644
--- a/clang/test/CodeGenHLSL/resources/StructuredBuffers-constructors.hlsl
+++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-constructors.hlsl
@@ -21,8 +21,8 @@ export void foo() {
}
// CHECK-DXIL: %"class.hlsl::StructuredBuffer" = type { target("dx.RawBuffer", float, 0, 0) }
-// CHECK-DXIL: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0) }
-// CHECK-DXIL: %"class.hlsl::AppendStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0) }
+// CHECK-DXIL: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) }
+// CHECK-DXIL: %"class.hlsl::AppendStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) }
// CHECK: @Buf1 = internal global %"class.hlsl::StructuredBuffer" poison, align 4
// CHECK: @[[Buf1Str:.*]] = private unnamed_addr constant [5 x i8] c"Buf1\00", align 1
diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl
index 93aa218..43ddd2e 100644
--- a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl
+++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-lib.hlsl
@@ -10,9 +10,9 @@ AppendStructuredBuffer<float> ASB : register(u2);
ConsumeStructuredBuffer<float> CSB : register(u3);
// CHECK: %"class.hlsl::StructuredBuffer" = type { target("dx.RawBuffer", float, 0, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0) }
-// CHECK: %"class.hlsl::AppendStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0) }
-// CHECK: %"class.hlsl::ConsumeStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0) }
+// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) }
+// CHECK: %"class.hlsl::AppendStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) }
+// CHECK: %"class.hlsl::ConsumeStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) }
export int TestIncrementCounter() {
return RWSB1.IncrementCounter();
diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl
index b513963..9e08a6d 100644
--- a/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl
+++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-methods-ps.hlsl
@@ -6,7 +6,7 @@
RWStructuredBuffer<float> RWSB1, RWSB2;
RasterizerOrderedStructuredBuffer<float> ROSB1, ROSB2;
-// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0) }
+// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) }
export void TestIncrementCounter() {
// CHECK: define void @_Z20TestIncrementCounterv()
diff --git a/clang/test/CodeGenHLSL/resources/resource-bindings.hlsl b/clang/test/CodeGenHLSL/resources/resource-bindings.hlsl
index 4ffa7cf..1d85048 100644
--- a/clang/test/CodeGenHLSL/resources/resource-bindings.hlsl
+++ b/clang/test/CodeGenHLSL/resources/resource-bindings.hlsl
@@ -4,7 +4,7 @@
// CHECK: %"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x float>, 1, 0, 0) }
// CHECK: %"class.hlsl::RWBuffer.0" = type { target("dx.TypedBuffer", float, 1, 0, 0) }
// CHECK: %"class.hlsl::StructuredBuffer" = type { target("dx.RawBuffer", i32, 0, 0) }
-// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", %struct.S, 1, 0) }
+// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", %struct.S, 1, 0), target("dx.RawBuffer", %struct.S, 1, 0) }
// CHECK: %"class.hlsl::RWBuffer.1" = type { target("dx.TypedBuffer", double, 1, 0, 0) }
// CHECK: @_ZL4U0S0 = internal global %"class.hlsl::RWBuffer" poison, align 4
diff --git a/clang/test/SemaCXX/builtin-assume-aligned.cpp b/clang/test/SemaCXX/builtin-assume-aligned.cpp
index afc11cc..48bd841 100644
--- a/clang/test/SemaCXX/builtin-assume-aligned.cpp
+++ b/clang/test/SemaCXX/builtin-assume-aligned.cpp
@@ -47,9 +47,3 @@ constexpr void *s1 = __builtin_assume_aligned(x, 32);
constexpr void *s2 = __builtin_assume_aligned(x, 32, 5);
constexpr void *s3 = __builtin_assume_aligned(x, 32, -1);
-
-constexpr int add(int a, int b) {
- return a+b;
-}
-constexpr void *c1 = __builtin_assume_aligned(p, add(1,1));
-constexpr void *c2 = __builtin_assume_aligned(p, add(2,1)); // expected-error {{not a power of 2}}
diff --git a/clang/tools/clang-import-test/clang-import-test.cpp b/clang/tools/clang-import-test/clang-import-test.cpp
index 910e08c..977cec1 100644
--- a/clang/tools/clang-import-test/clang-import-test.cpp
+++ b/clang/tools/clang-import-test/clang-import-test.cpp
@@ -216,7 +216,7 @@ std::unique_ptr<CompilerInstance> BuildCompilerInstance() {
Ins->getTarget().adjust(Ins->getDiagnostics(), Ins->getLangOpts(),
/*AuxTarget=*/nullptr);
Ins->createFileManager();
- Ins->createSourceManager(Ins->getFileManager());
+ Ins->createSourceManager();
Ins->createPreprocessor(TU_Complete);
return Ins;
diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
index 594c79a..de20e74 100644
--- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
+++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
@@ -462,9 +462,10 @@ static Error runAOTCompile(StringRef InputFile, StringRef OutputFile,
// TODO: Consider using LLVM-IR metadata to identify globals of interest
bool isKernel(const Function &F) {
- const CallingConv::ID CC = F.getCallingConv();
- return CC == CallingConv::SPIR_KERNEL || CC == CallingConv::AMDGPU_KERNEL ||
- CC == CallingConv::PTX_Kernel;
+ const llvm::CallingConv::ID CC = F.getCallingConv();
+ return CC == llvm::CallingConv::SPIR_KERNEL ||
+ CC == llvm::CallingConv::AMDGPU_KERNEL ||
+ CC == llvm::CallingConv::PTX_Kernel;
}
/// Performs the following steps:
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 30e2be7..c39f337 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2832,10 +2832,8 @@ void OpenACCClauseEnqueue::VisitTileClause(const OpenACCTileClause &C) {
void OpenACCClauseEnqueue::VisitPrivateClause(const OpenACCPrivateClause &C) {
VisitVarList(C);
- for (const OpenACCPrivateRecipe &R : C.getInitRecipes()) {
+ for (const OpenACCPrivateRecipe &R : C.getInitRecipes())
Visitor.AddDecl(R.AllocaDecl);
- Visitor.AddStmt(R.InitExpr);
- }
}
void OpenACCClauseEnqueue::VisitHostClause(const OpenACCHostClause &C) {
@@ -2851,7 +2849,6 @@ void OpenACCClauseEnqueue::VisitFirstPrivateClause(
VisitVarList(C);
for (const OpenACCFirstPrivateRecipe &R : C.getInitRecipes()) {
Visitor.AddDecl(R.AllocaDecl);
- Visitor.AddStmt(R.InitExpr);
Visitor.AddDecl(R.InitFromTemporary);
}
}
@@ -2927,10 +2924,8 @@ void OpenACCClauseEnqueue::VisitDeviceTypeClause(
void OpenACCClauseEnqueue::VisitReductionClause(
const OpenACCReductionClause &C) {
VisitVarList(C);
- for (const OpenACCReductionRecipe &R : C.getRecipes()) {
+ for (const OpenACCReductionRecipe &R : C.getRecipes())
Visitor.AddDecl(R.AllocaDecl);
- Visitor.AddStmt(R.InitExpr);
- }
}
void OpenACCClauseEnqueue::VisitAutoClause(const OpenACCAutoClause &C) {}
void OpenACCClauseEnqueue::VisitIndependentClause(
diff --git a/clang/unittests/Analysis/CFGTest.cpp b/clang/unittests/Analysis/CFGTest.cpp
index 46a6751..6aa09a8 100644
--- a/clang/unittests/Analysis/CFGTest.cpp
+++ b/clang/unittests/Analysis/CFGTest.cpp
@@ -93,6 +93,159 @@ TEST(CFG, DependantBaseAddImplicitDtors) {
.getStatus());
}
+TEST(CFG, SwitchCoveredEnumNoDefault) {
+ const char *Code = R"(
+ enum class E {E1, E2};
+ int f(E e) {
+ switch(e) {
+ case E::E1:
+ return 1;
+ case E::E2:
+ return 2;
+ }
+ return 0;
+ }
+ )";
+ CFG::BuildOptions Options;
+ Options.AssumeReachableDefaultInSwitchStatements = true;
+ BuildResult B = BuildCFG(Code, Options);
+ ASSERT_EQ(BuildResult::BuiltCFG, B.getStatus());
+
+ // [B5 (ENTRY)]
+ // Succs (1): B2
+ //
+ // [B1]
+ // 1: 0
+ // 2: return [B1.1];
+ // Preds (1): B2
+ // Succs (1): B0
+ //
+ // [B2]
+ // 1: e (ImplicitCastExpr, LValueToRValue, E)
+ // T: switch [B2.1]
+ // Preds (1): B5
+ // Succs (3): B3 B4 B1
+ //
+ // [B3]
+ // case E::E2:
+ // 1: 2
+ // 2: return [B3.1];
+ // Preds (1): B2
+ // Succs (1): B0
+ //
+ // [B4]
+ // case E::E1:
+ // 1: 1
+ // 2: return [B4.1];
+ // Preds (1): B2
+ // Succs (1): B0
+ //
+ // [B0 (EXIT)]
+ // Preds (3): B1 B3 B4
+
+ auto *CFG = B.getCFG();
+ const auto &Entry = CFG->getEntry();
+ ASSERT_EQ(1u, Entry.succ_size());
+ // First successor of Entry is the switch
+ CFGBlock *SwitchBlock = *Entry.succ_begin();
+ ASSERT_EQ(3u, SwitchBlock->succ_size());
+ // Last successor of the switch is after the switch
+ auto NoCaseSucc = SwitchBlock->succ_rbegin();
+ EXPECT_TRUE(NoCaseSucc->isReachable());
+
+ // Checking that the same node is Unreachable without this setting
+ Options.AssumeReachableDefaultInSwitchStatements = false;
+ B = BuildCFG(Code, Options);
+ ASSERT_EQ(BuildResult::BuiltCFG, B.getStatus());
+
+ const auto &Entry2 = B.getCFG()->getEntry();
+ ASSERT_EQ(1u, Entry2.succ_size());
+ CFGBlock *SwitchBlock2 = *Entry2.succ_begin();
+ ASSERT_EQ(3u, SwitchBlock2->succ_size());
+ auto NoCaseSucc2 = SwitchBlock2->succ_rbegin();
+ EXPECT_FALSE(NoCaseSucc2->isReachable());
+}
+
+TEST(CFG, SwitchCoveredEnumWithDefault) {
+ const char *Code = R"(
+ enum class E {E1, E2};
+ int f(E e) {
+ switch(e) {
+ case E::E1:
+ return 1;
+ case E::E2:
+ return 2;
+ default:
+ return 0;
+ }
+ return -1;
+ }
+ )";
+ CFG::BuildOptions Options;
+ Options.AssumeReachableDefaultInSwitchStatements = true;
+ BuildResult B = BuildCFG(Code, Options);
+ ASSERT_EQ(BuildResult::BuiltCFG, B.getStatus());
+
+ // [B6 (ENTRY)]
+ // Succs (1): B2
+ //
+ // [B1]
+ // 1: -1
+ // 2: return [B1.1];
+ // Succs (1): B0
+ //
+ // [B2]
+ // 1: e (ImplicitCastExpr, LValueToRValue, E)
+ // T: switch [B2.1]
+ // Preds (1): B6
+ // Succs (3): B4 B5 B3
+ //
+ // [B3]
+ // default:
+ // 1: 0
+ // 2: return [B3.1];
+ // Preds (1): B2
+ // Succs (1): B0
+ //
+ // [B4]
+ // case E::E2:
+ // 1: 2
+ // 2: return [B4.1];
+ // Preds (1): B2
+ // Succs (1): B0
+ //
+ // [B5]
+ // case E::E1:
+ // 1: 1
+ // 2: return [B5.1];
+ // Preds (1): B2
+ // Succs (1): B0
+ //
+ // [B0 (EXIT)]
+ // Preds (4): B1 B3 B4 B5
+
+ const auto &Entry = B.getCFG()->getEntry();
+ ASSERT_EQ(1u, Entry.succ_size());
+ // First successor of Entry is the switch
+ CFGBlock *SwitchBlock = *Entry.succ_begin();
+ ASSERT_EQ(3u, SwitchBlock->succ_size());
+ // Last successor of the switch is the default branch
+ auto defaultBlock = SwitchBlock->succ_rbegin();
+ EXPECT_TRUE(defaultBlock->isReachable());
+
+ // Checking that the same node is Unreachable without this setting
+ Options.AssumeReachableDefaultInSwitchStatements = false;
+ B = BuildCFG(Code, Options);
+ ASSERT_EQ(BuildResult::BuiltCFG, B.getStatus());
+
+ const auto &Entry2 = B.getCFG()->getEntry();
+ ASSERT_EQ(1u, Entry2.succ_size());
+ CFGBlock *SwitchBlock2 = *Entry2.succ_begin();
+ ASSERT_EQ(3u, SwitchBlock2->succ_size());
+ auto defaultBlock2 = SwitchBlock2->succ_rbegin();
+ EXPECT_FALSE(defaultBlock2->isReachable());
+}
+
TEST(CFG, IsLinear) {
auto expectLinear = [](bool IsLinear, const char *Code) {
BuildResult B = BuildCFG(Code);
diff --git a/clang/unittests/CodeGen/TestCompiler.h b/clang/unittests/CodeGen/TestCompiler.h
index 57b5b07..9bd9060 100644
--- a/clang/unittests/CodeGen/TestCompiler.h
+++ b/clang/unittests/CodeGen/TestCompiler.h
@@ -52,7 +52,7 @@ struct TestCompiler {
PtrSize = TInfo.getPointerWidth(clang::LangAS::Default) / 8;
compiler.createFileManager();
- compiler.createSourceManager(compiler.getFileManager());
+ compiler.createSourceManager();
compiler.createPreprocessor(clang::TU_Prefix);
compiler.createASTContext();
diff --git a/clang/unittests/Serialization/ForceCheckFileInputTest.cpp b/clang/unittests/Serialization/ForceCheckFileInputTest.cpp
index 24e2fd6..edf33ae 100644
--- a/clang/unittests/Serialization/ForceCheckFileInputTest.cpp
+++ b/clang/unittests/Serialization/ForceCheckFileInputTest.cpp
@@ -122,8 +122,8 @@ export int aa = 43;
Clang.setDiagnostics(Diags);
Clang.createVirtualFileSystem(CIOpts.VFS);
- FileManager *FM = Clang.createFileManager();
- Clang.createSourceManager(*FM);
+ Clang.createFileManager();
+ Clang.createSourceManager();
EXPECT_TRUE(Clang.createTarget());
Clang.createPreprocessor(TU_Complete);
diff --git a/clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp
index 80289ef..aa32bb3 100644
--- a/clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp
+++ b/clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp
@@ -65,7 +65,7 @@ public:
if (!Compiler.hasDiagnostics())
return false;
- Compiler.createSourceManager(*FileMgr);
+ Compiler.createSourceManager();
Compiler.addDependencyCollector(std::make_shared<TestFileCollector>(
Compiler.getInvocation().getDependencyOutputOpts(), Deps));
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index 57be863..e595e61 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -41,7 +41,9 @@
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstddef>
#include <iterator>
@@ -75,6 +77,112 @@ class MapInfoFinalizationPass
/// | |
std::map<mlir::Operation *, mlir::Value> localBoxAllocas;
+ /// Return true if the given path exists in a list of paths.
+ static bool
+ containsPath(const llvm::SmallVectorImpl<llvm::SmallVector<int64_t>> &paths,
+ llvm::ArrayRef<int64_t> path) {
+ return llvm::any_of(paths, [&](const llvm::SmallVector<int64_t> &p) {
+ return p.size() == path.size() &&
+ std::equal(p.begin(), p.end(), path.begin());
+ });
+ }
+
+ /// Return true if the given path is already present in
+ /// op.getMembersIndexAttr().
+ static bool mappedIndexPathExists(mlir::omp::MapInfoOp op,
+ llvm::ArrayRef<int64_t> indexPath) {
+ if (mlir::ArrayAttr attr = op.getMembersIndexAttr()) {
+ for (mlir::Attribute list : attr) {
+ auto listAttr = mlir::cast<mlir::ArrayAttr>(list);
+ if (listAttr.size() != indexPath.size())
+ continue;
+ bool allEq = true;
+ for (auto [i, val] : llvm::enumerate(listAttr)) {
+ if (mlir::cast<mlir::IntegerAttr>(val).getInt() != indexPath[i]) {
+ allEq = false;
+ break;
+ }
+ }
+ if (allEq)
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /// Build a compact string key for an index path for set-based
+ /// deduplication. Format: "N:v0,v1,..." where N is the length.
+ static void buildPathKey(llvm::ArrayRef<int64_t> path,
+ llvm::SmallString<64> &outKey) {
+ outKey.clear();
+ llvm::raw_svector_ostream os(outKey);
+ os << path.size() << ':';
+ for (size_t i = 0; i < path.size(); ++i) {
+ if (i)
+ os << ',';
+ os << path[i];
+ }
+ }
+
+ /// Create the member map for coordRef and append it (and its index
+ /// path) to the provided new* vectors, if it is not already present.
+ void appendMemberMapIfNew(
+ mlir::omp::MapInfoOp op, fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value coordRef, llvm::ArrayRef<int64_t> indexPath,
+ llvm::StringRef memberName,
+ llvm::SmallVectorImpl<mlir::Value> &newMapOpsForFields,
+ llvm::SmallVectorImpl<llvm::SmallVector<int64_t>> &newMemberIndexPaths) {
+ // Local de-dup within this op invocation.
+ if (containsPath(newMemberIndexPaths, indexPath))
+ return;
+ // Global de-dup against already present member indices.
+ if (mappedIndexPathExists(op, indexPath))
+ return;
+
+ if (op.getMapperId()) {
+ mlir::omp::DeclareMapperOp symbol =
+ mlir::SymbolTable::lookupNearestSymbolFrom<
+ mlir::omp::DeclareMapperOp>(op, op.getMapperIdAttr());
+ assert(symbol && "missing symbol for declare mapper identifier");
+ mlir::omp::DeclareMapperInfoOp mapperInfo = symbol.getDeclareMapperInfo();
+ // TODO: Probably a way to cache these keys in someway so we don't
+ // constantly go through the process of rebuilding them on every check, to
+ // save some cycles, but it can wait for a subsequent patch.
+ for (auto v : mapperInfo.getMapVars()) {
+ mlir::omp::MapInfoOp map =
+ mlir::cast<mlir::omp::MapInfoOp>(v.getDefiningOp());
+ if (!map.getMembers().empty() && mappedIndexPathExists(map, indexPath))
+ return;
+ }
+ }
+
+ builder.setInsertionPoint(op);
+ fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr(
+ builder, coordRef, /*isOptional=*/false, loc);
+ llvm::SmallVector<mlir::Value> bounds = fir::factory::genImplicitBoundsOps<
+ mlir::omp::MapBoundsOp, mlir::omp::MapBoundsType>(
+ builder, info,
+ hlfir::translateToExtendedValue(loc, builder, hlfir::Entity{coordRef})
+ .first,
+ /*dataExvIsAssumedSize=*/false, loc);
+
+ mlir::omp::MapInfoOp fieldMapOp = mlir::omp::MapInfoOp::create(
+ builder, loc, coordRef.getType(), coordRef,
+ mlir::TypeAttr::get(fir::unwrapRefType(coordRef.getType())),
+ op.getMapTypeAttr(),
+ builder.getAttr<mlir::omp::VariableCaptureKindAttr>(
+ mlir::omp::VariableCaptureKind::ByRef),
+ /*varPtrPtr=*/mlir::Value{}, /*members=*/mlir::ValueRange{},
+ /*members_index=*/mlir::ArrayAttr{}, bounds,
+ /*mapperId=*/mlir::FlatSymbolRefAttr(),
+ builder.getStringAttr(op.getNameAttr().strref() + "." + memberName +
+ ".implicit_map"),
+ /*partial_map=*/builder.getBoolAttr(false));
+
+ newMapOpsForFields.emplace_back(fieldMapOp);
+ newMemberIndexPaths.emplace_back(indexPath.begin(), indexPath.end());
+ }
+
/// getMemberUserList gathers all users of a particular MapInfoOp that are
/// other MapInfoOp's and places them into the mapMemberUsers list, which
/// records the map that the current argument MapInfoOp "op" is part of
@@ -363,7 +471,7 @@ class MapInfoFinalizationPass
mlir::ArrayAttr newMembersAttr;
mlir::SmallVector<mlir::Value> newMembers;
llvm::SmallVector<llvm::SmallVector<int64_t>> memberIndices;
- bool IsHasDeviceAddr = isHasDeviceAddr(op, target);
+ bool isHasDeviceAddrFlag = isHasDeviceAddr(op, target);
if (!mapMemberUsers.empty() || !op.getMembers().empty())
getMemberIndicesAsVectors(
@@ -406,7 +514,7 @@ class MapInfoFinalizationPass
mapUser.parent.getMembersMutable().assign(newMemberOps);
mapUser.parent.setMembersIndexAttr(
builder.create2DI64ArrayAttr(memberIndices));
- } else if (!IsHasDeviceAddr) {
+ } else if (!isHasDeviceAddrFlag) {
auto baseAddr =
genBaseAddrMap(descriptor, op.getBounds(), op.getMapType(), builder);
newMembers.push_back(baseAddr);
@@ -429,7 +537,7 @@ class MapInfoFinalizationPass
// The contents of the descriptor (the base address in particular) will
// remain unchanged though.
uint64_t mapType = op.getMapType();
- if (IsHasDeviceAddr) {
+ if (isHasDeviceAddrFlag) {
mapType |= llvm::to_underlying(
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS);
}
@@ -701,94 +809,134 @@ class MapInfoFinalizationPass
auto recordType = mlir::cast<fir::RecordType>(underlyingType);
llvm::SmallVector<mlir::Value> newMapOpsForFields;
- llvm::SmallVector<int64_t> fieldIndicies;
+ llvm::SmallVector<llvm::SmallVector<int64_t>> newMemberIndexPaths;
+ // 1) Handle direct top-level allocatable fields.
for (auto fieldMemTyPair : recordType.getTypeList()) {
auto &field = fieldMemTyPair.first;
auto memTy = fieldMemTyPair.second;
- bool shouldMapField =
- llvm::find_if(mapVarForwardSlice, [&](mlir::Operation *sliceOp) {
- if (!fir::isAllocatableType(memTy))
- return false;
-
- auto designateOp = mlir::dyn_cast<hlfir::DesignateOp>(sliceOp);
- if (!designateOp)
- return false;
-
- return designateOp.getComponent() &&
- designateOp.getComponent()->strref() == field;
- }) != mapVarForwardSlice.end();
-
- // TODO Handle recursive record types. Adapting
- // `createParentSymAndGenIntermediateMaps` to work direclty on MLIR
- // entities might be helpful here.
-
- if (!shouldMapField)
+ if (!fir::isAllocatableType(memTy))
continue;
- int32_t fieldIdx = recordType.getFieldIndex(field);
- bool alreadyMapped = [&]() {
- if (op.getMembersIndexAttr())
- for (auto indexList : op.getMembersIndexAttr()) {
- auto indexListAttr = mlir::cast<mlir::ArrayAttr>(indexList);
- if (indexListAttr.size() == 1 &&
- mlir::cast<mlir::IntegerAttr>(indexListAttr[0]).getInt() ==
- fieldIdx)
- return true;
- }
-
- return false;
- }();
-
- if (alreadyMapped)
+ bool referenced = llvm::any_of(mapVarForwardSlice, [&](auto *opv) {
+ auto designateOp = mlir::dyn_cast<hlfir::DesignateOp>(opv);
+ return designateOp && designateOp.getComponent() &&
+ designateOp.getComponent()->strref() == field;
+ });
+ if (!referenced)
continue;
+ int32_t fieldIdx = recordType.getFieldIndex(field);
builder.setInsertionPoint(op);
fir::IntOrValue idxConst =
mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx);
auto fieldCoord = fir::CoordinateOp::create(
builder, op.getLoc(), builder.getRefType(memTy), op.getVarPtr(),
llvm::SmallVector<fir::IntOrValue, 1>{idxConst});
- fir::factory::AddrAndBoundsInfo info =
- fir::factory::getDataOperandBaseAddr(
- builder, fieldCoord, /*isOptional=*/false, op.getLoc());
- llvm::SmallVector<mlir::Value> bounds =
- fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
- mlir::omp::MapBoundsType>(
- builder, info,
- hlfir::translateToExtendedValue(op.getLoc(), builder,
- hlfir::Entity{fieldCoord})
- .first,
- /*dataExvIsAssumedSize=*/false, op.getLoc());
-
- mlir::omp::MapInfoOp fieldMapOp = mlir::omp::MapInfoOp::create(
- builder, op.getLoc(), fieldCoord.getResult().getType(),
- fieldCoord.getResult(),
- mlir::TypeAttr::get(
- fir::unwrapRefType(fieldCoord.getResult().getType())),
- op.getMapTypeAttr(),
- builder.getAttr<mlir::omp::VariableCaptureKindAttr>(
- mlir::omp::VariableCaptureKind::ByRef),
- /*varPtrPtr=*/mlir::Value{}, /*members=*/mlir::ValueRange{},
- /*members_index=*/mlir::ArrayAttr{}, bounds,
- /*mapperId=*/mlir::FlatSymbolRefAttr(),
- builder.getStringAttr(op.getNameAttr().strref() + "." + field +
- ".implicit_map"),
- /*partial_map=*/builder.getBoolAttr(false));
- newMapOpsForFields.emplace_back(fieldMapOp);
- fieldIndicies.emplace_back(fieldIdx);
+ int64_t fieldIdx64 = static_cast<int64_t>(fieldIdx);
+ llvm::SmallVector<int64_t, 1> idxPath{fieldIdx64};
+ appendMemberMapIfNew(op, builder, op.getLoc(), fieldCoord, idxPath,
+ field, newMapOpsForFields, newMemberIndexPaths);
+ }
+
+ // Handle nested allocatable fields along any component chain
+ // referenced in the region via HLFIR designates.
+ llvm::SmallVector<llvm::SmallVector<int64_t>> seenIndexPaths;
+ for (mlir::Operation *sliceOp : mapVarForwardSlice) {
+ auto designateOp = mlir::dyn_cast<hlfir::DesignateOp>(sliceOp);
+ if (!designateOp || !designateOp.getComponent())
+ continue;
+ llvm::SmallVector<llvm::StringRef> compPathReversed;
+ compPathReversed.push_back(designateOp.getComponent()->strref());
+ mlir::Value curBase = designateOp.getMemref();
+ bool rootedAtMapArg = false;
+ while (true) {
+ if (auto parentDes = curBase.getDefiningOp<hlfir::DesignateOp>()) {
+ if (!parentDes.getComponent())
+ break;
+ compPathReversed.push_back(parentDes.getComponent()->strref());
+ curBase = parentDes.getMemref();
+ continue;
+ }
+ if (auto decl = curBase.getDefiningOp<hlfir::DeclareOp>()) {
+ if (auto barg =
+ mlir::dyn_cast<mlir::BlockArgument>(decl.getMemref()))
+ rootedAtMapArg = (barg == opBlockArg);
+ } else if (auto blockArg =
+ mlir::dyn_cast_or_null<mlir::BlockArgument>(
+ curBase)) {
+ rootedAtMapArg = (blockArg == opBlockArg);
+ }
+ break;
+ }
+ // Only process nested paths (2+ components). Single-component paths
+ // for direct fields are handled above.
+ if (!rootedAtMapArg || compPathReversed.size() < 2)
+ continue;
+ builder.setInsertionPoint(op);
+ llvm::SmallVector<int64_t> indexPath;
+ mlir::Type curTy = underlyingType;
+ mlir::Value coordRef = op.getVarPtr();
+ bool validPath = true;
+ for (llvm::StringRef compName : llvm::reverse(compPathReversed)) {
+ auto recTy = mlir::dyn_cast<fir::RecordType>(curTy);
+ if (!recTy) {
+ validPath = false;
+ break;
+ }
+ int32_t idx = recTy.getFieldIndex(compName);
+ if (idx < 0) {
+ validPath = false;
+ break;
+ }
+ indexPath.push_back(idx);
+ mlir::Type memTy = recTy.getType(idx);
+ fir::IntOrValue idxConst =
+ mlir::IntegerAttr::get(builder.getI32Type(), idx);
+ coordRef = fir::CoordinateOp::create(
+ builder, op.getLoc(), builder.getRefType(memTy), coordRef,
+ llvm::SmallVector<fir::IntOrValue, 1>{idxConst});
+ curTy = memTy;
+ }
+ if (!validPath)
+ continue;
+ if (auto finalRefTy =
+ mlir::dyn_cast<fir::ReferenceType>(coordRef.getType())) {
+ mlir::Type eleTy = finalRefTy.getElementType();
+ if (fir::isAllocatableType(eleTy)) {
+ if (!containsPath(seenIndexPaths, indexPath)) {
+ seenIndexPaths.emplace_back(indexPath.begin(), indexPath.end());
+ appendMemberMapIfNew(op, builder, op.getLoc(), coordRef,
+ indexPath, compPathReversed.front(),
+ newMapOpsForFields, newMemberIndexPaths);
+ }
+ }
+ }
}
if (newMapOpsForFields.empty())
return mlir::WalkResult::advance();
- op.getMembersMutable().append(newMapOpsForFields);
+ // Deduplicate by index path to avoid emitting duplicate members for
+ // the same component. Use a set-based key to keep this near O(n).
+ llvm::SmallVector<mlir::Value> dedupMapOps;
+ llvm::SmallVector<llvm::SmallVector<int64_t>> dedupIndexPaths;
+ llvm::StringSet<> seenKeys;
+ for (auto [i, mapOp] : llvm::enumerate(newMapOpsForFields)) {
+ const auto &path = newMemberIndexPaths[i];
+ llvm::SmallString<64> key;
+ buildPathKey(path, key);
+ if (seenKeys.contains(key))
+ continue;
+ seenKeys.insert(key);
+ dedupMapOps.push_back(mapOp);
+ dedupIndexPaths.emplace_back(path.begin(), path.end());
+ }
+ op.getMembersMutable().append(dedupMapOps);
llvm::SmallVector<llvm::SmallVector<int64_t>> newMemberIndices;
- mlir::ArrayAttr oldMembersIdxAttr = op.getMembersIndexAttr();
-
- if (oldMembersIdxAttr)
- for (mlir::Attribute indexList : oldMembersIdxAttr) {
+ if (mlir::ArrayAttr oldAttr = op.getMembersIndexAttr())
+ for (mlir::Attribute indexList : oldAttr) {
llvm::SmallVector<int64_t> listVec;
for (mlir::Attribute index : mlir::cast<mlir::ArrayAttr>(indexList))
@@ -796,10 +944,8 @@ class MapInfoFinalizationPass
newMemberIndices.emplace_back(std::move(listVec));
}
-
- for (int64_t newFieldIdx : fieldIndicies)
- newMemberIndices.emplace_back(
- llvm::SmallVector<int64_t>(1, newFieldIdx));
+ for (auto &path : dedupIndexPaths)
+ newMemberIndices.emplace_back(path);
op.setMembersIndexAttr(builder.create2DI64ArrayAttr(newMemberIndices));
op.setPartialMap(true);
diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp
index 8a8c6ef..2df6881 100644
--- a/flang/lib/Parser/parsing.cpp
+++ b/flang/lib/Parser/parsing.cpp
@@ -85,6 +85,7 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
if (options.features.IsEnabled(LanguageFeature::OpenACC) ||
(options.prescanAndReformat && noneOfTheAbove)) {
prescanner.AddCompilerDirectiveSentinel("$acc");
+ prescanner.AddCompilerDirectiveSentinel("@acc");
}
if (options.features.IsEnabled(LanguageFeature::OpenMP) ||
(options.prescanAndReformat && noneOfTheAbove)) {
diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp
index 865c149..66e5b2c 100644
--- a/flang/lib/Parser/prescan.cpp
+++ b/flang/lib/Parser/prescan.cpp
@@ -147,6 +147,11 @@ void Prescanner::Statement() {
directiveSentinel_[4] == '\0') {
// CUDA conditional compilation line.
condOffset = 5;
+ } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'a' &&
+ directiveSentinel_[2] == 'c' && directiveSentinel_[3] == 'c' &&
+ directiveSentinel_[4] == '\0') {
+ // OpenACC conditional compilation line.
+ condOffset = 5;
}
if (condOffset && !preprocessingOnly_) {
at_ += *condOffset, column_ += *condOffset;
diff --git a/flang/test/Lower/OpenMP/declare-mapper.f90 b/flang/test/Lower/OpenMP/declare-mapper.f90
index 8a98c68..3d4d0da 100644
--- a/flang/test/Lower/OpenMP/declare-mapper.f90
+++ b/flang/test/Lower/OpenMP/declare-mapper.f90
@@ -6,6 +6,7 @@
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %t/omp-declare-mapper-3.f90 -o - | FileCheck %t/omp-declare-mapper-3.f90
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %t/omp-declare-mapper-4.f90 -o - | FileCheck %t/omp-declare-mapper-4.f90
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %t/omp-declare-mapper-5.f90 -o - | FileCheck %t/omp-declare-mapper-5.f90
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 %t/omp-declare-mapper-6.f90 -o - | FileCheck %t/omp-declare-mapper-6.f90
!--- omp-declare-mapper-1.f90
subroutine declare_mapper_1
@@ -262,3 +263,41 @@ contains
!$omp end target
end subroutine
end program declare_mapper_5
+
+!--- omp-declare-mapper-6.f90
+subroutine declare_mapper_nested_parent
+ type :: inner_t
+ real, allocatable :: deep_arr(:)
+ end type inner_t
+
+ type, abstract :: base_t
+ real, allocatable :: base_arr(:)
+ type(inner_t) :: inner
+ end type base_t
+
+ type, extends(base_t) :: real_t
+ real, allocatable :: real_arr(:)
+ end type real_t
+
+ !$omp declare mapper (custommapper : real_t :: t) map(tofrom: t%base_arr, t%real_arr)
+ ! CHECK: omp.declare_mapper @{{.*custommapper}}
+ ! CHECK-DAG: omp.map.info {{.*}} {name = "t%base_t%base_arr"}
+ ! CHECK-DAG: omp.map.info {{.*}} {name = "t%real_arr"}
+ ! CHECK: omp.declare_mapper.info
+
+ type(real_t) :: r
+
+ allocate(r%base_arr(10))
+ allocate(r%inner%deep_arr(10))
+ allocate(r%real_arr(10))
+ r%base_arr = 1.0
+ r%inner%deep_arr = 4.0
+ r%real_arr = 0.0
+
+ ! Check implicit maps for deep nested allocatable payloads not covered by mapper
+ ! CHECK-DAG: omp.map.info {{.*}} {name = "r.deep_arr.implicit_map"}
+ ! CHECK: omp.target
+ !$omp target map(mapper(custommapper), tofrom: r)
+ r%real_arr = r%base_arr(1) + r%inner%deep_arr(1)
+ !$omp end target
+end subroutine declare_mapper_nested_parent
diff --git a/flang/test/Semantics/OpenACC/acc-sentinel.f90 b/flang/test/Semantics/OpenACC/acc-sentinel.f90
new file mode 100644
index 0000000..d34d97e
--- /dev/null
+++ b/flang/test/Semantics/OpenACC/acc-sentinel.f90
@@ -0,0 +1,14 @@
+! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenacc
+
+subroutine test_sentinel()
+! Test for error since we currently do not have an OpenACC module upstream.
+!ERROR: Cannot parse module file for module 'openacc': Source file 'openacc.mod' was not found
+ !@acc use openacc
+ integer :: i
+
+ !$acc parallel loop
+ do i = 1, 10
+ end do
+ !$acc end parallel
+
+end subroutine
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index ae8deab..cfb7791 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -325,6 +325,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.dup2
libc.src.unistd.dup3
libc.src.unistd.execve
+ libc.src.unistd.faccessat
libc.src.unistd.fchdir
libc.src.unistd.fpathconf
libc.src.unistd.fsync
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index bf2ad4a..87b78a33 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -331,6 +331,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.dup2
libc.src.unistd.dup3
libc.src.unistd.execve
+ libc.src.unistd.faccessat
libc.src.unistd.fchdir
libc.src.unistd.fpathconf
libc.src.unistd.fsync
diff --git a/libc/fuzzing/stdlib/strtointeger_differential_fuzz.cpp b/libc/fuzzing/stdlib/strtointeger_differential_fuzz.cpp
index 097e619..2fabbba 100644
--- a/libc/fuzzing/stdlib/strtointeger_differential_fuzz.cpp
+++ b/libc/fuzzing/stdlib/strtointeger_differential_fuzz.cpp
@@ -44,6 +44,10 @@
// greater than 50% chance for each character to end the string, making the odds
// of getting long numbers very low.
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ if (size < 2) // Needs at least one byte for the base and one byte for the
+ // string.
+ return 0;
+
uint8_t *container = new uint8_t[size + 1];
if (!container)
__builtin_trap();
diff --git a/libc/include/llvm-libc-macros/linux/fcntl-macros.h b/libc/include/llvm-libc-macros/linux/fcntl-macros.h
index aec8a0d..74d406f 100644
--- a/libc/include/llvm-libc-macros/linux/fcntl-macros.h
+++ b/libc/include/llvm-libc-macros/linux/fcntl-macros.h
@@ -61,6 +61,9 @@
// Allow empty relative pathname.
#define AT_EMPTY_PATH 0x1000
+// Perform access checks using the effective user and group IDs.
+#define AT_EACCESS 0x200
+
// Values of SYS_fcntl commands.
#define F_DUPFD 0
#define F_GETFD 1
diff --git a/libc/include/sys/syscall.h.def b/libc/include/sys/syscall.h.def
index 6d74cc6..60e5024 100644
--- a/libc/include/sys/syscall.h.def
+++ b/libc/include/sys/syscall.h.def
@@ -309,6 +309,10 @@
#define SYS_faccessat __NR_faccessat
#endif
+#ifdef __NR_faccessat2
+#define SYS_faccessat2 __NR_faccessat2
+#endif
+
#ifdef __NR_fadvise64
#define SYS_fadvise64 __NR_fadvise64
#endif
diff --git a/libc/include/unistd.yaml b/libc/include/unistd.yaml
index 3ba3ec7..2ff86ea 100644
--- a/libc/include/unistd.yaml
+++ b/libc/include/unistd.yaml
@@ -96,6 +96,15 @@ functions:
- type: const char *
- type: __exec_argv_t
- type: __exec_envp_t
+ - name: faccessat
+ standards:
+ - POSIX
+ return_type: int
+ arguments:
+ - type: int
+ - type: const char *
+ - type: int
+ - type: int
- name: fchdir
standards:
- POSIX
diff --git a/libc/src/unistd/CMakeLists.txt b/libc/src/unistd/CMakeLists.txt
index c66a3a4..78c3bf8 100644
--- a/libc/src/unistd/CMakeLists.txt
+++ b/libc/src/unistd/CMakeLists.txt
@@ -56,6 +56,13 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ faccessat
+ ALIAS
+ DEPENDS
+ .${LIBC_TARGET_OS}.faccessat
+)
+
+add_entrypoint_object(
fchdir
ALIAS
DEPENDS
diff --git a/libc/src/unistd/faccessat.h b/libc/src/unistd/faccessat.h
new file mode 100644
index 0000000..0dc834d
--- /dev/null
+++ b/libc/src/unistd/faccessat.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for faccessat ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_UNISTD_FACCESSAT_H
+#define LLVM_LIBC_SRC_UNISTD_FACCESSAT_H
+
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int faccessat(int fd, const char *path, int amode, int flag);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_UNISTD_FACCESSAT_H
diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt
index 2d510f3..dff6ba2 100644
--- a/libc/src/unistd/linux/CMakeLists.txt
+++ b/libc/src/unistd/linux/CMakeLists.txt
@@ -81,6 +81,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ faccessat
+ SRCS
+ faccessat.cpp
+ HDRS
+ ../faccessat.h
+ DEPENDS
+ libc.hdr.fcntl_macros
+ libc.include.sys_syscall
+ libc.src.__support.OSUtil.osutil
+ libc.src.errno.errno
+)
+
+add_entrypoint_object(
fchdir
SRCS
fchdir.cpp
diff --git a/libc/src/unistd/linux/access.cpp b/libc/src/unistd/linux/access.cpp
index 55cd6ad..f06eec5 100644
--- a/libc/src/unistd/linux/access.cpp
+++ b/libc/src/unistd/linux/access.cpp
@@ -23,7 +23,7 @@ LLVM_LIBC_FUNCTION(int, access, (const char *path, int mode)) {
int ret = LIBC_NAMESPACE::syscall_impl<int>(SYS_access, path, mode);
#elif defined(SYS_faccessat)
int ret =
- LIBC_NAMESPACE::syscall_impl<int>(SYS_faccessat, AT_FDCWD, path, mode, 0);
+ LIBC_NAMESPACE::syscall_impl<int>(SYS_faccessat, AT_FDCWD, path, mode);
#else
#error "access and faccessat syscalls not available."
#endif
diff --git a/libc/src/unistd/linux/faccessat.cpp b/libc/src/unistd/linux/faccessat.cpp
new file mode 100644
index 0000000..7a2a29c
--- /dev/null
+++ b/libc/src/unistd/linux/faccessat.cpp
@@ -0,0 +1,37 @@
+//===-- Linux implementation of faccessat ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/unistd/faccessat.h"
+
+#include "src/__support/OSUtil/syscall.h" // For internal syscall function.
+#include "src/__support/common.h"
+
+#include "hdr/fcntl_macros.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include <sys/syscall.h> // For syscall numbers.
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, faccessat,
+ (int fd, const char *path, int amode, int flag)) {
+#ifdef SYS_faccessat2
+ int ret =
+ LIBC_NAMESPACE::syscall_impl<int>(SYS_faccessat2, fd, path, amode, flag);
+#else
+#error "faccessat2 syscall is not available."
+#endif
+
+ if (ret < 0) {
+ libc_errno = -ret;
+ return -1;
+ }
+ return 0;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/unistd/CMakeLists.txt b/libc/test/src/unistd/CMakeLists.txt
index 6630a7e..44f28ff 100644
--- a/libc/test/src/unistd/CMakeLists.txt
+++ b/libc/test/src/unistd/CMakeLists.txt
@@ -94,6 +94,23 @@ add_libc_unittest(
)
add_libc_unittest(
+ faccessat_test
+ SUITE
+ libc_unistd_unittests
+ SRCS
+ faccessat_test.cpp
+ DEPENDS
+ libc.include.unistd
+ libc.src.errno.errno
+ libc.src.fcntl.open
+ libc.src.unistd.faccessat
+ libc.src.unistd.close
+ libc.src.unistd.unlink
+ libc.test.UnitTest.ErrnoCheckingTest
+ libc.test.UnitTest.ErrnoSetterMatcher
+)
+
+add_libc_unittest(
fchdir_test
SUITE
libc_unistd_unittests
diff --git a/libc/test/src/unistd/faccessat_test.cpp b/libc/test/src/unistd/faccessat_test.cpp
new file mode 100644
index 0000000..6280b14
--- /dev/null
+++ b/libc/test/src/unistd/faccessat_test.cpp
@@ -0,0 +1,115 @@
+//===-- Unittests for faccessat -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/fcntl/open.h"
+#include "src/unistd/close.h"
+#include "src/unistd/faccessat.h"
+#include "src/unistd/unlink.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
+#include "test/UnitTest/ErrnoSetterMatcher.h"
+#include "test/UnitTest/Test.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+namespace {
+
+using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails;
+using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds;
+
+using LlvmLibcFaccessatTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+TEST_F(LlvmLibcFaccessatTest, WithAtFdcwd) {
+ // Test access checks on a file with AT_FDCWD and no flags, equivalent to
+ // access().
+ constexpr const char *FILENAME = "faccessat_basic.test";
+ auto TEST_FILE = libc_make_test_file_path(FILENAME);
+
+ // Check permissions on a file with full permissions
+ int fd = LIBC_NAMESPACE::open(TEST_FILE, O_WRONLY | O_CREAT, S_IRWXU);
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_GT(fd, 0);
+ ASSERT_THAT(LIBC_NAMESPACE::close(fd), Succeeds(0));
+
+ ASSERT_THAT(LIBC_NAMESPACE::faccessat(AT_FDCWD, TEST_FILE, F_OK, 0),
+ Succeeds(0));
+ ASSERT_THAT(
+ LIBC_NAMESPACE::faccessat(AT_FDCWD, TEST_FILE, X_OK | W_OK | R_OK, 0),
+ Succeeds(0));
+ ASSERT_THAT(LIBC_NAMESPACE::unlink(TEST_FILE), Succeeds(0));
+
+ // Check permissions on a file with execute-only permission
+ fd = LIBC_NAMESPACE::open(TEST_FILE, O_WRONLY | O_CREAT, S_IXUSR);
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_GT(fd, 0);
+ ASSERT_THAT(LIBC_NAMESPACE::close(fd), Succeeds(0));
+
+ ASSERT_THAT(LIBC_NAMESPACE::faccessat(AT_FDCWD, TEST_FILE, F_OK, 0),
+ Succeeds(0));
+ ASSERT_THAT(LIBC_NAMESPACE::faccessat(AT_FDCWD, TEST_FILE, X_OK, 0),
+ Succeeds(0));
+ ASSERT_THAT(LIBC_NAMESPACE::faccessat(AT_FDCWD, TEST_FILE, R_OK, 0),
+ Fails(EACCES));
+ ASSERT_THAT(LIBC_NAMESPACE::faccessat(AT_FDCWD, TEST_FILE, W_OK, 0),
+ Fails(EACCES));
+ ASSERT_THAT(LIBC_NAMESPACE::unlink(TEST_FILE), Succeeds(0));
+}
+
+TEST_F(LlvmLibcFaccessatTest, NonExistentFile) {
+ ASSERT_THAT(LIBC_NAMESPACE::faccessat(AT_FDCWD, "faccessat_nonexistent.test",
+ F_OK, 0),
+ Fails(ENOENT));
+}
+
+TEST_F(LlvmLibcFaccessatTest, AtEaccess) {
+ // With AT_EACCESS, faccessat checks permissions using the effective user ID,
+ // but the effective and real user ID will be the same here and changing that
+ // is not feasible in a test, so this is just a basic sanity check.
+ constexpr const char *FILENAME = "faccessat_eaccess.test";
+ auto TEST_FILE = libc_make_test_file_path(FILENAME);
+
+ int fd = LIBC_NAMESPACE::open(TEST_FILE, O_WRONLY | O_CREAT, S_IRWXU);
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_GT(fd, 0);
+ ASSERT_THAT(LIBC_NAMESPACE::close(fd), Succeeds(0));
+
+ ASSERT_THAT(LIBC_NAMESPACE::faccessat(AT_FDCWD, TEST_FILE, X_OK | W_OK | R_OK,
+ AT_EACCESS),
+ Succeeds(0));
+
+ ASSERT_THAT(LIBC_NAMESPACE::unlink(TEST_FILE), Succeeds(0));
+}
+
+TEST_F(LlvmLibcFaccessatTest, AtEmptyPath) {
+ constexpr const char *FILENAME = "faccessat_atemptypath.test";
+ auto TEST_FILE = libc_make_test_file_path(FILENAME);
+
+ int fd = LIBC_NAMESPACE::open(TEST_FILE, O_WRONLY | O_CREAT, S_IRWXU);
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_GT(fd, 0);
+
+ // Check permissions on the file referred to by fd
+ ASSERT_THAT(LIBC_NAMESPACE::faccessat(fd, "", F_OK, AT_EMPTY_PATH),
+ Succeeds(0));
+ ASSERT_THAT(
+ LIBC_NAMESPACE::faccessat(fd, "", X_OK | W_OK | R_OK, AT_EMPTY_PATH),
+ Succeeds(0));
+
+ ASSERT_THAT(LIBC_NAMESPACE::close(fd), Succeeds(0));
+ ASSERT_THAT(LIBC_NAMESPACE::unlink(TEST_FILE), Succeeds(0));
+
+ // Check permissions on the current working directory
+ ASSERT_THAT(LIBC_NAMESPACE::faccessat(AT_FDCWD, "", F_OK, AT_EMPTY_PATH),
+ Succeeds(0));
+ ASSERT_THAT(LIBC_NAMESPACE::faccessat(AT_FDCWD, "", X_OK | W_OK | R_OK,
+ AT_EMPTY_PATH),
+ Succeeds(0));
+}
+
+} // namespace
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index a59cc06..3676b88 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2104,18 +2104,18 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
config->dtltoDistributor = args.getLastArgValue(OPT_thinlto_distributor);
// Handle /thinlto-distributor-arg:<arg>
- for (auto *arg : args.filtered(OPT_thinlto_distributor_arg))
- config->dtltoDistributorArgs.push_back(arg->getValue());
+ config->dtltoDistributorArgs =
+ args::getStrings(args, OPT_thinlto_distributor_arg);
// Handle /thinlto-remote-compiler:<path>
- config->dtltoCompiler = args.getLastArgValue(OPT_thinlto_compiler);
+ config->dtltoCompiler = args.getLastArgValue(OPT_thinlto_remote_compiler);
if (!config->dtltoDistributor.empty() && config->dtltoCompiler.empty())
Err(ctx) << "A value must be specified for /thinlto-remote-compiler if "
"/thinlto-distributor is specified.";
// Handle /thinlto-remote-compiler-arg:<arg>
- for (auto *arg : args.filtered(OPT_thinlto_compiler_arg))
- config->dtltoCompilerArgs.push_back(arg->getValue());
+ config->dtltoCompilerArgs =
+ args::getStrings(args, OPT_thinlto_remote_compiler_arg);
// Handle /dwodir
config->dwoDir = args.getLastArgValue(OPT_dwodir);
diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td
index 485db5a..f3d0eb3 100644
--- a/lld/COFF/Options.td
+++ b/lld/COFF/Options.td
@@ -289,10 +289,10 @@ def thinlto_distributor : P<"thinlto-distributor",
"backend compilations will be distributed">;
def thinlto_distributor_arg : P<"thinlto-distributor-arg",
"Arguments to pass to the ThinLTO distributor">;
-def thinlto_compiler : P<"thinlto-remote-compiler",
+def thinlto_remote_compiler : P<"thinlto-remote-compiler",
"Compiler for the ThinLTO distributor to invoke for ThinLTO backend "
"compilations">;
-def thinlto_compiler_arg : P<"thinlto-remote-compiler-arg",
+def thinlto_remote_compiler_arg : P<"thinlto-remote-compiler-arg",
"Compiler arguments for the ThinLTO distributor to pass for ThinLTO backend "
"compilations">;
def lto_obj_path : P<
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 1beab8d..62f7fff 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1399,8 +1399,9 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
ctx.arg.dtltoDistributor = args.getLastArgValue(OPT_thinlto_distributor_eq);
ctx.arg.dtltoDistributorArgs =
args::getStrings(args, OPT_thinlto_distributor_arg);
- ctx.arg.dtltoCompiler = args.getLastArgValue(OPT_thinlto_compiler_eq);
- ctx.arg.dtltoCompilerArgs = args::getStrings(args, OPT_thinlto_compiler_arg);
+ ctx.arg.dtltoCompiler = args.getLastArgValue(OPT_thinlto_remote_compiler_eq);
+ ctx.arg.dtltoCompilerArgs =
+ args::getStrings(args, OPT_thinlto_remote_compiler_arg);
ctx.arg.dwoDir = args.getLastArgValue(OPT_plugin_opt_dwo_dir_eq);
ctx.arg.dynamicLinker = getDynamicLinker(ctx, args);
ctx.arg.ehFrameHdr =
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index f052318..0d6dda4 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -722,11 +722,11 @@ def thinlto_distributor_eq: JJ<"thinlto-distributor=">,
"ThinLTO backend compilations will be distributed">;
defm thinlto_distributor_arg: EEq<"thinlto-distributor-arg", "Arguments to "
"pass to the ThinLTO distributor">;
-def thinlto_compiler_eq: JJ<"thinlto-remote-compiler=">,
+def thinlto_remote_compiler_eq: JJ<"thinlto-remote-compiler=">,
HelpText<"Compiler for the ThinLTO distributor to invoke for ThinLTO backend "
"compilations">;
-defm thinlto_compiler_arg: EEq<"thinlto-remote-compiler-arg", "Compiler "
- "arguments for the ThinLTO distributor to pass for ThinLTO backend "
+defm thinlto_remote_compiler_arg: EEq<"thinlto-remote-compiler-arg",
+ "Compiler arguments for the ThinLTO distributor to pass for ThinLTO backend "
"compilations">;
defm fat_lto_objects: BB<"fat-lto-objects",
"Use the .llvm.lto section, which contains LLVM bitcode, in fat LTO object files to perform LTO.",
diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp
index 90ffe78..0d03250 100644
--- a/lldb/source/API/SBTarget.cpp
+++ b/lldb/source/API/SBTarget.cpp
@@ -255,7 +255,7 @@ SBProcess SBTarget::LoadCore(const char *core_file, lldb::SBError &error) {
ProcessSP process_sp(target_sp->CreateProcess(
target_sp->GetDebugger().GetListener(), "", &filespec, false));
if (process_sp) {
- ElapsedTime loadCoreTime(target_sp->GetStatistics().GetLoadCoreTime());
+ ElapsedTime load_core_time(target_sp->GetStatistics().GetLoadCoreTime());
error.SetError(process_sp->LoadCore());
if (error.Success())
sb_process.SetSP(process_sp);
diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp
index b5fc49d..c59d028 100644
--- a/lldb/source/Commands/CommandObjectTarget.cpp
+++ b/lldb/source/Commands/CommandObjectTarget.cpp
@@ -419,7 +419,7 @@ protected:
// Seems weird that we Launch a core file, but that is what we
// do!
{
- ElapsedTime loadCoreTime(
+ ElapsedTime load_core_time(
target_sp->GetStatistics().GetLoadCoreTime());
error = process_sp->LoadCore();
}
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
index 924953c..3c49c91 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
@@ -792,7 +792,7 @@ ClangExpressionParser::ClangExpressionParser(
// 6. Set up the source management objects inside the compiler
m_compiler->createFileManager();
if (!m_compiler->hasSourceManager())
- m_compiler->createSourceManager(m_compiler->getFileManager());
+ m_compiler->createSourceManager();
m_compiler->createPreprocessor(TU_Complete);
switch (expr.Language().AsLanguageType()) {
diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst
index 2b1af05..0ff4cc7 100644
--- a/llvm/docs/TableGen/ProgRef.rst
+++ b/llvm/docs/TableGen/ProgRef.rst
@@ -64,7 +64,7 @@ Classes and concrete records have a unique *name*, either chosen by
the programmer or generated by TableGen. Associated with that name
is a list of *fields* with values and an optional list of *parent classes*
(sometimes called base or super classes). The fields are the primary data that
-backends will process. Note that TableGen assigns no meanings to fields; the
+backends will process. Note that TableGen assigns no meaning to fields; the
meanings are entirely up to the backends and the programs that incorporate
the output of those backends.
@@ -243,7 +243,7 @@ Include files
-------------
TableGen has an include mechanism. The content of the included file
-lexically replaces the ``include`` directive and is then parsed as if it was
+lexically replaces the ``include`` directive and is then parsed as if it were
originally in the main file.
.. productionlist::
@@ -670,17 +670,17 @@ name of a multiclass.
The argument values can be specified in two forms:
* Positional argument (``value``). The value is assigned to the argument in the
- corresponding position. For ``Foo<a0, a1>``, ``a0`` will be assigned to first
- argument and ``a1`` will be assigned to second argument.
+ corresponding position. For ``Foo<a0, a1>``, ``a0`` will be assigned to the first
+ argument and ``a1`` will be assigned to the second argument.
* Named argument (``name=value``). The value is assigned to the argument with
the specified name. For ``Foo<a=a0, b=a1>``, ``a0`` will be assigned to the
argument with name ``a`` and ``a1`` will be assigned to the argument with
name ``b``.
-Required arguments can also be specified as named argument.
+Required arguments can also be specified as a named argument.
Note that the argument can only be specified once regardless of the way (named
-or positional) to specify and positional arguments should be put before named
+or positional) to specify and positional arguments should precede named
arguments.
.. productionlist::
@@ -817,7 +817,7 @@ type. It provides a single field, ``Value``, which holds a 3-bit number. Its
template argument, ``val``, is used to set the ``Value`` field. Each of the
eight records is defined with ``FPFormat`` as its parent class. The
enumeration value is passed in angle brackets as the template argument. Each
-record will inherent the ``Value`` field with the appropriate enumeration
+record will inherit the ``Value`` field with the appropriate enumeration
value.
Here is a more complex example of classes with template arguments. First, we
@@ -1308,7 +1308,7 @@ with ``F0``, ``F1``, ``F2``, and ``F3``.
-------------------------------------
A ``dump`` statement prints the input string to standard error
-output. It is intended for debugging purpose.
+output. It is intended for debugging purposes.
* At top level, the message is printed immediately.
@@ -1727,7 +1727,7 @@ and non-0 as true.
``!div(``\ *a*\ ``,`` *b*\ ``)``
This operator performs signed division of *a* by *b*, and produces the quotient.
- Division by 0 produces an error. Division of INT64_MIN by -1 produces an error.
+ Division by 0 produces an error. Division of ``INT64_MIN`` by -1 produces an error.
``!empty(``\ *a*\ ``)``
This operator produces 1 if the string, list, or DAG *a* is empty; 0 otherwise.
@@ -1914,7 +1914,7 @@ and non-0 as true.
``!or(``\ *a*\ ``,`` *b*\ ``, ...)``
This operator does a bitwise OR on *a*, *b*, etc., and produces the
result. A logical OR can be performed if all the arguments are either
- 0 or 1. This operator is short-circuit to -1 (all ones) the left-most
+ 0 or 1. This operator is short-circuit to -1 (all ones) when the left-most
operand is -1.
``!range([``\ *start*\ ``,]`` *end*\ ``[,``\ *step*\ ``])``
@@ -1937,7 +1937,7 @@ and non-0 as true.
Equivalent to ``!range(0, !size(list))``.
``!repr(``\ *value*\ ``)``
- Represents *value* as a string. String format for the value is not
+ Represents *value* as a string. The string format for the value is not
guaranteed to be stable. Intended for debugging purposes only.
``!setdagarg(``\ *dag*\ ``,``\ *key*\ ``,``\ *arg*\ ``)``
diff --git a/llvm/include/llvm/ADT/BitVector.h b/llvm/include/llvm/ADT/BitVector.h
index 83350e6..9e81a4b 100644
--- a/llvm/include/llvm/ADT/BitVector.h
+++ b/llvm/include/llvm/ADT/BitVector.h
@@ -570,10 +570,7 @@ public:
template <class F, class... ArgTys>
static BitVector &apply(F &&f, BitVector &Out, BitVector const &Arg,
ArgTys const &...Args) {
- assert(llvm::all_of(
- std::initializer_list<unsigned>{Args.size()...},
- [&Arg](auto const &BV) { return Arg.size() == BV; }) &&
- "consistent sizes");
+ assert(((Arg.size() == Args.size()) && ...) && "consistent sizes");
Out.resize(Arg.size());
for (size_type I = 0, E = Arg.Bits.size(); I != E; ++I)
Out.Bits[I] = f(Arg.Bits[I], Args.Bits[I]...);
diff --git a/llvm/include/llvm/ADT/ConcurrentHashtable.h b/llvm/include/llvm/ADT/ConcurrentHashtable.h
index 6de194d..6a943c5 100644
--- a/llvm/include/llvm/ADT/ConcurrentHashtable.h
+++ b/llvm/include/llvm/ADT/ConcurrentHashtable.h
@@ -253,9 +253,8 @@ public:
OS << "\nOverall number of entries = " << OverallNumberOfEntries;
OS << "\nOverall number of non empty buckets = " << NumberOfNonEmptyBuckets;
- for (auto &BucketSize : BucketSizesMap)
- OS << "\n Number of buckets with size " << BucketSize.first << ": "
- << BucketSize.second;
+ for (auto [Size, Count] : BucketSizesMap)
+ OS << "\n Number of buckets with size " << Size << ": " << Count;
std::stringstream stream;
stream << std::fixed << std::setprecision(2)
diff --git a/llvm/include/llvm/ADT/DirectedGraph.h b/llvm/include/llvm/ADT/DirectedGraph.h
index 83c0bea..fb6b180 100644
--- a/llvm/include/llvm/ADT/DirectedGraph.h
+++ b/llvm/include/llvm/ADT/DirectedGraph.h
@@ -181,16 +181,6 @@ public:
DirectedGraph() = default;
explicit DirectedGraph(NodeType &N) : Nodes() { addNode(N); }
- DirectedGraph(const DGraphType &G) : Nodes(G.Nodes) {}
- DirectedGraph(DGraphType &&RHS) : Nodes(std::move(RHS.Nodes)) {}
- DGraphType &operator=(const DGraphType &G) {
- Nodes = G.Nodes;
- return *this;
- }
- DGraphType &operator=(const DGraphType &&G) {
- Nodes = std::move(G.Nodes);
- return *this;
- }
const_iterator begin() const { return Nodes.begin(); }
const_iterator end() const { return Nodes.end(); }
diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index be690a4..571caf9 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -59,14 +59,6 @@ LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type);
/// True if the AllocTypes bitmask contains just a single type.
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes);
-/// Removes any existing "ambiguous" memprof attribute. Called before we apply a
-/// specific allocation type such as "cold", "notcold", or "hot".
-LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB);
-
-/// Adds an "ambiguous" memprof attribute to call with a matched allocation
-/// profile but that we haven't yet been able to disambiguate.
-LLVM_ABI void addAmbiguousAttribute(CallBase *CB);
-
/// Class to build a trie of call stack contexts for a particular profiled
/// allocation call, along with their associated allocation types.
/// The allocation will be at the root of the trie, which is then used to
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index b37c677..50ce931 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -501,8 +501,12 @@ public:
StackID == TargetStackID::ScalablePredicateVector;
}
- bool isScalableStackID(int ObjectIdx) const {
+ bool hasScalableStackID(int ObjectIdx) const {
uint8_t StackID = getStackID(ObjectIdx);
+ return isScalableStackID(StackID);
+ }
+
+ bool isScalableStackID(uint8_t StackID) const {
return StackID == TargetStackID::ScalableVector ||
StackID == TargetStackID::ScalablePredicateVector;
}
diff --git a/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h b/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
index 52af205..ffe0b50 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
@@ -179,6 +179,7 @@ public:
class DWARFDataExtractorSimple
: public DWARFDataExtractorBase<DWARFDataExtractorSimple> {
+public:
using DWARFDataExtractorBase::DWARFDataExtractorBase;
LLVM_ABI uint64_t getRelocatedValueImpl(uint32_t Size, uint64_t *Off,
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index eb0440f..0622bfa 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -810,6 +810,26 @@ public:
/// Whether the intrinsic is signed or unsigned.
bool isSigned() const { return isSigned(getIntrinsicID()); };
+ /// Whether the intrinsic is a smin or umin.
+ static bool isMin(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::umin:
+ case Intrinsic::smin:
+ return true;
+ case Intrinsic::umax:
+ case Intrinsic::smax:
+ return false;
+ default:
+ llvm_unreachable("Invalid intrinsic");
+ }
+ }
+
+ /// Whether the intrinsic is a smin or a umin.
+ bool isMin() const { return isMin(getIntrinsicID()); }
+
+ /// Whether the intrinsic is a smax or a umax.
+ bool isMax() const { return !isMin(getIntrinsicID()); }
+
/// Min/max intrinsics are monotonic, they operate on a fixed-bitwidth values,
/// so there is a certain threshold value, upon reaching which,
/// their value can no longer change. Return said threshold.
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index 11602d2..0c1f8db 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -125,24 +125,6 @@ bool llvm::memprof::hasSingleAllocType(uint8_t AllocTypes) {
return NumAllocTypes == 1;
}
-void llvm::memprof::removeAnyExistingAmbiguousAttribute(CallBase *CB) {
- if (!CB->hasFnAttr("memprof"))
- return;
- assert(CB->getFnAttr("memprof").getValueAsString() == "ambiguous");
- CB->removeFnAttr("memprof");
-}
-
-void llvm::memprof::addAmbiguousAttribute(CallBase *CB) {
- // We may have an existing ambiguous attribute if we are reanalyzing
- // after inlining.
- if (CB->hasFnAttr("memprof")) {
- assert(CB->getFnAttr("memprof").getValueAsString() == "ambiguous");
- } else {
- auto A = llvm::Attribute::get(CB->getContext(), "memprof", "ambiguous");
- CB->addFnAttr(A);
- }
-}
-
void CallStackTrie::addCallStack(
AllocationType AllocType, ArrayRef<uint64_t> StackIds,
std::vector<ContextTotalSize> ContextSizeInfo) {
@@ -488,9 +470,6 @@ void CallStackTrie::addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT,
StringRef Descriptor) {
auto AllocTypeString = getAllocTypeAttributeString(AT);
auto A = llvm::Attribute::get(CI->getContext(), "memprof", AllocTypeString);
- // After inlining we may be able to convert an existing ambiguous allocation
- // to an unambiguous one.
- removeAnyExistingAmbiguousAttribute(CI);
CI->addFnAttr(A);
if (MemProfReportHintedSizes) {
std::vector<ContextTotalSize> ContextSizeInfo;
@@ -550,7 +529,6 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
assert(MIBCallStack.size() == 1 &&
"Should only be left with Alloc's location in stack");
CI->setMetadata(LLVMContext::MD_memprof, MDNode::get(Ctx, MIBNodes));
- addAmbiguousAttribute(CI);
return true;
}
// If there exists corner case that CallStackTrie has one chain to leaf
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 62fb5eb..3cfe7cc 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1889,11 +1889,12 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
bool IsBitfield = DT->isBitField();
// Handle the size.
- if (auto *Var = dyn_cast_or_null<DIVariable>(DT->getRawSizeInBits())) {
+ if (DT->getRawSizeInBits() == nullptr) {
+ // No size, just ignore.
+ } else if (auto *Var = dyn_cast<DIVariable>(DT->getRawSizeInBits())) {
if (auto *VarDIE = getDIE(Var))
addDIEEntry(MemberDie, dwarf::DW_AT_bit_size, *VarDIE);
- } else if (auto *Exp =
- dyn_cast_or_null<DIExpression>(DT->getRawSizeInBits())) {
+ } else if (auto *Exp = dyn_cast<DIExpression>(DT->getRawSizeInBits())) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
DwarfExpr.setMemoryLocationKind();
diff --git a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
index ec75dc3..64e5cd5 100644
--- a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
+++ b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
@@ -72,7 +72,7 @@ struct StackFrameLayoutAnalysis {
: Slot(Idx), Size(MFI.getObjectSize(Idx)),
Align(MFI.getObjectAlign(Idx).value()), Offset(Offset),
SlotTy(Invalid), Scalable(false) {
- Scalable = MFI.isScalableStackID(Idx);
+ Scalable = MFI.hasScalableStackID(Idx);
if (MFI.isSpillSlotObjectIndex(Idx))
SlotTy = SlotType::Spill;
else if (MFI.isFixedObjectIndex(Idx))
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index d14abb4..8623c06 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -5857,7 +5857,7 @@ DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
// practice.
if (Exp == APFloat::IEK_NaN) {
DoubleAPFloat Quiet{Arg};
- Quiet.getFirst().makeQuiet();
+ Quiet.getFirst() = Quiet.getFirst().makeQuiet();
return Quiet;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 79655e1..0f4bbfc3 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1610,7 +1610,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
int BaseOffset = -AFI->getTaggedBasePointerOffset();
Register FrameReg;
StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
- MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
+ MF, BaseOffset, false /*isFixed*/, TargetStackID::Default /*StackID*/,
+ FrameReg,
/*PreferFP=*/false,
/*ForSimm=*/true);
Register SrcReg = FrameReg;
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 20b0d69..8d6eb91 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -56,15 +56,20 @@
// | async context if needed |
// | (a.k.a. "frame record") |
// |-----------------------------------| <- fp(=x29)
-// | <hazard padding> |
-// |-----------------------------------|
-// | |
-// | callee-saved fp/simd/SVE regs |
-// | |
-// |-----------------------------------|
-// | |
-// | SVE stack objects |
-// | |
+// Default SVE stack layout Split SVE objects
+// (aarch64-split-sve-objects=false) (aarch64-split-sve-objects=true)
+// |-----------------------------------| |-----------------------------------|
+// | <hazard padding> | | callee-saved PPR registers |
+// |-----------------------------------| |-----------------------------------|
+// | | | PPR stack objects |
+// | callee-saved fp/simd/SVE regs | |-----------------------------------|
+// | | | <hazard padding> |
+// |-----------------------------------| |-----------------------------------|
+// | | | callee-saved ZPR/FPR registers |
+// | SVE stack objects | |-----------------------------------|
+// | | | ZPR stack objects |
+// |-----------------------------------| |-----------------------------------|
+// ^ NB: FPR CSRs are promoted to ZPRs
// |-----------------------------------|
// |.empty.space.to.make.part.below....|
// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
@@ -274,6 +279,11 @@ static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
cl::desc("sort stack allocations"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ SplitSVEObjects("aarch64-split-sve-objects",
+ cl::desc("Split allocation of ZPR & PPR objects"),
+ cl::init(false), cl::Hidden);
+
cl::opt<bool> EnableHomogeneousPrologEpilog(
"homogeneous-prolog-epilog", cl::Hidden,
cl::desc("Emit homogeneous prologue and epilogue for the size "
@@ -324,6 +334,40 @@ AArch64FrameLowering::getArgumentStackToRestore(MachineFunction &MF,
static bool produceCompactUnwindFrame(const AArch64FrameLowering &,
MachineFunction &MF);
+enum class AssignObjectOffsets { No, Yes };
+/// Process all the SVE stack objects and the SVE stack size and offsets for
+/// each object. If AssignOffsets is "Yes", the offsets get assigned (and SVE
+/// stack sizes set). Returns the size of the SVE stack.
+static SVEStackSizes determineSVEStackSizes(MachineFunction &MF,
+ AssignObjectOffsets AssignOffsets);
+
+static unsigned getStackHazardSize(const MachineFunction &MF) {
+ return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
+}
+
+/// Returns true if PPRs are spilled as ZPRs.
+static bool arePPRsSpilledAsZPR(const MachineFunction &MF) {
+ return MF.getSubtarget().getRegisterInfo()->getSpillSize(
+ AArch64::PPRRegClass) == 16;
+}
+
+StackOffset
+AArch64FrameLowering::getZPRStackSize(const MachineFunction &MF) const {
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return StackOffset::getScalable(AFI->getStackSizeZPR());
+}
+
+StackOffset
+AArch64FrameLowering::getPPRStackSize(const MachineFunction &MF) const {
+ // With split SVE objects, the hazard padding is added to the PPR region,
+ // which places it between the [GPR, PPR] area and the [ZPR, FPR] area. This
+ // avoids hazards between both GPRs and FPRs and ZPRs and PPRs.
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return StackOffset::get(AFI->hasSplitSVEObjects() ? getStackHazardSize(MF)
+ : 0,
+ AFI->getStackSizePPR());
+}
+
// Conservatively, returns true if the function is likely to have SVE vectors
// on the stack. This function is safe to be called before callee-saves or
// object offsets have been determined.
@@ -338,7 +382,7 @@ static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL,
const MachineFrameInfo &MFI = MF.getFrameInfo();
for (int FI = MFI.getObjectIndexBegin(); FI < MFI.getObjectIndexEnd(); FI++) {
- if (MFI.isScalableStackID(FI))
+ if (MFI.hasScalableStackID(FI))
return true;
}
@@ -482,13 +526,6 @@ AArch64FrameLowering::getFixedObjectSize(const MachineFunction &MF,
}
}
-/// Returns the size of the entire SVE stackframe (calleesaves + spills).
-StackOffset
-AArch64FrameLowering::getSVEStackSize(const MachineFunction &MF) const {
- const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
-}
-
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
@@ -514,7 +551,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
!Subtarget.hasSVE();
return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
- getSVEStackSize(MF) || LowerQRegCopyThroughMem);
+ AFI->hasSVEStackSize() || LowerQRegCopyThroughMem);
}
/// hasFPImpl - Return true if the specified function should have a dedicated
@@ -557,7 +594,7 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const {
// CFA in either of these cases.
if (AFI.needsDwarfUnwindInfo(MF) &&
((requiresSaveVG(MF) || AFI.getSMEFnAttrs().hasStreamingBody()) &&
- (!AFI.hasCalculatedStackSizeSVE() || AFI.getStackSizeSVE() > 0)))
+ (!AFI.hasCalculatedStackSizeSVE() || AFI.hasSVEStackSize())))
return true;
// With large callframes around we may need to use FP to access the scavenging
// emergency spillslot.
@@ -1126,10 +1163,6 @@ static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}
-static unsigned getStackHazardSize(const MachineFunction &MF) {
- return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
-}
-
void AArch64FrameLowering::emitPacRetPlusLeafHardening(
MachineFunction &MF) const {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
@@ -1212,7 +1245,9 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
- StackOffset SVEStackSize = getSVEStackSize(MF);
+ StackOffset ZPRStackSize = getZPRStackSize(MF);
+ StackOffset PPRStackSize = getPPRStackSize(MF);
+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
// For VLA-area objects, just emit an offset at the end of the stack frame.
// Whilst not quite correct, these objects do live at the end of the frame and
@@ -1228,11 +1263,21 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
bool FPAfterSVECalleeSaves =
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
- if (MFI.isScalableStackID(FI)) {
+ if (MFI.hasScalableStackID(FI)) {
if (FPAfterSVECalleeSaves &&
- -ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize())
+ -ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize()) {
+ assert(!AFI->hasSplitSVEObjects() &&
+ "split-sve-objects not supported with FPAfterSVECalleeSaves");
return StackOffset::getScalable(ObjectOffset);
- return StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()),
+ }
+ StackOffset AccessOffset{};
+ // The scalable vectors are below (lower address) the scalable predicates
+ // with split SVE objects, so we must subtract the size of the predicates.
+ if (AFI->hasSplitSVEObjects() &&
+ MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ AccessOffset = -PPRStackSize;
+ return AccessOffset +
+ StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()),
ObjectOffset);
}
@@ -1294,14 +1339,15 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference(
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
bool isFixed = MFI.isFixedObjectIndex(FI);
- bool isSVE = MFI.isScalableStackID(FI);
- return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
- PreferFP, ForSimm);
+ auto StackID = static_cast<TargetStackID::Value>(MFI.getStackID(FI));
+ return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, StackID,
+ FrameReg, PreferFP, ForSimm);
}
StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
- const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE,
- Register &FrameReg, bool PreferFP, bool ForSimm) const {
+ const MachineFunction &MF, int64_t ObjectOffset, bool isFixed,
+ TargetStackID::Value StackID, Register &FrameReg, bool PreferFP,
+ bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
@@ -1312,8 +1358,11 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
bool isCSR =
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
+ bool isSVE = MFI.isScalableStackID(StackID);
- const StackOffset &SVEStackSize = getSVEStackSize(MF);
+ StackOffset ZPRStackSize = getZPRStackSize(MF);
+ StackOffset PPRStackSize = getPPRStackSize(MF);
+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
@@ -1388,12 +1437,25 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
if (isSVE) {
- StackOffset FPOffset =
- StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
+ StackOffset FPOffset = StackOffset::get(
+ -AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
StackOffset SPOffset =
SVEStackSize +
StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
ObjectOffset);
+
+ // With split SVE objects the ObjectOffset is relative to the split area
+ // (i.e. the PPR area or ZPR area respectively).
+ if (AFI->hasSplitSVEObjects() && StackID == TargetStackID::ScalableVector) {
+ // If we're accessing an SVE vector with split SVE objects...
+ // - From the FP we need to move down past the PPR area:
+ FPOffset -= PPRStackSize;
+ // - From the SP we only need to move up to the ZPR area:
+ SPOffset -= PPRStackSize;
+ // Note: `SPOffset = SVEStackSize + ...`, so `-= PPRStackSize` results in
+ // `SPOffset = ZPRStackSize + ...`.
+ }
+
if (FPAfterSVECalleeSaves) {
FPOffset += StackOffset::getScalable(AFI->getSVECalleeSavedStackSize());
if (-ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize()) {
@@ -1401,6 +1463,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
SPOffset += StackOffset::getFixed(AFI->getCalleeSavedStackSize());
}
}
+
// Always use the FP for SVE spills if available and beneficial.
if (hasFP(MF) && (SPOffset.getFixed() ||
FPOffset.getScalable() < SPOffset.getScalable() ||
@@ -1408,13 +1471,13 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
}
-
FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
: (unsigned)AArch64::SP;
+
return SPOffset;
}
- StackOffset ScalableOffset = {};
+ StackOffset SVEAreaOffset = {};
if (FPAfterSVECalleeSaves) {
// In this stack layout, the FP is in between the callee saves and other
// SVE allocations.
@@ -1422,25 +1485,25 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
StackOffset::getScalable(AFI->getSVECalleeSavedStackSize());
if (UseFP) {
if (isFixed)
- ScalableOffset = SVECalleeSavedStack;
+ SVEAreaOffset = SVECalleeSavedStack;
else if (!isCSR)
- ScalableOffset = SVECalleeSavedStack - SVEStackSize;
+ SVEAreaOffset = SVECalleeSavedStack - SVEStackSize;
} else {
if (isFixed)
- ScalableOffset = SVEStackSize;
+ SVEAreaOffset = SVEStackSize;
else if (isCSR)
- ScalableOffset = SVEStackSize - SVECalleeSavedStack;
+ SVEAreaOffset = SVEStackSize - SVECalleeSavedStack;
}
} else {
if (UseFP && !(isFixed || isCSR))
- ScalableOffset = -SVEStackSize;
+ SVEAreaOffset = -SVEStackSize;
if (!UseFP && (isFixed || isCSR))
- ScalableOffset = SVEStackSize;
+ SVEAreaOffset = SVEStackSize;
}
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
- return StackOffset::getFixed(FPOffset) + ScalableOffset;
+ return StackOffset::getFixed(FPOffset) + SVEAreaOffset;
}
// Use the base pointer if we have one.
@@ -1457,7 +1520,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
Offset -= AFI->getLocalStackSize();
}
- return StackOffset::getFixed(Offset) + ScalableOffset;
+ return StackOffset::getFixed(Offset) + SVEAreaOffset;
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
@@ -1614,11 +1677,25 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
RegInc = -1;
FirstReg = Count - 1;
}
+
bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
- int ScalableByteOffset =
- FPAfterSVECalleeSaves ? 0 : AFI->getSVECalleeSavedStackSize();
+
+ int ZPRByteOffset = 0;
+ int PPRByteOffset = 0;
+ bool SplitPPRs = AFI->hasSplitSVEObjects();
+ if (SplitPPRs) {
+ ZPRByteOffset = AFI->getZPRCalleeSavedStackSize();
+ PPRByteOffset = AFI->getPPRCalleeSavedStackSize();
+ } else if (!FPAfterSVECalleeSaves) {
+ ZPRByteOffset =
+ AFI->getZPRCalleeSavedStackSize() + AFI->getPPRCalleeSavedStackSize();
+ // Unused: Everything goes in ZPR space.
+ PPRByteOffset = 0;
+ }
+
bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
Register LastReg = 0;
+ bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex() && !SplitPPRs;
// When iterating backwards, the loop condition relies on unsigned wraparound.
for (unsigned i = FirstReg; i < Count; i += RegInc) {
@@ -1647,8 +1724,12 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
llvm_unreachable("Unsupported register class.");
}
+ int &ScalableByteOffset = RPI.Type == RegPairInfo::PPR && SplitPPRs
+ ? PPRByteOffset
+ : ZPRByteOffset;
+
// Add the stack hazard size as we transition from GPR->FPR CSRs.
- if (AFI->hasStackHazardSlotIndex() &&
+ if (HasCSHazardPadding &&
(!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
AArch64InstrInfo::isFpOrNEON(RPI.Reg1))
ByteOffset += StackFillDir * StackHazardSize;
@@ -1656,7 +1737,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
int Scale = TRI->getSpillSize(*RPI.RC);
// Add the next reg to the pair if it is in the same register class.
- if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) {
+ if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {
MCRegister NextReg = CSI[i + RegInc].getReg();
bool IsFirst = i == FirstReg;
switch (RPI.Type) {
@@ -2203,6 +2284,13 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
return getMMOFrameID(*MI.memoperands_begin(), MFI);
}
+// Returns true if the LDST MachineInstr \p MI is a PPR access.
+static bool isPPRAccess(const MachineInstr &MI) {
+ return MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
+ MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO &&
+ AArch64::PPRRegClass.contains(MI.getOperand(0).getReg());
+}
+
// Check if a Hazard slot is needed for the current function, and if so create
// one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
// which can be used to determine if any hazard padding is needed.
@@ -2226,25 +2314,50 @@ void AArch64FrameLowering::determineStackHazardSlot(
bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
return AArch64::FPR64RegClass.contains(Reg) ||
AArch64::FPR128RegClass.contains(Reg) ||
- AArch64::ZPRRegClass.contains(Reg) ||
- AArch64::PPRRegClass.contains(Reg);
+ AArch64::ZPRRegClass.contains(Reg);
+ });
+ bool HasPPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
+ return AArch64::PPRRegClass.contains(Reg);
});
bool HasFPRStackObjects = false;
- if (!HasFPRCSRs) {
- std::vector<unsigned> FrameObjects(MFI.getObjectIndexEnd());
+ bool HasPPRStackObjects = false;
+ if (!HasFPRCSRs || SplitSVEObjects) {
+ enum SlotType : uint8_t {
+ Unknown = 0,
+ ZPRorFPR = 1 << 0,
+ PPR = 1 << 1,
+ GPR = 1 << 2,
+ LLVM_MARK_AS_BITMASK_ENUM(GPR)
+ };
+
+ // Find stack slots solely used for one kind of register (ZPR, PPR, etc.),
+ // based on the kinds of accesses used in the function.
+ SmallVector<SlotType> SlotTypes(MFI.getObjectIndexEnd(), SlotType::Unknown);
for (auto &MBB : MF) {
for (auto &MI : MBB) {
std::optional<int> FI = getLdStFrameID(MI, MFI);
- if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
- if (MFI.isScalableStackID(*FI) || AArch64InstrInfo::isFpOrNEON(MI))
- FrameObjects[*FI] |= 2;
- else
- FrameObjects[*FI] |= 1;
+ if (!FI || FI < 0 || FI > int(SlotTypes.size()))
+ continue;
+ if (MFI.hasScalableStackID(*FI)) {
+ SlotTypes[*FI] |=
+ isPPRAccess(MI) ? SlotType::PPR : SlotType::ZPRorFPR;
+ } else {
+ SlotTypes[*FI] |= AArch64InstrInfo::isFpOrNEON(MI)
+ ? SlotType::ZPRorFPR
+ : SlotType::GPR;
}
}
}
- HasFPRStackObjects =
- any_of(FrameObjects, [](unsigned B) { return (B & 3) == 2; });
+
+ for (int FI = 0; FI < int(SlotTypes.size()); ++FI) {
+ HasFPRStackObjects |= SlotTypes[FI] == SlotType::ZPRorFPR;
+ // For SplitSVEObjects remember that this stack slot is a predicate, this
+ // will be needed later when determining the frame layout.
+ if (SlotTypes[FI] == SlotType::PPR) {
+ MFI.setStackID(FI, TargetStackID::ScalablePredicateVector);
+ HasPPRStackObjects = true;
+ }
+ }
}
if (HasFPRCSRs || HasFPRStackObjects) {
@@ -2253,6 +2366,78 @@ void AArch64FrameLowering::determineStackHazardSlot(
<< StackHazardSize << "\n");
AFI->setStackHazardSlotIndex(ID);
}
+
+ // Determine if we should use SplitSVEObjects. This should only be used if
+ // there's a possibility of a stack hazard between PPRs and ZPRs or FPRs.
+ if (SplitSVEObjects) {
+ if (!HasPPRCSRs && !HasPPRStackObjects) {
+ LLVM_DEBUG(
+ dbgs() << "Not using SplitSVEObjects as no PPRs are on the stack\n");
+ return;
+ }
+
+ if (!HasFPRCSRs && !HasFPRStackObjects) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Not using SplitSVEObjects as no FPRs or ZPRs are on the stack\n");
+ return;
+ }
+
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (MFI.hasVarSizedObjects() || TRI->hasStackRealignment(MF)) {
+ LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with variable "
+ "sized objects or realignment\n");
+ return;
+ }
+
+ if (arePPRsSpilledAsZPR(MF)) {
+ LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with "
+ "-aarch64-enable-zpr-predicate-spills");
+ return;
+ }
+
+ // If another calling convention is explicitly set FPRs can't be promoted to
+ // ZPR callee-saves.
+ if (!is_contained({CallingConv::C, CallingConv::Fast,
+ CallingConv::AArch64_SVE_VectorCall},
+ MF.getFunction().getCallingConv())) {
+ LLVM_DEBUG(
+ dbgs() << "Calling convention is not supported with SplitSVEObjects");
+ return;
+ }
+
+ [[maybe_unused]] const AArch64Subtarget &Subtarget =
+ MF.getSubtarget<AArch64Subtarget>();
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Expected SVE to be available for PPRs");
+
+ // With SplitSVEObjects the CS hazard padding is placed between the
+ // PPRs and ZPRs. If there are any FPR CS there would be a hazard between
+ // them and the CS GRPs. Avoid this by promoting all FPR CS to ZPRs.
+ BitVector FPRZRegs(SavedRegs.size());
+ for (size_t Reg = 0, E = SavedRegs.size(); HasFPRCSRs && Reg < E; ++Reg) {
+ BitVector::reference RegBit = SavedRegs[Reg];
+ if (!RegBit)
+ continue;
+ unsigned SubRegIdx = 0;
+ if (AArch64::FPR64RegClass.contains(Reg))
+ SubRegIdx = AArch64::dsub;
+ else if (AArch64::FPR128RegClass.contains(Reg))
+ SubRegIdx = AArch64::zsub;
+ else
+ continue;
+ // Clear the bit for the FPR save.
+ RegBit = false;
+ // Mark that we should save the corresponding ZPR.
+ Register ZReg =
+ TRI->getMatchingSuperReg(Reg, SubRegIdx, &AArch64::ZPRRegClass);
+ FPRZRegs.set(ZReg);
+ }
+ SavedRegs |= FPRZRegs;
+
+ AFI->setSplitSVEObjects(true);
+ LLVM_DEBUG(dbgs() << "SplitSVEObjects enabled!\n");
+ }
}
void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
@@ -2263,10 +2448,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned UnspilledCSGPR = AArch64::NoRegister;
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
@@ -2385,17 +2571,26 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(AArch64::X18);
}
+ // Determine if a Hazard slot should be used and where it should go.
+ // If SplitSVEObjects is used, the hazard padding is placed between the PPRs
+ // and ZPRs. Otherwise, it goes in the callee save area.
+ determineStackHazardSlot(MF, SavedRegs);
+
// Calculates the callee saved stack size.
unsigned CSStackSize = 0;
- unsigned SVECSStackSize = 0;
+ unsigned ZPRCSStackSize = 0;
+ unsigned PPRCSStackSize = 0;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (unsigned Reg : SavedRegs.set_bits()) {
auto *RC = TRI->getMinimalPhysRegClass(Reg);
assert(RC && "expected register class!");
auto SpillSize = TRI->getSpillSize(*RC);
- if (AArch64::PPRRegClass.contains(Reg) ||
- AArch64::ZPRRegClass.contains(Reg))
- SVECSStackSize += SpillSize;
+ bool IsZPR = AArch64::ZPRRegClass.contains(Reg);
+ bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg);
+ if (IsZPR || (IsPPR && arePPRsSpilledAsZPR(MF)))
+ ZPRCSStackSize += SpillSize;
+ else if (IsPPR)
+ PPRCSStackSize += SpillSize;
else
CSStackSize += SpillSize;
}
@@ -2405,17 +2600,15 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// only 64-bit GPRs can be added to SavedRegs.
unsigned NumSavedRegs = SavedRegs.count();
+ // If we have hazard padding in the CS area add that to the size.
+ if (AFI->isStackHazardIncludedInCalleeSaveArea())
+ CSStackSize += getStackHazardSize(MF);
+
// Increase the callee-saved stack size if the function has streaming mode
// changes, as we will need to spill the value of the VG register.
if (requiresSaveVG(MF))
CSStackSize += 8;
- // Determine if a Hazard slot should be used, and increase the CSStackSize by
- // StackHazardSize if so.
- determineStackHazardSlot(MF, SavedRegs);
- if (AFI->hasStackHazardSlotIndex())
- CSStackSize += getStackHazardSize(MF);
-
// If we must call __arm_get_current_vg in the prologue preserve the LR.
if (requiresSaveVG(MF) && !Subtarget.hasSVE())
SavedRegs.set(AArch64::LR);
@@ -2436,8 +2629,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
});
// If any callee-saved registers are used, the frame cannot be eliminated.
- int64_t SVEStackSize =
- alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
+ auto [ZPRLocalStackSize, PPRLocalStackSize] =
+ determineSVEStackSizes(MF, AssignObjectOffsets::No);
+ uint64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize;
+ uint64_t SVEStackSize =
+ alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16);
bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
// The CSR spill slots have not been allocated yet, so estimateStackSize
@@ -2522,7 +2718,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// instructions.
AFI->setCalleeSavedStackSize(AlignedCSStackSize);
AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
- AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
+ AFI->setSVECalleeSavedStackSize(ZPRCSStackSize, alignTo(PPRCSStackSize, 16));
}
bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
@@ -2575,7 +2771,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
// Create a hazard slot as we switch between GPR and FPR CSRs.
- if (AFI->hasStackHazardSlotIndex() &&
+ if (AFI->isStackHazardIncludedInCalleeSaveArea() &&
(!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
AArch64InstrInfo::isFpOrNEON(Reg)) {
assert(HazardSlotIndex == std::numeric_limits<int>::max() &&
@@ -2614,7 +2810,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
}
// Add hazard slot in the case where no FPR CSRs are present.
- if (AFI->hasStackHazardSlotIndex() &&
+ if (AFI->isStackHazardIncludedInCalleeSaveArea() &&
HazardSlotIndex == std::numeric_limits<int>::max()) {
HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true);
LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex
@@ -2661,7 +2857,6 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
assert((Max == std::numeric_limits<int>::min() ||
Max + 1 == CS.getFrameIdx()) &&
"SVE CalleeSaves are not consecutive");
-
Min = std::min(Min, CS.getFrameIdx());
Max = std::max(Max, CS.getFrameIdx());
}
@@ -2669,43 +2864,64 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
return Min != std::numeric_limits<int>::max();
}
-// Process all the SVE stack objects and determine offsets for each
-// object. If AssignOffsets is true, the offsets get assigned.
-// Fills in the first and last callee-saved frame indices into
-// Min/MaxCSFrameIndex, respectively.
-// Returns the size of the stack.
-static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
- int &MinCSFrameIndex,
- int &MaxCSFrameIndex,
- bool AssignOffsets) {
+static SVEStackSizes determineSVEStackSizes(MachineFunction &MF,
+ AssignObjectOffsets AssignOffsets) {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+
+ SVEStackSizes SVEStack{};
+
+ // With SplitSVEObjects we maintain separate stack offsets for predicates
+ // (PPRs) and SVE vectors (ZPRs). When SplitSVEObjects is disabled predicates
+ // are included in the SVE vector area.
+ uint64_t &ZPRStackTop = SVEStack.ZPRStackSize;
+ uint64_t &PPRStackTop =
+ AFI->hasSplitSVEObjects() ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize;
+
#ifndef NDEBUG
// First process all fixed stack objects.
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
- assert(!MFI.isScalableStackID(I) &&
+ assert(!MFI.hasScalableStackID(I) &&
"SVE vectors should never be passed on the stack by value, only by "
"reference.");
#endif
- auto Assign = [&MFI](int FI, int64_t Offset) {
+ auto AllocateObject = [&](int FI) {
+ uint64_t &StackTop = MFI.getStackID(FI) == TargetStackID::ScalableVector
+ ? ZPRStackTop
+ : PPRStackTop;
+
+ // FIXME: Given that the length of SVE vectors is not necessarily a power of
+ // two, we'd need to align every object dynamically at runtime if the
+ // alignment is larger than 16. This is not yet supported.
+ Align Alignment = MFI.getObjectAlign(FI);
+ if (Alignment > Align(16))
+ report_fatal_error(
+ "Alignment of scalable vectors > 16 bytes is not yet supported");
+
+ StackTop += MFI.getObjectSize(FI);
+ StackTop = alignTo(StackTop, Alignment);
+
+ assert(StackTop < std::numeric_limits<int64_t>::max() &&
+ "SVE StackTop far too large?!");
+
+ int64_t Offset = -int64_t(StackTop);
+ if (AssignOffsets == AssignObjectOffsets::Yes)
+ MFI.setObjectOffset(FI, Offset);
+
LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
- MFI.setObjectOffset(FI, Offset);
};
- int64_t Offset = 0;
-
// Then process all callee saved slots.
+ int MinCSFrameIndex, MaxCSFrameIndex;
if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
- // Assign offsets to the callee save slots.
- for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
- Offset += MFI.getObjectSize(I);
- Offset = alignTo(Offset, MFI.getObjectAlign(I));
- if (AssignOffsets)
- Assign(I, -Offset);
- }
+ for (int FI = MinCSFrameIndex; FI <= MaxCSFrameIndex; ++FI)
+ AllocateObject(FI);
}
- // Ensure that the Callee-save area is aligned to 16bytes.
- Offset = alignTo(Offset, Align(16U));
+ // Ensure the CS area is 16-byte aligned.
+ PPRStackTop = alignTo(PPRStackTop, Align(16U));
+ ZPRStackTop = alignTo(ZPRStackTop, Align(16U));
// Create a buffer of SVE objects to allocate and sort it.
SmallVector<int, 8> ObjectsToAllocate;
@@ -2715,50 +2931,34 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
int StackProtectorFI = -1;
if (MFI.hasStackProtectorIndex()) {
StackProtectorFI = MFI.getStackProtectorIndex();
- if (MFI.isScalableStackID(StackProtectorFI))
+ if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector)
ObjectsToAllocate.push_back(StackProtectorFI);
}
- for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
- if (!MFI.isScalableStackID(I))
- continue;
- if (I == StackProtectorFI)
+
+ for (int FI = 0, E = MFI.getObjectIndexEnd(); FI != E; ++FI) {
+ if (FI == StackProtectorFI || MFI.isDeadObjectIndex(FI))
continue;
- if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
+ if (MaxCSFrameIndex >= FI && FI >= MinCSFrameIndex)
continue;
- if (MFI.isDeadObjectIndex(I))
+
+ if (MFI.getStackID(FI) != TargetStackID::ScalableVector &&
+ MFI.getStackID(FI) != TargetStackID::ScalablePredicateVector)
continue;
- ObjectsToAllocate.push_back(I);
+ ObjectsToAllocate.push_back(FI);
}
// Allocate all SVE locals and spills
- for (unsigned FI : ObjectsToAllocate) {
- Align Alignment = MFI.getObjectAlign(FI);
- // FIXME: Given that the length of SVE vectors is not necessarily a power of
- // two, we'd need to align every object dynamically at runtime if the
- // alignment is larger than 16. This is not yet supported.
- if (Alignment > Align(16))
- report_fatal_error(
- "Alignment of scalable vectors > 16 bytes is not yet supported");
-
- Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment);
- if (AssignOffsets)
- Assign(FI, -Offset);
- }
+ for (unsigned FI : ObjectsToAllocate)
+ AllocateObject(FI);
- return Offset;
-}
+ PPRStackTop = alignTo(PPRStackTop, Align(16U));
+ ZPRStackTop = alignTo(ZPRStackTop, Align(16U));
-int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
- MachineFrameInfo &MFI) const {
- int MinCSFrameIndex, MaxCSFrameIndex;
- return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
-}
+ if (AssignOffsets == AssignObjectOffsets::Yes)
+ AFI->setStackSizeSVE(SVEStack.ZPRStackSize, SVEStack.PPRStackSize);
-int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
- MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
- return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
- true);
+ return SVEStack;
}
/// Attempts to scavenge a register from \p ScavengeableRegs given the used
@@ -3072,12 +3272,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
"Upwards growing stack unsupported");
- int MinCSFrameIndex, MaxCSFrameIndex;
- int64_t SVEStackSize =
- assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
-
- AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
- AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
+ (void)determineSVEStackSizes(MF, AssignObjectOffsets::Yes);
// If this function isn't doing Win64-style C++ EH, we don't need to do
// anything.
@@ -3361,7 +3556,8 @@ void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
Register Reg;
FrameRegOffset = TFI->resolveFrameOffsetReference(
- *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg,
+ *MF, FirstTagStore.Offset, false /*isFixed*/,
+ TargetStackID::Default /*StackID*/, Reg,
/*PreferFP=*/false, /*ForSimm=*/true);
FrameReg = Reg;
FrameRegUpdate = std::nullopt;
@@ -3599,7 +3795,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
// Go to common code if we cannot provide sp + offset.
if (MFI.hasVarSizedObjects() ||
- MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() ||
+ MF.getInfo<AArch64FunctionInfo>()->hasSVEStackSize() ||
MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF))
return getFrameIndexReference(MF, FI, FrameReg);
@@ -3701,10 +3897,12 @@ bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
void AArch64FrameLowering::orderFrameObjects(
const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
- if (!OrderFrameObjects || ObjectsToAllocate.empty())
+ const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+
+ if ((!OrderFrameObjects && !AFI.hasSplitSVEObjects()) ||
+ ObjectsToAllocate.empty())
return;
- const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
for (auto &Obj : ObjectsToAllocate) {
@@ -3723,7 +3921,8 @@ void AArch64FrameLowering::orderFrameObjects(
if (AFI.hasStackHazardSlotIndex()) {
std::optional<int> FI = getLdStFrameID(MI, MFI);
if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
- if (MFI.isScalableStackID(*FI) || AArch64InstrInfo::isFpOrNEON(MI))
+ if (MFI.getStackID(*FI) == TargetStackID::ScalableVector ||
+ AArch64InstrInfo::isFpOrNEON(MI))
FrameObjects[*FI].Accesses |= FrameObject::AccessFPR;
else
FrameObjects[*FI].Accesses |= FrameObject::AccessGPR;
@@ -4081,7 +4280,7 @@ void AArch64FrameLowering::emitRemarks(
}
unsigned RegTy = StackAccess::AccessType::GPR;
- if (MFI.isScalableStackID(FrameIdx)) {
+ if (MFI.hasScalableStackID(FrameIdx)) {
// SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO
// spill/fill the predicate as a data vector (so are an FPR access).
if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 20d1d6a..32a9bd8 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -24,6 +24,11 @@ class AArch64FunctionInfo;
class AArch64PrologueEmitter;
class AArch64EpilogueEmitter;
+struct SVEStackSizes {
+ uint64_t ZPRStackSize{0};
+ uint64_t PPRStackSize{0};
+};
+
class AArch64FrameLowering : public TargetFrameLowering {
public:
explicit AArch64FrameLowering()
@@ -64,8 +69,9 @@ public:
bool ForSimm) const;
StackOffset resolveFrameOffsetReference(const MachineFunction &MF,
int64_t ObjectOffset, bool isFixed,
- bool isSVE, Register &FrameReg,
- bool PreferFP, bool ForSimm) const;
+ TargetStackID::Value StackID,
+ Register &FrameReg, bool PreferFP,
+ bool ForSimm) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
ArrayRef<CalleeSavedInfo> CSI,
@@ -147,7 +153,17 @@ public:
bool requiresSaveVG(const MachineFunction &MF) const;
- StackOffset getSVEStackSize(const MachineFunction &MF) const;
+ /// Returns the size of the entire ZPR stackframe (calleesaves + spills).
+ StackOffset getZPRStackSize(const MachineFunction &MF) const;
+
+ /// Returns the size of the entire PPR stackframe (calleesaves + spills +
+ /// hazard padding).
+ StackOffset getPPRStackSize(const MachineFunction &MF) const;
+
+ /// Returns the size of the entire SVE stackframe (PPRs + ZPRs).
+ StackOffset getSVEStackSize(const MachineFunction &MF) const {
+ return getZPRStackSize(MF) + getPPRStackSize(MF);
+ }
friend class AArch64PrologueEpilogueCommon;
friend class AArch64PrologueEmitter;
@@ -167,10 +183,6 @@ private:
/// Returns true if CSRs should be paired.
bool producePairRegisters(MachineFunction &MF) const;
- int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const;
- int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
- int &MinCSFrameIndex,
- int &MaxCSFrameIndex) const;
/// Make a determination whether a Hazard slot is used and create it if
/// needed.
void determineStackHazardSlot(MachineFunction &MF,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 35bbb0c0..e7b2d20 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -7497,7 +7497,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
int FI = cast<FrameIndexSDNode>(N)->getIndex();
// We can only encode VL scaled offsets, so only fold in frame indexes
// referencing SVE objects.
- if (MFI.isScalableStackID(FI)) {
+ if (MFI.hasScalableStackID(FI)) {
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
return true;
@@ -7543,7 +7543,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
// We can only encode VL scaled offsets, so only fold in frame indexes
// referencing SVE objects.
- if (MFI.isScalableStackID(FI))
+ if (MFI.hasScalableStackID(FI))
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c2a482a..70d5ad7d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9256,7 +9256,7 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
(MI.getOpcode() == AArch64::ADDXri ||
MI.getOpcode() == AArch64::SUBXri)) {
const MachineOperand &MO = MI.getOperand(1);
- if (MO.isFI() && MF.getFrameInfo().isScalableStackID(MO.getIndex()))
+ if (MO.isFI() && MF.getFrameInfo().hasScalableStackID(MO.getIndex()))
MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/false,
/*IsImplicit=*/true));
}
@@ -29608,7 +29608,7 @@ void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
// than doing it here in finalizeLowering.
if (MFI.hasStackProtectorIndex()) {
for (unsigned int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
- if (MFI.isScalableStackID(i) &&
+ if (MFI.hasScalableStackID(i) &&
MFI.getObjectSSPLayout(i) != MachineFrameInfo::SSPLK_None) {
MFI.setStackID(MFI.getStackProtectorIndex(),
TargetStackID::ScalableVector);
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index a81f5b3..b3c9656 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -23,12 +23,21 @@
using namespace llvm;
+static std::optional<uint64_t>
+getSVEStackSize(const AArch64FunctionInfo &MFI,
+ uint64_t (AArch64FunctionInfo::*GetStackSize)() const) {
+ if (!MFI.hasCalculatedStackSizeSVE())
+ return std::nullopt;
+ return (MFI.*GetStackSize)();
+}
+
yaml::AArch64FunctionInfo::AArch64FunctionInfo(
const llvm::AArch64FunctionInfo &MFI)
: HasRedZone(MFI.hasRedZone()),
- StackSizeSVE(MFI.hasCalculatedStackSizeSVE()
- ? std::optional<uint64_t>(MFI.getStackSizeSVE())
- : std::nullopt),
+ StackSizeZPR(
+ getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizeZPR)),
+ StackSizePPR(
+ getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizePPR)),
HasStackFrame(MFI.hasStackFrame()
? std::optional<bool>(MFI.hasStackFrame())
: std::nullopt) {}
@@ -41,8 +50,9 @@ void AArch64FunctionInfo::initializeBaseYamlFields(
const yaml::AArch64FunctionInfo &YamlMFI) {
if (YamlMFI.HasRedZone)
HasRedZone = YamlMFI.HasRedZone;
- if (YamlMFI.StackSizeSVE)
- setStackSizeSVE(*YamlMFI.StackSizeSVE);
+ if (YamlMFI.StackSizeZPR || YamlMFI.StackSizePPR)
+ setStackSizeSVE(YamlMFI.StackSizeZPR.value_or(0),
+ YamlMFI.StackSizePPR.value_or(0));
if (YamlMFI.HasStackFrame)
setHasStackFrame(*YamlMFI.HasStackFrame);
}
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 897c7e8..91e64e6 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -74,13 +74,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// Amount of stack frame size, not including callee-saved registers.
uint64_t LocalStackSize = 0;
- /// The start and end frame indices for the SVE callee saves.
- int MinSVECSFrameIndex = 0;
- int MaxSVECSFrameIndex = 0;
-
/// Amount of stack frame size used for saving callee-saved registers.
unsigned CalleeSavedStackSize = 0;
- unsigned SVECalleeSavedStackSize = 0;
+ unsigned ZPRCalleeSavedStackSize = 0;
+ unsigned PPRCalleeSavedStackSize = 0;
bool HasCalleeSavedStackSize = false;
bool HasSVECalleeSavedStackSize = false;
@@ -137,9 +134,14 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// SVE stack size (for predicates and data vectors) are maintained here
/// rather than in FrameInfo, as the placement and Stack IDs are target
/// specific.
- uint64_t StackSizeSVE = 0;
+ uint64_t StackSizeZPR = 0;
+ uint64_t StackSizePPR = 0;
+
+ /// Are SVE objects (vectors and predicates) split into separate regions on
+ /// the stack.
+ bool SplitSVEObjects = false;
- /// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid.
+ /// HasCalculatedStackSizeSVE indicates whether StackSizeZPR/PPR is valid.
bool HasCalculatedStackSizeSVE = false;
/// Has a value when it is known whether or not the function uses a
@@ -312,16 +314,25 @@ public:
TailCallReservedStack = bytes;
}
- bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
-
- void setStackSizeSVE(uint64_t S) {
+ void setStackSizeSVE(uint64_t ZPR, uint64_t PPR) {
+ StackSizeZPR = ZPR;
+ StackSizePPR = PPR;
HasCalculatedStackSizeSVE = true;
- StackSizeSVE = S;
}
- uint64_t getStackSizeSVE() const {
+ uint64_t getStackSizeZPR() const {
+ assert(hasCalculatedStackSizeSVE());
+ return StackSizeZPR;
+ }
+ uint64_t getStackSizePPR() const {
assert(hasCalculatedStackSizeSVE());
- return StackSizeSVE;
+ return StackSizePPR;
+ }
+
+ bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
+
+ bool hasSVEStackSize() const {
+ return getStackSizeZPR() > 0 || getStackSizePPR() > 0;
}
bool hasStackFrame() const { return HasStackFrame; }
@@ -329,7 +340,6 @@ public:
bool isStackRealigned() const { return StackRealigned; }
void setStackRealigned(bool s) { StackRealigned = s; }
-
bool hasCalleeSaveStackFreeSpace() const {
return CalleeSaveStackHasFreeSpace;
}
@@ -414,29 +424,37 @@ public:
}
// Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes'
- void setSVECalleeSavedStackSize(unsigned Size) {
- SVECalleeSavedStackSize = Size;
+ void setSVECalleeSavedStackSize(unsigned ZPR, unsigned PPR) {
+ ZPRCalleeSavedStackSize = ZPR;
+ PPRCalleeSavedStackSize = PPR;
HasSVECalleeSavedStackSize = true;
}
- unsigned getSVECalleeSavedStackSize() const {
+ unsigned getZPRCalleeSavedStackSize() const {
assert(HasSVECalleeSavedStackSize &&
- "SVECalleeSavedStackSize has not been calculated");
- return SVECalleeSavedStackSize;
+ "ZPRCalleeSavedStackSize has not been calculated");
+ return ZPRCalleeSavedStackSize;
}
-
- void setMinMaxSVECSFrameIndex(int Min, int Max) {
- MinSVECSFrameIndex = Min;
- MaxSVECSFrameIndex = Max;
+ unsigned getPPRCalleeSavedStackSize() const {
+ assert(HasSVECalleeSavedStackSize &&
+ "PPRCalleeSavedStackSize has not been calculated");
+ return PPRCalleeSavedStackSize;
}
- int getMinSVECSFrameIndex() const { return MinSVECSFrameIndex; }
- int getMaxSVECSFrameIndex() const { return MaxSVECSFrameIndex; }
+ unsigned getSVECalleeSavedStackSize() const {
+ assert(!hasSplitSVEObjects() &&
+ "ZPRs and PPRs are split. Use get[ZPR|PPR]CalleeSavedStackSize()");
+ return getZPRCalleeSavedStackSize() + getPPRCalleeSavedStackSize();
+ }
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
unsigned getNumLocalDynamicTLSAccesses() const {
return NumLocalDynamicTLSAccesses;
}
+ bool isStackHazardIncludedInCalleeSaveArea() const {
+ return hasStackHazardSlotIndex() && !hasSplitSVEObjects();
+ }
+
std::optional<bool> hasRedZone() const { return HasRedZone; }
void setHasRedZone(bool s) { HasRedZone = s; }
@@ -472,6 +490,15 @@ public:
StackHazardCSRSlotIndex = Index;
}
+ bool hasSplitSVEObjects() const { return SplitSVEObjects; }
+ void setSplitSVEObjects(bool s) { SplitSVEObjects = s; }
+
+ bool hasSVE_AAPCS(const MachineFunction &MF) const {
+ return hasSplitSVEObjects() || isSVECC() ||
+ MF.getFunction().getCallingConv() ==
+ CallingConv::AArch64_SVE_VectorCall;
+ }
+
SMEAttrs getSMEFnAttrs() const { return SMEFnAttrs; }
unsigned getSRetReturnReg() const { return SRetReturnReg; }
@@ -611,7 +638,8 @@ private:
namespace yaml {
struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
std::optional<bool> HasRedZone;
- std::optional<uint64_t> StackSizeSVE;
+ std::optional<uint64_t> StackSizeZPR;
+ std::optional<uint64_t> StackSizePPR;
std::optional<bool> HasStackFrame;
AArch64FunctionInfo() = default;
@@ -624,7 +652,8 @@ struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
template <> struct MappingTraits<AArch64FunctionInfo> {
static void mapping(IO &YamlIO, AArch64FunctionInfo &MFI) {
YamlIO.mapOptional("hasRedZone", MFI.HasRedZone);
- YamlIO.mapOptional("stackSizeSVE", MFI.StackSizeSVE);
+ YamlIO.mapOptional("stackSizeZPR", MFI.StackSizeZPR);
+ YamlIO.mapOptional("stackSizePPR", MFI.StackSizePPR);
YamlIO.mapOptional("hasStackFrame", MFI.HasStackFrame);
}
};
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index 5da16b9..aed137c 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -48,21 +48,19 @@ bool AArch64PrologueEpilogueCommon::isVGInstruction(
return Opc == TargetOpcode::COPY;
}
-// Convenience function to determine whether I is an SVE callee save.
-static bool isSVECalleeSave(MachineBasicBlock::iterator I) {
+// Convenience function to determine whether I is part of the ZPR callee saves.
+static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I) {
switch (I->getOpcode()) {
default:
return false;
- case AArch64::PTRUE_C_B:
case AArch64::LD1B_2Z_IMM:
case AArch64::ST1B_2Z_IMM:
case AArch64::STR_ZXI:
- case AArch64::STR_PXI:
case AArch64::LDR_ZXI:
- case AArch64::LDR_PXI:
- case AArch64::PTRUE_B:
case AArch64::CPY_ZPzI_B:
case AArch64::CMPNE_PPzZI_B:
+ case AArch64::PTRUE_C_B:
+ case AArch64::PTRUE_B:
return I->getFlag(MachineInstr::FrameSetup) ||
I->getFlag(MachineInstr::FrameDestroy);
case AArch64::SEH_SavePReg:
@@ -71,6 +69,23 @@ static bool isSVECalleeSave(MachineBasicBlock::iterator I) {
}
}
+// Convenience function to determine whether I is part of the PPR callee saves.
+static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I) {
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case AArch64::STR_PXI:
+ case AArch64::LDR_PXI:
+ return I->getFlag(MachineInstr::FrameSetup) ||
+ I->getFlag(MachineInstr::FrameDestroy);
+ }
+}
+
+// Convenience function to determine whether I is part of the SVE callee saves.
+static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I) {
+ return isPartOfZPRCalleeSaves(I) || isPartOfPPRCalleeSaves(I);
+}
+
AArch64PrologueEpilogueCommon::AArch64PrologueEpilogueCommon(
MachineFunction &MF, MachineBasicBlock &MBB,
const AArch64FrameLowering &AFL)
@@ -316,7 +331,7 @@ bool AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump(
// When there is an SVE area on the stack, always allocate the
// callee-saves and spills/locals separately.
- if (AFL.getSVEStackSize(MF))
+ if (AFI->hasSVEStackSize())
return false;
return true;
@@ -639,7 +654,7 @@ void AArch64PrologueEmitter::emitPrologue() {
// Now allocate space for the GPR callee saves.
MachineBasicBlock::iterator MBBI = PrologueBeginI;
- while (MBBI != EndI && isSVECalleeSave(MBBI))
+ while (MBBI != EndI && isPartOfSVECalleeSaves(MBBI))
++MBBI;
FirstGPRSaveI = convertCalleeSaveRestoreToSPPrePostIncDec(
MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
@@ -669,7 +684,7 @@ void AArch64PrologueEmitter::emitPrologue() {
MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
while (AfterGPRSavesI != EndI &&
AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
- !isSVECalleeSave(AfterGPRSavesI)) {
+ !isPartOfSVECalleeSaves(AfterGPRSavesI)) {
if (CombineSPBump &&
// Only fix-up frame-setup load/store instructions.
(!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
@@ -700,56 +715,105 @@ void AArch64PrologueEmitter::emitPrologue() {
if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
- StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
- StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
- MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
+ StackOffset PPRCalleeSavesSize =
+ StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
+ StackOffset ZPRCalleeSavesSize =
+ StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
+ StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
+ StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
+ StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
+
+ std::optional<MachineBasicBlock::iterator> ZPRCalleeSavesBegin,
+ ZPRCalleeSavesEnd, PPRCalleeSavesBegin, PPRCalleeSavesEnd;
StackOffset CFAOffset =
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
-
- // Process the SVE callee-saves to determine what space needs to be
- // allocated.
MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
- LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
- << "\n");
- SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
- SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
- // Find callee save instructions in frame.
- // Note: With FPAfterSVECalleeSaves the callee saves have already been
- // allocated.
- if (!FPAfterSVECalleeSaves) {
- MachineBasicBlock::iterator CalleeSavesBegin = AfterGPRSavesI;
- assert(isSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
- while (isSVECalleeSave(AfterSVESavesI) &&
+ if (!FPAfterSVECalleeSaves) {
+ // Process the SVE callee-saves to find the starts/ends of the ZPR and PPR
+ // areas.
+ PPRCalleeSavesBegin = AfterGPRSavesI;
+ if (PPRCalleeSavesSize) {
+ LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = "
+ << PPRCalleeSavesSize.getScalable() << "\n");
+
+ assert(isPartOfPPRCalleeSaves(*PPRCalleeSavesBegin) &&
+ "Unexpected instruction");
+ while (isPartOfPPRCalleeSaves(AfterSVESavesI) &&
+ AfterSVESavesI != MBB.getFirstTerminator())
+ ++AfterSVESavesI;
+ }
+ PPRCalleeSavesEnd = ZPRCalleeSavesBegin = AfterSVESavesI;
+ if (ZPRCalleeSavesSize) {
+ LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = "
+ << ZPRCalleeSavesSize.getScalable() << "\n");
+ assert(isPartOfZPRCalleeSaves(*ZPRCalleeSavesBegin) &&
+ "Unexpected instruction");
+ while (isPartOfZPRCalleeSaves(AfterSVESavesI) &&
AfterSVESavesI != MBB.getFirstTerminator())
++AfterSVESavesI;
- CalleeSavesEnd = AfterSVESavesI;
-
- StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
- // Allocate space for the callee saves (if any).
- allocateStackSpace(CalleeSavesBegin, 0, SVECalleeSavesSize,
- EmitAsyncCFI && !HasFP, CFAOffset,
- MFI.hasVarSizedObjects() || LocalsSize);
}
+ ZPRCalleeSavesEnd = AfterSVESavesI;
}
- CFAOffset += SVECalleeSavesSize;
if (EmitAsyncCFI)
- emitCalleeSavedSVELocations(CalleeSavesEnd);
-
- // Allocate space for the rest of the frame including SVE locals. Align the
- // stack as necessary.
- assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
- "Cannot use redzone with stack realignment");
- if (!AFL.canUseRedZone(MF)) {
- // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
- // the correct value here, as NumBytes also includes padding bytes,
- // which shouldn't be counted here.
- allocateStackSpace(CalleeSavesEnd, RealignmentPadding,
- SVELocalsSize + StackOffset::getFixed(NumBytes),
+ emitCalleeSavedSVELocations(AfterSVESavesI);
+
+ if (AFI->hasSplitSVEObjects()) {
+ assert(!FPAfterSVECalleeSaves &&
+ "Cannot use FPAfterSVECalleeSaves with aarch64-split-sve-objects");
+ assert(!AFL.canUseRedZone(MF) &&
+ "Cannot use redzone with aarch64-split-sve-objects");
+ // TODO: Handle HasWinCFI/NeedsWinCFI?
+ assert(!NeedsWinCFI &&
+ "WinCFI with aarch64-split-sve-objects is not supported");
+
+ // Split ZPR and PPR allocation.
+ // Allocate PPR callee saves
+ allocateStackSpace(*PPRCalleeSavesBegin, 0, PPRCalleeSavesSize,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || ZPRCalleeSavesSize ||
+ ZPRLocalsSize || PPRLocalsSize);
+ CFAOffset += PPRCalleeSavesSize;
+
+ // Allocate PPR locals + ZPR callee saves
+ assert(PPRCalleeSavesEnd == ZPRCalleeSavesBegin &&
+ "Expected ZPR callee saves after PPR locals");
+ allocateStackSpace(*PPRCalleeSavesEnd, RealignmentPadding,
+ PPRLocalsSize + ZPRCalleeSavesSize,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || ZPRLocalsSize);
+ CFAOffset += PPRLocalsSize + ZPRCalleeSavesSize;
+
+ // Allocate ZPR locals
+ allocateStackSpace(*ZPRCalleeSavesEnd, RealignmentPadding,
+ ZPRLocalsSize + StackOffset::getFixed(NumBytes),
EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects());
+ } else {
+ // Allocate space for the callee saves (if any).
+ StackOffset LocalsSize =
+ PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
+ if (!FPAfterSVECalleeSaves)
+ allocateStackSpace(AfterGPRSavesI, 0, SVECalleeSavesSize,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || LocalsSize);
+ CFAOffset += SVECalleeSavesSize;
+
+ // Allocate space for the rest of the frame including SVE locals. Align the
+ // stack as necessary.
+ assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
+ "Cannot use redzone with stack realignment");
+ if (!AFL.canUseRedZone(MF)) {
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+ // the correct value here, as NumBytes also includes padding bytes,
+ // which shouldn't be counted here.
+ StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
+ allocateStackSpace(AfterSVESavesI, RealignmentPadding,
+ SVELocalsSize + StackOffset::getFixed(NumBytes),
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects());
+ }
}
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -796,7 +860,8 @@ void AArch64PrologueEmitter::emitPrologue() {
emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
} else {
StackOffset TotalSize =
- SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+ AFL.getSVEStackSize(MF) +
+ StackOffset::getFixed((int64_t)MFI.getStackSize());
CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
CFIBuilder.insertCFIInst(
createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
@@ -1165,7 +1230,7 @@ void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
for (const auto &Info : CSI) {
unsigned FrameIdx = Info.getFrameIdx();
- if (MFI.isScalableStackID(FrameIdx))
+ if (MFI.hasScalableStackID(FrameIdx))
continue;
assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
@@ -1191,8 +1256,10 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
AFL.getOffsetOfLocalArea();
}
+ StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
for (const auto &Info : CSI) {
- if (!MFI.isScalableStackID(Info.getFrameIdx()))
+ int FI = Info.getFrameIdx();
+ if (!MFI.hasScalableStackID(FI))
continue;
// Not all unwinders may know about SVE registers, so assume the lowest
@@ -1203,9 +1270,13 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
continue;
StackOffset Offset =
- StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
+ StackOffset::getScalable(MFI.getObjectOffset(FI)) -
StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
+ if (AFI->hasSplitSVEObjects() &&
+ MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ Offset -= PPRStackSize;
+
CFIBuilder.insertCFIInst(
createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
}
@@ -1322,7 +1393,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
while (FirstGPRRestoreI != Begin) {
--FirstGPRRestoreI;
if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
- (!FPAfterSVECalleeSaves && isSVECalleeSave(FirstGPRRestoreI))) {
+ (!FPAfterSVECalleeSaves && isPartOfSVECalleeSaves(FirstGPRRestoreI))) {
++FirstGPRRestoreI;
break;
} else if (CombineSPBump)
@@ -1346,7 +1417,9 @@ void AArch64EpilogueEmitter::emitEpilogue() {
if (HasFP && AFI->hasSwiftAsyncContext())
emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
- const StackOffset &SVEStackSize = AFL.getSVEStackSize(MF);
+ StackOffset ZPRStackSize = AFL.getZPRStackSize(MF);
+ StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
@@ -1367,106 +1440,188 @@ void AArch64EpilogueEmitter::emitEpilogue() {
NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
- // Process the SVE callee-saves to determine what space needs to be
- // deallocated.
- StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
- MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
- RestoreEnd = FirstGPRRestoreI;
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
- if (FPAfterSVECalleeSaves)
- RestoreEnd = MBB.getFirstTerminator();
-
- RestoreBegin = std::prev(RestoreEnd);
- while (RestoreBegin != MBB.begin() &&
- isSVECalleeSave(std::prev(RestoreBegin)))
- --RestoreBegin;
-
- assert(isSVECalleeSave(RestoreBegin) &&
- isSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
-
- StackOffset CalleeSavedSizeAsOffset =
- StackOffset::getScalable(CalleeSavedSize);
- DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
- DeallocateAfter = CalleeSavedSizeAsOffset;
- }
-
- // Deallocate the SVE area.
- if (FPAfterSVECalleeSaves) {
- // If the callee-save area is before FP, restoring the FP implicitly
- // deallocates non-callee-save SVE allocations. Otherwise, deallocate
- // them explicitly.
- if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
- emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
- DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI);
+ if (!AFI->hasSplitSVEObjects()) {
+ // Process the SVE callee-saves to determine what space needs to be
+ // deallocated.
+ StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
+ MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
+ RestoreEnd = FirstGPRRestoreI;
+ int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
+ int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
+ int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
+
+ if (SVECalleeSavedSize) {
+ if (FPAfterSVECalleeSaves)
+ RestoreEnd = MBB.getFirstTerminator();
+
+ RestoreBegin = std::prev(RestoreEnd);
+ while (RestoreBegin != MBB.begin() &&
+ isPartOfSVECalleeSaves(std::prev(RestoreBegin)))
+ --RestoreBegin;
+
+ assert(isPartOfSVECalleeSaves(RestoreBegin) &&
+ isPartOfSVECalleeSaves(std::prev(RestoreEnd)) &&
+ "Unexpected instruction");
+
+ StackOffset CalleeSavedSizeAsOffset =
+ StackOffset::getScalable(SVECalleeSavedSize);
+ DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
+ DeallocateAfter = CalleeSavedSizeAsOffset;
}
- // Deallocate callee-save non-SVE registers.
- emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-
- // Deallocate fixed objects.
- emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(FixedObject), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-
- // Deallocate callee-save SVE registers.
- emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
- DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI);
- } else if (SVEStackSize) {
- int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
- // If we have stack realignment or variable-sized objects we must use the
- // FP to restore SVE callee saves (as there is an unknown amount of
- // data/padding between the SP and SVE CS area).
- Register BaseForSVEDealloc =
- (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
- : AArch64::SP;
- if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
- Register CalleeSaveBase = AArch64::FP;
- if (int64_t CalleeSaveBaseOffset =
- AFI->getCalleeSaveBaseToFrameRecordOffset()) {
- // If we have have an non-zero offset to the non-SVE CS base we need to
- // compute the base address by subtracting the offest in a temporary
- // register first (to avoid briefly deallocating the SVE CS).
- CalleeSaveBase =
- MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
- emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
- StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
- MachineInstr::FrameDestroy);
- }
- // The code below will deallocate the stack space space by moving the
- // SP to the start of the SVE callee-save area.
- emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
- StackOffset::getScalable(-SVECalleeSavedSize), TII,
- MachineInstr::FrameDestroy);
- } else if (BaseForSVEDealloc == AArch64::SP) {
- if (SVECalleeSavedSize) {
- // Deallocate the non-SVE locals first before we can deallocate (and
- // restore callee saves) from the SVE area.
- emitFrameOffset(
- MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
- false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
- SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
- NumBytes = 0;
+ // Deallocate the SVE area.
+ if (FPAfterSVECalleeSaves) {
+ // If the callee-save area is before FP, restoring the FP implicitly
+ // deallocates non-callee-save SVE allocations. Otherwise, deallocate
+ // them explicitly.
+ if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
+ emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
+ DeallocateBefore, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI);
}
+ // Deallocate callee-save non-SVE registers.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
- SVEStackSize +
- StackOffset::getFixed(NumBytes + PrologueSaveSize));
+ StackOffset::getFixed(AFI->getCalleeSavedStackSize()),
+ TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
+ &HasWinCFI);
+
+ // Deallocate fixed objects.
+ emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(FixedObject), TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI,
+ &HasWinCFI);
+ // Deallocate callee-save SVE registers.
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
- DeallocateAfter +
- StackOffset::getFixed(NumBytes + PrologueSaveSize));
+ NeedsWinCFI, &HasWinCFI);
+ } else if (SVEStackSize) {
+ int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
+ // If we have stack realignment or variable-sized objects we must use the
+ // FP to restore SVE callee saves (as there is an unknown amount of
+ // data/padding between the SP and SVE CS area).
+ Register BaseForSVEDealloc =
+ (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
+ : AArch64::SP;
+ if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
+ Register CalleeSaveBase = AArch64::FP;
+ if (int64_t CalleeSaveBaseOffset =
+ AFI->getCalleeSaveBaseToFrameRecordOffset()) {
+ // If we have have an non-zero offset to the non-SVE CS base we need
+ // to compute the base address by subtracting the offest in a
+ // temporary register first (to avoid briefly deallocating the SVE
+ // CS).
+ CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
+ &AArch64::GPR64RegClass);
+ emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
+ StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
+ MachineInstr::FrameDestroy);
+ }
+ // The code below will deallocate the stack space space by moving the
+ // SP to the start of the SVE callee-save area.
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
+ StackOffset::getScalable(-SVECalleeSavedSize), TII,
+ MachineInstr::FrameDestroy);
+ } else if (BaseForSVEDealloc == AArch64::SP) {
+ if (SVECalleeSavedSize) {
+ // Deallocate the non-SVE locals first before we can deallocate (and
+ // restore callee saves) from the SVE area.
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(NumBytes), TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI,
+ &HasWinCFI, EmitCFI && !HasFP,
+ SVEStackSize + StackOffset::getFixed(
+ NumBytes + PrologueSaveSize));
+ NumBytes = 0;
+ }
+
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+ DeallocateBefore, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ SVEStackSize +
+ StackOffset::getFixed(NumBytes + PrologueSaveSize));
+
+ emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
+ DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
+ NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ DeallocateAfter +
+ StackOffset::getFixed(NumBytes + PrologueSaveSize));
+ }
+
+ if (EmitCFI)
+ emitCalleeSavedSVERestores(RestoreEnd);
+ }
+ } else if (AFI->hasSplitSVEObjects() && SVEStackSize) {
+ // TODO: Support stack realigment and variable-sized objects.
+ assert(!AFI->isStackRealigned() && !MFI.hasVarSizedObjects() &&
+ "unexpected stack realignment or variable sized objects with split "
+ "SVE stack objects");
+ // SplitSVEObjects. Determine the sizes and starts/ends of the ZPR and PPR
+ // areas.
+ auto ZPRCalleeSavedSize =
+ StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
+ auto PPRCalleeSavedSize =
+ StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
+ StackOffset PPRLocalsSize = PPRStackSize - PPRCalleeSavedSize;
+ StackOffset ZPRLocalsSize = ZPRStackSize - ZPRCalleeSavedSize;
+
+ MachineBasicBlock::iterator PPRRestoreBegin = FirstGPRRestoreI,
+ PPRRestoreEnd = FirstGPRRestoreI;
+ if (PPRCalleeSavedSize) {
+ PPRRestoreBegin = std::prev(PPRRestoreEnd);
+ while (PPRRestoreBegin != MBB.begin() &&
+ isPartOfPPRCalleeSaves(std::prev(PPRRestoreBegin)))
+ --PPRRestoreBegin;
+ }
+
+ MachineBasicBlock::iterator ZPRRestoreBegin = PPRRestoreBegin,
+ ZPRRestoreEnd = PPRRestoreBegin;
+ if (ZPRCalleeSavedSize) {
+ ZPRRestoreBegin = std::prev(ZPRRestoreEnd);
+ while (ZPRRestoreBegin != MBB.begin() &&
+ isPartOfZPRCalleeSaves(std::prev(ZPRRestoreBegin)))
+ --ZPRRestoreBegin;
}
+
+ auto CFAOffset =
+ SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize);
+ if (PPRCalleeSavedSize || ZPRCalleeSavedSize) {
+ // Deallocate the non-SVE locals first before we can deallocate (and
+ // restore callee saves) from the SVE area.
+ auto NonSVELocals = StackOffset::getFixed(NumBytes);
+ emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
+ NonSVELocals, TII, MachineInstr::FrameDestroy, false,
+ false, nullptr, EmitCFI && !HasFP, CFAOffset);
+ NumBytes = 0;
+ CFAOffset -= NonSVELocals;
+ }
+
+ if (ZPRLocalsSize) {
+ emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
+ ZPRLocalsSize, TII, MachineInstr::FrameDestroy, false,
+ false, nullptr, EmitCFI && !HasFP, CFAOffset);
+ CFAOffset -= ZPRLocalsSize;
+ }
+
+ if (PPRLocalsSize || ZPRCalleeSavedSize) {
+ assert(PPRRestoreBegin == ZPRRestoreEnd &&
+ "Expected PPR restores after ZPR");
+ emitFrameOffset(MBB, PPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
+ PPRLocalsSize + ZPRCalleeSavedSize, TII,
+ MachineInstr::FrameDestroy, false, false, nullptr,
+ EmitCFI && !HasFP, CFAOffset);
+ CFAOffset -= PPRLocalsSize + ZPRCalleeSavedSize;
+ }
+ if (PPRCalleeSavedSize) {
+ emitFrameOffset(MBB, PPRRestoreEnd, DL, AArch64::SP, AArch64::SP,
+ PPRCalleeSavedSize, TII, MachineInstr::FrameDestroy,
+ false, false, nullptr, EmitCFI && !HasFP, CFAOffset);
+ }
+
+ // We only emit CFI information for ZPRs so emit CFI after the ZPR restores.
if (EmitCFI)
- emitCalleeSavedSVERestores(RestoreEnd);
+ emitCalleeSavedSVERestores(ZPRRestoreEnd);
}
if (!HasFP) {
@@ -1624,7 +1779,7 @@ void AArch64EpilogueEmitter::emitCalleeSavedRestores(
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
for (const auto &Info : CSI) {
- if (SVE != MFI.isScalableStackID(Info.getFrameIdx()))
+ if (SVE != MFI.hasScalableStackID(Info.getFrameIdx()))
continue;
MCRegister Reg = Info.getReg();
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 2b0c8ad..79975b0 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -71,6 +71,7 @@ bool AArch64RegisterInfo::regNeedsCFI(MCRegister Reg,
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
+ auto &AFI = *MF->getInfo<AArch64FunctionInfo>();
if (MF->getFunction().getCallingConv() == CallingConv::GHC)
// GHC set of callee saved regs is empty as all those regs are
@@ -101,10 +102,7 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_Win_AArch64_AAPCS_SwiftTail_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
return CSR_Win_AArch64_AAVPCS_SaveList;
- if (MF->getFunction().getCallingConv() ==
- CallingConv::AArch64_SVE_VectorCall)
- return CSR_Win_AArch64_SVE_AAPCS_SaveList;
- if (MF->getInfo<AArch64FunctionInfo>()->isSVECC())
+ if (AFI.hasSVE_AAPCS(*MF))
return CSR_Win_AArch64_SVE_AAPCS_SaveList;
return CSR_Win_AArch64_AAPCS_SaveList;
}
@@ -148,7 +146,7 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
// This is for OSes other than Windows; Windows is a separate case further
// above.
return CSR_AArch64_AAPCS_X18_SaveList;
- if (MF->getInfo<AArch64FunctionInfo>()->isSVECC())
+ if (AFI.hasSVE_AAPCS(*MF))
return CSR_AArch64_SVE_AAPCS_SaveList;
return CSR_AArch64_AAPCS_SaveList;
}
@@ -158,6 +156,7 @@ AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
assert(MF->getSubtarget<AArch64Subtarget>().isTargetDarwin() &&
"Invalid subtarget for getDarwinCalleeSavedRegs");
+ auto &AFI = *MF->getInfo<AArch64FunctionInfo>();
if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check)
report_fatal_error(
@@ -205,7 +204,7 @@ AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_Darwin_AArch64_RT_AllRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::Win64)
return CSR_Darwin_AArch64_AAPCS_Win64_SaveList;
- if (MF->getInfo<AArch64FunctionInfo>()->isSVECC())
+ if (AFI.hasSVE_AAPCS(*MF))
return CSR_Darwin_AArch64_SVE_AAPCS_SaveList;
return CSR_Darwin_AArch64_AAPCS_SaveList;
}
@@ -643,7 +642,7 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
if (ST.hasSVE() || ST.isStreaming()) {
// Frames that have variable sized objects and scalable SVE objects,
// should always use a basepointer.
- if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
+ if (!AFI->hasCalculatedStackSizeSVE() || AFI->hasSVEStackSize())
return true;
}
@@ -783,7 +782,7 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
AFI->hasCalculatedStackSizeSVE()) &&
"Expected SVE area to be calculated by this point");
- return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() &&
+ return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->hasSVEStackSize() &&
!AFI->hasStackHazardSlotIndex();
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index f01d5f6..6efa78e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -608,6 +608,8 @@ public:
? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
: EmptySet;
+ const size_t HybridModuleRootKernelsSize = HybridModuleRootKernels.size();
+
for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
// Each iteration of this loop assigns exactly one global variable to
// exactly one of the implementation strategies.
@@ -647,7 +649,8 @@ public:
ModuleScopeVariables.insert(GV);
} else if (K.second.size() == 1) {
KernelAccessVariables.insert(GV);
- } else if (set_is_subset(K.second, HybridModuleRootKernels)) {
+ } else if (K.second.size() == HybridModuleRootKernelsSize &&
+ set_is_subset(K.second, HybridModuleRootKernels)) {
ModuleScopeVariables.insert(GV);
} else {
TableLookupVariables.insert(GV);
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index b3fd8c7..84287b6 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -352,10 +352,12 @@ def S_XNOR_SAVEEXEC_B64 : SOP1_64 <"s_xnor_saveexec_b64">;
} // End hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC]
+let Defs = [SCC] in {
def S_QUADMASK_B32 : SOP1_32 <"s_quadmask_b32",
[(set i32:$sdst, (int_amdgcn_s_quadmask i32:$src0))]>;
def S_QUADMASK_B64 : SOP1_64 <"s_quadmask_b64",
[(set i64:$sdst, (int_amdgcn_s_quadmask i64:$src0))]>;
+}
let Uses = [M0] in {
def S_MOVRELS_B32 : SOP1_32R <"s_movrels_b32">;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 3ac7c28..8c21746 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -638,6 +638,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// No support for these operations with v2f32/v2i32
setOperationAction(ISD::INSERT_VECTOR_ELT, {MVT::v2f32, MVT::v2i32}, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, {MVT::v2f32, MVT::v2i32}, Expand);
+
+ setOperationAction(ISD::TRUNCATE, MVT::v2i16, Expand);
+ setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND},
+ MVT::v2i32, Expand);
+
// Need custom lowering in case the index is dynamic.
if (STI.hasF32x2Instructions())
setOperationAction(ISD::EXTRACT_VECTOR_ELT, {MVT::v2f32, MVT::v2i32},
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index 19d5aff..af1ceb6 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -118,7 +118,7 @@ let Predicates = [HasAtomicLdSt] in {
}
let Predicates = [HasAtomicLdSt, IsRV64] in {
- def : LdPat<atomic_load_nonext_32, LW, i32>;
+ // Load pattern is in RISCVInstrInfoA.td and shared with RV32.
def : StPat<atomic_store_32, SW, GPR, i32>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 99992d1..25accd9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -174,15 +174,14 @@ let Predicates = [HasAtomicLdSt] in {
def : StPat<relaxed_store<atomic_store_8>, SB, GPR, XLenVT>;
def : StPat<relaxed_store<atomic_store_16>, SH, GPR, XLenVT>;
def : StPat<relaxed_store<atomic_store_32>, SW, GPR, XLenVT>;
-}
-let Predicates = [HasAtomicLdSt, IsRV32] in {
- def : LdPat<relaxed_load<atomic_load_nonext_32>, LW>;
+ // Used by GISel for RV32 and RV64.
+ def : LdPat<relaxed_load<atomic_load_nonext_32>, LW, i32>;
}
let Predicates = [HasAtomicLdSt, IsRV64] in {
- def : LdPat<relaxed_load<atomic_load_asext_32>, LW>;
- def : LdPat<relaxed_load<atomic_load_zext_32>, LWU>;
+ def : LdPat<relaxed_load<atomic_load_asext_32>, LW, i64>;
+ def : LdPat<relaxed_load<atomic_load_zext_32>, LWU, i64>;
def : LdPat<relaxed_load<atomic_load_nonext_64>, LD, i64>;
def : StPat<relaxed_store<atomic_store_64>, SD, GPR, i64>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index d998316..298d35a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -554,7 +554,8 @@ defset list<VTypeInfoToWide> AllWidenableBF16ToFloatVectors = {
// This represents the information we need in codegen for each pseudo.
// The definition should be consistent with `struct PseudoInfo` in
// RISCVInstrInfo.h.
-class RISCVVPseudo<dag outs, dag ins, list<dag> pattern = [], string opcodestr = "", string argstr = "">
+class RISCVVPseudo<dag outs, dag ins, list<dag> pattern = [],
+ string opcodestr = "", string argstr = "">
: Pseudo<outs, ins, pattern, opcodestr, argstr> {
Pseudo Pseudo = !cast<Pseudo>(NAME); // Used as a key.
Instruction BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
@@ -1010,8 +1011,7 @@ class VPseudoNullaryNoMask<VReg RegClass> :
class VPseudoNullaryMask<VReg RegClass> :
RISCVVPseudo<(outs GetVRegNoV0<RegClass>.R:$rd),
(ins GetVRegNoV0<RegClass>.R:$passthru,
- VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy),
- []> {
+ VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1190,8 +1190,7 @@ class VPseudoBinaryNoMask<VReg RetClass,
bits<2> TargetConstraintType = 1,
DAGOperand sewop = sew> :
RISCVVPseudo<(outs RetClass:$rd),
- (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, sewop:$sew),
- []> {
+ (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, sewop:$sew)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1227,8 +1226,7 @@ class VPseudoBinaryNoMaskRoundingMode<VReg RetClass,
bits<2> TargetConstraintType = 1> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1,
- vec_rm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy),
- []> {
+ vec_rm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1320,7 +1318,7 @@ class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
bit Ordered>:
RISCVVPseudo<(outs),
(ins StClass:$rd, GPRMemZeroOffset:$rs1, IdxClass:$rs2,
- AVL:$vl, sew:$sew),[]>,
+ AVL:$vl, sew:$sew)>,
RISCVVSX</*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
let mayStore = 1;
@@ -1333,7 +1331,7 @@ class VPseudoIStoreMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
bit Ordered>:
RISCVVPseudo<(outs),
(ins StClass:$rd, GPRMemZeroOffset:$rs1, IdxClass:$rs2,
- VMaskOp:$vm, AVL:$vl, sew:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, sew:$sew)>,
RISCVVSX</*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
let mayStore = 1;
@@ -1351,8 +1349,7 @@ class VPseudoBinaryMaskPolicy<VReg RetClass,
RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$passthru,
Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy),
- []> {
+ VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1371,8 +1368,7 @@ class VPseudoTernaryMaskPolicy<VReg RetClass,
RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$passthru,
Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy),
- []> {
+ VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1414,8 +1410,7 @@ class VPseudoBinaryMOutMask<VReg RetClass,
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$passthru,
Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy),
- []> {
+ VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1438,8 +1433,7 @@ class VPseudoTiedBinaryMask<VReg RetClass,
RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$passthru,
Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy),
- []> {
+ VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1546,8 +1540,7 @@ class VPseudoTernaryNoMaskWithPolicyRoundingMode<VReg RetClass,
bits<2> TargetConstraintType = 1> :
RISCVVPseudo<(outs RetClass:$rd),
(ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
- vec_rm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy),
- []> {
+ vec_rm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy)> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1716,8 +1709,8 @@ class VPseudoUSSegStoreNoMask<VReg ValClass,
int EEW,
bits<4> NF> :
RISCVVPseudo<(outs),
- (ins ValClass:$rd, GPRMemZeroOffset:$rs1, AVL:$vl, sew:$sew),
- []>,
+ (ins ValClass:$rd, GPRMemZeroOffset:$rs1, AVL:$vl,
+ sew:$sew)>,
RISCVVSSEG<NF, /*Masked*/0, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
@@ -6029,9 +6022,9 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in {
PseudoInstExpansion<(CSRRS GPR:$rd, SysRegVLENB.Encoding, X0)>,
Sched<[WriteRdVLENB]>;
let Defs = [VL, VTYPE] in {
- def PseudoReadVLENBViaVSETVLIX0 : Pseudo<(outs GPRNoX0:$rd), (ins uimm5:$shamt),
- []>,
- Sched<[WriteVSETVLI, ReadVSETVLI]>;
+ def PseudoReadVLENBViaVSETVLIX0 : Pseudo<(outs GPRNoX0:$rd),
+ (ins uimm5:$shamt), []>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
}
}
@@ -6694,14 +6687,14 @@ defm PseudoVID : VPseudoVID_V;
let Predicates = [HasVInstructions] in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
let HasSEWOp = 1, BaseInstr = VMV_X_S in
- def PseudoVMV_X_S:
+ def PseudoVMV_X_S :
RISCVVPseudo<(outs GPR:$rd), (ins VR:$rs2, sew:$sew)>,
Sched<[WriteVMovXS, ReadVMovXS]>;
let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X, isReMaterializable = 1,
Constraints = "$rd = $passthru" in
- def PseudoVMV_S_X: RISCVVPseudo<(outs VR:$rd),
- (ins VR:$passthru, GPR:$rs1, AVL:$vl, sew:$sew),
- []>,
+ def PseudoVMV_S_X :
+ RISCVVPseudo<(outs VR:$rd),
+ (ins VR:$passthru, GPR:$rs1, AVL:$vl, sew:$sew)>,
Sched<[WriteVMovSX, ReadVMovSX_V, ReadVMovSX_X]>;
}
} // Predicates = [HasVInstructions]
@@ -6721,8 +6714,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
Constraints = "$rd = $passthru" in
def "PseudoVFMV_S_" # f.FX :
RISCVVPseudo<(outs VR:$rd),
- (ins VR:$passthru, f.fprclass:$rs1, AVL:$vl, sew:$sew),
- []>,
+ (ins VR:$passthru, f.fprclass:$rs1, AVL:$vl, sew:$sew)>,
Sched<[WriteVMovSF, ReadVMovSF_V, ReadVMovSF_F]>;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
index 5e013b4..680bca3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
@@ -63,13 +63,14 @@ defm SD : SRL_r_aq_rl<0b011, "sd">;
//===----------------------------------------------------------------------===//
class PatLAQ<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
- : Pat<(vt (OpNode (vt GPRMemZeroOffset:$rs1))), (Inst GPRMemZeroOffset:$rs1)>;
+ : Pat<(vt (OpNode (XLenVT GPRMemZeroOffset:$rs1))),
+ (Inst GPRMemZeroOffset:$rs1)>;
// n.b. this switches order of arguments
// to deal with the fact that SRL has addr, data
// while atomic_store has data, addr
class PatSRL<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
- : Pat<(OpNode (vt GPR:$rs2), (vt GPRMemZeroOffset:$rs1)),
+ : Pat<(OpNode (vt GPR:$rs2), (XLenVT GPRMemZeroOffset:$rs1)),
(Inst GPRMemZeroOffset:$rs1, GPR:$rs2)>;
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index c4f1b68..ddb95a4 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -3981,7 +3981,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
void ModuleCallsiteContextGraph::updateAllocationCall(
CallInfo &Call, AllocationType AllocType) {
std::string AllocTypeString = getAllocTypeAttributeString(AllocType);
- removeAnyExistingAmbiguousAttribute(cast<CallBase>(Call.call()));
auto A = llvm::Attribute::get(Call.call()->getFunction()->getContext(),
"memprof", AllocTypeString);
cast<CallBase>(Call.call())->addFnAttr(A);
@@ -5643,7 +5642,6 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// clone J-1 (J==0 is the original clone and does not have a VMaps
// entry).
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
- removeAnyExistingAmbiguousAttribute(CBClone);
CBClone->addFnAttr(A);
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CBClone)
<< ore::NV("AllocationCall", CBClone) << " in clone "
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e4cb4574..07ad65c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5780,6 +5780,45 @@ Instruction *InstCombinerImpl::foldICmpWithMinMax(Instruction &I,
return nullptr;
}
+/// Match and fold patterns like:
+/// icmp eq/ne X, min(max(X, Lo), Hi)
+/// which represents a range check and can be repsented as a ConstantRange.
+///
+/// For icmp eq, build ConstantRange [Lo, Hi + 1) and convert to:
+/// (X - Lo) u< (Hi + 1 - Lo)
+/// For icmp ne, build ConstantRange [Hi + 1, Lo) and convert to:
+/// (X - (Hi + 1)) u< (Lo - (Hi + 1))
+Instruction *InstCombinerImpl::foldICmpWithClamp(ICmpInst &I, Value *X,
+ MinMaxIntrinsic *Min) {
+ if (!I.isEquality() || !Min->hasOneUse() || !Min->isMin())
+ return nullptr;
+
+ const APInt *Lo = nullptr, *Hi = nullptr;
+ if (Min->isSigned()) {
+ if (!match(Min->getLHS(), m_OneUse(m_SMax(m_Specific(X), m_APInt(Lo)))) ||
+ !match(Min->getRHS(), m_APInt(Hi)) || !Lo->slt(*Hi))
+ return nullptr;
+ } else {
+ if (!match(Min->getLHS(), m_OneUse(m_UMax(m_Specific(X), m_APInt(Lo)))) ||
+ !match(Min->getRHS(), m_APInt(Hi)) || !Lo->ult(*Hi))
+ return nullptr;
+ }
+
+ ConstantRange CR = ConstantRange::getNonEmpty(*Lo, *Hi + 1);
+ ICmpInst::Predicate Pred;
+ APInt C, Offset;
+ if (I.getPredicate() == ICmpInst::ICMP_EQ)
+ CR.getEquivalentICmp(Pred, C, Offset);
+ else
+ CR.inverse().getEquivalentICmp(Pred, C, Offset);
+
+ if (!Offset.isZero())
+ X = Builder.CreateAdd(X, ConstantInt::get(X->getType(), Offset));
+
+ return replaceInstUsesWith(
+ I, Builder.CreateICmp(Pred, X, ConstantInt::get(X->getType(), C)));
+}
+
// Canonicalize checking for a power-of-2-or-zero value:
static Instruction *foldICmpPow2Test(ICmpInst &I,
InstCombiner::BuilderTy &Builder) {
@@ -7467,10 +7506,14 @@ Instruction *InstCombinerImpl::foldICmpCommutative(CmpPredicate Pred,
if (Instruction *NI = foldSelectICmp(Pred, SI, Op1, CxtI))
return NI;
- if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op0))
+ if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op0)) {
if (Instruction *Res = foldICmpWithMinMax(CxtI, MinMax, Op1, Pred))
return Res;
+ if (Instruction *Res = foldICmpWithClamp(CxtI, Op1, MinMax))
+ return Res;
+ }
+
{
Value *X;
const APInt *C;
@@ -8527,6 +8570,9 @@ static Instruction *foldFCmpFSubIntoFCmp(FCmpInst &I, Instruction *LHSI,
DenormalMode::getIEEE()) {
CI.replaceOperand(I, 0, X);
CI.replaceOperand(I, 1, Y);
+ I.setHasNoInfs(LHSI->hasNoInfs());
+ if (LHSI->hasNoNaNs())
+ I.setHasNoNaNs(true);
return &I;
}
break;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 4f94aa2..e01c145 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -725,6 +725,7 @@ public:
Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ);
Instruction *foldICmpWithMinMax(Instruction &I, MinMaxIntrinsic *MinMax,
Value *Z, CmpPredicate Pred);
+ Instruction *foldICmpWithClamp(ICmpInst &Cmp, Value *X, MinMaxIntrinsic *Min);
Instruction *foldICmpEquality(ICmpInst &Cmp);
Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I);
Instruction *foldSignBitTest(ICmpInst &I);
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 735bad1..e1dcaa85 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -883,84 +883,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
}
}
-struct WeightInfo {
- // Weights for current iteration.
- SmallVector<uint32_t> Weights;
- // Weights to subtract after each iteration.
- const SmallVector<uint32_t> SubWeights;
-};
-
-/// Update the branch weights of an exiting block of a peeled-off loop
-/// iteration.
-/// Let F is a weight of the edge to continue (fallthrough) into the loop.
-/// Let E is a weight of the edge to an exit.
-/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
-/// go to exit.
-/// Then, Estimated ExitCount = F / E.
-/// For I-th (counting from 0) peeled off iteration we set the weights for
-/// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
-/// The probability to go to exit 1/(EC-I) increases. At the same time
-/// the estimated exit count in the remainder loop reduces by I.
-/// To avoid dealing with division rounding we can just multiple both part
-/// of weights to E and use weight as (F - I * E, E).
-static void updateBranchWeights(Instruction *Term, WeightInfo &Info) {
- setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false);
- for (auto [Idx, SubWeight] : enumerate(Info.SubWeights))
- if (SubWeight != 0)
- // Don't set the probability of taking the edge from latch to loop header
- // to less than 1:1 ratio (meaning Weight should not be lower than
- // SubWeight), as this could significantly reduce the loop's hotness,
- // which would be incorrect in the case of underestimating the trip count.
- Info.Weights[Idx] =
- Info.Weights[Idx] > SubWeight
- ? std::max(Info.Weights[Idx] - SubWeight, SubWeight)
- : SubWeight;
-}
-
-/// Initialize the weights for all exiting blocks.
-static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
- Loop *L) {
- SmallVector<BasicBlock *> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
- for (BasicBlock *ExitingBlock : ExitingBlocks) {
- Instruction *Term = ExitingBlock->getTerminator();
- SmallVector<uint32_t> Weights;
- if (!extractBranchWeights(*Term, Weights))
- continue;
-
- // See the comment on updateBranchWeights() for an explanation of what we
- // do here.
- uint32_t FallThroughWeights = 0;
- uint32_t ExitWeights = 0;
- for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
- if (L->contains(Succ))
- FallThroughWeights += Weight;
- else
- ExitWeights += Weight;
- }
-
- // Don't try to update weights for degenerate case.
- if (FallThroughWeights == 0)
- continue;
-
- SmallVector<uint32_t> SubWeights;
- for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
- if (!L->contains(Succ)) {
- // Exit weights stay the same.
- SubWeights.push_back(0);
- continue;
- }
-
- // Subtract exit weights on each iteration, distributed across all
- // fallthrough edges.
- double W = (double)Weight / (double)FallThroughWeights;
- SubWeights.push_back((uint32_t)(ExitWeights * W));
- }
-
- WeightInfos.insert({Term, {std::move(Weights), std::move(SubWeights)}});
- }
-}
-
/// Clones the body of the loop L, putting it between \p InsertTop and \p
/// InsertBot.
/// \param IterNumber The serial number of the iteration currently being
@@ -1332,11 +1254,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
Instruction *LatchTerm =
cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator());
- // If we have branch weight information, we'll want to update it for the
- // newly created branches.
- DenseMap<Instruction *, WeightInfo> Weights;
- initBranchWeights(Weights, L);
-
// Identify what noalias metadata is inside the loop: if it is inside the
// loop, the associated metadata must be cloned for each iteration.
SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
@@ -1382,11 +1299,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
#endif
- for (auto &[Term, Info] : Weights) {
- auto *TermCopy = cast<Instruction>(VMap[Term]);
- updateBranchWeights(TermCopy, Info);
- }
-
// Remove Loop metadata from the latch branch instruction
// because it is not the Loop's latch branch anymore.
auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
@@ -1426,15 +1338,38 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
}
}
- for (const auto &[Term, Info] : Weights) {
- setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false);
- }
-
// Update Metadata for count of peeled off iterations.
unsigned AlreadyPeeled = 0;
if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
AlreadyPeeled = *Peeled;
- addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
+ unsigned TotalPeeled = AlreadyPeeled + PeelCount;
+ addStringMetadataToLoop(L, PeeledCountMetaData, TotalPeeled);
+
+ // Update metadata for the estimated trip count. The original branch weight
+ // metadata is already correct for both the remaining loop and the peeled loop
+ // iterations, so do not adjust it.
+ //
+ // For example, consider what happens when peeling 2 iterations from a loop
+ // with an estimated trip count of 10 and inserting them before the remaining
+ // loop. Each of the peeled iterations and each iteration in the remaining
+ // loop still has the same probability of exiting the *entire original* loop
+ // as it did when in the original loop, and thus it should still have the same
+ // branch weights. The peeled iterations' non-zero probabilities of exiting
+ // already appropriately reduce the probability of reaching the remaining
+ // iterations just as they did in the original loop. Trying to also adjust
+ // the remaining loop's branch weights to reflect its new trip count of 8 will
+ // erroneously further reduce its block frequencies. However, in case an
+ // analysis later needs to determine the trip count of the remaining loop
+ // while examining it in isolation without considering the probability of
+ // actually reaching it, we store the new trip count as separate metadata.
+ if (auto EstimatedTripCount = getLoopEstimatedTripCount(L)) {
+ unsigned EstimatedTripCountNew = *EstimatedTripCount;
+ if (EstimatedTripCountNew < TotalPeeled)
+ EstimatedTripCountNew = 0;
+ else
+ EstimatedTripCountNew -= TotalPeeled;
+ setLoopEstimatedTripCount(L, EstimatedTripCountNew);
+ }
if (Loop *ParentLoop = L->getParentLoop())
L = ParentLoop;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index acdb379..f76777b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1110,8 +1110,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
// x && !x -> 0
if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X)))))
- return Def->replaceAllUsesWith(Plan->getOrAddLiveIn(
- ConstantInt::getFalse(VPTypeAnalysis(*Plan).inferScalarType(Def))));
+ return Def->replaceAllUsesWith(Plan->getFalse());
if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X))))
return Def->replaceAllUsesWith(X);
@@ -3346,12 +3345,7 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
}
- [[maybe_unused]] auto *ConstStep =
- ScalarStep->isLiveIn()
- ? dyn_cast<ConstantInt>(ScalarStep->getLiveInIRValue())
- : nullptr;
- assert(!ConstStep || ConstStep->getValue() != 1);
- (void)ConstStep;
+ assert(!match(ScalarStep, m_One()) && "Expected non-unit scalar-step");
if (TypeInfo.inferScalarType(ScalarStep) != IVTy) {
ScalarStep =
Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
index a08f859..6d9aa8d 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
@@ -756,3 +756,129 @@ e.1:
e.2:
ret void
}
+
+define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_nofree_via_context(ptr %A, ptr %B) nosync {
+; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_nofree_via_context'
+; CHECK-NEXT: loop.header:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group GRP0:
+; CHECK-NEXT: %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
+; CHECK-NEXT: Against group GRP1:
+; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group GRP0:
+; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
+; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
+; CHECK-NEXT: Group GRP1:
+; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
+; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %A, i64 2000) ]
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %B, i64 2000) ]
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+ %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
+ %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
+ %l = load i32, ptr %gep.A, align 4
+ store i32 0, ptr %gep.B, align 4
+ %cntable.c.1 = icmp ult i64 %iv, 1000
+ %iv.next = add nuw nsw i64 %iv, 1
+ br i1 %cntable.c.1, label %b2, label %e.1
+
+b2:
+ %uncntable.c.0 = icmp eq i32 %l, 0
+ br i1 %uncntable.c.0, label %e.2, label %b3
+
+b3:
+ %cntable.c.2 = icmp eq i64 %iv.next, 500
+ br i1 %cntable.c.2, label %cleanup4, label %latch
+
+latch:
+ br label %loop.header
+
+cleanup4:
+ ret void
+
+e.1:
+ ret void
+
+e.2:
+ ret void
+}
+
+define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_missing_nofree_multiple_predecessors(ptr %A, ptr %B, i1 %c) nosync {
+; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_missing_nofree_multiple_predecessors'
+; CHECK-NEXT: loop.header:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group GRP0:
+; CHECK-NEXT: %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
+; CHECK-NEXT: Against group GRP1:
+; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group GRP0:
+; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
+; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
+; CHECK-NEXT: Group GRP1:
+; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
+; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %A, i64 2000) ]
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %B, i64 2000) ]
+ br i1 %c, label %then, label %else
+
+then:
+ br label %loop.header
+
+else:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %then ], [ 0, %else ], [ %iv.next, %latch ]
+ %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
+ %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
+ %l = load i32, ptr %gep.A, align 4
+ store i32 0, ptr %gep.B, align 4
+ %cntable.c.1 = icmp ult i64 %iv, 1000
+ %iv.next = add nuw nsw i64 %iv, 1
+ br i1 %cntable.c.1, label %b2, label %e.1
+
+b2:
+ %uncntable.c.0 = icmp eq i32 %l, 0
+ br i1 %uncntable.c.0, label %e.2, label %b3
+
+b3:
+ %cntable.c.2 = icmp eq i64 %iv.next, 500
+ br i1 %cntable.c.2, label %cleanup4, label %latch
+
+latch:
+ br label %loop.header
+
+cleanup4:
+ ret void
+
+e.1:
+ ret void
+
+e.2:
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir
new file mode 100644
index 0000000..35eafe8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir
@@ -0,0 +1,587 @@
+# RUN: llc -mattr=+sve -aarch64-stack-hazard-in-non-streaming -aarch64-split-sve-objects -aarch64-streaming-hazard-size=1024 -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve -aarch64-stack-hazard-in-non-streaming -aarch64-split-sve-objects -aarch64-streaming-hazard-size=1024 -start-before=prologepilog %s -o - | FileCheck %s --check-prefix=ASM
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve -aarch64-stack-hazard-in-non-streaming -aarch64-split-sve-objects -aarch64-streaming-hazard-size=1024 -start-before=prologepilog %s -filetype=obj -o %t
+# RUN: llvm-objdump --dwarf=frames %t | FileCheck %s --check-prefix=UNWINDINFO
+# RUN: rm -rf %t
+#
+# Test allocation and deallocation of SVE objects on the stack with
+# split-sve-objects (and hazard padding) enabled. This also tests using a
+# combination of scalable and non-scalable offsets to access the SVE on the
+# stack.
+#
+# With split-sve-objects (which implies hazard padding) the SVE area is split
+# into PPR and ZPR areas with (fixed-size) hazard padding between them. The PPR
+# area holds all scalable predicate callee saves and locals, and the ZPR area
+# holds all scalable vector callee saves and locals. Additionally, any FPR
+# callee save is promoted to a ZPR callee save (to avoid needing additional
+# hazard padding in the callee save area).
+#
+# +-------------+
+# | stack arg |
+# +-------------+ <- SP before call
+# | Callee Saves|
+# | Frame record| (if available)
+# |-------------| <- FP (if available)
+# | PPR area |
+# |-------------|
+# |/////////////| hazard padding
+# |-------------|
+# | ZPR area |
+# +-------------+
+# | : |
+# | Stack objs |
+# | : |
+# +-------------+ <- SP after call and frame-setup
+#
+--- |
+
+ define void @test_allocate_split_sve() uwtable { entry: unreachable }
+ define void @test_allocate_split_sve_realigned() uwtable { entry: unreachable }
+ define void @test_address_split_sve() uwtable { entry: unreachable }
+ define void @test_address_split_sve_fp() uwtable { entry: unreachable }
+ define aarch64_sve_vector_pcs void @save_restore_ppr_zpr() uwtable { entry: unreachable }
+
+...
+---
+# +----------+
+# |scratchreg| // x29 is used as scratch reg.
+# |----------|
+# | %stack.0 | // scalable predicate of n * 12 bytes, aligned to 16 bytes
+# | | // to be materialized with 1*ADDVL (<=> n * 16 bytes)
+# |----------|
+# |//////////| // hazard padding (1024 bytes) -- part of PPR locals area
+# |//////////| // Note: This is currently not included in the "stackSize"
+# +----------+
+# | %stack.0 | // scalable SVE object of n * 18 bytes, aligned to 16 bytes,
+# | | // to be materialized with 2*ADDVL (<=> 2 * n * 16 bytes)
+# +----------+
+# |//////////| // hazard padding (1024 bytes)
+# |----------|
+# | %stack.1 | // not scalable
+# +----------+ <- SP
+
+# CHECK-LABEL: name: test_allocate_split_sve
+# CHECK: stackSize: 1056
+
+# CHECK: bb.0.entry:
+# CHECK: liveins: $z0, $p0, $fp
+# CHECK: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.4)
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1040
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22
+#
+# CHECK-NEXT: $x8 = ADDXri $sp, 1040, 0
+# CHECK-NEXT: $x8 = ADDPL_XXI $x8, 7, implicit $vg
+# CHECK-NEXT: STR_ZXI $z0, killed $x8, 0 :: (store (<vscale x 1 x s128>) into %stack.0)
+# CHECK-NEXT: $x8 = ADDXri $sp, 2064, 0
+# CHECK-NEXT: STR_PXI $p0, killed $x8, 18 :: (store (<vscale x 1 x s16>) into %stack.1)
+#
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 1056
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+# CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.4)
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
+# CHECK-NEXT: RET_ReallyLR
+
+# ASM-LABEL: test_allocate_split_sve:
+# ASM: str x29, [sp, #-16]!
+# ASM-NEXT: .cfi_def_cfa_offset 16
+# ASM-NEXT: .cfi_offset w29, -16
+# ASM-NEXT: sub sp, sp, #1024
+# ASM-NEXT: .cfi_def_cfa_offset 1040
+# ASM-NEXT: addvl sp, sp, #-1
+# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1040 + 8 * VG
+# ASM-NEXT: sub sp, sp, #1040
+# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
+# ASM-NEXT: addvl sp, sp, #-2
+# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 2080 + 24 * VG
+#
+# ASM: addvl sp, sp, #2
+# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
+# ASM-NEXT: add sp, sp, #1024
+# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1056 + 8 * VG
+# ASM-NEXT: addvl sp, sp, #1
+# ASM-NEXT: .cfi_def_cfa wsp, 1056
+# ASM-NEXT: add sp, sp, #1040
+# ASM-NEXT: .cfi_def_cfa_offset 16
+# ASM-NEXT: ldr x29, [sp], #16
+# ASM-NEXT: .cfi_def_cfa_offset 0
+# ASM-NEXT: .cfi_restore w29
+
+# UNWINDINFO: DW_CFA_def_cfa_offset: +16
+# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
+# UNWINDINFO: DW_CFA_def_cfa_offset: +1040
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1040, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus
+#
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1056, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa: reg31 +1056
+# UNWINDINFO: DW_CFA_def_cfa_offset: +16
+# UNWINDINFO: DW_CFA_def_cfa_offset: +0
+# UNWINDINFO-NEXT: DW_CFA_restore: reg29
+
+name: test_allocate_split_sve
+stack:
+ - { id: 0, stack-id: scalable-vector, size: 18, alignment: 2 }
+ - { id: 1, stack-id: scalable-vector, size: 12, alignment: 2 }
+ - { id: 2, stack-id: default, size: 16, alignment: 8 }
+body: |
+ bb.0.entry:
+ liveins: $z0, $p0
+ STR_ZXI $z0, %stack.0, 0 :: (store (<vscale x 1 x s128>) into %stack.0)
+ STR_PXI $p0, %stack.1, 0 :: (store (<vscale x 1 x s16>) into %stack.1)
+ RET_ReallyLR
+...
+---
+
+# Stack realignment is not supported with split-sve-objects, so we fallback to
+# the default hazard padding implementation. This does not prevent hazards
+# between ZPRs and PPRs (TODO: support this case).
+#
+# +----------+
+# | lr, fp | // frame record
+# |----------|
+# |//////////| // hazard padding (1024 bytes)
+# |----------|
+# | %stack.0 | // scalable predicate of n * 12 bytes, aligned to 16 bytes
+# | | // to be materialized with 1*ADDVL (<=> n * 16 bytes)
+# +----------+
+# | %stack.0 | // scalable SVE object of n * 18 bytes, aligned to 16 bytes,
+# | | // to be materialized with 2*ADDVL (<=> 2 * n * 16 bytes)
+# +----------+
+# |//////////| // hazard padding (1024 bytes)
+# |----------|
+# | %stack.1 | // not scalable
+# +----------+ <- SP
+
+name: test_allocate_split_sve_realigned
+stack:
+ - { id: 0, stack-id: scalable-vector, size: 18, alignment: 2 }
+ - { id: 1, stack-id: scalable-vector, size: 12, alignment: 2 }
+ - { id: 2, stack-id: default, size: 16, alignment: 32 }
+body: |
+ bb.0.entry:
+ liveins: $z0, $p0
+ STR_ZXI $z0, %stack.0, 0 :: (store (<vscale x 1 x s128>) into %stack.0)
+ STR_PXI $p0, %stack.1, 0 :: (store (<vscale x 1 x s16>) into %stack.1)
+ RET_ReallyLR
+
+# CHECK-LABEL: name: test_allocate_split_sve_realigned
+# CHECK: stackSize: 2080
+
+# CHECK: bb.0.entry:
+# CHECK: liveins: $z0, $p0, $lr
+# CHECK: $sp = frame-setup SUBXri $sp, 1040, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1040
+# CHECK-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.5)
+# CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 129 :: (store (s64) into %stack.4)
+# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 1024, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 1040, 0
+# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -2, implicit $vg
+# CHECK-NEXT: $sp = frame-setup ANDXri killed $x9, 7930
+#
+# CHECK-NEXT: $x8 = SUBXri $fp, 1024, 0
+# CHECK-NEXT: $x8 = ADDPL_XXI $x8, -1, implicit $vg
+# CHECK-NEXT: STR_ZXI $z0, killed $x8, -1 :: (store (<vscale x 1 x s128>) into %stack.0)
+# CHECK-NEXT: $x8 = SUBXri $fp, 1024, 0
+# CHECK-NEXT: STR_PXI $p0, killed $x8, -15 :: (store (<vscale x 1 x s16>) into %stack.1)
+#
+# CHECK-NEXT: $sp = frame-destroy SUBXri $fp, 1024, 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 1040
+# CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 129 :: (load (s64) from %stack.4)
+# CHECK-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.5)
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
+# CHECK-NEXT: RET_ReallyLR
+
+# ASM-LABEL: test_allocate_split_sve_realigned
+# ASM: sub sp, sp, #1040
+# ASM-NEXT: .cfi_def_cfa_offset 1040
+# ASM-NEXT: str x29, [sp, #1024]
+# ASM-NEXT: str x30, [sp, #1032]
+# ASM-NEXT: add x29, sp, #1024
+# ASM-NEXT: .cfi_def_cfa w29, 16
+# ASM-NEXT: .cfi_offset w30, -8
+# ASM-NEXT: .cfi_offset w29, -16
+#
+# ASM: sub sp, x29, #1024
+# ASM-NEXT: .cfi_def_cfa wsp, 1040
+# ASM-NEXT: ldr x30, [sp, #1032]
+# ASM-NEXT: ldr x29, [sp, #1024]
+# ASM-NEXT: add sp, sp, #1040
+# ASM-NEXT: .cfi_def_cfa_offset 0
+# ASM-NEXT: .cfi_restore w30
+# ASM-NEXT: .cfi_restore w29
+
+# UNWINDINFO: DW_CFA_def_cfa_offset: +1040
+# UNWINDINFO: DW_CFA_def_cfa: reg29 +16
+# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8
+# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
+#
+# UNWINDINFO: DW_CFA_def_cfa: reg31 +1040
+# UNWINDINFO: DW_CFA_def_cfa_offset: +0
+# UNWINDINFO-NEXT: DW_CFA_restore: reg30
+# UNWINDINFO-NEXT: DW_CFA_restore: reg29
+...
+---
+
+# +----------+
+# |scratchreg| // x29 is used as scratch reg.
+# +----------+
+# | %stack.2 | // scalable predicate @ SP + 2064b + 46 scalable bytes
+# |----------|
+# |//////////| // hazard padding (1024 bytes) -- part of PPR locals area
+# |//////////| // Note: This is currently not included in the "stackSize"
+# |----------|
+# | %stack.0 | // scalable vector @ SP + 1040b + 16 scalable bytes
+# | %stack.1 | // scalable vector @ SP + 1040b
+# +----------+
+# |//////////| // hazard padding (1024 bytes)
+# |----------|
+# | %stack.3 | // not scalable
+# +----------+ <- SP
+
+# CHECK-LABEL: name: test_address_split_sve
+# CHECK: stackSize: 1056
+
+# CHECK: bb.0.entry:
+# CHECK-NEXT: liveins:
+# CHECK-NEXT: {{ $}}
+# CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.5)
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1040
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22
+#
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 1040, 0
+# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], 1
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 1040, 0
+# CHECK-NEXT: STR_ZXI $z1, killed $[[TMP]], 0
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 2064, 0
+# CHECK-NEXT: STR_PXI $p0, killed $[[TMP]], 23
+#
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 1056
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+# CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.5)
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
+# CHECK-NEXT: RET_ReallyLR
+
+# ASM-LABEL: test_address_split_sve
+# ASM: str x29, [sp, #-16]!
+# ASM-NEXT: .cfi_def_cfa_offset 16
+# ASM-NEXT: .cfi_offset w29, -16
+# ASM-NEXT: sub sp, sp, #1024
+# ASM-NEXT: .cfi_def_cfa_offset 1040
+# ASM-NEXT: addvl sp, sp, #-1
+# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1040 + 8 * VG
+# ASM-NEXT: sub sp, sp, #1040
+# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
+# ASM-NEXT: addvl sp, sp, #-2
+# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 2080 + 24 * VG
+#
+# ASM: addvl sp, sp, #2
+# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
+# ASM-NEXT: add sp, sp, #1024
+# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1056 + 8 * VG
+# ASM-NEXT: addvl sp, sp, #1
+# ASM-NEXT: .cfi_def_cfa wsp, 1056
+# ASM-NEXT: add sp, sp, #1040
+# ASM-NEXT: .cfi_def_cfa_offset 16
+# ASM-NEXT: ldr x29, [sp], #16
+# ASM-NEXT: .cfi_def_cfa_offset 0
+# ASM-NEXT: .cfi_restore w29
+
+# UNWINDINFO: DW_CFA_def_cfa_offset: +16
+# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
+# UNWINDINFO: DW_CFA_def_cfa_offset: +1040
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1040, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus
+#
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1056, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa: reg31 +1056
+# UNWINDINFO: DW_CFA_def_cfa_offset: +16
+# UNWINDINFO: DW_CFA_def_cfa_offset: +0
+# UNWINDINFO-NEXT: DW_CFA_restore: reg29
+
+name: test_address_split_sve
+frameInfo:
+ maxAlignment: 16
+stack:
+ - { id: 0, stack-id: scalable-vector, size: 16, alignment: 8 }
+ - { id: 1, stack-id: scalable-vector, size: 16, alignment: 8 }
+ - { id: 2, stack-id: scalable-vector, size: 2, alignment: 2 }
+ - { id: 3, stack-id: default, size: 16, alignment: 8 }
+body: |
+ bb.0.entry:
+ liveins: $z0, $z1, $p0
+
+ STR_ZXI $z0, %stack.0, 0 :: (store (<vscale x 1 x s128>) into %stack.0)
+ STR_ZXI $z1, %stack.1, 0 :: (store (<vscale x 1 x s128>) into %stack.1)
+ STR_PXI $p0, %stack.2, 0 :: (store (<vscale x 1 x s16>) into %stack.2)
+
+ RET_ReallyLR
+...
+---
+# +----------+
+# | lr, fp | // frame record
+# +----------+ <- FP
+# | %stack.2 | // scalable predicate @ FP - 2 scalable bytes
+# |----------|
+# |//////////| // hazard padding (1024 bytes) -- part of PPR locals area
+# |//////////| // Note: This is currently not included in the "stackSize"
+# |----------|
+# | %stack.0 | // scalable vector @ FP - 1024b - 32 scalable bytes
+# | %stack.1 | // scalable vector @ FP - 1024b - 48 scalable bytes
+# +----------+
+# |//////////| // hazard padding (1024 bytes)
+# |----------|
+# | %stack.3 | // not scalable
+# +----------+ <- SP
+
+# CHECK-LABEL: name: test_address_split_sve_fp
+# CHECK: stackSize: 1056
+#
+# CHECK: bb.0.entry:
+# CHECK-NEXT: liveins:
+# CHECK-NEXT: {{ $}}
+# CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.6), (store (s64) into %stack.5)
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
+# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
+#
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = SUBXri $fp, 1024, 0
+# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], -2
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = SUBXri $fp, 1024, 0
+# CHECK-NEXT: STR_ZXI $z1, killed $[[TMP]], -3
+# CHECK-NEXT: STR_PXI $p0, $fp, -1
+#
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
+# CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.6), (load (s64) from %stack.5)
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
+# CHECK-NEXT: RET_ReallyLR
+
+# ASM-LABEL: test_address_split_sve_fp
+# ASM: stp x29, x30, [sp, #-16]!
+# ASM-NEXT: .cfi_def_cfa_offset 16
+# ASM-NEXT: mov x29, sp
+# ASM-NEXT: .cfi_def_cfa w29, 16
+# ASM-NEXT: .cfi_offset w30, -8
+# ASM-NEXT: .cfi_offset w29, -16
+# ASM-NEXT: sub sp, sp, #1024
+# ASM-NEXT: addvl sp, sp, #-1
+# ASM-NEXT: sub sp, sp, #1040
+# ASM-NEXT: addvl sp, sp, #-2
+#
+# ASM: addvl sp, sp, #2
+# ASM-NEXT: add sp, sp, #1024
+# ASM-NEXT: addvl sp, sp, #1
+# ASM-NEXT: add sp, sp, #1040
+# ASM-NEXT: .cfi_def_cfa wsp, 16
+# ASM-NEXT: ldp x29, x30, [sp], #16
+# ASM-NEXT: .cfi_def_cfa_offset 0
+# ASM-NEXT: .cfi_restore w30
+# ASM-NEXT: .cfi_restore w29
+
+# UNWINDINFO: DW_CFA_def_cfa_offset: +16
+# UNWINDINFO: DW_CFA_def_cfa: reg29 +16
+# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8
+# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
+#
+# UNWINDINFO: DW_CFA_def_cfa: reg31 +16
+# UNWINDINFO: DW_CFA_def_cfa_offset: +0
+# UNWINDINFO-NEXT: DW_CFA_restore: reg30
+# UNWINDINFO-NEXT: DW_CFA_restore: reg29
+
+name: test_address_split_sve_fp
+frameInfo:
+ maxAlignment: 16
+ isFrameAddressTaken: true
+stack:
+ - { id: 0, stack-id: scalable-vector, size: 16, alignment: 8 }
+ - { id: 1, stack-id: scalable-vector, size: 16, alignment: 8 }
+ - { id: 2, stack-id: scalable-vector, size: 2, alignment: 2 }
+ - { id: 3, stack-id: default, size: 16, alignment: 8 }
+body: |
+ bb.0.entry:
+ liveins: $z0, $z1, $p0
+
+ STR_ZXI $z0, %stack.0, 0 :: (store (<vscale x 1 x s128>) into %stack.0)
+ STR_ZXI $z1, %stack.1, 0 :: (store (<vscale x 1 x s128>) into %stack.1)
+ STR_PXI $p0, %stack.2, 0 :: (store (<vscale x 1 x s16>) into %stack.2)
+
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: save_restore_ppr_zpr
+# CHECK: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.8)
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+# CHECK-NEXT: frame-setup STR_PXI killed $p6, $sp, 5 :: (store (s16) into %stack.7)
+# CHECK-NEXT: frame-setup STR_PXI killed $p5, $sp, 6 :: (store (s16) into %stack.6)
+# CHECK-NEXT: frame-setup STR_PXI killed $p4, $sp, 7 :: (store (s16) into %stack.5)
+#
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+#
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3, implicit $vg
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0a, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22
+# CHECK-NEXT: frame-setup STR_ZXI killed $z10, $sp, 0 :: (store (s128) into %stack.4)
+# CHECK-NEXT: frame-setup STR_ZXI killed $z9, $sp, 1 :: (store (s128) into %stack.3)
+# CHECK-NEXT: frame-setup STR_ZXI killed $z8, $sp, 2 :: (store (s128) into %stack.2)
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1056, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0a, 0x8f, 0xb0, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22
+#
+#
+# CHECK: $sp = frame-destroy ADDXri $sp, 1056, 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0a, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22
+# CHECK-NEXT: $z10 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.4)
+# CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.3)
+# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.2)
+#
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22
+#
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3, implicit $vg
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z9
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z10
+# CHECK-NEXT: $p6 = frame-destroy LDR_PXI $sp, 5 :: (load (s16) from %stack.7)
+# CHECK-NEXT: $p5 = frame-destroy LDR_PXI $sp, 6 :: (load (s16) from %stack.6)
+# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 7 :: (load (s16) from %stack.5)
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
+# CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.8)
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
+# CHECK-NEXT: RET_ReallyLR
+
+# ASM-LABEL: save_restore_ppr_zpr:
+# ASM: str x29, [sp, #-16]!
+# ASM-NEXT: .cfi_def_cfa_offset 16
+# ASM-NEXT: .cfi_offset w29, -16
+# ASM-NEXT: addvl sp, sp, #-1
+# ASM-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
+# ASM-NEXT: str p6, [sp, #5, mul vl]
+# ASM-NEXT: str p5, [sp, #6, mul vl]
+# ASM-NEXT: str p4, [sp, #7, mul vl]
+# ASM-NEXT: sub sp, sp, #1024
+# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1040 + 8 * VG
+# ASM-NEXT: addvl sp, sp, #-3
+# ASM-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 1040 + 32 * VG
+# ASM-NEXT: str z10, [sp]
+# ASM-NEXT: str z9, [sp, #1, mul vl]
+# ASM-NEXT: str z8, [sp, #2, mul vl]
+# ASM-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1040
+# ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d9 @ cfa - 24 * VG - 1040
+# ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d10 @ cfa - 32 * VG - 1040
+# ASM-NEXT: sub sp, sp, #1056
+# ASM-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0xb0, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 2096 + 32 * VG
+#
+# ASM: add sp, sp, #1056
+# ASM-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 1040 + 32 * VG
+# ASM-NEXT: ldr z10, [sp]
+# ASM-NEXT: ldr z9, [sp, #1, mul vl]
+# ASM-NEXT: ldr z8, [sp, #2, mul vl]
+# ASM-NEXT: add sp, sp, #1024
+# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 16 + 32 * VG
+# ASM-NEXT: addvl sp, sp, #3
+# ASM-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
+# ASM-NEXT: .cfi_restore z8
+# ASM-NEXT: .cfi_restore z9
+# ASM-NEXT: .cfi_restore z10
+# ASM-NEXT: ldr p6, [sp, #5, mul vl]
+# ASM-NEXT: ldr p5, [sp, #6, mul vl]
+# ASM-NEXT: ldr p4, [sp, #7, mul vl]
+# ASM-NEXT: addvl sp, sp, #1
+# ASM-NEXT: .cfi_def_cfa wsp, 16
+# ASM-NEXT: ldr x29, [sp], #16
+# ASM-NEXT: .cfi_def_cfa_offset 0
+# ASM-NEXT: .cfi_restore w29
+
+# UNWINDINFO: DW_CFA_def_cfa_offset: +16
+# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +16, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1040, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1040, DW_OP_bregx 0x2e +0, DW_OP_consts +32, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_expression: reg72 DW_OP_bregx 0x2e +0, DW_OP_consts -16, DW_OP_mul, DW_OP_plus, DW_OP_consts -1040, DW_OP_plus
+# UNWINDINFO: DW_CFA_expression: reg73 DW_OP_bregx 0x2e +0, DW_OP_consts -24, DW_OP_mul, DW_OP_plus, DW_OP_consts -1040, DW_OP_plus
+# UNWINDINFO: DW_CFA_expression: reg74 DW_OP_bregx 0x2e +0, DW_OP_consts -32, DW_OP_mul, DW_OP_plus, DW_OP_consts -1040, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2096, DW_OP_bregx 0x2e +0, DW_OP_consts +32, DW_OP_mul, DW_OP_plus
+#
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1040, DW_OP_bregx 0x2e +0, DW_OP_consts +32, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +16, DW_OP_bregx 0x2e +0, DW_OP_consts +32, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +16, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus
+# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg104
+# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg105
+# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg106
+# UNWINDINFO: DW_CFA_def_cfa: reg31 +16
+# UNWINDINFO: DW_CFA_def_cfa_offset: +0
+# UNWINDINFO-NEXT: DW_CFA_restore: reg29
+
+name: save_restore_ppr_zpr
+stack:
+ - { id: 0, stack-id: default, size: 32, alignment: 16 }
+body: |
+ bb.0.entry:
+
+ $p4 = IMPLICIT_DEF
+ $p5 = IMPLICIT_DEF
+ $p6 = IMPLICIT_DEF
+ $z8 = IMPLICIT_DEF
+ $z9 = IMPLICIT_DEF
+ $z10 = IMPLICIT_DEF
+
+ RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
index bff0cac..0298168 100644
--- a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
+++ b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
@@ -983,26 +983,22 @@ body: |
; EXPAND-LABEL: name: zpr_predicate_spill_p4_saved
; EXPAND: liveins: $p0, $p1, $p2, $p3, $fp, $p8, $p4
; EXPAND-NEXT: {{ $}}
- ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0
- ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.3)
+ ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.2)
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.1)
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
- ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.1)
- ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
+ ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.0)
;
; EXPAND-NEXT: $p8 = IMPLICIT_DEF
;
- ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.2)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.1)
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
- ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.1)
+ ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.0)
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
- ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.3)
- ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
+ ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $p1, implicit $p2, implicit $p3
; If we spill a register above p8, p4 must also be saved, so we can guarantee
diff --git a/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll
new file mode 100644
index 0000000..690a39d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll
@@ -0,0 +1,824 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-in-non-streaming -aarch64-split-sve-objects -aarch64-streaming-hazard-size=1024 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-in-non-streaming -aarch64-split-sve-objects -aarch64-streaming-hazard-size=1024 -pass-remarks-analysis=stack-frame-layout 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK-FRAMELAYOUT
+
+; CHECK-FRAMELAYOUT-LABEL: Function: zpr_and_ppr_local
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 16, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-32 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2064-32 x vscale], Type: Variable, Align: 16, Size: 1024
+
+; <GPRs>
+; %ppr_local sp+2048+30*vscale (= #15, mul vl for str/ldr PPR)
+; 14 x vscale bytes of padding sp+2048+16*vscale
+; <hazard padding> sp+1024+16*vscale
+; %zpr_local sp+1024
+; <hazard padding>
+; -> sp
+define void @zpr_and_ppr_local(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: zpr_and_ppr_local:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: sub sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: add x8, sp, #2048
+; CHECK-NEXT: str p0, [x8, #15, mul vl]
+; CHECK-NEXT: add x8, sp, #1024
+; CHECK-NEXT: str z0, [x8]
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: add sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: add sp, sp, #1024
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %ppr_local = alloca <vscale x 16 x i1>
+ %zpr_local = alloca <vscale x 16 x i8>
+ store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
+ store volatile <vscale x 16 x i8> %vector, ptr %zpr_local
+ ret void
+}
+
+; CHECK-FRAMELAYOUT-LABEL: Function: zpr_and_ppr_local_fp
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-32 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2064-32 x vscale], Type: Variable, Align: 16, Size: 1024
+
+; <GPRs>
+; -> fp
+; %ppr_local fp-2*vscale (= #-1, mul vl for str/ldr PPR)
+; 14 x vscale bytes of padding fp-16*vscale
+; <hazard padding> fp-1024-16*vscale
+; %zpr_local fp-1024-32*vscale (= #-2, mul vl for str/ldr ZPR)
+; <hazard padding>
+; -> sp
+define void @zpr_and_ppr_local_fp(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
+; CHECK-LABEL: zpr_and_ppr_local_fp:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: sub sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: sub sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: sub x8, x29, #1024
+; CHECK-NEXT: str p0, [x29, #-1, mul vl]
+; CHECK-NEXT: str z0, [x8, #-2, mul vl]
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: add sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: add sp, sp, #1024
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %ppr_local = alloca <vscale x 16 x i1>
+ %zpr_local = alloca <vscale x 16 x i8>
+ store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
+ store volatile <vscale x 16 x i8> %vector, ptr %zpr_local
+ ret void
+}
+
+; CHECK-FRAMELAYOUT-LABEL: Function: fpr_and_ppr_local
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 16, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1048-16 x vscale], Type: Variable, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2080-16 x vscale], Type: Variable, Align: 16, Size: 1024
+
+; <GPRs>
+; %ppr_local sp+2064+14*vscale (= #7, mul vl for str/ldr PPR)
+; 14 x vscale bytes of padding sp+2064
+; <hazard padding> sp+1040
+; %fpr_local sp+1032
+; 8 bytes of padding sp+1024
+; <hazard padding>
+; -> sp
+define void @fpr_and_ppr_local(<vscale x 16 x i1> %pred, double %double) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: fpr_and_ppr_local:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: sub sp, sp, #1040
+; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: add x8, sp, #2064
+; CHECK-NEXT: str p0, [x8, #7, mul vl]
+; CHECK-NEXT: str d0, [sp, #1032]
+; CHECK-NEXT: add sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: add sp, sp, #1040
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %ppr_local = alloca <vscale x 16 x i1>
+ %fpr_local = alloca double
+ store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
+ store volatile double %double, ptr %fpr_local
+ ret void
+}
+
+; CHECK-FRAMELAYOUT-LABEL: Function: fpr_and_ppr_local_fp
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1048-16 x vscale], Type: Variable, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2080-16 x vscale], Type: Variable, Align: 16, Size: 1024
+
+; <GPRs>
+; -> fp
+; %ppr_local fp-2*vscale (= #-1, mul vl for str/ldr PPR)
+; 14 x vscale bytes of padding
+; <hazard padding>
+; %fpr_local sp+1032
+; 8 bytes of padding sp+1024
+; <hazard padding>
+; -> sp
+define void @fpr_and_ppr_local_fp(<vscale x 16 x i1> %pred, double %double) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
+; CHECK-LABEL: fpr_and_ppr_local_fp:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: sub sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: sub sp, sp, #1040
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: str p0, [x29, #-1, mul vl]
+; CHECK-NEXT: str d0, [sp, #1032]
+; CHECK-NEXT: add sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: add sp, sp, #1040
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %ppr_local = alloca <vscale x 16 x i1>
+ %fpr_local = alloca double
+ store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
+ store volatile double %double, ptr %fpr_local
+ ret void
+}
+
+; CHECK-FRAMELAYOUT-LABEL: Function: gpr_and_ppr_local
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 16, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-32 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2064-32 x vscale], Type: Variable, Align: 16, Size: 1024
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2072-32 x vscale], Type: Variable, Align: 8, Size: 8
+
+; <CS GPRs>
+; %ppr_local sp+2064+30*vscale (= #15, mul vl for str/ldr PPR)
+; 14 x vscale bytes of padding
+; <hazard padding> sp+1040+16*vscale
+; <fpr callee save: z8> sp+1040
+; <hazard padding> sp+16
+; %gpr_local sp+8
+; 8 bytes of padding
+; -> sp
+define void @gpr_and_ppr_local(<vscale x 16 x i1> %pred, i64 %int) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: gpr_and_ppr_local:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: str z8, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #1040
+; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2080 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1040
+; CHECK-NEXT: add x8, sp, #2064
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: str p0, [x8, #15, mul vl]
+; CHECK-NEXT: str x0, [sp, #8]
+; CHECK-NEXT: add sp, sp, #1040
+; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ tail call void asm sideeffect "", "~{d8}"() #1 ; Spill an FPR so hazard padding is needed
+ %ppr_local = alloca <vscale x 16 x i1>
+ %gpr_local = alloca i64
+ store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
+ store volatile i64 %int, ptr %gpr_local
+ ret void
+}
+
+; CHECK-FRAMELAYOUT-LABEL: Function: gpr_and_ppr_local_fp
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-32 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2064-32 x vscale], Type: Variable, Align: 16, Size: 1024
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2072-32 x vscale], Type: Variable, Align: 8, Size: 8
+
+; <CS GPRs>
+; -> fp
+; %ppr_local fp-2*vscale (= #-1, mul vl for str/ldr PPR)
+; 14 x vscale bytes of padding
+; <hazard padding>
+; <fpr callee save: z8>
+; <hazard padding>
+; %gpr_local sp+8
+; 8 bytes of padding
+; -> sp
+define void @gpr_and_ppr_local_fp(<vscale x 16 x i1> %pred, i64 %int) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
+; CHECK-LABEL: gpr_and_ppr_local_fp:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: sub sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: str z8, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #1040
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1040
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: str p0, [x29, #-1, mul vl]
+; CHECK-NEXT: str x0, [sp, #8]
+; CHECK-NEXT: add sp, sp, #1040
+; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ tail call void asm sideeffect "", "~{d8}"() #1 ; Spill an FPR so hazard padding is needed
+ %ppr_local = alloca <vscale x 16 x i1>
+ %gpr_local = alloca i64
+ store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
+ store volatile i64 %int, ptr %gpr_local
+ ret void
+}
+
+; CHECK-FRAMELAYOUT-LABEL: Function: all_stack_areas
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-4 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-6 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-8 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-10 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-12 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-14 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-16 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-18 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-20 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-22 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-24 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-34 x vscale], Type: Variable, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-64 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-80 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-96 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-112 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-128 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-144 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-160 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-176 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-192 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-208 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-224 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-240 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-256 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-272 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-288 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-304 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-320 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1048-320 x vscale], Type: Variable, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2080-320 x vscale], Type: Variable, Align: 16, Size: 1024
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2088-320 x vscale], Type: Variable, Align: 8, Size: 8
+
+; <CS GPRs>
+; <CS PPRs>
+; %ppr_local sp+2080+286*vscale (addvl #17, addpl #7)
+; 14 * vscale bytes of padding sp+2080+272*vscale
+; <hazard padding> sp+1056+272*vscale
+; <CS ZPRs> sp+1056+16*vscale
+; %zpr_local sp+1056
+; %fpr_local sp+1048
+; 8 bytes of padding sp+1040
+; <hazard padding> sp+16
+; %gpr_local sp+8
+; 8 bytes of padding sp
+; -> sp
+define void @all_stack_areas(<vscale x 16 x i1> %pred, double %fp) {
+; CHECK-LABEL: all_stack_areas:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #-17
+; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #1056
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0b, 0x8f, 0xb0, 0x10, 0x92, 0x2e, 0x00, 0x11, 0xa0, 0x01, 0x1e, 0x22 // sp + 2096 + 160 * VG
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 32 * VG - 1040
+; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d9 @ cfa - 40 * VG - 1040
+; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d10 @ cfa - 48 * VG - 1040
+; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d11 @ cfa - 56 * VG - 1040
+; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d12 @ cfa - 64 * VG - 1040
+; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d13 @ cfa - 72 * VG - 1040
+; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d14 @ cfa - 80 * VG - 1040
+; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xa8, 0x7f, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d15 @ cfa - 88 * VG - 1040
+; CHECK-NEXT: add x0, sp, #2080
+; CHECK-NEXT: add x8, sp, #2080
+; CHECK-NEXT: add x1, sp, #1056
+; CHECK-NEXT: addvl x0, x0, #17
+; CHECK-NEXT: add x2, sp, #1048
+; CHECK-NEXT: add x3, sp, #8
+; CHECK-NEXT: addpl x0, x0, #7
+; CHECK-NEXT: str d0, [sp, #1048]
+; CHECK-NEXT: str p0, [x8, #143, mul vl]
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: add sp, sp, #1056
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr z23, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #17
+; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %ppr_local = alloca <vscale x 16 x i1>
+ %zpr_local = alloca <vscale x 16 x i8>
+ %fpr_local = alloca double
+ ; // Needed to sort %fpr_local into the FPR region
+ store double %fp, ptr %fpr_local
+ ; // Needed to sort %ppr_local into the PPR region
+ store <vscale x 16 x i1> %pred, ptr %ppr_local
+ %gpr_local = alloca i64
+ call void @foo(ptr %ppr_local, ptr %zpr_local, ptr %fpr_local, ptr %gpr_local)
+ ret void
+}
+declare void @foo(ptr, ptr, ptr, ptr)
+
+; CHECK-FRAMELAYOUT-LABEL: Function: all_stack_areas_fp
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 16, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-24], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-2 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-4 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-6 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-8 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-10 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-12 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-14 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-18 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-20 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-22 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-24 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-34 x vscale], Type: Variable, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-64 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-80 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-96 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-112 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-128 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-144 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-160 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-176 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-192 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-208 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-224 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-240 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-256 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-272 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-288 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-304 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-320 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1064-320 x vscale], Type: Variable, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2096-320 x vscale], Type: Variable, Align: 16, Size: 1024
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2104-320 x vscale], Type: Variable, Align: 8, Size: 8
+
+; <CS GPRs>
+; -> fp
+; <CS PPRs> fp-32*vscale
+; %ppr_local fp-34*vscale (addpl #-17)
+; 14 * vscale bytes of padding fp-48*vscale
+; <hazard padding> fp-1024-48*vscale
+; <CS ZPRs> fp-1024-304*vscale
+; %zpr_local sp-1024-320*vscale (addvl #-20)
+; %fpr_local sp+1048
+; 8 bytes of padding sp+1040
+; <hazard padding> sp+16
+; %gpr_local sp+8
+; 8 bytes of padding sp
+; -> sp
+define void @all_stack_areas_fp(<vscale x 16 x i1> %pred, double %fp) "frame-pointer"="all" {
+; CHECK-LABEL: all_stack_areas_fp:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x28, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #-17
+; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #1056
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_def_cfa w29, 32
+; CHECK-NEXT: .cfi_offset w28, -16
+; CHECK-NEXT: .cfi_offset w30, -24
+; CHECK-NEXT: .cfi_offset w29, -32
+; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d8 @ cfa - 32 * VG - 1056
+; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d9 @ cfa - 40 * VG - 1056
+; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d10 @ cfa - 48 * VG - 1056
+; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d11 @ cfa - 56 * VG - 1056
+; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d12 @ cfa - 64 * VG - 1056
+; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d13 @ cfa - 72 * VG - 1056
+; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d14 @ cfa - 80 * VG - 1056
+; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xa8, 0x7f, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d15 @ cfa - 88 * VG - 1056
+; CHECK-NEXT: sub x1, x29, #1024
+; CHECK-NEXT: addpl x0, x29, #-17
+; CHECK-NEXT: add x2, sp, #1048
+; CHECK-NEXT: addvl x1, x1, #-20
+; CHECK-NEXT: add x3, sp, #8
+; CHECK-NEXT: str d0, [sp, #1048]
+; CHECK-NEXT: str p0, [x29, #-17, mul vl]
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: add sp, sp, #1056
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr z23, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #17
+; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %ppr_local = alloca <vscale x 16 x i1>
+ %zpr_local = alloca <vscale x 16 x i8>
+ %fpr_local = alloca double
+ ; // Needed to sort %fpr_local into the FPR region
+ store double %fp, ptr %fpr_local
+ ; // Needed to sort %ppr_local into the PPR region
+ store <vscale x 16 x i1> %pred, ptr %ppr_local
+ %gpr_local = alloca i64
+ call void @foo(ptr %ppr_local, ptr %zpr_local, ptr %fpr_local, ptr %gpr_local)
+ ret void
+}
+
+; CHECK-FRAMELAYOUT-LABEL: Function: svecc_call
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-24], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48], Type: Spill, Align: 16, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-56], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-2 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-4 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-6 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-8 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-10 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-12 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-14 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-16 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-18 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-20 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-22 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-24 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-48 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-64 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-80 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-96 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-112 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-128 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-144 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-160 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-176 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-192 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-208 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-224 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-240 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-256 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-272 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-288 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2112-288 x vscale], Type: Variable, Align: 16, Size: 1024
+
+define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: svecc_call:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: cntd x9
+; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: .cfi_def_cfa w29, 64
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w26, -16
+; CHECK-NEXT: .cfi_offset w27, -24
+; CHECK-NEXT: .cfi_offset w28, -32
+; CHECK-NEXT: .cfi_offset vg, -48
+; CHECK-NEXT: .cfi_offset w30, -56
+; CHECK-NEXT: .cfi_offset w29, -64
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #-16
+; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 24 * IncomingVG - 1088
+; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 32 * IncomingVG - 1088
+; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 40 * IncomingVG - 1088
+; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 48 * IncomingVG - 1088
+; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 56 * IncomingVG - 1088
+; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 64 * IncomingVG - 1088
+; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 72 * IncomingVG - 1088
+; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 80 * IncomingVG - 1088
+; CHECK-NEXT: sub sp, sp, #1024
+; CHECK-NEXT: mov x8, x0
+; CHECK-NEXT: bl __arm_sme_state
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: tbz w19, #0, .LBB8_2
+; CHECK-NEXT: // %bb.1: // %entry
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: .LBB8_2: // %entry
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: mov w1, #45 // =0x2d
+; CHECK-NEXT: mov w2, #37 // =0x25
+; CHECK-NEXT: bl memset
+; CHECK-NEXT: tbz w19, #0, .LBB8_4
+; CHECK-NEXT: // %bb.3: // %entry
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .LBB8_4: // %entry
+; CHECK-NEXT: mov w0, #22647 // =0x5877
+; CHECK-NEXT: movk w0, #59491, lsl #16
+; CHECK-NEXT: add sp, sp, #1024
+; CHECK-NEXT: ldr z23, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #16
+; CHECK-NEXT: .cfi_restore z8
+; CHECK-NEXT: .cfi_restore z9
+; CHECK-NEXT: .cfi_restore z10
+; CHECK-NEXT: .cfi_restore z11
+; CHECK-NEXT: .cfi_restore z12
+; CHECK-NEXT: .cfi_restore z13
+; CHECK-NEXT: .cfi_restore z14
+; CHECK-NEXT: .cfi_restore z15
+; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: .cfi_def_cfa wsp, 64
+; CHECK-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: .cfi_restore w19
+; CHECK-NEXT: .cfi_restore w26
+; CHECK-NEXT: .cfi_restore w27
+; CHECK-NEXT: .cfi_restore w28
+; CHECK-NEXT: .cfi_restore vg
+; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: .cfi_restore w29
+; CHECK-NEXT: ret
+entry:
+ tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
+ %call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37)
+ ret i32 -396142473
+}
+declare ptr @memset(ptr, i32, i32)
+
+; FIXME: aarch64-split-sve-objects is currently not supported in this function
+; as it requires stack reealignment (for the 32-byte aligned alloca).
+; GPR CSRs
+; <hazard padding>
+; FPR CSRs
+; <hazrd padding>
+; <SVE locals (PPRs and ZPRs)> <--- hazard between PPRs and ZPRs here!
+; <realignment padding>
+; -> sp
+define void @zpr_and_ppr_local_realignment(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector, i64 %gpr) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: zpr_and_ppr_local_realignment:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #1040
+; CHECK-NEXT: sub x9, sp, #1040
+; CHECK-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #1024
+; CHECK-NEXT: addvl x9, x9, #-2
+; CHECK-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
+; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: sub x8, x29, #1024
+; CHECK-NEXT: str p0, [x8, #-1, mul vl]
+; CHECK-NEXT: str z0, [x8, #-2, mul vl]
+; CHECK-NEXT: str x0, [sp]
+; CHECK-NEXT: sub sp, x29, #1024
+; CHECK-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #1040
+; CHECK-NEXT: ret
+ %ppr_local = alloca <vscale x 16 x i1>
+ %zpr_local = alloca <vscale x 16 x i8>
+ %gpr_local = alloca i64, align 32
+ store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
+ store volatile <vscale x 16 x i8> %vector, ptr %zpr_local
+ store volatile i64 %gpr, ptr %gpr_local
+ ret void
+}
+
+define void @zpr_and_ppr_local_stack_probing(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector, i64 %gpr)
+; CHECK-LABEL: zpr_and_ppr_local_stack_probing:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: sub sp, sp, #1824
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xb0, 0x16, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2864 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: add x8, sp, #2848
+; CHECK-NEXT: str p0, [x8, #15, mul vl]
+; CHECK-NEXT: add x8, sp, #1824
+; CHECK-NEXT: str z0, [x8]
+; CHECK-NEXT: str x0, [sp]
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: add sp, sp, #1024
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: add sp, sp, #1824
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ "probe-stack"="inline-asm" "stack-probe-size"="4096" "frame-pointer"="none" "aarch64_pstate_sm_compatible"
+{
+ %ppr_local = alloca <vscale x 16 x i1>
+ %zpr_local = alloca <vscale x 16 x i8>
+ %gpr_local = alloca i64, i64 100, align 8
+ store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
+ store volatile <vscale x 16 x i8> %vector, ptr %zpr_local
+ store volatile i64 %gpr, ptr %gpr_local
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll
index 5f52280..333a8be 100644
--- a/llvm/test/CodeGen/AArch64/stack-hazard.ll
+++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=0 | FileCheck %s --check-prefixes=CHECK,CHECK0
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=64 | FileCheck %s --check-prefixes=CHECK,CHECK64
-; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefixes=CHECK,CHECK1024
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefixes=CHECK,CHECK1024,CHECK1024-NOSPLITSVE
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-split-sve-objects -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefixes=CHECK,CHECK1024,CHECK1024-SPLITSVE
define i32 @basic(i32 noundef %num) {
; CHECK-LABEL: basic:
@@ -1503,72 +1504,24 @@ define [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1([2 x <vscale x 4 x i1>
}
define [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1_caller([2 x <vscale x 4 x i1>] %arg1, [2 x <vscale x 4 x i1>] %arg2) nounwind "aarch64_pstate_sm_compatible" {
-; CHECK0-LABEL: sve_signature_pred_2xv4i1_caller:
-; CHECK0: // %bb.0:
-; CHECK0-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK0-NEXT: addvl sp, sp, #-1
-; CHECK0-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
-; CHECK0-NEXT: mov p5.b, p0.b
-; CHECK0-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK0-NEXT: mov p4.b, p1.b
-; CHECK0-NEXT: mov p0.b, p2.b
-; CHECK0-NEXT: mov p1.b, p3.b
-; CHECK0-NEXT: mov p2.b, p5.b
-; CHECK0-NEXT: mov p3.b, p4.b
-; CHECK0-NEXT: bl sve_signature_pred_2xv4i1
-; CHECK0-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
-; CHECK0-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
-; CHECK0-NEXT: addvl sp, sp, #1
-; CHECK0-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK0-NEXT: ret
-;
-; CHECK64-LABEL: sve_signature_pred_2xv4i1_caller:
-; CHECK64: // %bb.0:
-; CHECK64-NEXT: sub sp, sp, #80
-; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
-; CHECK64-NEXT: addvl sp, sp, #-1
-; CHECK64-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
-; CHECK64-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK64-NEXT: sub sp, sp, #64
-; CHECK64-NEXT: mov p4.b, p1.b
-; CHECK64-NEXT: mov p5.b, p0.b
-; CHECK64-NEXT: mov p0.b, p2.b
-; CHECK64-NEXT: mov p1.b, p3.b
-; CHECK64-NEXT: mov p2.b, p5.b
-; CHECK64-NEXT: mov p3.b, p4.b
-; CHECK64-NEXT: bl sve_signature_pred_2xv4i1
-; CHECK64-NEXT: add sp, sp, #64
-; CHECK64-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
-; CHECK64-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
-; CHECK64-NEXT: addvl sp, sp, #1
-; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
-; CHECK64-NEXT: add sp, sp, #80
-; CHECK64-NEXT: ret
-;
-; CHECK1024-LABEL: sve_signature_pred_2xv4i1_caller:
-; CHECK1024: // %bb.0:
-; CHECK1024-NEXT: sub sp, sp, #1040
-; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
-; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
-; CHECK1024-NEXT: addvl sp, sp, #-1
-; CHECK1024-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: sub sp, sp, #1024
-; CHECK1024-NEXT: mov p4.b, p1.b
-; CHECK1024-NEXT: mov p5.b, p0.b
-; CHECK1024-NEXT: mov p0.b, p2.b
-; CHECK1024-NEXT: mov p1.b, p3.b
-; CHECK1024-NEXT: mov p2.b, p5.b
-; CHECK1024-NEXT: mov p3.b, p4.b
-; CHECK1024-NEXT: bl sve_signature_pred_2xv4i1
-; CHECK1024-NEXT: add sp, sp, #1024
-; CHECK1024-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: addvl sp, sp, #1
-; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
-; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
-; CHECK1024-NEXT: add sp, sp, #1040
-; CHECK1024-NEXT: ret
+; CHECK-LABEL: sve_signature_pred_2xv4i1_caller:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p5.b, p0.b
+; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p4.b, p1.b
+; CHECK-NEXT: mov p0.b, p2.b
+; CHECK-NEXT: mov p1.b, p3.b
+; CHECK-NEXT: mov p2.b, p5.b
+; CHECK-NEXT: mov p3.b, p4.b
+; CHECK-NEXT: bl sve_signature_pred_2xv4i1
+; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
%res = call [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1([2 x <vscale x 4 x i1>] %arg2, [2 x <vscale x 4 x i1>] %arg1)
ret [2 x <vscale x 4 x i1>] %res
}
@@ -2113,139 +2066,269 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3,
; CHECK64-NEXT: .cfi_restore w29
; CHECK64-NEXT: ret
;
-; CHECK1024-LABEL: svecc_call:
-; CHECK1024: // %bb.0: // %entry
-; CHECK1024-NEXT: sub sp, sp, #1088
-; CHECK1024-NEXT: .cfi_def_cfa_offset 1088
-; CHECK1024-NEXT: cntd x9
-; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
-; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
-; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill
-; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill
-; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill
-; CHECK1024-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill
-; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill
-; CHECK1024-NEXT: add x29, sp, #1024
-; CHECK1024-NEXT: .cfi_def_cfa w29, 64
-; CHECK1024-NEXT: .cfi_offset w19, -16
-; CHECK1024-NEXT: .cfi_offset w26, -24
-; CHECK1024-NEXT: .cfi_offset w27, -32
-; CHECK1024-NEXT: .cfi_offset w28, -40
-; CHECK1024-NEXT: .cfi_offset vg, -48
-; CHECK1024-NEXT: .cfi_offset w30, -56
-; CHECK1024-NEXT: .cfi_offset w29, -64
-; CHECK1024-NEXT: addvl sp, sp, #-18
-; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 8 * IncomingVG - 1088
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 16 * IncomingVG - 1088
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 24 * IncomingVG - 1088
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 32 * IncomingVG - 1088
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 40 * IncomingVG - 1088
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 48 * IncomingVG - 1088
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 56 * IncomingVG - 1088
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 64 * IncomingVG - 1088
-; CHECK1024-NEXT: sub sp, sp, #1024
-; CHECK1024-NEXT: mov x8, x0
-; CHECK1024-NEXT: bl __arm_sme_state
-; CHECK1024-NEXT: mov x19, x0
-; CHECK1024-NEXT: //APP
-; CHECK1024-NEXT: //NO_APP
-; CHECK1024-NEXT: tbz w19, #0, .LBB28_2
-; CHECK1024-NEXT: // %bb.1: // %entry
-; CHECK1024-NEXT: smstop sm
-; CHECK1024-NEXT: .LBB28_2: // %entry
-; CHECK1024-NEXT: mov x0, x8
-; CHECK1024-NEXT: mov w1, #45 // =0x2d
-; CHECK1024-NEXT: mov w2, #37 // =0x25
-; CHECK1024-NEXT: bl memset
-; CHECK1024-NEXT: tbz w19, #0, .LBB28_4
-; CHECK1024-NEXT: // %bb.3: // %entry
-; CHECK1024-NEXT: smstart sm
-; CHECK1024-NEXT: .LBB28_4: // %entry
-; CHECK1024-NEXT: mov w0, #22647 // =0x5877
-; CHECK1024-NEXT: movk w0, #59491, lsl #16
-; CHECK1024-NEXT: add sp, sp, #1024
-; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: addvl sp, sp, #18
-; CHECK1024-NEXT: .cfi_restore z8
-; CHECK1024-NEXT: .cfi_restore z9
-; CHECK1024-NEXT: .cfi_restore z10
-; CHECK1024-NEXT: .cfi_restore z11
-; CHECK1024-NEXT: .cfi_restore z12
-; CHECK1024-NEXT: .cfi_restore z13
-; CHECK1024-NEXT: .cfi_restore z14
-; CHECK1024-NEXT: .cfi_restore z15
-; CHECK1024-NEXT: .cfi_def_cfa wsp, 1088
-; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload
-; CHECK1024-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload
-; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload
-; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload
-; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
-; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
-; CHECK1024-NEXT: add sp, sp, #1088
-; CHECK1024-NEXT: .cfi_def_cfa_offset 0
-; CHECK1024-NEXT: .cfi_restore w19
-; CHECK1024-NEXT: .cfi_restore w26
-; CHECK1024-NEXT: .cfi_restore w27
-; CHECK1024-NEXT: .cfi_restore w28
-; CHECK1024-NEXT: .cfi_restore vg
-; CHECK1024-NEXT: .cfi_restore w30
-; CHECK1024-NEXT: .cfi_restore w29
-; CHECK1024-NEXT: ret
+; CHECK1024-NOSPLITSVE-LABEL: svecc_call:
+; CHECK1024-NOSPLITSVE: // %bb.0: // %entry
+; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa_offset 1088
+; CHECK1024-NOSPLITSVE-NEXT: cntd x9
+; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: add x29, sp, #1024
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa w29, 64
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w19, -16
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w26, -24
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w27, -32
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w28, -40
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset vg, -48
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w30, -56
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -64
+; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-18
+; CHECK1024-NOSPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 8 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 16 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 24 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 32 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 40 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 48 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4e, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 56 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4f, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 64 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1024
+; CHECK1024-NOSPLITSVE-NEXT: mov x8, x0
+; CHECK1024-NOSPLITSVE-NEXT: bl __arm_sme_state
+; CHECK1024-NOSPLITSVE-NEXT: mov x19, x0
+; CHECK1024-NOSPLITSVE-NEXT: //APP
+; CHECK1024-NOSPLITSVE-NEXT: //NO_APP
+; CHECK1024-NOSPLITSVE-NEXT: tbz w19, #0, .LBB28_2
+; CHECK1024-NOSPLITSVE-NEXT: // %bb.1: // %entry
+; CHECK1024-NOSPLITSVE-NEXT: smstop sm
+; CHECK1024-NOSPLITSVE-NEXT: .LBB28_2: // %entry
+; CHECK1024-NOSPLITSVE-NEXT: mov x0, x8
+; CHECK1024-NOSPLITSVE-NEXT: mov w1, #45 // =0x2d
+; CHECK1024-NOSPLITSVE-NEXT: mov w2, #37 // =0x25
+; CHECK1024-NOSPLITSVE-NEXT: bl memset
+; CHECK1024-NOSPLITSVE-NEXT: tbz w19, #0, .LBB28_4
+; CHECK1024-NOSPLITSVE-NEXT: // %bb.3: // %entry
+; CHECK1024-NOSPLITSVE-NEXT: smstart sm
+; CHECK1024-NOSPLITSVE-NEXT: .LBB28_4: // %entry
+; CHECK1024-NOSPLITSVE-NEXT: mov w0, #22647 // =0x5877
+; CHECK1024-NOSPLITSVE-NEXT: movk w0, #59491, lsl #16
+; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1024
+; CHECK1024-NOSPLITSVE-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #18
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z8
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z9
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z10
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z11
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z12
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z13
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z14
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z15
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa wsp, 1088
+; CHECK1024-NOSPLITSVE-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa_offset 0
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w19
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w26
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w27
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w28
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore vg
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w30
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w29
+; CHECK1024-NOSPLITSVE-NEXT: ret
+;
+; CHECK1024-SPLITSVE-LABEL: svecc_call:
+; CHECK1024-SPLITSVE: // %bb.0: // %entry
+; CHECK1024-SPLITSVE-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa_offset 64
+; CHECK1024-SPLITSVE-NEXT: cntd x9
+; CHECK1024-SPLITSVE-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str x9, [sp, #16] // 8-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: mov x29, sp
+; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa w29, 64
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w19, -8
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w26, -16
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w27, -24
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w28, -32
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset vg, -48
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w30, -56
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -64
+; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-2
+; CHECK1024-SPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-16
+; CHECK1024-SPLITSVE-NEXT: str z23, [sp] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 24 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 32 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 40 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 48 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 56 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 64 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4e, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 72 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4f, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 80 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: mov x8, x0
+; CHECK1024-SPLITSVE-NEXT: bl __arm_sme_state
+; CHECK1024-SPLITSVE-NEXT: mov x19, x0
+; CHECK1024-SPLITSVE-NEXT: //APP
+; CHECK1024-SPLITSVE-NEXT: //NO_APP
+; CHECK1024-SPLITSVE-NEXT: tbz w19, #0, .LBB28_2
+; CHECK1024-SPLITSVE-NEXT: // %bb.1: // %entry
+; CHECK1024-SPLITSVE-NEXT: smstop sm
+; CHECK1024-SPLITSVE-NEXT: .LBB28_2: // %entry
+; CHECK1024-SPLITSVE-NEXT: mov x0, x8
+; CHECK1024-SPLITSVE-NEXT: mov w1, #45 // =0x2d
+; CHECK1024-SPLITSVE-NEXT: mov w2, #37 // =0x25
+; CHECK1024-SPLITSVE-NEXT: bl memset
+; CHECK1024-SPLITSVE-NEXT: tbz w19, #0, .LBB28_4
+; CHECK1024-SPLITSVE-NEXT: // %bb.3: // %entry
+; CHECK1024-SPLITSVE-NEXT: smstart sm
+; CHECK1024-SPLITSVE-NEXT: .LBB28_4: // %entry
+; CHECK1024-SPLITSVE-NEXT: mov w0, #22647 // =0x5877
+; CHECK1024-SPLITSVE-NEXT: movk w0, #59491, lsl #16
+; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: ldr z23, [sp] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #16
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z8
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z9
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z10
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z11
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z12
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z13
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z14
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z15
+; CHECK1024-SPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #2
+; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa wsp, 64
+; CHECK1024-SPLITSVE-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa_offset 0
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore w19
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore w26
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore w27
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore w28
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore vg
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore w30
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore w29
+; CHECK1024-SPLITSVE-NEXT: ret
entry:
tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
%call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37)
@@ -2505,138 +2588,267 @@ define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8
; CHECK64-NEXT: .cfi_restore w29
; CHECK64-NEXT: ret
;
-; CHECK1024-LABEL: svecc_alloca_call:
-; CHECK1024: // %bb.0: // %entry
-; CHECK1024-NEXT: sub sp, sp, #1088
-; CHECK1024-NEXT: .cfi_def_cfa_offset 1088
-; CHECK1024-NEXT: cntd x9
-; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
-; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
-; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill
-; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill
-; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill
-; CHECK1024-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill
-; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill
-; CHECK1024-NEXT: add x29, sp, #1024
-; CHECK1024-NEXT: .cfi_def_cfa w29, 64
-; CHECK1024-NEXT: .cfi_offset w19, -16
-; CHECK1024-NEXT: .cfi_offset w26, -24
-; CHECK1024-NEXT: .cfi_offset w27, -32
-; CHECK1024-NEXT: .cfi_offset w28, -40
-; CHECK1024-NEXT: .cfi_offset vg, -48
-; CHECK1024-NEXT: .cfi_offset w30, -56
-; CHECK1024-NEXT: .cfi_offset w29, -64
-; CHECK1024-NEXT: addvl sp, sp, #-18
-; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
-; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 8 * IncomingVG - 1088
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 16 * IncomingVG - 1088
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 24 * IncomingVG - 1088
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 32 * IncomingVG - 1088
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 40 * IncomingVG - 1088
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 48 * IncomingVG - 1088
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 56 * IncomingVG - 1088
-; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 64 * IncomingVG - 1088
-; CHECK1024-NEXT: sub sp, sp, #1072
-; CHECK1024-NEXT: bl __arm_sme_state
-; CHECK1024-NEXT: mov x19, x0
-; CHECK1024-NEXT: //APP
-; CHECK1024-NEXT: //NO_APP
-; CHECK1024-NEXT: tbz w19, #0, .LBB29_2
-; CHECK1024-NEXT: // %bb.1: // %entry
-; CHECK1024-NEXT: smstop sm
-; CHECK1024-NEXT: .LBB29_2: // %entry
-; CHECK1024-NEXT: mov x0, sp
-; CHECK1024-NEXT: mov w1, #45 // =0x2d
-; CHECK1024-NEXT: mov w2, #37 // =0x25
-; CHECK1024-NEXT: bl memset
-; CHECK1024-NEXT: tbz w19, #0, .LBB29_4
-; CHECK1024-NEXT: // %bb.3: // %entry
-; CHECK1024-NEXT: smstart sm
-; CHECK1024-NEXT: .LBB29_4: // %entry
-; CHECK1024-NEXT: mov w0, #22647 // =0x5877
-; CHECK1024-NEXT: movk w0, #59491, lsl #16
-; CHECK1024-NEXT: add sp, sp, #1072
-; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
-; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
-; CHECK1024-NEXT: addvl sp, sp, #18
-; CHECK1024-NEXT: .cfi_restore z8
-; CHECK1024-NEXT: .cfi_restore z9
-; CHECK1024-NEXT: .cfi_restore z10
-; CHECK1024-NEXT: .cfi_restore z11
-; CHECK1024-NEXT: .cfi_restore z12
-; CHECK1024-NEXT: .cfi_restore z13
-; CHECK1024-NEXT: .cfi_restore z14
-; CHECK1024-NEXT: .cfi_restore z15
-; CHECK1024-NEXT: .cfi_def_cfa wsp, 1088
-; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload
-; CHECK1024-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload
-; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload
-; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload
-; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
-; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
-; CHECK1024-NEXT: add sp, sp, #1088
-; CHECK1024-NEXT: .cfi_def_cfa_offset 0
-; CHECK1024-NEXT: .cfi_restore w19
-; CHECK1024-NEXT: .cfi_restore w26
-; CHECK1024-NEXT: .cfi_restore w27
-; CHECK1024-NEXT: .cfi_restore w28
-; CHECK1024-NEXT: .cfi_restore vg
-; CHECK1024-NEXT: .cfi_restore w30
-; CHECK1024-NEXT: .cfi_restore w29
-; CHECK1024-NEXT: ret
+; CHECK1024-NOSPLITSVE-LABEL: svecc_alloca_call:
+; CHECK1024-NOSPLITSVE: // %bb.0: // %entry
+; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa_offset 1088
+; CHECK1024-NOSPLITSVE-NEXT: cntd x9
+; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: add x29, sp, #1024
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa w29, 64
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w19, -16
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w26, -24
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w27, -32
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w28, -40
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset vg, -48
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w30, -56
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -64
+; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-18
+; CHECK1024-NOSPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 8 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 16 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 24 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 32 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 40 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 48 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4e, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 56 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4f, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 64 * IncomingVG - 1088
+; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1072
+; CHECK1024-NOSPLITSVE-NEXT: bl __arm_sme_state
+; CHECK1024-NOSPLITSVE-NEXT: mov x19, x0
+; CHECK1024-NOSPLITSVE-NEXT: //APP
+; CHECK1024-NOSPLITSVE-NEXT: //NO_APP
+; CHECK1024-NOSPLITSVE-NEXT: tbz w19, #0, .LBB29_2
+; CHECK1024-NOSPLITSVE-NEXT: // %bb.1: // %entry
+; CHECK1024-NOSPLITSVE-NEXT: smstop sm
+; CHECK1024-NOSPLITSVE-NEXT: .LBB29_2: // %entry
+; CHECK1024-NOSPLITSVE-NEXT: mov x0, sp
+; CHECK1024-NOSPLITSVE-NEXT: mov w1, #45 // =0x2d
+; CHECK1024-NOSPLITSVE-NEXT: mov w2, #37 // =0x25
+; CHECK1024-NOSPLITSVE-NEXT: bl memset
+; CHECK1024-NOSPLITSVE-NEXT: tbz w19, #0, .LBB29_4
+; CHECK1024-NOSPLITSVE-NEXT: // %bb.3: // %entry
+; CHECK1024-NOSPLITSVE-NEXT: smstart sm
+; CHECK1024-NOSPLITSVE-NEXT: .LBB29_4: // %entry
+; CHECK1024-NOSPLITSVE-NEXT: mov w0, #22647 // =0x5877
+; CHECK1024-NOSPLITSVE-NEXT: movk w0, #59491, lsl #16
+; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1072
+; CHECK1024-NOSPLITSVE-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #18
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z8
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z9
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z10
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z11
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z12
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z13
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z14
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z15
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa wsp, 1088
+; CHECK1024-NOSPLITSVE-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
+; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1088
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa_offset 0
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w19
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w26
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w27
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w28
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore vg
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w30
+; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w29
+; CHECK1024-NOSPLITSVE-NEXT: ret
+;
+; CHECK1024-SPLITSVE-LABEL: svecc_alloca_call:
+; CHECK1024-SPLITSVE: // %bb.0: // %entry
+; CHECK1024-SPLITSVE-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa_offset 64
+; CHECK1024-SPLITSVE-NEXT: cntd x9
+; CHECK1024-SPLITSVE-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str x9, [sp, #16] // 8-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: mov x29, sp
+; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa w29, 64
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w19, -8
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w26, -16
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w27, -24
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w28, -32
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset vg, -48
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w30, -56
+; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -64
+; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-2
+; CHECK1024-SPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-16
+; CHECK1024-SPLITSVE-NEXT: str z23, [sp] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 24 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 32 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 40 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 48 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 56 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 64 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4e, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 72 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4f, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 80 * IncomingVG - 1088
+; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1072
+; CHECK1024-SPLITSVE-NEXT: bl __arm_sme_state
+; CHECK1024-SPLITSVE-NEXT: mov x19, x0
+; CHECK1024-SPLITSVE-NEXT: //APP
+; CHECK1024-SPLITSVE-NEXT: //NO_APP
+; CHECK1024-SPLITSVE-NEXT: tbz w19, #0, .LBB29_2
+; CHECK1024-SPLITSVE-NEXT: // %bb.1: // %entry
+; CHECK1024-SPLITSVE-NEXT: smstop sm
+; CHECK1024-SPLITSVE-NEXT: .LBB29_2: // %entry
+; CHECK1024-SPLITSVE-NEXT: mov x0, sp
+; CHECK1024-SPLITSVE-NEXT: mov w1, #45 // =0x2d
+; CHECK1024-SPLITSVE-NEXT: mov w2, #37 // =0x25
+; CHECK1024-SPLITSVE-NEXT: bl memset
+; CHECK1024-SPLITSVE-NEXT: tbz w19, #0, .LBB29_4
+; CHECK1024-SPLITSVE-NEXT: // %bb.3: // %entry
+; CHECK1024-SPLITSVE-NEXT: smstart sm
+; CHECK1024-SPLITSVE-NEXT: .LBB29_4: // %entry
+; CHECK1024-SPLITSVE-NEXT: mov w0, #22647 // =0x5877
+; CHECK1024-SPLITSVE-NEXT: movk w0, #59491, lsl #16
+; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1072
+; CHECK1024-SPLITSVE-NEXT: ldr z23, [sp] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024
+; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #16
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z8
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z9
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z10
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z11
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z12
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z13
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z14
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore z15
+; CHECK1024-SPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #2
+; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa wsp, 64
+; CHECK1024-SPLITSVE-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload
+; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa_offset 0
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore w19
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore w26
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore w27
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore w28
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore vg
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore w30
+; CHECK1024-SPLITSVE-NEXT: .cfi_restore w29
+; CHECK1024-SPLITSVE-NEXT: ret
entry:
tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
diff --git a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
index 2cbb29e..d8de12c 100644
--- a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
@@ -672,5 +672,3 @@ entry:
ret i32 %x
}
declare void @other()
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-FRAMELAYOUT: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
index de7d234..b9bf76c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -global-isel=1 < %s | FileCheck -check-prefixes=GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -global-isel=0 < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -global-isel=1 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -global-isel=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
declare i32 @llvm.amdgcn.s.quadmask.i32(i32)
declare i64 @llvm.amdgcn.s.quadmask.i64(i64)
@@ -172,3 +172,91 @@ entry:
%qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 %mask)
ret i64 %qm
}
+
+;; Ensure that AND/ICMP cannot be fused into an AND because s_quadmask_b32 implicitly defines SCC.
+define amdgpu_kernel void @test_scc_quadmask_32(i32 %val0, i32 %val1, ptr addrspace(1) %ptr) {
+; GFX11-GISEL-LABEL: test_scc_quadmask_32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_and_b32 s0, s0, 1
+; GFX11-GISEL-NEXT: s_quadmask_b32 s1, s1
+; GFX11-GISEL-NEXT: s_cmp_eq_u32 s0, 0
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, s1
+; GFX11-GISEL-NEXT: s_cselect_b32 s0, 1, 0
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, s0
+; GFX11-GISEL-NEXT: global_store_b32 v2, v3, s[2:3]
+; GFX11-GISEL-NEXT: global_store_b32 v[0:1], v4, off
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: test_scc_quadmask_32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: s_and_b32 s0, s0, 1
+; GFX11-SDAG-NEXT: s_quadmask_b32 s1, s1
+; GFX11-SDAG-NEXT: s_cmp_eq_u32 s0, 0
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, s1
+; GFX11-SDAG-NEXT: s_cselect_b32 s0, -1, 0
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0
+; GFX11-SDAG-NEXT: global_store_b32 v2, v3, s[2:3]
+; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v4, off
+; GFX11-SDAG-NEXT: s_endpgm
+ %and = and i32 %val0, 1
+ %result = call i32 @llvm.amdgcn.s.quadmask.i32(i32 %val1) nounwind readnone
+ store i32 %result, ptr addrspace(1) %ptr
+ %cmp = icmp eq i32 %and, 0
+ %sel = select i1 %cmp, i32 1, i32 0
+ store i32 %sel, ptr addrspace(1) null, align 4
+ ret void
+}
+
+;; Ensure that AND/ICMP cannot be fused into an AND because s_quadmask_b64 implicitly defines SCC.
+define amdgpu_kernel void @test_scc_quadmask_64(i32 %val0, i64 %val1, ptr addrspace(1) %ptr) {
+; GFX11-GISEL-LABEL: test_scc_quadmask_64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_clause 0x1
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c
+; GFX11-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x24
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_quadmask_b64 s[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_and_b32 s4, s4, 1
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-GISEL-NEXT: s_cmp_eq_u32 s4, 0
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s1
+; GFX11-GISEL-NEXT: s_cselect_b32 s0, 1, 0
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, s0
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX11-GISEL-NEXT: global_store_b64 v4, v[0:1], s[2:3]
+; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v5, off
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: test_scc_quadmask_64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x24
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: s_and_b32 s4, s6, 1
+; GFX11-SDAG-NEXT: s_quadmask_b64 s[0:1], s[0:1]
+; GFX11-SDAG-NEXT: s_cmp_eq_u32 s4, 0
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, s1
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX11-SDAG-NEXT: s_cselect_b32 s0, -1, 0
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0
+; GFX11-SDAG-NEXT: global_store_b64 v4, v[2:3], s[2:3]
+; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v5, off
+; GFX11-SDAG-NEXT: s_endpgm
+ %and = and i32 %val0, 1
+ %result = call i64 @llvm.amdgcn.s.quadmask.i64(i64 %val1) nounwind readnone
+ store i64 %result, ptr addrspace(1) %ptr
+ %cmp = icmp eq i32 %and, 0
+ %sel = select i1 %cmp, i32 1, i32 0
+ store i32 %sel, ptr addrspace(1) null, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll
index 0de7f8f..bd29e9e 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
-; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
; Regression test for issue 160181
; One variable is chosen to be assigned at zero. Here, that's @both
@@ -22,12 +22,20 @@
;.
; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t poison, align 4, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
+; CHECK: @llvm.amdgcn.kernel.kern_one.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_one.lds.t poison, align 4, !absolute_symbol [[META1:![0-9]+]]
+; CHECK: @llvm.amdgcn.kernel.kern_two.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_two.lds.t poison, align 4, !absolute_symbol [[META1]]
+; CHECK: @llvm.amdgcn.kernel.kern_block_direct_allocation.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kern_block_direct_allocation.lds.t poison, align 4, !absolute_symbol [[META1]]
+
;.
define void @func_one() {
; CHECK-LABEL: define {{[^@]+}}@func_one() {
-; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META1:![0-9]+]]
-; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META18:![0-9]+]]
-; CHECK-NEXT: store i16 10, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 3), align 4, !noalias [[META23:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2:![0-9]+]]
+; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ONE]], align 4
+; CHECK-NEXT: [[ONE1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
+; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[ONE1]], align 4
+; CHECK-NEXT: store i16 10, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11:![0-9]+]]
; CHECK-NEXT: ret void
;
%val0 = load i32, ptr addrspace(3) @both
@@ -38,9 +46,10 @@ define void @func_one() {
define amdgpu_kernel void @kern_one() {
; CHECK-LABEL: define {{[^@]+}}@kern_one
-; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META16:![0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !noalias [[META24:![0-9]+]]
+; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kern_one.lds) ]
+; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !noalias [[META17:![0-9]+]]
; CHECK-NEXT: call void @func_one()
; CHECK-NEXT: ret void
;
@@ -51,9 +60,13 @@ entry:
define void @func_two() {
; CHECK-LABEL: define {{[^@]+}}@func_two() {
-; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META1]]
-; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 2), align 4, !noalias [[META25:![0-9]+]]
-; CHECK-NEXT: store i16 20, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 3), align 4, !noalias [[META23]]
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2]]
+; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TWO]], align 4
+; CHECK-NEXT: [[TWO1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
+; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) [[TWO1]], align 4
+; CHECK-NEXT: store i16 20, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11]]
; CHECK-NEXT: ret void
;
%val0 = load i32, ptr addrspace(3) @both
@@ -64,9 +77,10 @@ define void @func_two() {
define amdgpu_kernel void @kern_two() {
; CHECK-LABEL: define {{[^@]+}}@kern_two
-; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-SAME: () #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META18:![0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META26:![0-9]+]], !noalias [[META27:![0-9]+]]
+; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kern_two.lds) ]
+; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META19:![0-9]+]], !noalias [[META20:![0-9]+]]
; CHECK-NEXT: call void @func_two()
; CHECK-NEXT: ret void
;
@@ -82,11 +96,18 @@ entry:
; remains the best candidate for address zero allocation.
define void @func_block_direct_allocation() {
; CHECK-LABEL: define {{[^@]+}}@func_block_direct_allocation() {
-; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META18]]
-; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 2), align 4, !noalias [[META25]]
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[ONE:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ONE]], align 4
+; CHECK-NEXT: [[ONE1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
+; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(3) [[ONE1]], align 4
+; CHECK-NEXT: [[TWO:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TWO]], align 4
+; CHECK-NEXT: [[TWO2:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
+; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr addrspace(3) [[TWO2]], align 4
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[VAL1]], [[VAL2]]
-; CHECK-NEXT: store i32 [[SUM]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META1]]
-; CHECK-NEXT: store i16 30, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 3), align 4, !noalias [[META23]]
+; CHECK-NEXT: store i32 [[SUM]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META2]]
+; CHECK-NEXT: store i16 30, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META11]]
; CHECK-NEXT: ret void
;
%val1 = load i32, ptr addrspace(3) @one
@@ -99,7 +120,8 @@ define void @func_block_direct_allocation() {
define amdgpu_kernel void @kern_block_direct_allocation() {
; CHECK-LABEL: define {{[^@]+}}@kern_block_direct_allocation
-; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META21:![0-9]+]] {
+; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kern_block_direct_allocation.lds) ], !alias.scope [[META22:![0-9]+]], !noalias [[META25:![0-9]+]]
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; CHECK-NEXT: call void @func_block_direct_allocation()
; CHECK-NEXT: call void @func_one()
@@ -112,35 +134,8 @@ define amdgpu_kernel void @kern_block_direct_allocation() {
ret void
}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-lds-size"="16" }
-; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
-;.
-; CHECK: [[META0]] = !{i32 0, i32 1}
-; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META4:![0-9]+]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META12:![0-9]+]], [[META13:![0-9]+]], [[META14:![0-9]+]], [[META16:![0-9]+]], [[META17:![0-9]+]]}
-; CHECK: [[META2]] = distinct !{[[META2]], [[META3:![0-9]+]]}
-; CHECK: [[META3]] = distinct !{[[META3]]}
-; CHECK: [[META4]] = distinct !{[[META4]], [[META3]]}
-; CHECK: [[META5]] = distinct !{[[META5]], [[META3]]}
-; CHECK: [[META6]] = distinct !{[[META6]], [[META7:![0-9]+]]}
-; CHECK: [[META7]] = distinct !{[[META7]]}
-; CHECK: [[META8]] = distinct !{[[META8]], [[META7]]}
-; CHECK: [[META9]] = distinct !{[[META9]], [[META7]]}
-; CHECK: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]]}
-; CHECK: [[META11]] = distinct !{[[META11]]}
-; CHECK: [[META12]] = distinct !{[[META12]], [[META11]]}
-; CHECK: [[META13]] = distinct !{[[META13]], [[META11]]}
-; CHECK: [[META14]] = distinct !{[[META14]], [[META15:![0-9]+]]}
-; CHECK: [[META15]] = distinct !{[[META15]]}
-; CHECK: [[META16]] = distinct !{[[META16]], [[META15]]}
-; CHECK: [[META17]] = distinct !{[[META17]], [[META15]]}
-; CHECK: [[META18]] = !{[[META19:![0-9]+]], [[META2]], [[META5]], [[META20:![0-9]+]], [[META6]], [[META9]], [[META21:![0-9]+]], [[META10]], [[META13]], [[META22:![0-9]+]], [[META14]], [[META17]]}
-; CHECK: [[META19]] = distinct !{[[META19]], [[META3]]}
-; CHECK: [[META20]] = distinct !{[[META20]], [[META7]]}
-; CHECK: [[META21]] = distinct !{[[META21]], [[META11]]}
-; CHECK: [[META22]] = distinct !{[[META22]], [[META15]]}
-; CHECK: [[META23]] = !{[[META19]], [[META4]], [[META5]], [[META20]], [[META8]], [[META9]], [[META21]], [[META12]], [[META13]], [[META22]], [[META16]], [[META17]]}
-; CHECK: [[META24]] = !{[[META10]], [[META12]], [[META13]], [[META14]], [[META16]], [[META17]]}
-; CHECK: [[META25]] = !{[[META19]], [[META2]], [[META4]], [[META20]], [[META6]], [[META8]], [[META21]], [[META10]], [[META12]], [[META22]], [[META14]], [[META16]]}
-; CHECK: [[META26]] = !{[[META22]]}
-; CHECK: [[META27]] = !{[[META14]], [[META16]], [[META17]]}
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-lds-size"="12" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-lds-size"="16" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;.
diff --git a/llvm/test/CodeGen/NVPTX/f32x2-convert-i32x2.ll b/llvm/test/CodeGen/NVPTX/f32x2-convert-i32x2.ll
index 18fb879..21ca041 100644
--- a/llvm/test/CodeGen/NVPTX/f32x2-convert-i32x2.ll
+++ b/llvm/test/CodeGen/NVPTX/f32x2-convert-i32x2.ll
@@ -115,5 +115,150 @@ define ptx_kernel void @inlineasm(ptr %p) {
store <2 x float> %mul, ptr %p, align 8
ret void
}
+
+define ptx_kernel void @trunc_v2i32(<2 x i32> %0) {
+; CHECK-SM90A-LABEL: trunc_v2i32(
+; CHECK-SM90A: {
+; CHECK-SM90A-NEXT: .reg .b32 %r<7>;
+; CHECK-SM90A-NEXT: .reg .b64 %rd<2>;
+; CHECK-SM90A-EMPTY:
+; CHECK-SM90A-NEXT: // %bb.0:
+; CHECK-SM90A-NEXT: ld.param.v2.b32 {%r1, %r2}, [trunc_v2i32_param_0];
+; CHECK-SM90A-NEXT: prmt.b32 %r3, %r1, %r2, 0x3340U;
+; CHECK-SM90A-NEXT: mov.b32 %r4, 0;
+; CHECK-SM90A-NEXT: prmt.b32 %r5, %r4, 0, 0x3340U;
+; CHECK-SM90A-NEXT: prmt.b32 %r6, %r5, %r3, 0x5410U;
+; CHECK-SM90A-NEXT: mov.b64 %rd1, 0;
+; CHECK-SM90A-NEXT: st.b32 [%rd1], %r6;
+; CHECK-SM90A-NEXT: ret;
+;
+; CHECK-SM100-LABEL: trunc_v2i32(
+; CHECK-SM100: {
+; CHECK-SM100-NEXT: .reg .b32 %r<7>;
+; CHECK-SM100-NEXT: .reg .b64 %rd<3>;
+; CHECK-SM100-EMPTY:
+; CHECK-SM100-NEXT: // %bb.0:
+; CHECK-SM100-NEXT: ld.param.b64 %rd1, [trunc_v2i32_param_0];
+; CHECK-SM100-NEXT: mov.b64 {%r1, %r2}, %rd1;
+; CHECK-SM100-NEXT: mov.b32 %r3, 0;
+; CHECK-SM100-NEXT: prmt.b32 %r4, %r3, 0, 0x3340U;
+; CHECK-SM100-NEXT: prmt.b32 %r5, %r1, %r2, 0x3340U;
+; CHECK-SM100-NEXT: prmt.b32 %r6, %r4, %r5, 0x5410U;
+; CHECK-SM100-NEXT: mov.b64 %rd2, 0;
+; CHECK-SM100-NEXT: st.b32 [%rd2], %r6;
+; CHECK-SM100-NEXT: ret;
+ %2 = trunc <2 x i32> %0 to <2 x i8>
+ %3 = shufflevector <2 x i8> zeroinitializer, <2 x i8> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ store <4 x i8> %3, ptr null, align 4
+ ret void
+}
+
+define ptx_kernel void @zextend_to_v2i32(<2 x i8> %0) {
+; CHECK-SM90A-LABEL: zextend_to_v2i32(
+; CHECK-SM90A: {
+; CHECK-SM90A-NEXT: .reg .b16 %rs<3>;
+; CHECK-SM90A-NEXT: .reg .b32 %r<4>;
+; CHECK-SM90A-NEXT: .reg .b64 %rd<5>;
+; CHECK-SM90A-EMPTY:
+; CHECK-SM90A-NEXT: // %bb.0:
+; CHECK-SM90A-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [zextend_to_v2i32_param_0];
+; CHECK-SM90A-NEXT: mov.b32 %r1, {%rs1, %rs2};
+; CHECK-SM90A-NEXT: cvt.u32.u16 %r2, %rs1;
+; CHECK-SM90A-NEXT: cvt.u32.u16 %r3, %rs2;
+; CHECK-SM90A-NEXT: mov.b64 %rd1, 12;
+; CHECK-SM90A-NEXT: st.b32 [%rd1], %r3;
+; CHECK-SM90A-NEXT: mov.b64 %rd2, 8;
+; CHECK-SM90A-NEXT: st.b32 [%rd2], %r2;
+; CHECK-SM90A-NEXT: mov.b64 %rd3, 4;
+; CHECK-SM90A-NEXT: st.b32 [%rd3], 0;
+; CHECK-SM90A-NEXT: mov.b64 %rd4, 0;
+; CHECK-SM90A-NEXT: st.b32 [%rd4], 0;
+; CHECK-SM90A-NEXT: ret;
+;
+; CHECK-SM100-LABEL: zextend_to_v2i32(
+; CHECK-SM100: {
+; CHECK-SM100-NEXT: .reg .b16 %rs<3>;
+; CHECK-SM100-NEXT: .reg .b32 %r<5>;
+; CHECK-SM100-NEXT: .reg .b64 %rd<8>;
+; CHECK-SM100-EMPTY:
+; CHECK-SM100-NEXT: // %bb.0:
+; CHECK-SM100-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [zextend_to_v2i32_param_0];
+; CHECK-SM100-NEXT: mov.b32 %r1, {%rs1, %rs2};
+; CHECK-SM100-NEXT: cvt.u32.u16 %r2, %rs2;
+; CHECK-SM100-NEXT: cvt.u32.u16 %r3, %rs1;
+; CHECK-SM100-NEXT: mov.b64 %rd1, {%r3, %r2};
+; CHECK-SM100-NEXT: mov.b32 %r4, 0;
+; CHECK-SM100-NEXT: mov.b64 %rd2, {%r4, %r4};
+; CHECK-SM100-NEXT: mov.b64 %rd3, 4;
+; CHECK-SM100-NEXT: st.b32 [%rd3], %rd2;
+; CHECK-SM100-NEXT: mov.b64 %rd4, 0;
+; CHECK-SM100-NEXT: st.b32 [%rd4], %rd2;
+; CHECK-SM100-NEXT: mov.b64 %rd5, 8;
+; CHECK-SM100-NEXT: st.b32 [%rd5], %rd1;
+; CHECK-SM100-NEXT: shr.u64 %rd6, %rd1, 32;
+; CHECK-SM100-NEXT: mov.b64 %rd7, 12;
+; CHECK-SM100-NEXT: st.b32 [%rd7], %rd6;
+; CHECK-SM100-NEXT: ret;
+ %2 = zext <2 x i8> %0 to <2 x i32>
+ %3 = shufflevector <2 x i32> zeroinitializer, <2 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ store <4 x i32> %3, ptr null, align 4
+ ret void
+}
+
+define ptx_kernel void @sextend_to_v2i32(<2 x i8> %0) {
+; CHECK-SM90A-LABEL: sextend_to_v2i32(
+; CHECK-SM90A: {
+; CHECK-SM90A-NEXT: .reg .b16 %rs<3>;
+; CHECK-SM90A-NEXT: .reg .b32 %r<6>;
+; CHECK-SM90A-NEXT: .reg .b64 %rd<5>;
+; CHECK-SM90A-EMPTY:
+; CHECK-SM90A-NEXT: // %bb.0:
+; CHECK-SM90A-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [sextend_to_v2i32_param_0];
+; CHECK-SM90A-NEXT: mov.b32 %r1, {%rs1, %rs2};
+; CHECK-SM90A-NEXT: cvt.u32.u16 %r2, %rs1;
+; CHECK-SM90A-NEXT: cvt.s32.s8 %r3, %r2;
+; CHECK-SM90A-NEXT: cvt.u32.u16 %r4, %rs2;
+; CHECK-SM90A-NEXT: cvt.s32.s8 %r5, %r4;
+; CHECK-SM90A-NEXT: mov.b64 %rd1, 12;
+; CHECK-SM90A-NEXT: st.b32 [%rd1], %r5;
+; CHECK-SM90A-NEXT: mov.b64 %rd2, 8;
+; CHECK-SM90A-NEXT: st.b32 [%rd2], %r3;
+; CHECK-SM90A-NEXT: mov.b64 %rd3, 4;
+; CHECK-SM90A-NEXT: st.b32 [%rd3], 0;
+; CHECK-SM90A-NEXT: mov.b64 %rd4, 0;
+; CHECK-SM90A-NEXT: st.b32 [%rd4], 0;
+; CHECK-SM90A-NEXT: ret;
+;
+; CHECK-SM100-LABEL: sextend_to_v2i32(
+; CHECK-SM100: {
+; CHECK-SM100-NEXT: .reg .b16 %rs<3>;
+; CHECK-SM100-NEXT: .reg .b32 %r<7>;
+; CHECK-SM100-NEXT: .reg .b64 %rd<8>;
+; CHECK-SM100-EMPTY:
+; CHECK-SM100-NEXT: // %bb.0:
+; CHECK-SM100-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [sextend_to_v2i32_param_0];
+; CHECK-SM100-NEXT: mov.b32 %r1, {%rs1, %rs2};
+; CHECK-SM100-NEXT: cvt.u32.u16 %r2, %rs2;
+; CHECK-SM100-NEXT: cvt.s32.s8 %r3, %r2;
+; CHECK-SM100-NEXT: cvt.u32.u16 %r4, %rs1;
+; CHECK-SM100-NEXT: cvt.s32.s8 %r5, %r4;
+; CHECK-SM100-NEXT: mov.b64 %rd1, {%r5, %r3};
+; CHECK-SM100-NEXT: mov.b32 %r6, 0;
+; CHECK-SM100-NEXT: mov.b64 %rd2, {%r6, %r6};
+; CHECK-SM100-NEXT: mov.b64 %rd3, 4;
+; CHECK-SM100-NEXT: st.b32 [%rd3], %rd2;
+; CHECK-SM100-NEXT: mov.b64 %rd4, 0;
+; CHECK-SM100-NEXT: st.b32 [%rd4], %rd2;
+; CHECK-SM100-NEXT: mov.b64 %rd5, 8;
+; CHECK-SM100-NEXT: st.b32 [%rd5], %rd1;
+; CHECK-SM100-NEXT: shr.u64 %rd6, %rd1, 32;
+; CHECK-SM100-NEXT: mov.b64 %rd7, 12;
+; CHECK-SM100-NEXT: st.b32 [%rd7], %rd6;
+; CHECK-SM100-NEXT: ret;
+ %2 = sext <2 x i8> %0 to <2 x i32>
+ %3 = shufflevector <2 x i32> zeroinitializer, <2 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ store <4 x i32> %3, ptr null, align 4
+ ret void
+}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/remat.ll b/llvm/test/CodeGen/RISCV/rvv/remat.ll
index 06d54fa..95bff27 100644
--- a/llvm/test/CodeGen/RISCV/rvv/remat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/remat.ll
@@ -301,3 +301,135 @@ define void @vfmv.s.f(ptr %p, double %x) {
store volatile double %x, ptr %p
ret void
}
+
+; This test is fairly fragile, but it's trying to cover the case which
+; caused the revert of bba9172 due to interaction with how rematerialize
+; instructions are pruned from the original live interval. In the result
+; below, we remat the vmv.v.x into the loop, but fail to remat the vmv.v.x
+; a second time after further splitting it's live range. We shouldn't need
+; to spill it to the stack at all.
+define i64 @dual_remat(i64 %0, <vscale x 16 x i64> %1, <vscale x 16 x i64> %2, ptr %p) #0 {
+; CHECK-LABEL: dual_remat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 5
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x21, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 33 * vlenb
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv.v.i v16, 0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a1, a2, 3
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: vmv.v.i v0, 0
+; CHECK-NEXT: .LBB8_1: # %vector.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: mv a5, a4
+; CHECK-NEXT: slli a4, a4, 3
+; CHECK-NEXT: add a5, a5, a4
+; CHECK-NEXT: slli a4, a4, 1
+; CHECK-NEXT: add a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a5, a4, 4
+; CHECK-NEXT: add a4, a5, a4
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: mv a5, a4
+; CHECK-NEXT: slli a4, a4, 3
+; CHECK-NEXT: add a5, a5, a4
+; CHECK-NEXT: slli a4, a4, 1
+; CHECK-NEXT: add a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vand.vv v16, v16, v8
+; CHECK-NEXT: vmsne.vi v24, v16, 0
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a4, a4, 4
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs1r.v v24, (a4) # vscale x 8-byte Folded Spill
+; CHECK-NEXT: vand.vv v16, v0, v8
+; CHECK-NEXT: vmsne.vi v8, v16, 0
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: mv a5, a4
+; CHECK-NEXT: slli a4, a4, 3
+; CHECK-NEXT: add a5, a5, a4
+; CHECK-NEXT: slli a4, a4, 1
+; CHECK-NEXT: add a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a4, a4, 4
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vl1r.v v9, (a4) # vscale x 8-byte Folded Reload
+; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslideup.vx v9, v8, a1
+; CHECK-NEXT: vsetvli a4, zero, e8, m2, ta, ma
+; CHECK-NEXT: vcpop.m a4, v9
+; CHECK-NEXT: csrr a5, vlenb
+; CHECK-NEXT: slli a6, a5, 4
+; CHECK-NEXT: add a5, a6, a5
+; CHECK-NEXT: add a5, sp, a5
+; CHECK-NEXT: addi a5, a5, 16
+; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vs8r.v v8, (a3)
+; CHECK-NEXT: vs8r.v v8, (a2)
+; CHECK-NEXT: addi a5, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vsetvli a5, zero, e64, m8, ta, ma
+; CHECK-NEXT: vor.vv v16, v16, v8
+; CHECK-NEXT: csrr a5, vlenb
+; CHECK-NEXT: slli a5, a5, 3
+; CHECK-NEXT: add a5, sp, a5
+; CHECK-NEXT: addi a5, a5, 16
+; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vor.vv v0, v0, v8
+; CHECK-NEXT: beqz a4, .LBB8_1
+; CHECK-NEXT: # %bb.2: # %middle.block
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 5
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: add sp, sp, a1
+; CHECK-NEXT: .cfi_def_cfa sp, 16
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+entry:
+ %broadcast.splatinsert = insertelement <vscale x 16 x i64> zeroinitializer, i64 %0, i64 0
+ %broadcast.splat = shufflevector <vscale x 16 x i64> %broadcast.splatinsert, <vscale x 16 x i64> zeroinitializer, <vscale x 16 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %entry
+ %vec.ind = phi <vscale x 16 x i64> [ zeroinitializer, %entry ], [ %vec.ind.next, %vector.body ]
+ %3 = and <vscale x 16 x i64> %vec.ind, %broadcast.splat
+ %4 = icmp ne <vscale x 16 x i64> %3, zeroinitializer
+ store <vscale x 16 x i64> %broadcast.splat, ptr %p
+ %5 = tail call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> %4)
+ %vec.ind.next = or <vscale x 16 x i64> %vec.ind, %1
+ br i1 %5, label %middle.block, label %vector.body
+
+middle.block: ; preds = %vector.body
+ %and.i = and i64 1, %0
+ ret i64 %and.i
+}
diff --git a/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir b/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir
index 5d644c3..718fa6f 100644
--- a/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir
+++ b/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir
@@ -366,7 +366,8 @@ frameInfo:
maxCallFrameSize: 0
localFrameSize: 144
machineFunctionInfo:
- stackSizeSVE: 0
+ stackSizeZPR: 0
+ stackSizePPR: 0
stack:
- { id: 0, name: StackGuardSlot, offset: -40, size: 8, alignment: 8,
stack-id: default, local-offset: -8 }
diff --git a/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir b/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir
index 013d933..b7a9892 100644
--- a/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir
+++ b/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir
@@ -69,7 +69,8 @@ frameInfo:
hasCalls: true
maxCallFrameSize: 0
machineFunctionInfo:
- stackSizeSVE: 0
+ stackSizeZPR: 0
+ stackSizePPR: 0
stack:
- { id: 0, type: spill-slot, offset: -20, size: 4, alignment: 4, stack-id: default }
- { id: 1, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default,
diff --git a/llvm/test/DebugInfo/X86/dynamic-bitfield.ll b/llvm/test/DebugInfo/X86/dynamic-bitfield.ll
index c9148ca4..f893597 100644
--- a/llvm/test/DebugInfo/X86/dynamic-bitfield.ll
+++ b/llvm/test/DebugInfo/X86/dynamic-bitfield.ll
@@ -27,7 +27,7 @@ source_filename = "bitfield.c"
!6 = !{}
!7 = !{!0, !2}
!8 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "PackedBits", file: !5, line: 3, size: 40, elements: !9)
-!9 = !{!10, !12, !16}
+!9 = !{!10, !12, !16, !21}
!10 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !8, file: !5, line: 5, baseType: !11, size: 8)
; CHECK: DW_TAG_member
; CHECK-NEXT: DW_AT_name{{.*}}"a"
@@ -60,5 +60,14 @@ source_filename = "bitfield.c"
; CHECK: DW_AT_bit_size [DW_FORM_exprloc] (DW_OP_lit27)
; CHECK-NEXT: DW_AT_data_bit_offset [DW_FORM_exprloc] (DW_OP_lit13)
; CHECK-NOT: DW_AT_data_member_location
-; CHECK: DW_TAG
!20 = !{!"clang version 3.9.0 (trunk 267633)"}
+!21 = !DIDerivedType(tag: DW_TAG_member, name: "d", scope: !8, file: !5, line: 7, baseType: !13, offset: !DIExpression(DW_OP_constu, 15), flags: DIFlagBitField)
+; CHECK: DW_TAG_member
+; CHECK-NEXT: DW_AT_name{{.*}}"d"
+; CHECK-NOT: DW_TAG
+; CHECK-NOT: DW_AT_bit_offset
+; CHECK-NOT: DW_AT_byte_size
+; CHECK-NOT: DW_AT_bit_size
+; CHECK: DW_AT_data_bit_offset [DW_FORM_exprloc] (DW_OP_lit15)
+; CHECK-NOT: DW_AT_data_member_location
+; CHECK: DW_TAG
diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll
index 119cffd..d94e78c 100644
--- a/llvm/test/Transforms/InstCombine/fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/fcmp.ll
@@ -1812,6 +1812,46 @@ define i1 @fcmp_ule_fsub_const(float %x, float %y) {
ret i1 %cmp
}
+define i1 @fcmp_ninf_ule_fsub_const(float %x, float %y) {
+; CHECK-LABEL: @fcmp_ninf_ule_fsub_const(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ule float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %fs = fsub float %x, %y
+ %cmp = fcmp ninf ule float %fs, 0.000000e+00
+ ret i1 %cmp
+}
+
+define i1 @fcmp_nnan_ule_fsub_const(float %x, float %y) {
+; CHECK-LABEL: @fcmp_nnan_ule_fsub_const(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp nnan ule float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %fs = fsub float %x, %y
+ %cmp = fcmp nnan ule float %fs, 0.000000e+00
+ ret i1 %cmp
+}
+
+define i1 @fcmp_ule_fsub_ninf_const(float %x, float %y) {
+; CHECK-LABEL: @fcmp_ule_fsub_ninf_const(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ninf ule float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %fs = fsub ninf float %x, %y
+ %cmp = fcmp ule float %fs, 0.000000e+00
+ ret i1 %cmp
+}
+
+define i1 @fcmp_ule_fsub_nnan_const(float %x, float %y) {
+; CHECK-LABEL: @fcmp_ule_fsub_nnan_const(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp nnan ule float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %fs = fsub nnan float %x, %y
+ %cmp = fcmp ule float %fs, 0.000000e+00
+ ret i1 %cmp
+}
+
define i1 @fcmp_ugt_fsub_const(float %x, float %y) {
; CHECK-LABEL: @fcmp_ugt_fsub_const(
; CHECK-NEXT: [[FS:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
diff --git a/llvm/test/Transforms/InstCombine/icmp-clamp.ll b/llvm/test/Transforms/InstCombine/icmp-clamp.ll
new file mode 100644
index 0000000..4866dbf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-clamp.ll
@@ -0,0 +1,295 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+declare void @use(i32)
+
+define i1 @test_i32_eq(i32 %x) {
+; CHECK-LABEL: define i1 @test_i32_eq(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], 95
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], 256
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 -95)
+ %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 160)
+ %cmp = icmp eq i32 %v2, %x
+ ret i1 %cmp
+}
+
+define i1 @test_i32_ne(i32 %x) {
+; CHECK-LABEL: define i1 @test_i32_ne(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -161
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], -256
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 -95)
+ %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 160)
+ %cmp = icmp ne i32 %v2, %x
+ ret i1 %cmp
+}
+
+define i1 @test_i32_eq_no_add(i32 %x) {
+; CHECK-LABEL: define i1 @test_i32_eq_no_add(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X]], 161
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 0)
+ %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 160)
+ %cmp = icmp eq i32 %v2, %x
+ ret i1 %cmp
+}
+
+define i1 @test_i32_ne_no_add(i32 %x) {
+; CHECK-LABEL: define i1 @test_i32_ne_no_add(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X]], 160
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 0)
+ %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 160)
+ %cmp = icmp ne i32 %v2, %x
+ ret i1 %cmp
+}
+
+define i1 @test_unsigned_eq(i32 %x) {
+; CHECK-LABEL: define i1 @test_unsigned_eq(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -10
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], 91
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.umax.i32(i32 %x, i32 10)
+ %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 100)
+ %cmp = icmp eq i32 %v2, %x
+ ret i1 %cmp
+}
+
+define i1 @test_unsigned_ne(i32 %x) {
+; CHECK-LABEL: define i1 @test_unsigned_ne(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -101
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], -91
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.umax.i32(i32 %x, i32 10)
+ %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 100)
+ %cmp = icmp ne i32 %v2, %x
+ ret i1 %cmp
+}
+
+
+; Different bit widths
+define i1 @test_i8_eq(i8 %x) {
+; CHECK-LABEL: define i1 @test_i8_eq(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 50
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[TMP1]], 101
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i8 @llvm.smax.i8(i8 %x, i8 -50)
+ %v2 = tail call i8 @llvm.smin.i8(i8 %v1, i8 50)
+ %cmp = icmp eq i8 %v2, %x
+ ret i1 %cmp
+}
+
+define i1 @test_i16_eq(i16 %x) {
+; CHECK-LABEL: define i1 @test_i16_eq(
+; CHECK-SAME: i16 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[X]], 1000
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[TMP1]], 2001
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i16 @llvm.smax.i16(i16 %x, i16 -1000)
+ %v2 = tail call i16 @llvm.smin.i16(i16 %v1, i16 1000)
+ %cmp = icmp eq i16 %v2, %x
+ ret i1 %cmp
+}
+
+define i1 @test_i64_eq(i64 %x) {
+; CHECK-LABEL: define i1 @test_i64_eq(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[X]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP1]], -1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i64 @llvm.smax.i64(i64 %x, i64 -1)
+ %v2 = tail call i64 @llvm.smin.i64(i64 %v1, i64 9223372036854775806)
+ %cmp = icmp eq i64 %v2, %x
+ ret i1 %cmp
+}
+
+; Negative tests - wrong predicate
+define i1 @test_wrong_pred_slt(i32 %x) {
+; CHECK-LABEL: define i1 @test_wrong_pred_slt(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], 160
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 -95)
+ %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 160)
+ %cmp = icmp slt i32 %v2, %x
+ ret i1 %cmp
+}
+
+
+; Negative tests - not a clamp pattern
+define i1 @test_not_clamp_pattern(i32 %x, i32 %y) {
+; CHECK-LABEL: define i1 @test_not_clamp_pattern(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[V1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[Y]], i32 -95)
+; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 160)
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V2]], [[X]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.smax.i32(i32 %y, i32 -95)
+ %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 160)
+ %cmp = icmp eq i32 %v2, %x
+ ret i1 %cmp
+}
+
+; Negative tests - Lo >= Hi
+define i1 @test_invalid_range(i32 %x) {
+; CHECK-LABEL: define i1 @test_invalid_range(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 50
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 100)
+ %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 50)
+ %cmp = icmp eq i32 %v2, %x
+ ret i1 %cmp
+}
+
+; Negative tests - Lo is minimum signed value
+define i1 @test_lo_min_signed(i32 %x) {
+; CHECK-LABEL: define i1 @test_lo_min_signed(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X]], 161
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 -2147483648)
+ %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 160)
+ %cmp = icmp eq i32 %v2, %x
+ ret i1 %cmp
+}
+
+; Negative tests - Hi is maximum signed value
+define i1 @test_hi_max_signed(i32 %x) {
+; CHECK-LABEL: define i1 @test_hi_max_signed(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], -96
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 -95)
+ %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 2147483647)
+ %cmp = icmp eq i32 %v2, %x
+ ret i1 %cmp
+}
+
+; Negative tests - Hi is maximum unsigned value
+define i1 @test_hi_max_unsigned(i32 %x) {
+; CHECK-LABEL: define i1 @test_hi_max_unsigned(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X]], 9
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.umax.i32(i32 %x, i32 10)
+ %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 4294967295)
+ %cmp = icmp eq i32 %v2, %x
+ ret i1 %cmp
+}
+
+; Multi-use tests - multiple uses of max
+define i1 @test_multi_use_max(i32 %x) {
+; CHECK-LABEL: define i1 @test_multi_use_max(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[V1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X]], i32 -95)
+; CHECK-NEXT: call void @use(i32 [[V1]])
+; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 160)
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V2]], [[X]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 -95)
+ call void @use(i32 %v1)
+ %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 160)
+ %cmp = icmp eq i32 %v2, %x
+ ret i1 %cmp
+}
+
+; Multi-use tests - multiple uses of min
+define i1 @test_multi_use_min(i32 %x) {
+; CHECK-LABEL: define i1 @test_multi_use_min(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[V1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X]], i32 -95)
+; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 160)
+; CHECK-NEXT: call void @use(i32 [[V2]])
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V2]], [[X]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 -95)
+ %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 160)
+ call void @use(i32 %v2)
+ %cmp = icmp eq i32 %v2, %x
+ ret i1 %cmp
+}
+
+; Commuted tests
+define i1 @test_commuted_eq(i32 %x) {
+; CHECK-LABEL: define i1 @test_commuted_eq(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], 95
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], 256
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 -95)
+ %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 160)
+ %cmp = icmp eq i32 %x, %v2
+ ret i1 %cmp
+}
+
+
+; Vector tests - splat constants
+define <2 x i1> @test_vec_splat_eq(<2 x i32> %x) {
+; CHECK-LABEL: define <2 x i1> @test_vec_splat_eq(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X]], splat (i32 50)
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 101)
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %v1 = tail call <2 x i32> @llvm.smax.v2i32(<2 x i32> %x, <2 x i32> <i32 -50, i32 -50>)
+ %v2 = tail call <2 x i32> @llvm.smin.v2i32(<2 x i32> %v1, <2 x i32> <i32 50, i32 50>)
+ %cmp = icmp eq <2 x i32> %v2, %x
+ ret <2 x i1> %cmp
+}
+
+; Vector tests - poison elements
+define <2 x i1> @test_vec_poison_eq(<2 x i32> %x) {
+; CHECK-LABEL: define <2 x i1> @test_vec_poison_eq(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[V1:%.*]] = tail call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[X]], <2 x i32> <i32 -50, i32 poison>)
+; CHECK-NEXT: [[V2:%.*]] = tail call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[V1]], <2 x i32> <i32 50, i32 poison>)
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V2]], [[X]]
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %v1 = tail call <2 x i32> @llvm.smax.v2i32(<2 x i32> %x, <2 x i32> <i32 -50, i32 poison>)
+ %v2 = tail call <2 x i32> @llvm.smin.v2i32(<2 x i32> %v1, <2 x i32> <i32 50, i32 poison>)
+ %cmp = icmp eq <2 x i32> %v2, %x
+ ret <2 x i1> %cmp
+}
+
+; Vector tests - non-splat
+define <2 x i1> @test_vec_non_splat_eq(<2 x i32> %x) {
+; CHECK-LABEL: define <2 x i1> @test_vec_non_splat_eq(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[V1:%.*]] = tail call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[X]], <2 x i32> <i32 -50, i32 -30>)
+; CHECK-NEXT: [[V2:%.*]] = tail call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[V1]], <2 x i32> <i32 50, i32 70>)
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V2]], [[X]]
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %v1 = tail call <2 x i32> @llvm.smax.v2i32(<2 x i32> %x, <2 x i32> <i32 -50, i32 -30>)
+ %v2 = tail call <2 x i32> @llvm.smin.v2i32(<2 x i32> %v1, <2 x i32> <i32 50, i32 70>)
+ %cmp = icmp eq <2 x i32> %v2, %x
+ ret <2 x i1> %cmp
+}
diff --git a/llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll b/llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll
new file mode 100644
index 0000000..1339afe
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll
@@ -0,0 +1,75 @@
+; Test branch weight metadata, estimated trip count metadata, and block
+; frequencies after loop peeling.
+
+; RUN: opt < %s -S -passes='print<block-freq>' 2>&1 | \
+; RUN: FileCheck -check-prefix=CHECK %s
+
+; The -implicit-check-not options make sure that no additional labels or calls
+; to @f show up.
+; RUN: opt < %s -S -passes='loop-unroll,print<block-freq>' \
+; RUN: -unroll-force-peel-count=2 2>&1 | \
+; RUN: FileCheck %s -check-prefix=CHECK-UR \
+; RUN: -implicit-check-not='{{^[^ ;]*:}}' \
+; RUN: -implicit-check-not='call void @f'
+
+; CHECK: block-frequency-info: test
+; CHECK: do.body: float = 10.0,
+
+; The sum should still be ~10.
+;
+; CHECK-UR: block-frequency-info: test
+; CHECK-UR: - [[DO_BODY_PEEL:.*]]: float = 1.0,
+; CHECK-UR: - [[DO_BODY_PEEL2:.*]]: float = 0.9,
+; CHECK-UR: - [[DO_BODY:.*]]: float = 8.1,
+
+declare void @f(i32)
+
+define void @test(i32 %n) {
+; CHECK-UR-LABEL: define void @test(
+; CHECK-UR: [[ENTRY:.*]]:
+; CHECK-UR: br label %[[DO_BODY_PEEL_BEGIN:.*]]
+; CHECK-UR: [[DO_BODY_PEEL_BEGIN]]:
+; CHECK-UR: br label %[[DO_BODY_PEEL:.*]]
+; CHECK-UR: [[DO_BODY_PEEL]]:
+; CHECK-UR: call void @f
+; CHECK-UR: br i1 %{{.*}}, label %[[DO_END:.*]], label %[[DO_BODY_PEEL_NEXT:.*]], !prof ![[#PROF:]]
+; CHECK-UR: [[DO_BODY_PEEL_NEXT]]:
+; CHECK-UR: br label %[[DO_BODY_PEEL2:.*]]
+; CHECK-UR: [[DO_BODY_PEEL2]]:
+; CHECK-UR: call void @f
+; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_PEEL_NEXT1:.*]], !prof ![[#PROF]]
+; CHECK-UR: [[DO_BODY_PEEL_NEXT1]]:
+; CHECK-UR: br label %[[DO_BODY_PEEL_NEXT5:.*]]
+; CHECK-UR: [[DO_BODY_PEEL_NEXT5]]:
+; CHECK-UR: br label %[[ENTRY_PEEL_NEWPH:.*]]
+; CHECK-UR: [[ENTRY_PEEL_NEWPH]]:
+; CHECK-UR: br label %[[DO_BODY]]
+; CHECK-UR: [[DO_BODY]]:
+; CHECK-UR: call void @f
+; CHECK-UR: br i1 %{{.*}}, label %[[DO_END_LOOPEXIT:.*]], label %[[DO_BODY]], !prof ![[#PROF]], !llvm.loop ![[#LOOP_UR_LATCH:]]
+; CHECK-UR: [[DO_END_LOOPEXIT]]:
+; CHECK-UR: br label %[[DO_END]]
+; CHECK-UR: [[DO_END]]:
+; CHECK-UR: ret void
+
+entry:
+ br label %do.body
+
+do.body:
+ %i = phi i32 [ 0, %entry ], [ %inc, %do.body ]
+ %inc = add i32 %i, 1
+ call void @f(i32 %i)
+ %c = icmp sge i32 %inc, %n
+ br i1 %c, label %do.end, label %do.body, !prof !0
+
+do.end:
+ ret void
+}
+
+!0 = !{!"branch_weights", i32 1, i32 9}
+
+; CHECK-UR: ![[#PROF]] = !{!"branch_weights", i32 1, i32 9}
+; CHECK-UR: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_PC:]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; CHECK-UR: ![[#LOOP_UR_PC]] = !{!"llvm.loop.peeled.count", i32 2}
+; CHECK-UR: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 8}
+; CHECK-UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"}
diff --git a/llvm/test/Transforms/LoopUnroll/peel-branch-weights.ll b/llvm/test/Transforms/LoopUnroll/peel-branch-weights.ll
index c58f8f1..63a0dd4 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-branch-weights.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-branch-weights.ll
@@ -15,9 +15,9 @@ define void @test() {
; CHECK: loop.peel:
; CHECK-NEXT: [[X_PEEL:%.*]] = call i32 @get.x()
; CHECK-NEXT: switch i32 [[X_PEEL]], label [[LOOP_LATCH_PEEL:%.*]] [
-; CHECK-NEXT: i32 0, label [[LOOP_LATCH_PEEL]]
-; CHECK-NEXT: i32 1, label [[LOOP_EXIT:%.*]]
-; CHECK-NEXT: i32 2, label [[LOOP_EXIT]]
+; CHECK-NEXT: i32 0, label [[LOOP_LATCH_PEEL]]
+; CHECK-NEXT: i32 1, label [[LOOP_EXIT:%.*]]
+; CHECK-NEXT: i32 2, label [[LOOP_EXIT]]
; CHECK-NEXT: ], !prof [[PROF0:![0-9]+]]
; CHECK: loop.latch.peel:
; CHECK-NEXT: br label [[LOOP_PEEL_NEXT:%.*]]
@@ -26,10 +26,10 @@ define void @test() {
; CHECK: loop.peel2:
; CHECK-NEXT: [[X_PEEL3:%.*]] = call i32 @get.x()
; CHECK-NEXT: switch i32 [[X_PEEL3]], label [[LOOP_LATCH_PEEL4:%.*]] [
-; CHECK-NEXT: i32 0, label [[LOOP_LATCH_PEEL4]]
-; CHECK-NEXT: i32 1, label [[LOOP_EXIT]]
-; CHECK-NEXT: i32 2, label [[LOOP_EXIT]]
-; CHECK-NEXT: ], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT: i32 0, label [[LOOP_LATCH_PEEL4]]
+; CHECK-NEXT: i32 1, label [[LOOP_EXIT]]
+; CHECK-NEXT: i32 2, label [[LOOP_EXIT]]
+; CHECK-NEXT: ], !prof [[PROF0]]
; CHECK: loop.latch.peel4:
; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]]
; CHECK: loop.peel.next1:
@@ -41,31 +41,33 @@ define void @test() {
; CHECK: loop:
; CHECK-NEXT: [[X:%.*]] = call i32 @get.x()
; CHECK-NEXT: switch i32 [[X]], label [[LOOP_LATCH:%.*]] [
-; CHECK-NEXT: i32 0, label [[LOOP_LATCH]]
-; CHECK-NEXT: i32 1, label [[LOOP_EXIT_LOOPEXIT:%.*]]
-; CHECK-NEXT: i32 2, label [[LOOP_EXIT_LOOPEXIT]]
-; CHECK-NEXT: ], !prof [[PROF2:![0-9]+]]
+; CHECK-NEXT: i32 0, label [[LOOP_LATCH]]
+; CHECK-NEXT: i32 1, label [[LOOP_EXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT: i32 2, label [[LOOP_EXIT_LOOPEXIT]]
+; CHECK-NEXT: ], !prof [[PROF0]]
; CHECK: loop.latch:
-; CHECK-NEXT: br label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br label [[LOOP]], !llvm.loop [[LOOP1:![0-9]+]]
; CHECK: loop.exit.loopexit:
; CHECK-NEXT: br label [[LOOP_EXIT]]
; CHECK: loop.exit:
; CHECK-NEXT: ret void
+;
+; DISABLEADV-LABEL: @test(
+; DISABLEADV-NEXT: entry:
+; DISABLEADV-NEXT: br label [[LOOP:%.*]]
+; DISABLEADV: loop:
+; DISABLEADV-NEXT: [[X:%.*]] = call i32 @get.x()
+; DISABLEADV-NEXT: switch i32 [[X]], label [[LOOP_LATCH:%.*]] [
+; DISABLEADV-NEXT: i32 0, label [[LOOP_LATCH]]
+; DISABLEADV-NEXT: i32 1, label [[LOOP_EXIT:%.*]]
+; DISABLEADV-NEXT: i32 2, label [[LOOP_EXIT]]
+; DISABLEADV-NEXT: ], !prof [[PROF0:![0-9]+]]
+; DISABLEADV: loop.latch:
+; DISABLEADV-NEXT: br label [[LOOP]]
+; DISABLEADV: loop.exit:
+; DISABLEADV-NEXT: ret void
+;
-; DISABLEADV-LABEL: @test()
-; DISABLEADV-NEXT: entry:
-; DISABLEADV-NEXT: br label %loop
-; DISABLEADV: loop
-; DISABLEADV-NEXT: %x = call i32 @get.x()
-; DISABLEADV-NEXT: switch i32 %x, label %loop.latch [
-; DISABLEADV-NEXT: i32 0, label %loop.latch
-; DISABLEADV-NEXT: i32 1, label %loop.exit
-; DISABLEADV-NEXT: i32 2, label %loop.exit
-; DISABLEADV-NEXT: ], !prof !0
-; DISABLEADV: loop.latch:
-; DISABLEADV-NEXT: br label %loop
-; DISABLEADV: loop.exit:
-; DISABLEADV-NEXT: ret void
entry:
br label %loop
@@ -89,9 +91,9 @@ loop.exit:
;.
; CHECK: [[PROF0]] = !{!"branch_weights", i32 100, i32 200, i32 20, i32 10}
-; CHECK: [[PROF1]] = !{!"branch_weights", i32 90, i32 180, i32 20, i32 10}
-; CHECK: [[PROF2]] = !{!"branch_weights", i32 80, i32 160, i32 20, i32 10}
-; CHECK: [[LOOP3]] = distinct !{!3, !4, !5}
-; CHECK: [[META4:![0-9]+]] = !{!"llvm.loop.peeled.count", i32 2}
-; CHECK: [[META5:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
+; CHECK: [[META2]] = !{!"llvm.loop.peeled.count", i32 2}
+; CHECK: [[META3]] = !{!"llvm.loop.unroll.disable"}
+;.
+; DISABLEADV: [[PROF0]] = !{!"branch_weights", i32 100, i32 200, i32 20, i32 10}
;.
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll
index d91cb5b..e951215 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll
@@ -15,13 +15,13 @@
; CHECK: br i1 %{{.*}}, label %[[NEXT0:.*]], label %for.cond.for.end_crit_edge, !prof !16
; CHECK: [[NEXT0]]:
; CHECK: br i1 %c, label %{{.*}}, label %side_exit, !prof !15
-; CHECK: br i1 %{{.*}}, label %[[NEXT1:.*]], label %for.cond.for.end_crit_edge, !prof !17
+; CHECK: br i1 %{{.*}}, label %[[NEXT1:.*]], label %for.cond.for.end_crit_edge, !prof !16
; CHECK: [[NEXT1]]:
; CHECK: br i1 %c, label %{{.*}}, label %side_exit, !prof !15
-; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !18
+; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !16
; CHECK: [[NEXT2]]:
; CHECK: br i1 %c, label %{{.*}}, label %side_exit.loopexit, !prof !15
-; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !18
+; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !16, !llvm.loop !17
define i32 @basic(ptr %p, i32 %k, i1 %c) #0 !prof !15 {
entry:
@@ -84,6 +84,7 @@ attributes #1 = { nounwind optsize }
;CHECK: !15 = !{!"branch_weights", i32 1, i32 0}
; This is a weights of latch and its copies.
;CHECK: !16 = !{!"branch_weights", i32 3001, i32 1001}
-;CHECK: !17 = !{!"branch_weights", i32 2000, i32 1001}
-;CHECK: !18 = !{!"branch_weights", i32 1001, i32 1001}
+;CHECK: !17 = distinct !{!17, !18, !19, {{.*}}}
+;CHECK: !18 = !{!"llvm.loop.peeled.count", i32 4}
+;CHECK: !19 = !{!"llvm.loop.estimated_trip_count", i32 0}
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll
index 15dce234..dec126f 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll
@@ -5,7 +5,7 @@
; RUN: opt < %s -S -profile-summary-huge-working-set-size-threshold=9 -debug-only=loop-unroll -passes='require<profile-summary>,function(require<opt-remark-emit>,loop-unroll)' 2>&1 | FileCheck %s --check-prefix=NOPEEL
; REQUIRES: asserts
-; Make sure we use the profile information correctly to peel-off 3 iterations
+; Make sure we use the profile information correctly to peel-off 4 iterations
; from the loop, and update the branch weights for the peeled loop properly.
; CHECK: Loop Unroll: F[basic]
@@ -20,11 +20,11 @@
; CHECK-LABEL: @basic
; CHECK: br i1 %{{.*}}, label %[[NEXT0:.*]], label %for.cond.for.end_crit_edge, !prof !15
; CHECK: [[NEXT0]]:
-; CHECK: br i1 %{{.*}}, label %[[NEXT1:.*]], label %for.cond.for.end_crit_edge, !prof !16
+; CHECK: br i1 %{{.*}}, label %[[NEXT1:.*]], label %for.cond.for.end_crit_edge, !prof !15
; CHECK: [[NEXT1]]:
-; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !17
+; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !15
; CHECK: [[NEXT2]]:
-; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !17
+; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !15, !llvm.loop !16
define void @basic(ptr %p, i32 %k) #0 !prof !15 {
entry:
@@ -104,6 +104,7 @@ attributes #1 = { nounwind optsize }
!16 = !{!"branch_weights", i32 3001, i32 1001}
;CHECK: !15 = !{!"branch_weights", i32 3001, i32 1001}
-;CHECK: !16 = !{!"branch_weights", i32 2000, i32 1001}
-;CHECK: !17 = !{!"branch_weights", i32 1001, i32 1001}
+;CHECK: !16 = distinct !{!16, !17, !18, {{.*}}}
+;CHECK: !17 = !{!"llvm.loop.peeled.count", i32 4}
+;CHECK: !18 = !{!"llvm.loop.estimated_trip_count", i32 0}
diff --git a/llvm/test/Transforms/LoopUnroll/scev-invalidation-lcssa.ll b/llvm/test/Transforms/LoopUnroll/scev-invalidation-lcssa.ll
index ec71c67..0a3d201 100644
--- a/llvm/test/Transforms/LoopUnroll/scev-invalidation-lcssa.ll
+++ b/llvm/test/Transforms/LoopUnroll/scev-invalidation-lcssa.ll
@@ -3,7 +3,7 @@
define i32 @f(i1 %cond1) #0 !prof !0 {
; CHECK-LABEL: define i32 @f
-; CHECK-SAME: (i1 [[COND1:%.*]]) !prof [[PROF0:![0-9]+]] {
+; CHECK-SAME: (i1 [[COND1:%.*]]) {{.*}}{
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP1_PEEL_BEGIN:%.*]]
; CHECK: loop1.peel.begin:
@@ -19,7 +19,7 @@ define i32 @f(i1 %cond1) #0 !prof !0 {
; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop1:
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr null, align 8
-; CHECK-NEXT: br i1 [[COND1]], label [[LOOP1]], label [[EXIT1_LOOPEXIT:%.*]], !prof [[PROF2:![0-9]+]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND1]], label [[LOOP1]], label [[EXIT1_LOOPEXIT:%.*]], !prof [[PROF1]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: exit1.loopexit:
; CHECK-NEXT: [[LD_LCSSA_PH:%.*]] = phi i64 [ [[LD]], [[LOOP1]] ]
; CHECK-NEXT: br label [[EXIT1]]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll
new file mode 100644
index 0000000..e97d6e66d
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
+; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S %s | FileCheck --check-prefix=VF2IC1 %s
+; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S %s | FileCheck --check-prefix=VF2IC2 %s
+; RUN: opt -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S %s | FileCheck --check-prefix=VF1IC2 %s
+
+define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
+; VF2IC1-LABEL: define i32 @FOR_used_outside(
+; VF2IC1-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) {
+; VF2IC1-NEXT: [[ENTRY:.*]]:
+; VF2IC1-NEXT: br label %[[LOOP:.*]]
+; VF2IC1: [[LOOP]]:
+; VF2IC1-NEXT: [[TMP1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; VF2IC1-NEXT: [[FOR:%.*]] = phi i32 [ 33, %[[ENTRY]] ], [ [[TMP10:%.*]], %[[LOOP]] ]
+; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]]
+; VF2IC1-NEXT: [[TMP10]] = load i32, ptr [[TMP9]], align 4
+; VF2IC1-NEXT: [[TMP23:%.*]] = add nsw i32 [[FOR]], [[TMP10]]
+; VF2IC1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]]
+; VF2IC1-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 4
+; VF2IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[TMP1]], 1
+; VF2IC1-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; VF2IC1-NEXT: br i1 [[EC]], label %[[FOR_END:.*]], label %[[LOOP]]
+; VF2IC1: [[FOR_END]]:
+; VF2IC1-NEXT: [[TMP32:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
+; VF2IC1-NEXT: ret i32 [[TMP32]]
+;
+; VF2IC2-LABEL: define i32 @FOR_used_outside(
+; VF2IC2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) {
+; VF2IC2-NEXT: [[ENTRY:.*]]:
+; VF2IC2-NEXT: br label %[[LOOP:.*]]
+; VF2IC2: [[LOOP]]:
+; VF2IC2-NEXT: [[TMP3:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; VF2IC2-NEXT: [[FOR:%.*]] = phi i32 [ 33, %[[ENTRY]] ], [ [[TMP23:%.*]], %[[LOOP]] ]
+; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]]
+; VF2IC2-NEXT: [[TMP23]] = load i32, ptr [[TMP22]], align 4
+; VF2IC2-NEXT: [[TMP47:%.*]] = add nsw i32 [[FOR]], [[TMP23]]
+; VF2IC2-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]]
+; VF2IC2-NEXT: store i32 [[TMP47]], ptr [[TMP44]], align 4
+; VF2IC2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[TMP3]], 1
+; VF2IC2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; VF2IC2-NEXT: br i1 [[EC]], label %[[FOR_END:.*]], label %[[LOOP]]
+; VF2IC2: [[FOR_END]]:
+; VF2IC2-NEXT: [[TMP66:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
+; VF2IC2-NEXT: ret i32 [[TMP66]]
+;
+; VF1IC2-LABEL: define i32 @FOR_used_outside(
+; VF1IC2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) {
+; VF1IC2-NEXT: [[ENTRY:.*]]:
+; VF1IC2-NEXT: br label %[[LOOP:.*]]
+; VF1IC2: [[LOOP]]:
+; VF1IC2-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; VF1IC2-NEXT: [[FOR:%.*]] = phi i32 [ 33, %[[ENTRY]] ], [ [[TMP7:%.*]], %[[LOOP]] ]
+; VF1IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP0]]
+; VF1IC2-NEXT: [[TMP7]] = load i32, ptr [[TMP6]], align 4
+; VF1IC2-NEXT: [[TMP12:%.*]] = add nsw i32 [[FOR]], [[TMP7]]
+; VF1IC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP0]]
+; VF1IC2-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4
+; VF1IC2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[TMP0]], 1
+; VF1IC2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; VF1IC2-NEXT: br i1 [[EC]], label %[[FOR_END:.*]], label %[[LOOP]]
+; VF1IC2: [[FOR_END]]:
+; VF1IC2-NEXT: [[TMP30:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
+; VF1IC2-NEXT: ret i32 [[TMP30]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %for = phi i32 [ 33, %entry ], [ %for.next, %loop ]
+ %gep.A = getelementptr inbounds nuw i32, ptr %A, i64 %iv
+ %for.next = load i32, ptr %gep.A, align 4
+ %add = add nsw i32 %for, %for.next
+ %gep.B = getelementptr inbounds nuw i32, ptr %B, i64 %iv
+ store i32 %add, ptr %gep.B, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %for.end, label %loop
+
+for.end:
+ ret i32 %for
+}
+
+define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
+; VF2IC1-LABEL: define i32 @FOR_next_used_outside(
+; VF2IC1-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) {
+; VF2IC1-NEXT: [[ENTRY:.*]]:
+; VF2IC1-NEXT: br label %[[LOOP:.*]]
+; VF2IC1: [[LOOP]]:
+; VF2IC1-NEXT: [[TMP1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; VF2IC1-NEXT: [[FOR:%.*]] = phi i32 [ 33, %[[ENTRY]] ], [ [[TMP10:%.*]], %[[LOOP]] ]
+; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]]
+; VF2IC1-NEXT: [[TMP10]] = load i32, ptr [[TMP9]], align 4
+; VF2IC1-NEXT: [[TMP23:%.*]] = add nsw i32 [[FOR]], [[TMP10]]
+; VF2IC1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]]
+; VF2IC1-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 4
+; VF2IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[TMP1]], 1
+; VF2IC1-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; VF2IC1-NEXT: br i1 [[EC]], label %[[FOR_END:.*]], label %[[LOOP]]
+; VF2IC1: [[FOR_END]]:
+; VF2IC1-NEXT: [[TMP28:%.*]] = phi i32 [ [[TMP10]], %[[LOOP]] ]
+; VF2IC1-NEXT: ret i32 [[TMP28]]
+;
+; VF2IC2-LABEL: define i32 @FOR_next_used_outside(
+; VF2IC2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) {
+; VF2IC2-NEXT: [[ENTRY:.*]]:
+; VF2IC2-NEXT: br label %[[LOOP:.*]]
+; VF2IC2: [[LOOP]]:
+; VF2IC2-NEXT: [[TMP3:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; VF2IC2-NEXT: [[FOR:%.*]] = phi i32 [ 33, %[[ENTRY]] ], [ [[TMP23:%.*]], %[[LOOP]] ]
+; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]]
+; VF2IC2-NEXT: [[TMP23]] = load i32, ptr [[TMP22]], align 4
+; VF2IC2-NEXT: [[TMP47:%.*]] = add nsw i32 [[FOR]], [[TMP23]]
+; VF2IC2-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]]
+; VF2IC2-NEXT: store i32 [[TMP47]], ptr [[TMP44]], align 4
+; VF2IC2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[TMP3]], 1
+; VF2IC2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; VF2IC2-NEXT: br i1 [[EC]], label %[[FOR_END:.*]], label %[[LOOP]]
+; VF2IC2: [[FOR_END]]:
+; VF2IC2-NEXT: [[TMP62:%.*]] = phi i32 [ [[TMP23]], %[[LOOP]] ]
+; VF2IC2-NEXT: ret i32 [[TMP62]]
+;
+; VF1IC2-LABEL: define i32 @FOR_next_used_outside(
+; VF1IC2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) {
+; VF1IC2-NEXT: [[ENTRY:.*]]:
+; VF1IC2-NEXT: br label %[[LOOP:.*]]
+; VF1IC2: [[LOOP]]:
+; VF1IC2-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; VF1IC2-NEXT: [[FOR:%.*]] = phi i32 [ 33, %[[ENTRY]] ], [ [[TMP7:%.*]], %[[LOOP]] ]
+; VF1IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP0]]
+; VF1IC2-NEXT: [[TMP7]] = load i32, ptr [[TMP6]], align 4
+; VF1IC2-NEXT: [[TMP12:%.*]] = add nsw i32 [[FOR]], [[TMP7]]
+; VF1IC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP0]]
+; VF1IC2-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4
+; VF1IC2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[TMP0]], 1
+; VF1IC2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; VF1IC2-NEXT: br i1 [[EC]], label %[[FOR_END:.*]], label %[[LOOP]]
+; VF1IC2: [[FOR_END]]:
+; VF1IC2-NEXT: [[TMP27:%.*]] = phi i32 [ [[TMP7]], %[[LOOP]] ]
+; VF1IC2-NEXT: ret i32 [[TMP27]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %for = phi i32 [ 33, %entry ], [ %for.next, %loop ]
+ %gep.A = getelementptr inbounds nuw i32, ptr %A, i64 %iv
+ %for.next = load i32, ptr %gep.A, align 4
+ %add = add nsw i32 %for, %for.next
+ %gep.B = getelementptr inbounds nuw i32, ptr %B, i64 %iv
+ store i32 %add, ptr %gep.B, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %for.end, label %loop
+
+for.end:
+ ret i32 %for.next
+}
+
+define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
+; VF2IC1-LABEL: define i32 @FOR_and_next_used_outside(
+; VF2IC1-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) {
+; VF2IC1-NEXT: [[ENTRY:.*]]:
+; VF2IC1-NEXT: br label %[[LOOP:.*]]
+; VF2IC1: [[LOOP]]:
+; VF2IC1-NEXT: [[TMP1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; VF2IC1-NEXT: [[FOR:%.*]] = phi i32 [ 33, %[[ENTRY]] ], [ [[TMP10:%.*]], %[[LOOP]] ]
+; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]]
+; VF2IC1-NEXT: [[TMP10]] = load i32, ptr [[TMP9]], align 4
+; VF2IC1-NEXT: [[TMP23:%.*]] = add nsw i32 [[FOR]], [[TMP10]]
+; VF2IC1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]]
+; VF2IC1-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 4
+; VF2IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[TMP1]], 1
+; VF2IC1-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; VF2IC1-NEXT: br i1 [[EC]], label %[[FOR_END:.*]], label %[[LOOP]]
+; VF2IC1: [[FOR_END]]:
+; VF2IC1-NEXT: [[TMP32:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
+; VF2IC1-NEXT: [[TMP33:%.*]] = phi i32 [ [[TMP10]], %[[LOOP]] ]
+; VF2IC1-NEXT: [[RES:%.*]] = add i32 [[TMP32]], [[TMP33]]
+; VF2IC1-NEXT: ret i32 [[RES]]
+;
+; VF2IC2-LABEL: define i32 @FOR_and_next_used_outside(
+; VF2IC2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) {
+; VF2IC2-NEXT: [[ENTRY:.*]]:
+; VF2IC2-NEXT: br label %[[LOOP:.*]]
+; VF2IC2: [[LOOP]]:
+; VF2IC2-NEXT: [[TMP3:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; VF2IC2-NEXT: [[FOR:%.*]] = phi i32 [ 33, %[[ENTRY]] ], [ [[TMP23:%.*]], %[[LOOP]] ]
+; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]]
+; VF2IC2-NEXT: [[TMP23]] = load i32, ptr [[TMP22]], align 4
+; VF2IC2-NEXT: [[TMP47:%.*]] = add nsw i32 [[FOR]], [[TMP23]]
+; VF2IC2-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]]
+; VF2IC2-NEXT: store i32 [[TMP47]], ptr [[TMP44]], align 4
+; VF2IC2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[TMP3]], 1
+; VF2IC2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; VF2IC2-NEXT: br i1 [[EC]], label %[[FOR_END:.*]], label %[[LOOP]]
+; VF2IC2: [[FOR_END]]:
+; VF2IC2-NEXT: [[TMP66:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
+; VF2IC2-NEXT: [[TMP71:%.*]] = phi i32 [ [[TMP23]], %[[LOOP]] ]
+; VF2IC2-NEXT: [[RES:%.*]] = add i32 [[TMP66]], [[TMP71]]
+; VF2IC2-NEXT: ret i32 [[RES]]
+;
+; VF1IC2-LABEL: define i32 @FOR_and_next_used_outside(
+; VF1IC2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) {
+; VF1IC2-NEXT: [[ENTRY:.*]]:
+; VF1IC2-NEXT: br label %[[LOOP:.*]]
+; VF1IC2: [[LOOP]]:
+; VF1IC2-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; VF1IC2-NEXT: [[FOR:%.*]] = phi i32 [ 33, %[[ENTRY]] ], [ [[TMP7:%.*]], %[[LOOP]] ]
+; VF1IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP0]]
+; VF1IC2-NEXT: [[TMP7]] = load i32, ptr [[TMP6]], align 4
+; VF1IC2-NEXT: [[TMP12:%.*]] = add nsw i32 [[FOR]], [[TMP7]]
+; VF1IC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP0]]
+; VF1IC2-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4
+; VF1IC2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[TMP0]], 1
+; VF1IC2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; VF1IC2-NEXT: br i1 [[EC]], label %[[FOR_END:.*]], label %[[LOOP]]
+; VF1IC2: [[FOR_END]]:
+; VF1IC2-NEXT: [[TMP30:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
+; VF1IC2-NEXT: [[TMP33:%.*]] = phi i32 [ [[TMP7]], %[[LOOP]] ]
+; VF1IC2-NEXT: [[RES:%.*]] = add i32 [[TMP30]], [[TMP33]]
+; VF1IC2-NEXT: ret i32 [[RES]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %for = phi i32 [ 33, %entry ], [ %for.next, %loop ]
+ %gep.A = getelementptr inbounds nuw i32, ptr %A, i64 %iv
+ %for.next = load i32, ptr %gep.A, align 4
+ %add = add nsw i32 %for, %for.next
+ %gep.B = getelementptr inbounds nuw i32, ptr %B, i64 %iv
+ store i32 %add, ptr %gep.B, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %for.end, label %loop
+
+for.end:
+ %res = add i32 %for, %for.next
+ ret i32 %res
+}
+
+
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-order.ll b/llvm/test/Transforms/LoopVectorize/reduction-order.ll
index b07c3833..b51db48 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-order.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-order.ll
@@ -1,63 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
; RUN: opt -passes='loop-vectorize' -force-vector-width=4 -force-vector-interleave=1 -S < %s 2>&1 | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
; Make sure the selects generated from reduction are always emitted
; in deterministic order.
-; CHECK-LABEL: @foo(
-; CHECK: vector.body:
-; CHECK: [[VEC_PHI_1:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[ADD_5:%.+]], %vector.body ]
-; CHECK: [[VEC_PHI_2:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[ADD_3:%.+]], %vector.body ]
-; CHECK: icmp ule <4 x i64>
-; CHECK-NEXT: [[ADD_3]] = add <4 x i32> splat (i32 3), [[VEC_PHI_2]]
-; CHECK-NEXT: [[ADD_5]] = add <4 x i32> [[VEC_PHI_1]], splat (i32 5)
-; CHECK: select <4 x i1> {{.*}}, <4 x i32> [[ADD_5]], <4 x i32>
-; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> [[ADD_3]], <4 x i32>
-; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
;
-define internal i64 @foo(ptr %t0) !prof !1 {
-t16:
- br label %t20
-
-t17: ; preds = %t20
- %t18 = phi i32 [ %t24, %t20 ]
- %t19 = phi i32 [ %t28, %t20 ]
- br label %t31
+define i32 @foo() !prof !1 {
+; CHECK-LABEL: define i32 @foo() {{.*}}{
+; CHECK-NEXT: [[T16:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI_1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[ADD_5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI_2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[ADD_3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IV]], splat (i64 9)
+; CHECK-NEXT: [[ADD_3]] = add <4 x i32> splat (i32 3), [[VEC_PHI_2]]
+; CHECK-NEXT: [[ADD_5]] = add <4 x i32> [[VEC_PHI_1]], splat (i32 5)
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[ADD_5]], <4 x i32> [[VEC_PHI_1]]
+; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[ADD_3]], <4 x i32> [[VEC_PHI_2]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+entry:
+ br label %loop
-t20: ; preds = %t20, %t16
- %t21 = phi i64 [ 0, %t16 ], [ %t29, %t20 ]
- %t22 = phi i32 [ 0, %t16 ], [ %t28, %t20 ]
- %t23 = phi i32 [ 0, %t16 ], [ %t24, %t20 ]
- %t24 = add i32 3, %t23
- %t28 = add i32 %t22, 5
- %t29 = add nuw nsw i64 %t21, 1
- %t30 = icmp eq i64 %t29, 10
- br i1 %t30, label %t17, label %t20, !prof !2
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %red.1 = phi i32 [ 0, %entry ], [ %red.1.next, %loop ]
+ %red.2 = phi i32 [ 0, %entry ], [ %red.2.next, %loop ]
+ %red.2.next = add i32 3, %red.2
+ %red.1.next = add i32 %red.1, 5
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 10
+ br i1 %ec, label %exit, label %loop, !prof !2
-t31:
- ret i64 undef
+exit:
+ %r.2 = phi i32 [ %red.2.next, %loop ]
+ %r.1 = phi i32 [ %red.1.next, %loop ]
+ %add = add i32 %r.2, %r.1
+ ret i32 %add
}
; Make sure we do not fail when checking for ordered reduction. This test just
; exercises the path and bails out without performing vectorization.
-; CHECK-LABEL: quux
-; CHECK-NOT: fadd <4 x
-define void @quux(i1 %arg) {
-bb:
+define double @quux(i1 %arg) {
+; CHECK-LABEL: define double @quux(
+; CHECK-SAME: i1 [[ARG:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[HEADER:.*]]
+; CHECK: [[HEADER]]:
+; CHECK-NEXT: [[TMP5:%.*]] = phi double [ 1.300000e+01, %[[ENTRY]] ], [ [[TMP:%.*]], %[[LATCH:.*]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = fadd double [[TMP5]], 1.000000e+00
+; CHECK-NEXT: br label %[[LATCH]]
+; CHECK: [[LATCH]]:
+; CHECK-NEXT: [[TMP]] = phi double [ [[TMP6]], %[[HEADER]] ]
+; CHECK-NEXT: br i1 [[ARG]], label %[[HEADER]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[R:%.*]] = phi double [ [[TMP]], %[[LATCH]] ]
+; CHECK-NEXT: ret double [[R]]
+;
+entry:
br label %header
-latch: ; preds = %header
- %tmp = phi double [ %tmp6, %header ]
- br i1 %arg, label %header, label %bb2
-
-bb2: ; preds = %latch
- %tmp3 = phi double [ %tmp, %latch ]
- ret void
-
-header: ; preds = %latch, %bb
- %tmp5 = phi double [ 1.300000e+01, %bb ], [ %tmp, %latch ]
+header:
+ %tmp5 = phi double [ 1.300000e+01, %entry ], [ %tmp, %latch ]
%tmp6 = fadd double %tmp5, 1.000000e+00
br label %latch
+
+latch:
+ %tmp = phi double [ %tmp6, %header ]
+ br i1 %arg, label %header, label %exit
+
+exit:
+ %r = phi double [ %tmp, %latch ]
+ ret double %r
}
!1 = !{!"function_entry_count", i64 801}
diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
index 644900d..9620697 100644
--- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
@@ -500,3 +500,50 @@ exit:
%first.addr.0.lcssa.i = phi ptr [ %first, %entry ], [ %iv, %loop.header ], [ %iv.next, %loop.latch ]
ret ptr %first.addr.0.lcssa.i
}
+
+define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_size_nofree_via_context(ptr noalias %p1, ptr noalias %p2) nosync {
+; CHECK-LABEL: define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_size_nofree_via_context(
+; CHECK-SAME: ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 1024) ]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 1024) ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; CHECK: [[LOOP_INC]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX1]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; CHECK: [[LOOP_END]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX1]], %[[LOOP]] ], [ -1, %[[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ call void @llvm.assume(i1 true) [ "align"(ptr %p1, i64 4), "dereferenceable"(ptr %p1, i64 1024) ]
+ call void @llvm.assume(i1 true) [ "align"(ptr %p2, i64 4), "dereferenceable"(ptr %p2, i64 1024) ]
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 1024
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ -1, %loop.inc ]
+ ret i64 %retval
+}
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index 141282e..30f0a8e5 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -10176,4 +10176,11 @@ TEST(APFloatTest, hasSignBitInMSB) {
EXPECT_FALSE(APFloat::hasSignBitInMSB(APFloat::Float8E8M0FNU()));
}
+TEST(APFloatTest, FrexpQuietSNaN) {
+ APFloat SNaN = APFloat::getSNaN(APFloat::PPCDoubleDouble());
+ int Exp;
+ APFloat Result = frexp(SNaN, Exp, APFloat::rmNearestTiesToEven);
+ EXPECT_FALSE(Result.isSignaling());
+}
+
} // namespace
diff --git a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
index d1c0f64..d8457a3 100644
--- a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
+++ b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
@@ -230,8 +230,7 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
CallBase *Call = findCall(*Func, "call");
Trie.buildAndAttachMIBMetadata(Call);
- EXPECT_TRUE(Call->hasFnAttr("memprof"));
- EXPECT_EQ(Call->getFnAttr("memprof").getValueAsString(), "ambiguous");
+ EXPECT_FALSE(Call->hasFnAttr("memprof"));
EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof));
MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof);
ASSERT_EQ(MemProfMD->getNumOperands(), 2u);
@@ -280,8 +279,7 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
CallBase *Call = findCall(*Func, "call");
Trie.buildAndAttachMIBMetadata(Call);
- EXPECT_TRUE(Call->hasFnAttr("memprof"));
- EXPECT_EQ(Call->getFnAttr("memprof").getValueAsString(), "ambiguous");
+ EXPECT_FALSE(Call->hasFnAttr("memprof"));
EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof));
MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof);
ASSERT_EQ(MemProfMD->getNumOperands(), 2u);
@@ -335,8 +333,7 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
CallBase *Call = findCall(*Func, "call");
Trie.buildAndAttachMIBMetadata(Call);
- EXPECT_TRUE(Call->hasFnAttr("memprof"));
- EXPECT_EQ(Call->getFnAttr("memprof").getValueAsString(), "ambiguous");
+ EXPECT_FALSE(Call->hasFnAttr("memprof"));
EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof));
MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof);
ASSERT_EQ(MemProfMD->getNumOperands(), 2u);
@@ -395,8 +392,7 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
CallBase *Call = findCall(*Func, "call");
Trie.buildAndAttachMIBMetadata(Call);
- EXPECT_TRUE(Call->hasFnAttr("memprof"));
- EXPECT_EQ(Call->getFnAttr("memprof").getValueAsString(), "ambiguous");
+ EXPECT_FALSE(Call->hasFnAttr("memprof"));
EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof));
MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof);
ASSERT_EQ(MemProfMD->getNumOperands(), 2u);
@@ -467,8 +463,7 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
ASSERT_NE(Call, nullptr);
Trie.buildAndAttachMIBMetadata(Call);
- EXPECT_TRUE(Call->hasFnAttr("memprof"));
- EXPECT_EQ(Call->getFnAttr("memprof").getValueAsString(), "ambiguous");
+ EXPECT_FALSE(Call->hasFnAttr("memprof"));
EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof));
MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof);
EXPECT_THAT(MemProfMD, MemprofMetadataEquals(ExpectedVals));
@@ -541,8 +536,7 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
// Restore original option value.
MemProfKeepAllNotColdContexts = OrigMemProfKeepAllNotColdContexts;
- EXPECT_TRUE(Call->hasFnAttr("memprof"));
- EXPECT_EQ(Call->getFnAttr("memprof").getValueAsString(), "ambiguous");
+ EXPECT_FALSE(Call->hasFnAttr("memprof"));
EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof));
MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof);
EXPECT_THAT(MemProfMD, MemprofMetadataEquals(ExpectedVals));
@@ -670,8 +664,7 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
// The hot allocations will be converted to NotCold and pruned as they
// are unnecessary to determine how to clone the cold allocation.
- EXPECT_TRUE(Call->hasFnAttr("memprof"));
- EXPECT_EQ(Call->getFnAttr("memprof").getValueAsString(), "ambiguous");
+ EXPECT_FALSE(Call->hasFnAttr("memprof"));
EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof));
MemProfMD = Call->getMetadata(LLVMContext::MD_memprof);
ASSERT_EQ(MemProfMD->getNumOperands(), 2u);
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
index 1eda5e4..8e43c42 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
@@ -996,6 +996,35 @@ class OpenMP_NumTeamsClauseSkip<
def OpenMP_NumTeamsClause : OpenMP_NumTeamsClauseSkip<>;
//===----------------------------------------------------------------------===//
+// V5.1: [10.1.2] `sizes` clause
+//===----------------------------------------------------------------------===//
+
+class OpenMP_SizesClauseSkip<
+ bit traits = false, bit arguments = false, bit assemblyFormat = false,
+ bit description = false, bit extraClassDeclaration = false
+ > : OpenMP_Clause<traits, arguments, assemblyFormat, description,
+ extraClassDeclaration> {
+ let arguments = (ins
+ Variadic<IntLikeType>:$sizes
+ );
+
+ let optAssemblyFormat = [{
+ `sizes` `(` $sizes `:` type($sizes) `)`
+ }];
+
+ let description = [{
+ The `sizes` clauses defines the size of a grid over a multi-dimensional
+ logical iteration space. This grid is used for loop transformations such as
+ `tile` and `strip`. The size per dimension can be a variable, but only
+ values that are not at least 2 make sense. It is not specified what happens
+ when smaller values are used, but should still result in a loop nest that
+ executes each logical iteration once.
+ }];
+}
+
+def OpenMP_SizesClause : OpenMP_SizesClauseSkip<>;
+
+//===----------------------------------------------------------------------===//
// V5.2: [10.1.2] `num_threads` clause
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
index bbcfb87f..5ad4e4b 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
@@ -38,6 +38,44 @@ def OpenMP_MapBoundsType : OpenMP_Type<"MapBounds", "map_bounds_ty"> {
let summary = "Type for representing omp map clause bounds information";
}
+//===---------------------------------------------------------------------===//
+// OpenMP Canonical Loop Info Type
+//===---------------------------------------------------------------------===//
+
+def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> {
+ let summary = "Type for representing a reference to a canonical loop";
+ let description = [{
+ A variable of type CanonicalLoopInfo refers to an OpenMP-compatible
+ canonical loop in the same function. Values of this type are not
+ available at runtime and therefore cannot be used by the program itself,
+ i.e. an opaque type. It is similar to the transform dialect's
+ `!transform.interface` type, but instead of implementing an interface
+ for each transformation, the OpenMP dialect itself defines possible
+ operations on this type.
+
+ A value of type CanonicalLoopInfoType (in the following: CLI) value can be
+
+ 1. created by omp.new_cli.
+ 2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI
+ can only be associated once.
+ 3. passed to an omp loop transformation operation that modifies the loop
+ associated with the CLI. The CLI is the "applyee" and the operation is
+ the consumer. A CLI can only be consumed once.
+ 4. passed to an omp loop transformation operation to associate the cli with
+ a result of that transformation. The CLI is the "generatee" and the
+ operation is the generator.
+
+ A CLI cannot
+
+ 1. be returned from a function.
+ 2. be passed to operations that are not specifically designed to take a
+ CanonicalLoopInfoType, including AnyType.
+
+ A CLI directly corresponds to an object of
+ OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR.
+ }];
+}
+
//===----------------------------------------------------------------------===//
// Base classes for OpenMP dialect operations.
//===----------------------------------------------------------------------===//
@@ -211,8 +249,35 @@ class OpenMP_Op<string mnemonic, list<Trait> traits = [],
// Doesn't actually create a C++ base class (only defines default values for
// tablegen classes that derive from this). Use LoopTransformationInterface
// instead for common operations.
-class OpenMPTransform_Op<string mnemonic, list<Trait> traits = []> :
- OpenMP_Op<mnemonic, !listconcat([DeclareOpInterfaceMethods<LoopTransformationInterface>], traits) > {
+class OpenMPTransform_Op<string mnemonic,
+ list<Trait> traits = [],
+ list<OpenMP_Clause> clauses = []> :
+ OpenMP_Op<mnemonic,
+ traits = !listconcat([DeclareOpInterfaceMethods<LoopTransformationInterface>], traits),
+ clauses = clauses> {
+}
+
+// Base clause for loop transformations using the standard syntax.
+//
+// omp.opname ($generatees) <- ($applyees) clause(...) clause(...) ... <attr-dicr>
+// omp.opname ($applyees) clause(...) clause(...) ... <attr-dict>
+//
+// $generatees is optional and is assumed to be empty if omitted
+class OpenMPTransformBase_Op<string mnemonic,
+ list<Trait> traits = [],
+ list<OpenMP_Clause> clauses = []> :
+ OpenMPTransform_Op<mnemonic,
+ traits = !listconcat(traits, [AttrSizedOperandSegments]),
+ clauses = clauses> {
+
+ let arguments = !con(
+ (ins Variadic<CanonicalLoopInfoType>:$generatees,
+ Variadic<CanonicalLoopInfoType>:$applyees
+ ), clausesArgs);
+
+ let assemblyFormat = [{ custom<LoopTransformClis>($generatees, $applyees) }]
+ # clausesAssemblyFormat
+ # [{ attr-dict }];
}
#endif // OPENMP_OP_BASE
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 5c77e21..b73091e 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -358,44 +358,6 @@ def SingleOp : OpenMP_Op<"single", traits = [
}
//===---------------------------------------------------------------------===//
-// OpenMP Canonical Loop Info Type
-//===---------------------------------------------------------------------===//
-
-def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> {
- let summary = "Type for representing a reference to a canonical loop";
- let description = [{
- A variable of type CanonicalLoopInfo refers to an OpenMP-compatible
- canonical loop in the same function. Values of this type are not
- available at runtime and therefore cannot be used by the program itself,
- i.e. an opaque type. It is similar to the transform dialect's
- `!transform.interface` type, but instead of implementing an interface
- for each transformation, the OpenMP dialect itself defines possible
- operations on this type.
-
- A value of type CanonicalLoopInfoType (in the following: CLI) value can be
-
- 1. created by omp.new_cli.
- 2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI
- can only be associated once.
- 3. passed to an omp loop transformation operation that modifies the loop
- associated with the CLI. The CLI is the "applyee" and the operation is
- the consumer. A CLI can only be consumed once.
- 4. passed to an omp loop transformation operation to associate the cli with
- a result of that transformation. The CLI is the "generatee" and the
- operation is the generator.
-
- A CLI cannot
-
- 1. be returned from a function.
- 2. be passed to operations that are not specifically designed to take a
- CanonicalLoopInfoType, including AnyType.
-
- A CLI directly corresponds to an object of
- OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR.
- }];
-}
-
-//===---------------------------------------------------------------------===//
// OpenMP Canonical Loop Info Creation
//===---------------------------------------------------------------------===//
@@ -564,6 +526,31 @@ def UnrollHeuristicOp : OpenMPTransform_Op<"unroll_heuristic", []> {
}
//===----------------------------------------------------------------------===//
+// OpenMP tile operation
+//===----------------------------------------------------------------------===//
+
+def TileOp : OpenMPTransformBase_Op<"tile",
+ clauses = [OpenMP_SizesClause]> {
+ let summary = "OpenMP tile operation";
+ let description = [{
+ Represents the OpenMP tile directive introduced in OpenMP 5.1.
+
+ The construct partitions the logical iteration space of the affected loops
+ into equally-sized tiles, then creates two sets of nested loops. The outer
+ loops, called the grid loops, iterate over all tiles. The inner loops,
+ called the intratile loops, iterate over the logical iterations of a tile.
+ The sizes clause determines the size of a tile.
+
+ Currently, the affected loops must be rectangular (the tripcount of the
+ inner loop must not depend on any iv of an surrounding affected loop) and
+ perfectly nested (except for the innermost affected loop, no operations
+ other than the nested loop and the terminator in the loop body).
+ }] # clausesDescription;
+
+ let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
// 2.8.3 Workshare Construct
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Arith/Transforms/EmulateUnsupportedFloats.cpp b/mlir/lib/Dialect/Arith/Transforms/EmulateUnsupportedFloats.cpp
index 7626d35..c64e10f5 100644
--- a/mlir/lib/Dialect/Arith/Transforms/EmulateUnsupportedFloats.cpp
+++ b/mlir/lib/Dialect/Arith/Transforms/EmulateUnsupportedFloats.cpp
@@ -123,7 +123,8 @@ void mlir::arith::populateEmulateUnsupportedFloatsLegality(
vector::OuterProductOp, vector::ScanOp>(
[&](Operation *op) { return converter.isLegal(op); });
target.addLegalOp<arith::BitcastOp, arith::ExtFOp, arith::TruncFOp,
- arith::ConstantOp, vector::SplatOp, vector::BroadcastOp>();
+ arith::ConstantOp, arith::SelectOp, vector::SplatOp,
+ vector::BroadcastOp>();
}
void EmulateUnsupportedFloatsPass::runOnOperation() {
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 32ebe06..5672942 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -33,6 +33,7 @@
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/ADT/bit.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include "llvm/Support/InterleavedRange.h"
#include <cstddef>
#include <iterator>
#include <optional>
@@ -3385,6 +3386,9 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
Value result = getResult();
auto [newCli, gen, cons] = decodeCli(result);
+ // Structured binding `gen` cannot be captured in lambdas before C++20
+ OpOperand *generator = gen;
+
// Derive the CLI variable name from its generator:
// * "canonloop" for omp.canonical_loop
// * custom name for loop transformation generatees
@@ -3403,6 +3407,24 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
.Case([&](UnrollHeuristicOp op) -> std::string {
llvm_unreachable("heuristic unrolling does not generate a loop");
})
+ .Case([&](TileOp op) -> std::string {
+ auto [generateesFirst, generateesCount] =
+ op.getGenerateesODSOperandIndexAndLength();
+ unsigned firstGrid = generateesFirst;
+ unsigned firstIntratile = generateesFirst + generateesCount / 2;
+ unsigned end = generateesFirst + generateesCount;
+ unsigned opnum = generator->getOperandNumber();
+ // In the OpenMP apply and looprange clauses, indices are 1-based
+ if (firstGrid <= opnum && opnum < firstIntratile) {
+ unsigned gridnum = opnum - firstGrid + 1;
+ return ("grid" + Twine(gridnum)).str();
+ }
+ if (firstIntratile <= opnum && opnum < end) {
+ unsigned intratilenum = opnum - firstIntratile + 1;
+ return ("intratile" + Twine(intratilenum)).str();
+ }
+ llvm_unreachable("Unexpected generatee argument");
+ })
.Default([&](Operation *op) {
assert(false && "TODO: Custom name for this operation");
return "transformed";
@@ -3632,6 +3654,138 @@ UnrollHeuristicOp::getGenerateesODSOperandIndexAndLength() {
}
//===----------------------------------------------------------------------===//
+// TileOp
+//===----------------------------------------------------------------------===//
+
+static void printLoopTransformClis(OpAsmPrinter &p, TileOp op,
+ OperandRange generatees,
+ OperandRange applyees) {
+ if (!generatees.empty())
+ p << '(' << llvm::interleaved(generatees) << ')';
+
+ if (!applyees.empty())
+ p << " <- (" << llvm::interleaved(applyees) << ')';
+}
+
+static ParseResult parseLoopTransformClis(
+ OpAsmParser &parser,
+ SmallVectorImpl<OpAsmParser::UnresolvedOperand> &generateesOperands,
+ SmallVectorImpl<OpAsmParser::UnresolvedOperand> &applyeesOperands) {
+ if (parser.parseOptionalLess()) {
+ // Syntax 1: generatees present
+
+ if (parser.parseOperandList(generateesOperands,
+ mlir::OpAsmParser::Delimiter::Paren))
+ return failure();
+
+ if (parser.parseLess())
+ return failure();
+ } else {
+ // Syntax 2: generatees omitted
+ }
+
+ // Parse `<-` (`<` has already been parsed)
+ if (parser.parseMinus())
+ return failure();
+
+ if (parser.parseOperandList(applyeesOperands,
+ mlir::OpAsmParser::Delimiter::Paren))
+ return failure();
+
+ return success();
+}
+
+LogicalResult TileOp::verify() {
+ if (getApplyees().empty())
+ return emitOpError() << "must apply to at least one loop";
+
+ if (getSizes().size() != getApplyees().size())
+ return emitOpError() << "there must be one tile size for each applyee";
+
+ if (!getGeneratees().empty() &&
+ 2 * getSizes().size() != getGeneratees().size())
+ return emitOpError()
+ << "expecting two times the number of generatees than applyees";
+
+ DenseSet<Value> parentIVs;
+
+ Value parent = getApplyees().front();
+ for (auto &&applyee : llvm::drop_begin(getApplyees())) {
+ auto [parentCreate, parentGen, parentCons] = decodeCli(parent);
+ auto [create, gen, cons] = decodeCli(applyee);
+
+ if (!parentGen)
+ return emitOpError() << "applyee CLI has no generator";
+
+ auto parentLoop = dyn_cast_or_null<CanonicalLoopOp>(parentGen->getOwner());
+ if (!parentGen)
+ return emitOpError()
+ << "currently only supports omp.canonical_loop as applyee";
+
+ parentIVs.insert(parentLoop.getInductionVar());
+
+ if (!gen)
+ return emitOpError() << "applyee CLI has no generator";
+ auto loop = dyn_cast_or_null<CanonicalLoopOp>(gen->getOwner());
+ if (!loop)
+ return emitOpError()
+ << "currently only supports omp.canonical_loop as applyee";
+
+ // Canonical loop must be perfectly nested, i.e. the body of the parent must
+ // only contain the omp.canonical_loop of the nested loops, and
+ // omp.terminator
+ bool isPerfectlyNested = [&]() {
+ auto &parentBody = parentLoop.getRegion();
+ if (!parentBody.hasOneBlock())
+ return false;
+ auto &parentBlock = parentBody.getBlocks().front();
+
+ auto nestedLoopIt = parentBlock.begin();
+ if (nestedLoopIt == parentBlock.end() ||
+ (&*nestedLoopIt != loop.getOperation()))
+ return false;
+
+ auto termIt = std::next(nestedLoopIt);
+ if (termIt == parentBlock.end() || !isa<TerminatorOp>(termIt))
+ return false;
+
+ if (std::next(termIt) != parentBlock.end())
+ return false;
+
+ return true;
+ }();
+ if (!isPerfectlyNested)
+ return emitOpError() << "tiled loop nest must be perfectly nested";
+
+ if (parentIVs.contains(loop.getTripCount()))
+ return emitOpError() << "tiled loop nest must be rectangular";
+
+ parent = applyee;
+ }
+
+ // TODO: The tile sizes must be computed before the loop, but checking this
+ // requires dominance analysis. For instance:
+ //
+ // %canonloop = omp.new_cli
+ // omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ // // write to %x
+ // omp.terminator
+ // }
+ // %ts = llvm.load %x
+ // omp.tile <- (%canonloop) sizes(%ts : i32)
+
+ return success();
+}
+
+std::pair<unsigned, unsigned> TileOp ::getApplyeesODSOperandIndexAndLength() {
+ return getODSOperandIndexAndLength(odsIndex_applyees);
+}
+
+std::pair<unsigned, unsigned> TileOp::getGenerateesODSOperandIndexAndLength() {
+ return getODSOperandIndexAndLength(odsIndex_generatees);
+}
+
+//===----------------------------------------------------------------------===//
// Critical construct (2.17.1)
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index 9413a92..784e5d6 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -824,7 +824,7 @@ struct WgToSgStoreScatterOpWithOffset
return failure();
xegpu::DistributeLayoutAttr layout =
- xegpu::getDistributeLayoutAttr(op.getValue());
+ xegpu::getDistributeLayoutAttr(op.getOperand(0));
if (!layout || !layout.isForWorkgroup())
return failure();
@@ -844,12 +844,19 @@ struct WgToSgStoreScatterOpWithOffset
auto chunkSizeAttr = rewriter.getI64IntegerAttr(chunkSize);
for (auto [val, offs, mask] : llvm::zip(
adaptor.getValue(), adaptor.getOffsets(), adaptor.getMask())) {
- xegpu::StoreScatterOp::create(rewriter, loc, val, op.getDest(), offs,
- mask, chunkSizeAttr, op.getL1HintAttr(),
- op.getL2HintAttr(), op.getL3HintAttr());
+ auto store = xegpu::StoreScatterOp::create(
+ rewriter, loc, val, op.getDest(), offs, mask, chunkSizeAttr,
+ op.getL1HintAttr(), op.getL2HintAttr(), op.getL3HintAttr());
// Update the layout attribute to drop sg_layout and sg_data.
- if (auto newLayout = layout.dropSgLayoutAndData())
- op->setAttr("layout", newLayout);
+ if (!layout.getEffectiveLaneLayoutAsInt().empty() ||
+ !layout.getEffectiveInstDataAsInt().empty()) {
+ for (OpOperand &operand : store->getOpOperands()) {
+ // Skip for operand one (memref)
+ if (operand.getOperandNumber() == 1)
+ continue;
+ xegpu::setDistributeLayoutAttr(operand, layout.dropSgLayoutAndData());
+ }
+ }
}
rewriter.eraseOp(op);
return success();
@@ -1247,10 +1254,7 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
target.addDynamicallyLegalOp<xegpu::StoreScatterOp>(
[=](xegpu::StoreScatterOp op) -> bool {
- // Check if the layout attribute is present on the result.
- auto layout = op->getAttrOfType<xegpu::LayoutAttr>("layout");
- if (!layout)
- return true;
+ auto layout = xegpu::getDistributeLayoutAttr(op.getOperand(0));
return isLegal(layout);
});
diff --git a/mlir/lib/Target/IRDLToCpp/IRDLToCpp.cpp b/mlir/lib/Target/IRDLToCpp/IRDLToCpp.cpp
index d6b8a8a..e3f075f 100644
--- a/mlir/lib/Target/IRDLToCpp/IRDLToCpp.cpp
+++ b/mlir/lib/Target/IRDLToCpp/IRDLToCpp.cpp
@@ -54,6 +54,7 @@ struct OpStrings {
std::string opCppName;
SmallVector<std::string> opResultNames;
SmallVector<std::string> opOperandNames;
+ SmallVector<std::string> opRegionNames;
};
static std::string joinNameList(llvm::ArrayRef<std::string> names) {
@@ -87,8 +88,8 @@ static TypeStrings getStrings(irdl::TypeOp type) {
/// Generates OpStrings from an OperatioOp
static OpStrings getStrings(irdl::OperationOp op) {
auto operandOp = op.getOp<irdl::OperandsOp>();
-
auto resultOp = op.getOp<irdl::ResultsOp>();
+ auto regionsOp = op.getOp<irdl::RegionsOp>();
OpStrings strings;
strings.opName = op.getSymName();
@@ -108,6 +109,13 @@ static OpStrings getStrings(irdl::OperationOp op) {
}));
}
+ if (regionsOp) {
+ strings.opRegionNames = SmallVector<std::string>(
+ llvm::map_range(regionsOp->getNames(), [](Attribute attr) {
+ return llvm::formatv("{0}", cast<StringAttr>(attr));
+ }));
+ }
+
return strings;
}
@@ -122,6 +130,7 @@ static void fillDict(irdl::detail::dictionary &dict,
static void fillDict(irdl::detail::dictionary &dict, const OpStrings &strings) {
const auto operandCount = strings.opOperandNames.size();
const auto resultCount = strings.opResultNames.size();
+ const auto regionCount = strings.opRegionNames.size();
dict["OP_NAME"] = strings.opName;
dict["OP_CPP_NAME"] = strings.opCppName;
@@ -131,6 +140,7 @@ static void fillDict(irdl::detail::dictionary &dict, const OpStrings &strings) {
operandCount ? joinNameList(strings.opOperandNames) : "{\"\"}";
dict["OP_RESULT_INITIALIZER_LIST"] =
resultCount ? joinNameList(strings.opResultNames) : "{\"\"}";
+ dict["OP_REGION_COUNT"] = std::to_string(regionCount);
}
/// Fills a dictionary with values from DialectStrings
@@ -179,6 +189,8 @@ static void generateOpGetterDeclarations(irdl::detail::dictionary &dict,
const OpStrings &opStrings) {
auto opGetters = std::string{};
auto resGetters = std::string{};
+ auto regionGetters = std::string{};
+ auto regionAdaptorGetters = std::string{};
for (size_t i = 0, end = opStrings.opOperandNames.size(); i < end; ++i) {
const auto op =
@@ -196,8 +208,23 @@ static void generateOpGetterDeclarations(irdl::detail::dictionary &dict,
op, i);
}
+ for (size_t i = 0, end = opStrings.opRegionNames.size(); i < end; ++i) {
+ const auto op =
+ llvm::convertToCamelFromSnakeCase(opStrings.opRegionNames[i], true);
+ regionAdaptorGetters += llvm::formatv(
+ R"(::mlir::Region &get{0}() { return *getRegions()[{1}]; }
+ )",
+ op, i);
+ regionGetters += llvm::formatv(
+ R"(::mlir::Region &get{0}() { return (*this)->getRegion({1}); }
+ )",
+ op, i);
+ }
+
dict["OP_OPERAND_GETTER_DECLS"] = opGetters;
dict["OP_RESULT_GETTER_DECLS"] = resGetters;
+ dict["OP_REGION_ADAPTER_GETTER_DECLS"] = regionAdaptorGetters;
+ dict["OP_REGION_GETTER_DECLS"] = regionGetters;
}
static void generateOpBuilderDeclarations(irdl::detail::dictionary &dict,
@@ -238,6 +265,22 @@ static void generateOpBuilderDeclarations(irdl::detail::dictionary &dict,
dict["OP_BUILD_DECLS"] = buildDecls;
}
+// add traits to the dictionary, return true if any were added
+static SmallVector<std::string> generateTraits(irdl::OperationOp op,
+ const OpStrings &strings) {
+ SmallVector<std::string> cppTraitNames;
+ if (!strings.opRegionNames.empty()) {
+ cppTraitNames.push_back(
+ llvm::formatv("::mlir::OpTrait::NRegions<{0}>::Impl",
+ strings.opRegionNames.size())
+ .str());
+
+ // Requires verifyInvariantsImpl is implemented on the op
+ cppTraitNames.emplace_back("::mlir::OpTrait::OpInvariants");
+ }
+ return cppTraitNames;
+}
+
static LogicalResult generateOperationInclude(irdl::OperationOp op,
raw_ostream &output,
irdl::detail::dictionary &dict) {
@@ -247,6 +290,13 @@ static LogicalResult generateOperationInclude(irdl::OperationOp op,
const auto opStrings = getStrings(op);
fillDict(dict, opStrings);
+ SmallVector<std::string> traitNames = generateTraits(op, opStrings);
+ if (traitNames.empty())
+ dict["OP_TEMPLATE_ARGS"] = opStrings.opCppName;
+ else
+ dict["OP_TEMPLATE_ARGS"] = llvm::formatv("{0}, {1}", opStrings.opCppName,
+ llvm::join(traitNames, ", "));
+
generateOpGetterDeclarations(dict, opStrings);
generateOpBuilderDeclarations(dict, opStrings);
@@ -301,6 +351,110 @@ static LogicalResult generateInclude(irdl::DialectOp dialect,
return success();
}
+static void generateRegionConstraintVerifiers(
+ irdl::detail::dictionary &dict, irdl::OperationOp op,
+ const OpStrings &strings, SmallVectorImpl<std::string> &verifierHelpers,
+ SmallVectorImpl<std::string> &verifierCalls) {
+ auto regionsOp = op.getOp<irdl::RegionsOp>();
+ if (strings.opRegionNames.empty() || !regionsOp)
+ return;
+
+ for (size_t i = 0; i < strings.opRegionNames.size(); ++i) {
+ std::string regionName = strings.opRegionNames[i];
+ std::string helperFnName =
+ llvm::formatv("__mlir_irdl_local_region_constraint_{0}_{1}",
+ strings.opCppName, regionName)
+ .str();
+
+ // Extract the actual region constraint from the IRDL RegionOp
+ std::string condition = "true";
+ std::string textualConditionName = "any region";
+
+ if (auto regionDefOp =
+ dyn_cast<irdl::RegionOp>(regionsOp->getArgs()[i].getDefiningOp())) {
+ // Generate constraint condition based on RegionOp attributes
+ SmallVector<std::string> conditionParts;
+ SmallVector<std::string> descriptionParts;
+
+ // Check number of blocks constraint
+ if (auto blockCount = regionDefOp.getNumberOfBlocks()) {
+ conditionParts.push_back(
+ llvm::formatv("region.getBlocks().size() == {0}",
+ blockCount.value())
+ .str());
+ descriptionParts.push_back(
+ llvm::formatv("exactly {0} block(s)", blockCount.value()).str());
+ }
+
+ // Check entry block arguments constraint
+ if (regionDefOp.getConstrainedArguments()) {
+ size_t expectedArgCount = regionDefOp.getEntryBlockArgs().size();
+ conditionParts.push_back(
+ llvm::formatv("region.getNumArguments() == {0}", expectedArgCount)
+ .str());
+ descriptionParts.push_back(
+ llvm::formatv("{0} entry block argument(s)", expectedArgCount)
+ .str());
+ }
+
+ // Combine conditions
+ if (!conditionParts.empty()) {
+ condition = llvm::join(conditionParts, " && ");
+ }
+
+ // Generate descriptive error message
+ if (!descriptionParts.empty()) {
+ textualConditionName =
+ llvm::formatv("region with {0}",
+ llvm::join(descriptionParts, " and "))
+ .str();
+ }
+ }
+
+ verifierHelpers.push_back(llvm::formatv(
+ R"(static ::llvm::LogicalResult {0}(::mlir::Operation *op, ::mlir::Region &region, ::llvm::StringRef regionName, unsigned regionIndex) {{
+ if (!({1})) {{
+ return op->emitOpError("region #") << regionIndex
+ << (regionName.empty() ? " " : " ('" + regionName + "') ")
+ << "failed to verify constraint: {2}";
+ }
+ return ::mlir::success();
+})",
+ helperFnName, condition, textualConditionName));
+
+ verifierCalls.push_back(llvm::formatv(R"(
+ if (::mlir::failed({0}(*this, (*this)->getRegion({1}), "{2}", {1})))
+ return ::mlir::failure();)",
+ helperFnName, i, regionName)
+ .str());
+ }
+}
+
+static void generateVerifiers(irdl::detail::dictionary &dict,
+ irdl::OperationOp op, const OpStrings &strings) {
+ SmallVector<std::string> verifierHelpers;
+ SmallVector<std::string> verifierCalls;
+
+ generateRegionConstraintVerifiers(dict, op, strings, verifierHelpers,
+ verifierCalls);
+
+ // Add an overall verifier that sequences the helper calls
+ std::string verifierDef =
+ llvm::formatv(R"(
+::llvm::LogicalResult {0}::verifyInvariantsImpl() {{
+ if(::mlir::failed(verify()))
+ return ::mlir::failure();
+
+ {1}
+
+ return ::mlir::success();
+})",
+ strings.opCppName, llvm::join(verifierCalls, "\n"));
+
+ dict["OP_VERIFIER_HELPERS"] = llvm::join(verifierHelpers, "\n");
+ dict["OP_VERIFIER"] = verifierDef;
+}
+
static std::string generateOpDefinition(irdl::detail::dictionary &dict,
irdl::OperationOp op) {
static const auto perOpDefTemplate = mlir::irdl::detail::Template{
@@ -370,6 +524,8 @@ void {0}::build(::mlir::OpBuilder &opBuilder, ::mlir::OperationState &opState, {
dict["OP_BUILD_DEFS"] = buildDefinition;
+ generateVerifiers(dict, op, opStrings);
+
std::string str;
llvm::raw_string_ostream stream{str};
perOpDefTemplate.render(stream, dict);
@@ -427,7 +583,7 @@ static LogicalResult generateLib(irdl::DialectOp dialect, raw_ostream &output,
dict["TYPE_PARSER"] = llvm::formatv(
R"(static ::mlir::OptionalParseResult generatedTypeParser(::mlir::AsmParser &parser, ::llvm::StringRef *mnemonic, ::mlir::Type &value) {
return ::mlir::AsmParser::KeywordSwitch<::mlir::OptionalParseResult>(parser)
- {0}
+ {0}
.Default([&](llvm::StringRef keyword, llvm::SMLoc) {{
*mnemonic = keyword;
return std::nullopt;
@@ -520,6 +676,8 @@ static LogicalResult verifySupported(irdl::DialectOp dialect) {
"IRDL C++ translation does not yet support variadic results");
}))
.Case<irdl::AnyOp>(([](irdl::AnyOp) { return success(); }))
+ .Case<irdl::RegionOp>(([](irdl::RegionOp) { return success(); }))
+ .Case<irdl::RegionsOp>(([](irdl::RegionsOp) { return success(); }))
.Default([](mlir::Operation *op) -> LogicalResult {
return op->emitError("IRDL C++ translation does not yet support "
"translation of ")
diff --git a/mlir/lib/Target/IRDLToCpp/Templates/PerOperationDecl.txt b/mlir/lib/Target/IRDLToCpp/Templates/PerOperationDecl.txt
index e9068e9..93ce0be 100644
--- a/mlir/lib/Target/IRDLToCpp/Templates/PerOperationDecl.txt
+++ b/mlir/lib/Target/IRDLToCpp/Templates/PerOperationDecl.txt
@@ -12,15 +12,15 @@ public:
struct Properties {
};
public:
- __OP_CPP_NAME__GenericAdaptorBase(::mlir::Operation *op)
- : odsAttrs(op->getRawDictionaryAttrs()), odsOpName(op->getName()),
- odsRegions(op->getRegions())
+ __OP_CPP_NAME__GenericAdaptorBase(::mlir::Operation *op)
+ : odsAttrs(op->getRawDictionaryAttrs()), odsOpName(op->getName()),
+ odsRegions(op->getRegions())
{}
/// Return the unstructured operand index of a structured operand along with
// the amount of unstructured operands it contains.
std::pair<unsigned, unsigned>
- getStructuredOperandIndexAndLength (unsigned index,
+ getStructuredOperandIndexAndLength (unsigned index,
unsigned odsOperandsSize) {
return {index, 1};
}
@@ -32,6 +32,12 @@ public:
::mlir::DictionaryAttr getAttributes() {
return odsAttrs;
}
+
+ __OP_REGION_ADAPTER_GETTER_DECLS__
+
+ ::mlir::RegionRange getRegions() {
+ return odsRegions;
+ }
protected:
::mlir::DictionaryAttr odsAttrs;
::std::optional<::mlir::OperationName> odsOpName;
@@ -42,28 +48,28 @@ protected:
} // namespace detail
template <typename RangeT>
-class __OP_CPP_NAME__GenericAdaptor
+class __OP_CPP_NAME__GenericAdaptor
: public detail::__OP_CPP_NAME__GenericAdaptorBase {
using ValueT = ::llvm::detail::ValueOfRange<RangeT>;
using Base = detail::__OP_CPP_NAME__GenericAdaptorBase;
public:
__OP_CPP_NAME__GenericAdaptor(RangeT values, ::mlir::DictionaryAttr attrs,
- ::mlir::OpaqueProperties properties,
- ::mlir::RegionRange regions = {})
- : __OP_CPP_NAME__GenericAdaptor(values, attrs,
- (properties ? *properties.as<::mlir::EmptyProperties *>()
+ ::mlir::OpaqueProperties properties,
+ ::mlir::RegionRange regions = {})
+ : __OP_CPP_NAME__GenericAdaptor(values, attrs,
+ (properties ? *properties.as<::mlir::EmptyProperties *>()
: ::mlir::EmptyProperties{}), regions) {}
- __OP_CPP_NAME__GenericAdaptor(RangeT values,
+ __OP_CPP_NAME__GenericAdaptor(RangeT values,
const __OP_CPP_NAME__GenericAdaptorBase &base)
: Base(base), odsOperands(values) {}
- // This template parameter allows using __OP_CPP_NAME__ which is declared
+ // This template parameter allows using __OP_CPP_NAME__ which is declared
// later.
template <typename LateInst = __OP_CPP_NAME__,
typename = std::enable_if_t<
std::is_same_v<LateInst, __OP_CPP_NAME__>>>
- __OP_CPP_NAME__GenericAdaptor(RangeT values, LateInst op)
+ __OP_CPP_NAME__GenericAdaptor(RangeT values, LateInst op)
: Base(op), odsOperands(values) {}
/// Return the unstructured operand index of a structured operand along with
@@ -77,7 +83,7 @@ public:
RangeT getStructuredOperands(unsigned index) {
auto valueRange = getStructuredOperandIndexAndLength(index);
return {std::next(odsOperands.begin(), valueRange.first),
- std::next(odsOperands.begin(),
+ std::next(odsOperands.begin(),
valueRange.first + valueRange.second)};
}
@@ -91,7 +97,7 @@ private:
RangeT odsOperands;
};
-class __OP_CPP_NAME__Adaptor
+class __OP_CPP_NAME__Adaptor
: public __OP_CPP_NAME__GenericAdaptor<::mlir::ValueRange> {
public:
using __OP_CPP_NAME__GenericAdaptor::__OP_CPP_NAME__GenericAdaptor;
@@ -100,7 +106,7 @@ public:
::llvm::LogicalResult verify(::mlir::Location loc);
};
-class __OP_CPP_NAME__ : public ::mlir::Op<__OP_CPP_NAME__> {
+class __OP_CPP_NAME__ : public ::mlir::Op<__OP_TEMPLATE_ARGS__> {
public:
using Op::Op;
using Op::print;
@@ -112,6 +118,8 @@ public:
return {};
}
+ ::llvm::LogicalResult verifyInvariantsImpl();
+
static constexpr ::llvm::StringLiteral getOperationName() {
return ::llvm::StringLiteral("__DIALECT_NAME__.__OP_NAME__");
}
@@ -147,7 +155,7 @@ public:
::mlir::Operation::operand_range getStructuredOperands(unsigned index) {
auto valueRange = getStructuredOperandIndexAndLength(index);
return {std::next(getOperation()->operand_begin(), valueRange.first),
- std::next(getOperation()->operand_begin(),
+ std::next(getOperation()->operand_begin(),
valueRange.first + valueRange.second)};
}
@@ -162,18 +170,19 @@ public:
::mlir::Operation::result_range getStructuredResults(unsigned index) {
auto valueRange = getStructuredResultIndexAndLength(index);
return {std::next(getOperation()->result_begin(), valueRange.first),
- std::next(getOperation()->result_begin(),
+ std::next(getOperation()->result_begin(),
valueRange.first + valueRange.second)};
}
__OP_OPERAND_GETTER_DECLS__
__OP_RESULT_GETTER_DECLS__
-
+ __OP_REGION_GETTER_DECLS__
+
__OP_BUILD_DECLS__
- static void build(::mlir::OpBuilder &odsBuilder,
- ::mlir::OperationState &odsState,
- ::mlir::TypeRange resultTypes,
- ::mlir::ValueRange operands,
+ static void build(::mlir::OpBuilder &odsBuilder,
+ ::mlir::OperationState &odsState,
+ ::mlir::TypeRange resultTypes,
+ ::mlir::ValueRange operands,
::llvm::ArrayRef<::mlir::NamedAttribute> attributes = {});
static __OP_CPP_NAME__ create(::mlir::OpBuilder &odsBuilder,
diff --git a/mlir/lib/Target/IRDLToCpp/Templates/PerOperationDef.txt b/mlir/lib/Target/IRDLToCpp/Templates/PerOperationDef.txt
index 30ca420..f4a1b7a 100644
--- a/mlir/lib/Target/IRDLToCpp/Templates/PerOperationDef.txt
+++ b/mlir/lib/Target/IRDLToCpp/Templates/PerOperationDef.txt
@@ -6,12 +6,14 @@ R"(
__NAMESPACE_OPEN__
+__OP_VERIFIER_HELPERS__
+
__OP_BUILD_DEFS__
-void __OP_CPP_NAME__::build(::mlir::OpBuilder &odsBuilder,
- ::mlir::OperationState &odsState,
- ::mlir::TypeRange resultTypes,
- ::mlir::ValueRange operands,
+void __OP_CPP_NAME__::build(::mlir::OpBuilder &odsBuilder,
+ ::mlir::OperationState &odsState,
+ ::mlir::TypeRange resultTypes,
+ ::mlir::ValueRange operands,
::llvm::ArrayRef<::mlir::NamedAttribute> attributes)
{
assert(operands.size() == __OP_OPERAND_COUNT__);
@@ -19,6 +21,9 @@ void __OP_CPP_NAME__::build(::mlir::OpBuilder &odsBuilder,
odsState.addOperands(operands);
odsState.addAttributes(attributes);
odsState.addTypes(resultTypes);
+ for (unsigned i = 0; i != __OP_REGION_COUNT__; ++i) {
+ (void)odsState.addRegion();
+ }
}
__OP_CPP_NAME__
@@ -44,6 +49,7 @@ __OP_CPP_NAME__::create(::mlir::ImplicitLocOpBuilder &odsBuilder,
return create(odsBuilder, odsBuilder.getLoc(), resultTypes, operands, attributes);
}
+__OP_VERIFIER__
__NAMESPACE_CLOSE__
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 53209a4..9fcb02e 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3175,6 +3175,45 @@ applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder,
return success();
}
+/// Apply a `#pragma omp tile` / `!$omp tile` transformation using the
+/// OpenMPIRBuilder.
+static LogicalResult applyTile(omp::TileOp op, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ llvm::OpenMPIRBuilder::LocationDescription loc(builder);
+
+ SmallVector<llvm::CanonicalLoopInfo *> translatedLoops;
+ SmallVector<llvm::Value *> translatedSizes;
+
+ for (Value size : op.getSizes()) {
+ llvm::Value *translatedSize = moduleTranslation.lookupValue(size);
+ assert(translatedSize &&
+ "sizes clause arguments must already be translated");
+ translatedSizes.push_back(translatedSize);
+ }
+
+ for (Value applyee : op.getApplyees()) {
+ llvm::CanonicalLoopInfo *consBuilderCLI =
+ moduleTranslation.lookupOMPLoop(applyee);
+ assert(applyee && "Canonical loop must already been translated");
+ translatedLoops.push_back(consBuilderCLI);
+ }
+
+ auto generatedLoops =
+ ompBuilder->tileLoops(loc.DL, translatedLoops, translatedSizes);
+ if (!op.getGeneratees().empty()) {
+ for (auto [mlirLoop, genLoop] :
+ zip_equal(op.getGeneratees(), generatedLoops))
+ moduleTranslation.mapOmpLoop(mlirLoop, genLoop);
+ }
+
+ // CLIs can only be consumed once
+ for (Value applyee : op.getApplyees())
+ moduleTranslation.invalidateOmpLoop(applyee);
+
+ return success();
+}
+
/// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static llvm::AtomicOrdering
convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
@@ -6227,6 +6266,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
// the omp.canonical_loop.
return applyUnrollHeuristic(op, builder, moduleTranslation);
})
+ .Case([&](omp::TileOp op) {
+ return applyTile(op, builder, moduleTranslation);
+ })
.Case([&](omp::TargetAllocMemOp) {
return convertTargetAllocMemOp(*op, builder, moduleTranslation);
})
diff --git a/mlir/test/Dialect/Arith/emulate-unsupported-floats.mlir b/mlir/test/Dialect/Arith/emulate-unsupported-floats.mlir
index 99790cc..fcd004a 100644
--- a/mlir/test/Dialect/Arith/emulate-unsupported-floats.mlir
+++ b/mlir/test/Dialect/Arith/emulate-unsupported-floats.mlir
@@ -85,3 +85,14 @@ func.func @no_expansion(%x: f32) -> f32 {
%y = arith.addf %x, %c : f32
func.return %y : f32
}
+
+// -----
+
+func.func @no_promote_select(%c: i1, %x: bf16, %y: bf16) -> bf16 {
+// CHECK-LABEL: @no_promote_select
+// CHECK-SAME: (%[[C:.+]]: i1, %[[X:.+]]: bf16, %[[Y:.+]]: bf16)
+// CHECK: %[[Z:.+]] = arith.select %[[C]], %[[X]], %[[Y]] : bf16
+// CHECK: return %[[Z]]
+ %z = arith.select %c, %x, %y : bf16
+ func.return %z : bf16
+}
diff --git a/mlir/test/Dialect/OpenMP/cli-tile.mlir b/mlir/test/Dialect/OpenMP/cli-tile.mlir
new file mode 100644
index 0000000..73d5478
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/cli-tile.mlir
@@ -0,0 +1,138 @@
+// RUN: mlir-opt %s | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
+
+
+// Raw syntax check (MLIR output is always pretty-printed)
+// CHECK-LABEL: @omp_tile_raw(
+// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) {
+func.func @omp_tile_raw(%tc : i32, %ts : i32) -> () {
+ // CHECK-NEXT: %canonloop = omp.new_cli
+ %canonloop = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: %grid1 = omp.new_cli
+ %grid = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: %intratile1 = omp.new_cli
+ %intratile = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ "omp.canonical_loop" (%tc, %canonloop) ({
+ ^bb0(%iv: i32):
+ // CHECK: omp.terminator
+ omp.terminator
+ }) : (i32, !omp.cli) -> ()
+ // CHECK: omp.tile (%grid1, %intratile1) <- (%canonloop) sizes(%[[ts]] : i32)
+ "omp.tile"(%grid, %intratile, %canonloop, %ts) <{operandSegmentSizes = array<i32: 2, 1, 1>}> : (!omp.cli, !omp.cli, !omp.cli, i32) -> ()
+ //"omp.tile" (%canonloop) : (!omp.cli) -> ()
+ return
+}
+
+
+// Pretty syntax check
+// CHECK-LABEL: @omp_tile_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) {
+func.func @omp_tile_pretty(%tc : i32, %ts : i32) -> () {
+ // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+ %canonloop = omp.new_cli
+ // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+ %grid = omp.new_cli
+ // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+ %intratile = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.tile (%grid1, %intratile1) <- (%canonloop) sizes(%[[ts]] : i32)
+ omp.tile(%grid, %intratile) <- (%canonloop) sizes(%ts : i32)
+ return
+}
+
+
+// Specifying the generatees for omp.tile is optional
+// CHECK-LABEL: @omp_tile_optionalgen_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) {
+func.func @omp_tile_optionalgen_pretty(%tc : i32, %ts : i32) -> () {
+ // CHECK-NEXT: %canonloop = omp.new_cli
+ %canonloop = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.tile <- (%canonloop) sizes(%[[ts]] : i32)
+ omp.tile <- (%canonloop) sizes(%ts : i32)
+ return
+}
+
+
+// Two-dimensional tiling
+// CHECK-LABEL: @omp_tile_2d_pretty(
+// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32, %[[ts1:.+]]: i32, %[[ts2:.+]]: i32) {
+func.func @omp_tile_2d_pretty(%tc1 : i32, %tc2 : i32, %ts1 : i32, %ts2 : i32) -> () {
+ // CHECK-NEXT: %canonloop = omp.new_cli
+ %cli_outer = omp.new_cli
+ // CHECK-NEXT: %canonloop_d1 = omp.new_cli
+ %cli_inner = omp.new_cli
+ // CHECK-NEXT: %grid1 = omp.new_cli
+ %grid1 = omp.new_cli
+ // CHECK-NEXT: %grid2 = omp.new_cli
+ %grid2 = omp.new_cli
+ // CHECK-NEXT: %intratile1 = omp.new_cli
+ %intratile1 = omp.new_cli
+ // CHECK-NEXT: %intratile2 = omp.new_cli
+ %intratile2 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc1]]) {
+ omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc1) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc2]]) {
+ omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc2) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.tile (%grid1, %grid2, %intratile1, %intratile2) <- (%canonloop, %canonloop_d1) sizes(%[[ts1]], %[[ts2]] : i32, i32)
+ omp.tile (%grid1, %grid2, %intratile1, %intratile2) <- (%cli_outer, %cli_inner) sizes(%ts1, %ts2 : i32, i32)
+ return
+}
+
+
+// Three-dimensional tiling
+// CHECK-LABEL: @omp_tile_3d_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) {
+func.func @omp_tile_3d_pretty(%tc : i32, %ts : i32) -> () {
+ // CHECK-NEXT: %canonloop = omp.new_cli
+ %cli_outer = omp.new_cli
+ // CHECK-NEXT: %canonloop_d1 = omp.new_cli
+ %cli_middle = omp.new_cli
+ // CHECK-NEXT: %canonloop_d2 = omp.new_cli
+ %cli_inner = omp.new_cli
+ // CHECK-NEXT: %grid1 = omp.new_cli
+ %grid1 = omp.new_cli
+ // CHECK-NEXT: %grid2 = omp.new_cli
+ %grid2 = omp.new_cli
+ // CHECK-NEXT: %grid3 = omp.new_cli
+ %grid3 = omp.new_cli
+ // CHECK-NEXT: %intratile1 = omp.new_cli
+ %intratile1 = omp.new_cli
+ // CHECK-NEXT: %intratile2 = omp.new_cli
+ %intratile2 = omp.new_cli
+ // CHECK-NEXT: %intratile3 = omp.new_cli
+ %intratile3 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%cli_middle) %iv_middle : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.tile (%grid1, %grid2, %grid3, %intratile1, %intratile2, %intratile3) <- (%canonloop, %canonloop_d1, %canonloop_d2) sizes(%[[ts]], %[[ts]], %[[ts]] : i32, i32, i32)
+ omp.tile (%grid1, %grid2, %grid3, %intratile1, %intratile2, %intratile3) <- (%cli_outer, %cli_middle, %cli_inner) sizes(%ts, %ts, %ts: i32, i32, i32)
+ return
+}
diff --git a/mlir/test/Dialect/OpenMP/invalid-tile.mlir b/mlir/test/Dialect/OpenMP/invalid-tile.mlir
new file mode 100644
index 0000000..e63a062
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/invalid-tile.mlir
@@ -0,0 +1,119 @@
+// RUN: mlir-opt -split-input-file -verify-diagnostics %s
+
+
+func.func @missing_sizes(%tc : i32, %ts : i32) {
+ %canonloop = omp.new_cli
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ omp.terminator
+ }
+
+ // expected-error@+1 {{'omp.tile' op there must be one tile size for each applyee}}
+ omp.tile <-(%canonloop)
+
+ llvm.return
+}
+
+// -----
+
+func.func @no_loop(%tc : i32, %ts : i32) {
+ // expected-error@+1 {{'omp.tile' op must apply to at least one loop}}
+ omp.tile <-()
+
+ return
+}
+
+// -----
+
+func.func @missing_generator(%tc : i32, %ts : i32) {
+ // expected-error@+1 {{'omp.new_cli' op CLI has no generator}}
+ %canonloop = omp.new_cli
+
+ // expected-note@+1 {{see consumer here: "omp.tile"(%0, %arg1) <{operandSegmentSizes = array<i32: 0, 1, 1>}> : (!omp.cli, i32) -> ()}}
+ omp.tile <-(%canonloop) sizes(%ts : i32)
+
+ return
+}
+
+// -----
+
+func.func @insufficient_sizes(%tc : i32, %ts : i32) {
+ %canonloop1 = omp.new_cli
+ %canonloop2 = omp.new_cli
+ omp.canonical_loop(%canonloop1) %iv : i32 in range(%tc) {
+ omp.terminator
+ }
+ omp.canonical_loop(%canonloop2) %iv : i32 in range(%tc) {
+ omp.terminator
+ }
+
+ // expected-error@+1 {{'omp.tile' op there must be one tile size for each applyee}}
+ omp.tile <-(%canonloop1, %canonloop2) sizes(%ts : i32)
+
+ llvm.return
+}
+
+// -----
+
+func.func @insufficient_applyees(%tc : i32, %ts : i32) {
+ %canonloop = omp.new_cli
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ omp.terminator
+ }
+
+ // expected-error@+1 {{omp.tile' op there must be one tile size for each applyee}}
+ omp.tile <- (%canonloop) sizes(%ts, %ts : i32, i32)
+
+ return
+}
+
+// -----
+
+func.func @insufficient_generatees(%tc : i32, %ts : i32) {
+ %canonloop = omp.new_cli
+ %grid = omp.new_cli
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ omp.terminator
+ }
+
+ // expected-error@+1 {{'omp.tile' op expecting two times the number of generatees than applyees}}
+ omp.tile (%grid) <- (%canonloop) sizes(%ts : i32)
+
+ return
+}
+
+// -----
+
+func.func @not_perfectly_nested(%tc : i32, %ts : i32) {
+ %canonloop1 = omp.new_cli
+ %canonloop2 = omp.new_cli
+ omp.canonical_loop(%canonloop1) %iv1 : i32 in range(%tc) {
+ %v = arith.constant 42 : i32
+ omp.canonical_loop(%canonloop2) %iv2 : i32 in range(%tc) {
+ omp.terminator
+ }
+ omp.terminator
+ }
+
+ // expected-error@+1 {{'omp.tile' op tiled loop nest must be perfectly nested}}
+ omp.tile <-(%canonloop1, %canonloop2) sizes(%ts, %ts : i32, i32)
+
+ llvm.return
+}
+
+// -----
+
+func.func @non_nectangular(%tc : i32, %ts : i32) {
+ %canonloop1 = omp.new_cli
+ %canonloop2 = omp.new_cli
+ omp.canonical_loop(%canonloop1) %iv1 : i32 in range(%tc) {
+ omp.canonical_loop(%canonloop2) %iv2 : i32 in range(%iv1) {
+ omp.terminator
+ }
+ omp.terminator
+ }
+
+ // expected-error@+1 {{'omp.tile' op tiled loop nest must be rectangular}}
+ omp.tile <-(%canonloop1, %canonloop2) sizes(%ts, %ts : i32, i32)
+
+ llvm.return
+}
diff --git a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir
index 03c6386..38392fd 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir
@@ -282,15 +282,20 @@ gpu.module @test_distribution {
// CHECK-LABEL: @store_scatter
// CHECK-SAME: %[[ARG0:.*]]: memref<256xf16>
gpu.func @store_scatter(%dest : memref<256xf16>) {
- // CHECK: %[[VAL:.*]] = arith.constant dense<2.550000e+01> : vector<8xf16>
- // CHECK: %[[CST:.*]] = arith.constant dense<0> : vector<8xindex>
- // CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<8xi1>
+ // CHECK: %[[VAL:.*]] = arith.constant {layout_result_0 = #xegpu.layout<inst_data = [8]>} dense<2.550000e+01> : vector<8xf16>
+ // CHECK: %[[CST:.*]] = arith.constant {layout_result_0 = #xegpu.layout<inst_data = [8]>} dense<0> : vector<8xindex>
+ // CHECK: %[[MASK:.*]] = arith.constant {layout_result_0 = #xegpu.layout<inst_data = [8]>} dense<true> : vector<8xi1>
// CHECK: xegpu.store %[[VAL]], %[[ARG0]][%[[CST]]], %[[MASK]] <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint<cached>}>
+ // CHECK-SAME: {layout_operand_0 = #xegpu.layout<inst_data = [8]>, layout_operand_2 = #xegpu.layout<inst_data = [8]>,
+ // CHECK-SAME: layout_operand_3 = #xegpu.layout<inst_data = [8]>}
// CHECK-SAME: : vector<8xf16>, memref<256xf16>, vector<8xindex>, vector<8xi1>
- %val = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [32], sg_data = [8]>} dense<25.5> : vector<256xf16>
- %offset = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [32], sg_data = [8]>} dense<0> : vector<256xindex>
- %mask = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [32], sg_data = [8]>} dense<1> : vector<256xi1>
- xegpu.store %val, %dest[%offset], %mask {chunk_size = 1, layout = #xegpu.layout<sg_layout = [32], sg_data = [8]>, l1_hint = #xegpu.cache_hint<cached>}
+ %val = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [32], sg_data = [8], inst_data = [8]>} dense<25.5> : vector<256xf16>
+ %offset = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [32], sg_data = [8], inst_data = [8]>} dense<0> : vector<256xindex>
+ %mask = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [32], sg_data = [8], inst_data = [8]>} dense<1> : vector<256xi1>
+ xegpu.store %val, %dest[%offset], %mask {chunk_size = 1, layout_operand_0 = #xegpu.layout<sg_layout = [32], sg_data = [8], inst_data = [8]>,
+ layout_operand_2 = #xegpu.layout<sg_layout = [32], sg_data = [8], inst_data = [8]>,
+ layout_operand_3 = #xegpu.layout<sg_layout = [32], sg_data = [8], inst_data = [8]>,
+ l1_hint = #xegpu.cache_hint<cached>}
: vector<256xf16>, memref<256xf16>, vector<256xindex>, vector<256xi1>
gpu.return
}
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir b/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir
new file mode 100644
index 0000000..4ac4f02
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir
@@ -0,0 +1,101 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+
+
+llvm.func @tile_trivial_loop(%baseptr: !llvm.ptr, %tc: i32, %ts: i32) -> () {
+ %literal_cli = omp.new_cli
+ omp.canonical_loop(%literal_cli) %iv : i32 in range(%tc) {
+ %ptr = llvm.getelementptr inbounds %baseptr[%iv] : (!llvm.ptr, i32) -> !llvm.ptr, f32
+ %val = llvm.mlir.constant(42.0 : f32) : f32
+ llvm.store %val, %ptr : f32, !llvm.ptr
+ omp.terminator
+ }
+ omp.tile <- (%literal_cli) sizes(%ts : i32)
+ llvm.return
+}
+
+
+// CHECK: ; ModuleID = 'LLVMDialectModule'
+// CHECK-NEXT: source_filename = "LLVMDialectModule"
+// CHECK-EMPTY:
+// CHECK-NEXT: define void @tile_trivial_loop(ptr %0, i32 %1, i32 %2) {
+// CHECK-NEXT: br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader: ; preds = %3
+// CHECK-NEXT: %4 = udiv i32 %1, %2
+// CHECK-NEXT: %5 = urem i32 %1, %2
+// CHECK-NEXT: %6 = icmp ne i32 %5, 0
+// CHECK-NEXT: %7 = zext i1 %6 to i32
+// CHECK-NEXT: %omp_floor0.tripcount = add nuw i32 %4, %7
+// CHECK-NEXT: br label %omp_floor0.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.preheader: ; preds = %omp_omp.loop.preheader
+// CHECK-NEXT: br label %omp_floor0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.header: ; preds = %omp_floor0.inc, %omp_floor0.preheader
+// CHECK-NEXT: %omp_floor0.iv = phi i32 [ 0, %omp_floor0.preheader ], [ %omp_floor0.next, %omp_floor0.inc ]
+// CHECK-NEXT: br label %omp_floor0.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.cond: ; preds = %omp_floor0.header
+// CHECK-NEXT: %omp_floor0.cmp = icmp ult i32 %omp_floor0.iv, %omp_floor0.tripcount
+// CHECK-NEXT: br i1 %omp_floor0.cmp, label %omp_floor0.body, label %omp_floor0.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.body: ; preds = %omp_floor0.cond
+// CHECK-NEXT: %8 = icmp eq i32 %omp_floor0.iv, %4
+// CHECK-NEXT: %9 = select i1 %8, i32 %5, i32 %2
+// CHECK-NEXT: br label %omp_tile0.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.preheader: ; preds = %omp_floor0.body
+// CHECK-NEXT: br label %omp_tile0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.header: ; preds = %omp_tile0.inc, %omp_tile0.preheader
+// CHECK-NEXT: %omp_tile0.iv = phi i32 [ 0, %omp_tile0.preheader ], [ %omp_tile0.next, %omp_tile0.inc ]
+// CHECK-NEXT: br label %omp_tile0.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.cond: ; preds = %omp_tile0.header
+// CHECK-NEXT: %omp_tile0.cmp = icmp ult i32 %omp_tile0.iv, %9
+// CHECK-NEXT: br i1 %omp_tile0.cmp, label %omp_tile0.body, label %omp_tile0.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.body: ; preds = %omp_tile0.cond
+// CHECK-NEXT: %10 = mul nuw i32 %2, %omp_floor0.iv
+// CHECK-NEXT: %11 = add nuw i32 %10, %omp_tile0.iv
+// CHECK-NEXT: br label %omp_omp.loop.body
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body: ; preds = %omp_tile0.body
+// CHECK-NEXT: br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region: ; preds = %omp_omp.loop.body
+// CHECK-NEXT: %12 = getelementptr inbounds float, ptr %0, i32 %11
+// CHECK-NEXT: store float 4.200000e+01, ptr %12, align 4
+// CHECK-NEXT: br label %omp.region.cont
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont: ; preds = %omp.loop.region
+// CHECK-NEXT: br label %omp_tile0.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.inc: ; preds = %omp.region.cont
+// CHECK-NEXT: %omp_tile0.next = add nuw i32 %omp_tile0.iv, 1
+// CHECK-NEXT: br label %omp_tile0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.exit: ; preds = %omp_tile0.cond
+// CHECK-NEXT: br label %omp_tile0.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.after: ; preds = %omp_tile0.exit
+// CHECK-NEXT: br label %omp_floor0.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.inc: ; preds = %omp_tile0.after
+// CHECK-NEXT: %omp_floor0.next = add nuw i32 %omp_floor0.iv, 1
+// CHECK-NEXT: br label %omp_floor0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.exit: ; preds = %omp_floor0.cond
+// CHECK-NEXT: br label %omp_floor0.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.after: ; preds = %omp_floor0.exit
+// CHECK-NEXT: br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after: ; preds = %omp_floor0.after
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+// CHECK-EMPTY:
+// CHECK-NEXT: !llvm.module.flags = !{!0}
+// CHECK-EMPTY:
+// CHECK-NEXT: !0 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir b/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir
new file mode 100644
index 0000000..6fad81c
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir
@@ -0,0 +1,190 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+
+llvm.func @tile_2d_loop(%baseptr: !llvm.ptr, %tc1: i32, %tc2: i32, %ts1: i32, %ts2: i32) -> () {
+ %literal_outer = omp.new_cli
+ %literal_inner = omp.new_cli
+ omp.canonical_loop(%literal_outer) %iv1 : i32 in range(%tc1) {
+ omp.canonical_loop(%literal_inner) %iv2 : i32 in range(%tc2) {
+ %idx = llvm.add %iv1, %iv2 : i32
+ %ptr = llvm.getelementptr inbounds %baseptr[%idx] : (!llvm.ptr, i32) -> !llvm.ptr, f32
+ %val = llvm.mlir.constant(42.0 : f32) : f32
+ llvm.store %val, %ptr : f32, !llvm.ptr
+ omp.terminator
+ }
+ omp.terminator
+ }
+ omp.tile <- (%literal_outer, %literal_inner) sizes(%ts1, %ts2 : i32,i32)
+ llvm.return
+}
+
+
+// CHECK: ; ModuleID = 'LLVMDialectModule'
+// CHECK-NEXT: source_filename = "LLVMDialectModule"
+// CHECK-EMPTY:
+// CHECK-NEXT: define void @tile_2d_loop(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4) {
+// CHECK-NEXT: br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader: ; preds = %5
+// CHECK-NEXT: %6 = udiv i32 %1, %3
+// CHECK-NEXT: %7 = urem i32 %1, %3
+// CHECK-NEXT: %8 = icmp ne i32 %7, 0
+// CHECK-NEXT: %9 = zext i1 %8 to i32
+// CHECK-NEXT: %omp_floor0.tripcount = add nuw i32 %6, %9
+// CHECK-NEXT: %10 = udiv i32 %2, %4
+// CHECK-NEXT: %11 = urem i32 %2, %4
+// CHECK-NEXT: %12 = icmp ne i32 %11, 0
+// CHECK-NEXT: %13 = zext i1 %12 to i32
+// CHECK-NEXT: %omp_floor1.tripcount = add nuw i32 %10, %13
+// CHECK-NEXT: br label %omp_floor0.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header: ; preds = %omp_omp.loop.inc
+// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ %omp_omp.loop.next, %omp_omp.loop.inc ]
+// CHECK-NEXT: br label %omp_omp.loop.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond: ; preds = %omp_omp.loop.header
+// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %19, %1
+// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body: ; preds = %omp_tile1.body, %omp_omp.loop.cond
+// CHECK-NEXT: br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region: ; preds = %omp_omp.loop.body
+// CHECK-NEXT: br label %omp_omp.loop.preheader1
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader1: ; preds = %omp.loop.region
+// CHECK-NEXT: br label %omp_omp.loop.body4
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.preheader: ; preds = %omp_omp.loop.preheader
+// CHECK-NEXT: br label %omp_floor0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.header: ; preds = %omp_floor0.inc, %omp_floor0.preheader
+// CHECK-NEXT: %omp_floor0.iv = phi i32 [ 0, %omp_floor0.preheader ], [ %omp_floor0.next, %omp_floor0.inc ]
+// CHECK-NEXT: br label %omp_floor0.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.cond: ; preds = %omp_floor0.header
+// CHECK-NEXT: %omp_floor0.cmp = icmp ult i32 %omp_floor0.iv, %omp_floor0.tripcount
+// CHECK-NEXT: br i1 %omp_floor0.cmp, label %omp_floor0.body, label %omp_floor0.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.body: ; preds = %omp_floor0.cond
+// CHECK-NEXT: br label %omp_floor1.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.preheader: ; preds = %omp_floor0.body
+// CHECK-NEXT: br label %omp_floor1.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.header: ; preds = %omp_floor1.inc, %omp_floor1.preheader
+// CHECK-NEXT: %omp_floor1.iv = phi i32 [ 0, %omp_floor1.preheader ], [ %omp_floor1.next, %omp_floor1.inc ]
+// CHECK-NEXT: br label %omp_floor1.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.cond: ; preds = %omp_floor1.header
+// CHECK-NEXT: %omp_floor1.cmp = icmp ult i32 %omp_floor1.iv, %omp_floor1.tripcount
+// CHECK-NEXT: br i1 %omp_floor1.cmp, label %omp_floor1.body, label %omp_floor1.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.body: ; preds = %omp_floor1.cond
+// CHECK-NEXT: %14 = icmp eq i32 %omp_floor0.iv, %6
+// CHECK-NEXT: %15 = select i1 %14, i32 %7, i32 %3
+// CHECK-NEXT: %16 = icmp eq i32 %omp_floor1.iv, %10
+// CHECK-NEXT: %17 = select i1 %16, i32 %11, i32 %4
+// CHECK-NEXT: br label %omp_tile0.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.preheader: ; preds = %omp_floor1.body
+// CHECK-NEXT: br label %omp_tile0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.header: ; preds = %omp_tile0.inc, %omp_tile0.preheader
+// CHECK-NEXT: %omp_tile0.iv = phi i32 [ 0, %omp_tile0.preheader ], [ %omp_tile0.next, %omp_tile0.inc ]
+// CHECK-NEXT: br label %omp_tile0.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.cond: ; preds = %omp_tile0.header
+// CHECK-NEXT: %omp_tile0.cmp = icmp ult i32 %omp_tile0.iv, %15
+// CHECK-NEXT: br i1 %omp_tile0.cmp, label %omp_tile0.body, label %omp_tile0.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.body: ; preds = %omp_tile0.cond
+// CHECK-NEXT: br label %omp_tile1.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.preheader: ; preds = %omp_tile0.body
+// CHECK-NEXT: br label %omp_tile1.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.header: ; preds = %omp_tile1.inc, %omp_tile1.preheader
+// CHECK-NEXT: %omp_tile1.iv = phi i32 [ 0, %omp_tile1.preheader ], [ %omp_tile1.next, %omp_tile1.inc ]
+// CHECK-NEXT: br label %omp_tile1.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.cond: ; preds = %omp_tile1.header
+// CHECK-NEXT: %omp_tile1.cmp = icmp ult i32 %omp_tile1.iv, %17
+// CHECK-NEXT: br i1 %omp_tile1.cmp, label %omp_tile1.body, label %omp_tile1.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.body: ; preds = %omp_tile1.cond
+// CHECK-NEXT: %18 = mul nuw i32 %3, %omp_floor0.iv
+// CHECK-NEXT: %19 = add nuw i32 %18, %omp_tile0.iv
+// CHECK-NEXT: %20 = mul nuw i32 %4, %omp_floor1.iv
+// CHECK-NEXT: %21 = add nuw i32 %20, %omp_tile1.iv
+// CHECK-NEXT: br label %omp_omp.loop.body
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body4: ; preds = %omp_omp.loop.preheader1
+// CHECK-NEXT: br label %omp.loop.region12
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region12: ; preds = %omp_omp.loop.body4
+// CHECK-NEXT: %22 = add i32 %19, %21
+// CHECK-NEXT: %23 = getelementptr inbounds float, ptr %0, i32 %22
+// CHECK-NEXT: store float 4.200000e+01, ptr %23, align 4
+// CHECK-NEXT: br label %omp.region.cont11
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont11: ; preds = %omp.loop.region12
+// CHECK-NEXT: br label %omp_tile1.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.inc: ; preds = %omp.region.cont11
+// CHECK-NEXT: %omp_tile1.next = add nuw i32 %omp_tile1.iv, 1
+// CHECK-NEXT: br label %omp_tile1.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.exit: ; preds = %omp_tile1.cond
+// CHECK-NEXT: br label %omp_tile1.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.after: ; preds = %omp_tile1.exit
+// CHECK-NEXT: br label %omp_tile0.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.inc: ; preds = %omp_tile1.after
+// CHECK-NEXT: %omp_tile0.next = add nuw i32 %omp_tile0.iv, 1
+// CHECK-NEXT: br label %omp_tile0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.exit: ; preds = %omp_tile0.cond
+// CHECK-NEXT: br label %omp_tile0.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.after: ; preds = %omp_tile0.exit
+// CHECK-NEXT: br label %omp_floor1.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.inc: ; preds = %omp_tile0.after
+// CHECK-NEXT: %omp_floor1.next = add nuw i32 %omp_floor1.iv, 1
+// CHECK-NEXT: br label %omp_floor1.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.exit: ; preds = %omp_floor1.cond
+// CHECK-NEXT: br label %omp_floor1.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.after: ; preds = %omp_floor1.exit
+// CHECK-NEXT: br label %omp_floor0.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.inc: ; preds = %omp_floor1.after
+// CHECK-NEXT: %omp_floor0.next = add nuw i32 %omp_floor0.iv, 1
+// CHECK-NEXT: br label %omp_floor0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.exit: ; preds = %omp_floor0.cond
+// CHECK-NEXT: br label %omp_floor0.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.after: ; preds = %omp_floor0.exit
+// CHECK-NEXT: br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont: ; No predecessors!
+// CHECK-NEXT: br label %omp_omp.loop.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc: ; preds = %omp.region.cont
+// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %19, 1
+// CHECK-NEXT: br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit: ; preds = %omp_omp.loop.cond
+// CHECK-NEXT: br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after: ; preds = %omp_floor0.after, %omp_omp.loop.exit
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+// CHECK-EMPTY:
+// CHECK-NEXT: !llvm.module.flags = !{!0}
+// CHECK-EMPTY:
+// CHECK-NEXT: !0 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/mlir/test/lib/Dialect/TestIRDLToCpp/CMakeLists.txt b/mlir/test/lib/Dialect/TestIRDLToCpp/CMakeLists.txt
index 103bc94..7d32577 100644
--- a/mlir/test/lib/Dialect/TestIRDLToCpp/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/TestIRDLToCpp/CMakeLists.txt
@@ -12,5 +12,7 @@ add_mlir_library(MLIRTestIRDLToCppDialect
mlir_target_link_libraries(MLIRTestIRDLToCppDialect PUBLIC
MLIRIR
MLIRPass
+ MLIRSCFDialect
MLIRTransforms
+ MLIRTestDialect
)
diff --git a/mlir/test/lib/Dialect/TestIRDLToCpp/TestIRDLToCppDialect.cpp b/mlir/test/lib/Dialect/TestIRDLToCpp/TestIRDLToCppDialect.cpp
index 9550e4c..421db7e 100644
--- a/mlir/test/lib/Dialect/TestIRDLToCpp/TestIRDLToCppDialect.cpp
+++ b/mlir/test/lib/Dialect/TestIRDLToCpp/TestIRDLToCppDialect.cpp
@@ -13,6 +13,7 @@
// #include "mlir/IR/Dialect.h"
#include "mlir/IR/Region.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/DialectImplementation.h"
#include "mlir/Interfaces/InferTypeOpInterface.h"
@@ -54,16 +55,34 @@ struct TestOpConversion : public OpConversionPattern<test_irdl_to_cpp::BeefOp> {
}
};
+struct TestRegionConversion
+ : public OpConversionPattern<test_irdl_to_cpp::ConditionalOp> {
+ using OpConversionPattern::OpConversionPattern;
+
+ LogicalResult
+ matchAndRewrite(mlir::test_irdl_to_cpp::ConditionalOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ // Just exercising the C++ API even though these are not enforced in the
+ // dialect definition
+ assert(op.getThen().getBlocks().size() == 1);
+ assert(adaptor.getElse().getBlocks().size() == 1);
+ auto ifOp = scf::IfOp::create(rewriter, op.getLoc(), op.getInput());
+ rewriter.replaceOp(op, ifOp);
+ return success();
+ }
+};
+
struct ConvertTestDialectToSomethingPass
: PassWrapper<ConvertTestDialectToSomethingPass, OperationPass<ModuleOp>> {
void runOnOperation() override {
MLIRContext *ctx = &getContext();
RewritePatternSet patterns(ctx);
- patterns.add<TestOpConversion>(ctx);
+ patterns.add<TestOpConversion, TestRegionConversion>(ctx);
ConversionTarget target(getContext());
- target.addIllegalOp<test_irdl_to_cpp::BeefOp>();
- target.addLegalOp<test_irdl_to_cpp::BarOp>();
- target.addLegalOp<test_irdl_to_cpp::HashOp>();
+ target.addIllegalOp<test_irdl_to_cpp::BeefOp,
+ test_irdl_to_cpp::ConditionalOp>();
+ target.addLegalOp<test_irdl_to_cpp::BarOp, test_irdl_to_cpp::HashOp,
+ scf::IfOp, scf::YieldOp>();
if (failed(applyPartialConversion(getOperation(), target,
std::move(patterns))))
signalPassFailure();
@@ -73,6 +92,10 @@ struct ConvertTestDialectToSomethingPass
StringRef getDescription() const final {
return "Checks the convertability of an irdl dialect";
}
+
+ void getDependentDialects(DialectRegistry &registry) const override {
+ registry.insert<scf::SCFDialect>();
+ }
};
void registerIrdlTestDialect(mlir::DialectRegistry &registry) {
diff --git a/mlir/test/lib/Dialect/TestIRDLToCpp/test_conversion.testd.mlir b/mlir/test/lib/Dialect/TestIRDLToCpp/test_conversion.testd.mlir
index f6233ee..1915324 100644
--- a/mlir/test/lib/Dialect/TestIRDLToCpp/test_conversion.testd.mlir
+++ b/mlir/test/lib/Dialect/TestIRDLToCpp/test_conversion.testd.mlir
@@ -1,15 +1,29 @@
// RUN: mlir-opt %s --pass-pipeline="builtin.module(test-irdl-conversion-check)" | FileCheck %s
// CHECK-LABEL: module {
module {
- // CHECK: func.func @test() {
+ // CHECK: func.func @test(%[[test_arg:[^ ]*]]: i1) {
// CHECK: %[[v0:[^ ]*]] = "test_irdl_to_cpp.bar"() : () -> i32
// CHECK: %[[v1:[^ ]*]] = "test_irdl_to_cpp.bar"() : () -> i32
// CHECK: %[[v2:[^ ]*]] = "test_irdl_to_cpp.hash"(%[[v0]], %[[v0]]) : (i32, i32) -> i32
+ // CHECK: scf.if %[[test_arg]]
// CHECK: return
// CHECK: }
- func.func @test() {
+ func.func @test(%test_arg: i1) {
%0 = "test_irdl_to_cpp.bar"() : () -> i32
%1 = "test_irdl_to_cpp.beef"(%0, %0) : (i32, i32) -> i32
+ "test_irdl_to_cpp.conditional"(%test_arg) ({
+ ^cond(%test: i1):
+ %3 = "test_irdl_to_cpp.bar"() : () -> i32
+ "test.terminator"() : ()->()
+ }, {
+ ^then(%what: i1, %ever: i32):
+ %4 = "test_irdl_to_cpp.bar"() : () -> i32
+ "test.terminator"() : ()->()
+ }, {
+ ^else():
+ %5 = "test_irdl_to_cpp.bar"() : () -> i32
+ "test.terminator"() : ()->()
+ }) : (i1) -> ()
return
}
diff --git a/mlir/test/lib/Dialect/TestIRDLToCpp/test_irdl_to_cpp.irdl.mlir b/mlir/test/lib/Dialect/TestIRDLToCpp/test_irdl_to_cpp.irdl.mlir
index 42e713e..85fb8cb 100644
--- a/mlir/test/lib/Dialect/TestIRDLToCpp/test_irdl_to_cpp.irdl.mlir
+++ b/mlir/test/lib/Dialect/TestIRDLToCpp/test_irdl_to_cpp.irdl.mlir
@@ -2,7 +2,7 @@
// CHECK: class TestIrdlToCpp
irdl.dialect @test_irdl_to_cpp {
-
+
// CHECK: class FooType
irdl.type @foo
@@ -32,4 +32,53 @@ irdl.dialect @test_irdl_to_cpp {
irdl.operands(lhs: %0, rhs: %0)
irdl.results(res: %0)
}
+
+ // CHECK: ConditionalOp declarations
+ // CHECK: ConditionalOpGenericAdaptorBase
+ // CHECK: ::mlir::Region &getCond() { return *getRegions()[0]; }
+ // CHECK: ::mlir::Region &getThen() { return *getRegions()[1]; }
+ // CHECK: ::mlir::Region &getElse() { return *getRegions()[2]; }
+ //
+ // CHECK: class ConditionalOp : public ::mlir::Op<ConditionalOp, ::mlir::OpTrait::NRegions<3>::Impl, ::mlir::OpTrait::OpInvariants>
+ // CHECK: ::mlir::Region &getCond() { return (*this)->getRegion(0); }
+ // CHECK: ::mlir::Region &getThen() { return (*this)->getRegion(1); }
+ // CHECK: ::mlir::Region &getElse() { return (*this)->getRegion(2); }
+
+ // CHECK: ConditionalOp definitions
+ // CHECK: __mlir_irdl_local_region_constraint_ConditionalOp_cond
+ // CHECK: if (!(region.getNumArguments() == 1)) {
+ // CHECK: failed to verify constraint: region with 1 entry block argument(s)
+
+ // CHECK: __mlir_irdl_local_region_constraint_ConditionalOp_then
+ // CHECK: if (!(true)) {
+
+ // CHECK: __mlir_irdl_local_region_constraint_ConditionalOp_else
+ // CHECK: if (!(region.getNumArguments() == 0)) {
+ // CHECK: failed to verify constraint: region with 0 entry block argument(s)
+
+ // CHECK: ConditionalOp::build
+ // CHECK: for (unsigned i = 0; i != 3; ++i)
+ // CHECK-NEXT: (void)odsState.addRegion();
+
+ // CHECK: ConditionalOp::verifyInvariantsImpl
+ // CHECK: __mlir_irdl_local_region_constraint_ConditionalOp_cond
+ // CHECK: failure
+ // CHECK: __mlir_irdl_local_region_constraint_ConditionalOp_then
+ // CHECK: failure
+ // CHECK: __mlir_irdl_local_region_constraint_ConditionalOp_else
+ // CHECK: failure
+ // CHECK: success
+ irdl.operation @conditional {
+ %r0 = irdl.region // Unconstrained region
+ %r1 = irdl.region() // Region with no entry block arguments
+
+ // TODO(#161018): support irdl.is in irdl-to-cpp
+ // %v0 = irdl.is i1 // Type constraint: i1 (boolean)
+ %v0 = irdl.any
+ %r2 = irdl.region(%v0) // Region with one i1 entry block argument
+ irdl.regions(cond: %r2, then: %r0, else: %r1)
+
+ %0 = irdl.any
+ irdl.operands(input: %0)
+ }
}
diff --git a/mlir/test/lib/Dialect/TestIRDLToCpp/test_irdl_to_cpp_invalid_unsupported_types.irdl.mlir b/mlir/test/lib/Dialect/TestIRDLToCpp/test_irdl_to_cpp_invalid_unsupported_types.irdl.mlir
index 403b492..cc27456 100644
--- a/mlir/test/lib/Dialect/TestIRDLToCpp/test_irdl_to_cpp_invalid_unsupported_types.irdl.mlir
+++ b/mlir/test/lib/Dialect/TestIRDLToCpp/test_irdl_to_cpp_invalid_unsupported_types.irdl.mlir
@@ -7,7 +7,7 @@ irdl.dialect @test_irdl_to_cpp {
irdl.results(res: %1)
}
}
-// -----
+// -----
irdl.dialect @test_irdl_to_cpp {
irdl.operation @operands_no_any_of {
@@ -42,7 +42,7 @@ irdl.dialect @test_irdl_to_cpp {
irdl.dialect @test_irdl_to_cpp {
irdl.type @ty {
- %0 = irdl.any
+ %0 = irdl.any
// expected-error@+1 {{IRDL C++ translation does not yet support translation of irdl.parameters operation}}
irdl.parameters(ty: %0)
}
@@ -51,29 +51,8 @@ irdl.dialect @test_irdl_to_cpp {
// -----
irdl.dialect @test_irdl_to_cpp {
- irdl.operation @test_op {
- // expected-error@+1 {{IRDL C++ translation does not yet support translation of irdl.region operation}}
- %0 = irdl.region()
- irdl.regions(reg: %0)
- }
-
-}
-
-// -----
-
-irdl.dialect @test_irdl_to_cpp {
- irdl.operation @test_op {
- // expected-error@+1 {{IRDL C++ translation does not yet support translation of irdl.regions operation}}
- irdl.regions()
- }
-
-}
-
-// -----
-
-irdl.dialect @test_irdl_to_cpp {
irdl.type @test_derived {
// expected-error@+1 {{IRDL C++ translation does not yet support translation of irdl.base operation}}
%0 = irdl.base "!builtin.integer"
- }
+ }
}
diff --git a/mlir/test/mlir-tblgen/op-format-invalid.td b/mlir/test/mlir-tblgen/op-format-invalid.td
index 2f29543..0a022ad 100644
--- a/mlir/test/mlir-tblgen/op-format-invalid.td
+++ b/mlir/test/mlir-tblgen/op-format-invalid.td
@@ -307,7 +307,7 @@ def DirectiveTypeZOperandInvalidI : TestFormat_Op<[{
def LiteralInvalidA : TestFormat_Op<[{
`a:`
}]>;
-// CHECK: error: expected valid literal but got '1': single character literal must be a letter or one of '_:,=<>()[]{}?+*'
+// CHECK: error: expected valid literal but got '1': single character literal must be a letter or one of '_:,=<>()[]{}?+-*'
def LiteralInvalidB : TestFormat_Op<[{
`1`
}]>;
diff --git a/mlir/test/mlir-tblgen/op-format-spec.td b/mlir/test/mlir-tblgen/op-format-spec.td
index 1541cd0..1ac2311 100644
--- a/mlir/test/mlir-tblgen/op-format-spec.td
+++ b/mlir/test/mlir-tblgen/op-format-spec.td
@@ -123,7 +123,7 @@ def DirectiveTypeValid : TestFormat_Op<[{
// CHECK-NOT: error
def LiteralValid : TestFormat_Op<[{
- `_` `:` `,` `=` `<` `>` `(` `)` `[` `]` `?` `+` `*` ` ` `` `->` `\n` `abc$._`
+ `_` `:` `,` `=` `<` `>` `(` `)` `[` `]` `?` `+` `-` `*` ` ` `` `->` `\n` `abc$._`
attr-dict
}]>;
diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
index a1899a8..8dd9713 100644
--- a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
@@ -403,6 +403,7 @@ void DefFormat::genLiteralParser(StringRef value, FmtContext &ctx,
.Case("]", "RSquare")
.Case("?", "Question")
.Case("+", "Plus")
+ .Case("-", "Minus")
.Case("*", "Star")
.Case("...", "Ellipsis")
<< "()";
diff --git a/mlir/tools/mlir-tblgen/FormatGen.cpp b/mlir/tools/mlir-tblgen/FormatGen.cpp
index 4dfdde2..04d3ed1 100644
--- a/mlir/tools/mlir-tblgen/FormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/FormatGen.cpp
@@ -518,7 +518,7 @@ bool mlir::tblgen::isValidLiteral(StringRef value,
// If there is only one character, this must either be punctuation or a
// single character bare identifier.
if (value.size() == 1) {
- StringRef bare = "_:,=<>()[]{}?+*";
+ StringRef bare = "_:,=<>()[]{}?+-*";
if (isalpha(front) || bare.contains(front))
return true;
if (emitError)
diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
index 0d113b3..ccf21d1 100644
--- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
@@ -852,6 +852,7 @@ static void genLiteralParser(StringRef value, MethodBody &body) {
.Case("]", "RSquare()")
.Case("?", "Question()")
.Case("+", "Plus()")
+ .Case("-", "Minus()")
.Case("*", "Star()")
.Case("...", "Ellipsis()");
}
diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp
index eb5425e..c55ef2c 100644
--- a/offload/libomptarget/OpenMP/InteropAPI.cpp
+++ b/offload/libomptarget/OpenMP/InteropAPI.cpp
@@ -124,7 +124,7 @@ void *getProperty<void *>(omp_interop_val_t &InteropVal,
case omp_ipr_device_context:
return InteropVal.device_info.Context;
case omp_ipr_targetsync:
- return InteropVal.async_info->Queue;
+ return InteropVal.async_info ? InteropVal.async_info->Queue : nullptr;
default:;
}
getTypeMismatch(Property, Err);
@@ -167,7 +167,6 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr,
omp_interop_property_t property_id, \
int *err) { \
omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \
- assert((interop_val)->interop_type == kmp_interop_type_targetsync); \
if (!getPropertyCheck(&interop_val, property_id, err)) { \
return (RETURN_TYPE)(0); \
} \
@@ -275,8 +274,8 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType,
return Interop;
}
-int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop,
- interop_ctx_t *Ctx, dep_pack_t *Deps) {
+int __tgt_interop_use60(ident_t *LocRef, omp_interop_val_t *Interop,
+ interop_ctx_t *Ctx, dep_pack_t *Deps) {
bool Nowait = Ctx->flags.nowait;
DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__,
DPxPTR(Interop), Nowait);
@@ -359,6 +358,40 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop,
return omp_irc_success;
}
+// Backwards compatibility wrappers
+void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
+ omp_interop_val_t *&InteropPtr, int32_t InteropType,
+ int32_t DeviceId, int32_t Ndeps,
+ kmp_depend_info_t *DepList, int32_t HaveNowait) {
+ constexpr int32_t old_kmp_interop_type_targetsync = 2;
+ interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid};
+ dep_pack_t Deps = {Ndeps, 0, DepList, nullptr};
+ InteropPtr =
+ __tgt_interop_get(LocRef,
+ InteropType == old_kmp_interop_type_targetsync
+ ? kmp_interop_type_targetsync
+ : kmp_interop_type_target,
+ DeviceId, 0, nullptr, &Ctx, Ndeps ? &Deps : nullptr);
+}
+
+void __tgt_interop_use(ident_t *LocRef, int32_t Gtid,
+ omp_interop_val_t *&InteropPtr, int32_t DeviceId,
+ int32_t Ndeps, kmp_depend_info_t *DepList,
+ int32_t HaveNowait) {
+ interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid};
+ dep_pack_t Deps = {Ndeps, 0, DepList, nullptr};
+ __tgt_interop_use60(LocRef, InteropPtr, &Ctx, Ndeps ? &Deps : nullptr);
+}
+
+void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid,
+ omp_interop_val_t *&InteropPtr, int32_t DeviceId,
+ int32_t Ndeps, kmp_depend_info_t *DepList,
+ int32_t HaveNowait) {
+ interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid};
+ dep_pack_t Deps = {Ndeps, 0, DepList, nullptr};
+ __tgt_interop_release(LocRef, InteropPtr, &Ctx, Ndeps ? &Deps : nullptr);
+}
+
} // extern "C"
llvm::Expected<DeviceTy &> omp_interop_val_t::getDevice() const {
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index 8e2db6b..1374bfe 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -68,8 +68,11 @@ VERS1.0 {
omp_get_interop_int;
omp_get_interop_name;
omp_get_interop_type_desc;
- __tgt_interop_get;
+ __tgt_interop_init;
__tgt_interop_use;
+ __tgt_interop_destroy;
+ __tgt_interop_get;
+ __tgt_interop_use60;
__tgt_interop_release;
__tgt_target_sync;
__llvmPushCallConfiguration;
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 7b834ee..f73fa047 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2712,6 +2712,37 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Plugin::success();
}
+ interop_spec_t selectInteropPreference(int32_t InteropType,
+ int32_t NumPrefers,
+ interop_spec_t *Prefers) override {
+ // TODO: update once targetsync is supported
+ if (InteropType == kmp_interop_type_target)
+ return interop_spec_t{tgt_fr_hsa, {false, 0}, 0};
+ return interop_spec_t{tgt_fr_none, {false, 0}, 0};
+ }
+
+ Expected<omp_interop_val_t *>
+ createInterop(int32_t InteropType, interop_spec_t &InteropSpec) override {
+ auto *Ret = new omp_interop_val_t(
+ DeviceId, static_cast<kmp_interop_type_t>(InteropType));
+ Ret->fr_id = tgt_fr_hsa;
+ Ret->vendor_id = omp_vendor_amd;
+
+ // TODO: implement targetsync support
+
+ Ret->device_info.Platform = nullptr;
+ Ret->device_info.Device = reinterpret_cast<void *>(Agent.handle);
+ Ret->device_info.Context = nullptr;
+
+ return Ret;
+ }
+
+ Error releaseInterop(omp_interop_val_t *Interop) override {
+ if (Interop)
+ delete Interop;
+ return Plugin::success();
+ }
+
Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
AsyncInfoWrapperTy &AsyncInfo) override {
AMDGPUStreamTy *Stream = nullptr;
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index b30c651..e5c4a1b 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -917,6 +917,50 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::success();
}
+ interop_spec_t selectInteropPreference(int32_t InteropType,
+ int32_t NumPrefers,
+ interop_spec_t *Prefers) override {
+ return interop_spec_t{tgt_fr_cuda, {true, 0}, 0};
+ }
+
+ Expected<omp_interop_val_t *>
+ createInterop(int32_t InteropType, interop_spec_t &InteropSpec) override {
+ auto *Ret = new omp_interop_val_t(
+ DeviceId, static_cast<kmp_interop_type_t>(InteropType));
+ Ret->fr_id = tgt_fr_cuda;
+ Ret->vendor_id = omp_vendor_nvidia;
+
+ if (InteropType == kmp_interop_type_target ||
+ InteropType == kmp_interop_type_targetsync) {
+ Ret->device_info.Platform = nullptr;
+ Ret->device_info.Device = reinterpret_cast<void *>(Device);
+ Ret->device_info.Context = Context;
+ }
+
+ if (InteropType == kmp_interop_type_targetsync) {
+ Ret->async_info = new __tgt_async_info();
+ if (auto Err = setContext())
+ return Err;
+ CUstream Stream;
+ if (auto Err = CUDAStreamManager.getResource(Stream))
+ return Err;
+
+ Ret->async_info->Queue = Stream;
+ }
+ return Ret;
+ }
+
+ Error releaseInterop(omp_interop_val_t *Interop) override {
+ if (!Interop)
+ return Plugin::success();
+
+ if (Interop->async_info)
+ delete Interop->async_info;
+
+ delete Interop;
+ return Plugin::success();
+ }
+
Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
AsyncInfoWrapperTy &AsyncInfo) override {
if (auto Err = setContext())
diff --git a/offload/test/offloading/fortran/target-declare-mapper-parent-allocatable.f90 b/offload/test/offloading/fortran/target-declare-mapper-parent-allocatable.f90
new file mode 100644
index 0000000..65e04af
--- /dev/null
+++ b/offload/test/offloading/fortran/target-declare-mapper-parent-allocatable.f90
@@ -0,0 +1,43 @@
+! This test validates that declare mapper for a derived type that extends
+! a parent type with an allocatable component correctly maps the nested
+! allocatable payload via the mapper when the whole object is mapped on
+! target.
+
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+
+program target_declare_mapper_parent_allocatable
+ implicit none
+
+ type, abstract :: base_t
+ real, allocatable :: base_arr(:)
+ end type base_t
+
+ type, extends(base_t) :: real_t
+ real, allocatable :: real_arr(:)
+ end type real_t
+ !$omp declare mapper(custommapper: real_t :: t) map(t%base_arr, t%real_arr)
+
+ type(real_t) :: r
+ integer :: i
+ allocate(r%base_arr(10), source=1.0)
+ allocate(r%real_arr(10), source=1.0)
+
+ !$omp target map(mapper(custommapper), tofrom: r)
+ do i = 1, size(r%base_arr)
+ r%base_arr(i) = 2.0
+ r%real_arr(i) = 3.0
+ r%real_arr(i) = r%base_arr(1)
+ end do
+ !$omp end target
+
+
+ !CHECK: base_arr: 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
+ print*, "base_arr: ", r%base_arr
+ !CHECK: real_arr: 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
+ print*, "real_arr: ", r%real_arr
+
+ deallocate(r%real_arr)
+ deallocate(r%base_arr)
+end program target_declare_mapper_parent_allocatable