aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.clang-format1
-rw-r--r--clang/.clang-format1
-rw-r--r--clang/docs/ReleaseNotes.rst2
-rw-r--r--clang/lib/Analysis/ThreadSafety.cpp36
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp41
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h4
-rw-r--r--clang/lib/Parse/ParseDecl.cpp3
-rw-r--r--clang/lib/Sema/SemaDeclCXX.cpp2
-rw-r--r--clang/lib/Sema/SemaExpr.cpp7
-rw-r--r--clang/lib/Sema/SemaTemplateDeductionGuide.cpp9
-rw-r--r--clang/lib/StaticAnalyzer/Core/RegionStore.cpp10
-rw-r--r--clang/test/Analysis/initializer.cpp48
-rw-r--r--clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp32
-rw-r--r--clang/test/CIR/CodeGenOpenACC/compute-private-clause.c3
-rw-r--r--clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp32
-rw-r--r--clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp32
-rw-r--r--clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-CtorDtor.cpp223
-rw-r--r--clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-NoOps.cpp224
-rw-r--r--clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-int.cpp4
-rw-r--r--clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-CtorDtor.cpp539
-rw-r--r--clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-NoOps.cpp544
-rw-r--r--clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-int.cpp12
-rw-r--r--clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-CtorDtor.cpp159
-rw-r--r--clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-NoOps.cpp161
-rw-r--r--clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-int.cpp6
-rw-r--r--clang/test/CIR/IR/alloca.cir2
-rw-r--r--clang/test/CIR/IR/array-ctor.cir2
-rw-r--r--clang/test/CIR/IR/array-dtor.cir2
-rw-r--r--clang/test/CIR/IR/array.cir2
-rw-r--r--clang/test/CIR/IR/atomic.cir2
-rw-r--r--clang/test/CIR/IR/binassign.cir2
-rw-r--r--clang/test/CIR/IR/bitfield_info.cir2
-rw-r--r--clang/test/CIR/IR/call.cir2
-rw-r--r--clang/test/CIR/IR/cast.cir2
-rw-r--r--clang/test/CIR/IR/cmp.cir2
-rw-r--r--clang/test/CIR/IR/complex.cir2
-rw-r--r--clang/test/CIR/IR/copy.cir2
-rw-r--r--clang/test/CIR/IR/func.cir2
-rw-r--r--clang/test/CIR/IR/global-init.cir2
-rw-r--r--clang/test/CIR/IR/global-var-linkage.cir3
-rw-r--r--clang/test/CIR/IR/global.cir2
-rw-r--r--clang/test/CIR/IR/label.cir2
-rw-r--r--clang/test/CIR/IR/module.cir3
-rw-r--r--clang/test/CIR/IR/stack-save-restore.cir2
-rw-r--r--clang/test/CIR/IR/struct.cir2
-rw-r--r--clang/test/CIR/IR/switch-flat.cir2
-rw-r--r--clang/test/CIR/IR/switch.cir2
-rw-r--r--clang/test/CIR/IR/ternary.cir2
-rw-r--r--clang/test/CIR/IR/throw.cir2
-rw-r--r--clang/test/CIR/IR/unary.cir2
-rw-r--r--clang/test/CIR/IR/vector.cir2
-rw-r--r--clang/test/CIR/IR/vtable-addrpt.cir2
-rw-r--r--clang/test/CIR/IR/vtable-attr.cir2
-rw-r--r--clang/test/CIR/IR/vtt-addrpoint.cir2
-rw-r--r--clang/test/Parser/recovery-after-expected-unqualified-id.cpp9
-rw-r--r--clang/test/SemaCXX/cxx20-ctad-type-alias.cpp15
-rw-r--r--clang/test/SemaCXX/cxx98-compat.cpp19
-rw-r--r--flang/include/flang/Lower/OpenACC.h3
-rw-r--r--flang/include/flang/Lower/SymbolMap.h4
-rw-r--r--flang/include/flang/Semantics/symbol.h2
-rw-r--r--flang/lib/Lower/Bridge.cpp2
-rw-r--r--flang/lib/Lower/OpenACC.cpp34
-rw-r--r--flang/lib/Lower/SymbolMap.cpp10
-rw-r--r--flang/lib/Semantics/check-declarations.cpp3
-rw-r--r--flang/lib/Semantics/resolve-directives.cpp5
-rw-r--r--flang/lib/Semantics/resolve-names.cpp65
-rw-r--r--flang/test/Lower/OpenACC/acc-host-data-cuda-device.f9043
-rw-r--r--lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered_map-iterator/TestDataFormatterStdUnorderedMap.py5
-rw-r--r--lldb/test/API/lang/cpp/abi_tag_structors/TestAbiTagStructors.py33
-rw-r--r--lldb/test/API/lang/cpp/expr-definition-in-dylib/TestExprDefinitionInDylib.py20
-rw-r--r--lldb/test/API/lang/cpp/structured-binding/TestStructuredBinding.py31
-rw-r--r--lldb/test/Shell/Expr/TestGlobalSymbolObjCConflict.c4
-rw-r--r--llvm/.clang-format2
-rw-r--r--llvm/include/llvm/CodeGen/MIRYamlMapping.h2
-rw-r--r--llvm/include/llvm/CodeGen/MachineFrameInfo.h9
-rw-r--r--llvm/include/llvm/CodeGen/TargetFrameLowering.h1
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAArch64.td30
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp71
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp50
-rw-r--r--llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp243
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.h24
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp19
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td22
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp8
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp20
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h63
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp135
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp1
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizeImplicitBinding.cpp43
-rw-r--r--llvm/lib/Target/VE/VEISelLowering.cpp2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp41
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp29
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp10
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHelpers.h4
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp8
-rw-r--r--llvm/test/CMakeLists.txt4
-rw-r--r--llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir8
-rw-r--r--llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir4
-rw-r--r--llvm/test/CodeGen/AArch64/framelayout-sve.mir12
-rw-r--r--llvm/test/CodeGen/AArch64/spillfill-sve.mir10
-rw-r--r--llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll10
-rw-r--r--llvm/test/CodeGen/AArch64/sve-load-store-legalisation.ll2854
-rw-r--r--llvm/test/CodeGen/AMDGPU/limit-coalesce.mir33
-rw-r--r--llvm/test/CodeGen/Hexagon/unaligned-vec-store.ll23
-rw-r--r--llvm/test/CodeGen/SPIRV/hlsl-resources/ImplicitBinding.ll53
-rw-r--r--llvm/test/CodeGen/SPIRV/hlsl-resources/UniqueImplicitBindingNumber.ll19
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-cmp-04.ll4
-rw-r--r--llvm/test/CodeGen/VE/Vector/vec_divrem.ll56
-rw-r--r--llvm/test/CodeGen/X86/fshl.ll81
-rw-r--r--llvm/test/CodeGen/X86/fshr.ll90
-rw-r--r--llvm/test/CodeGen/X86/sbb.ll29
-rw-r--r--llvm/test/CodeGen/X86/shift-i128.ll3
-rw-r--r--llvm/test/DebugInfo/AArch64/asan-stack-vars.mir3
-rw-r--r--llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir3
-rw-r--r--llvm/test/Transforms/GVN/condprop.ll60
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll34
-rw-r--r--mlir/.clang-format1
-rw-r--r--mlir/include/mlir/Bindings/Python/NanobindAdaptors.h38
-rw-r--r--mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td22
-rw-r--r--mlir/lib/Bindings/Python/IRCore.cpp58
-rw-r--r--mlir/lib/Bindings/Python/IRModule.h6
-rw-r--r--mlir/lib/Bindings/Python/IRTypes.cpp4
-rw-r--r--mlir/lib/Bindings/Python/MainModule.cpp6
-rw-r--r--mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp291
-rw-r--r--mlir/lib/Dialect/Transform/IR/TransformOps.cpp7
-rw-r--r--mlir/test/Dialect/LLVMIR/rocdl.mlir32
-rw-r--r--mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir198
-rw-r--r--mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir28
-rw-r--r--mlir/test/Target/LLVMIR/rocdl.mlir28
-rw-r--r--orc-rt/include/orc-rt/Error.h24
-rw-r--r--orc-rt/unittests/ErrorTest.cpp23
137 files changed, 6604 insertions, 913 deletions
diff --git a/.clang-format b/.clang-format
index 9b3aa8b..ecb44bf 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1 +1,2 @@
BasedOnStyle: LLVM
+LineEnding: LF
diff --git a/clang/.clang-format b/clang/.clang-format
index 9b3aa8b..ecb44bf 100644
--- a/clang/.clang-format
+++ b/clang/.clang-format
@@ -1 +1,2 @@
BasedOnStyle: LLVM
+LineEnding: LF
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b1ddfa0..74b0647 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -437,6 +437,7 @@ Bug Fixes to C++ Support
- Fix the result of `__builtin_is_implicit_lifetime` for types with a user-provided constructor. (#GH160610)
- Correctly deduce return types in ``decltype`` expressions. (#GH160497) (#GH56652) (#GH116319) (#GH161196)
- Fixed a crash in the pre-C++23 warning for attributes before a lambda declarator (#GH161070).
+- Fix a crash when attempting to deduce a deduction guide from a non deducible template template parameter. (#130604)
Bug Fixes to AST Handling
^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -445,6 +446,7 @@ Bug Fixes to AST Handling
legal representation. This is fixed because ElaboratedTypes don't exist anymore. (#GH43179) (#GH68670) (#GH92757)
- Fix unrecognized html tag causing undesirable comment lexing (#GH152944)
- Fix comment lexing of special command names (#GH152943)
+- Use `extern` as a hint to continue parsing when recovering from a malformed declaration.
Miscellaneous Bug Fixes
^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp
index d19f86a..a56fdb1 100644
--- a/clang/lib/Analysis/ThreadSafety.cpp
+++ b/clang/lib/Analysis/ThreadSafety.cpp
@@ -419,22 +419,28 @@ public:
// The expression for this variable, OR
const Expr *Exp = nullptr;
- // Reference to another VarDefinition
- unsigned Ref = 0;
+ // Direct reference to another VarDefinition
+ unsigned DirectRef = 0;
+
+ // Reference to underlying canonical non-reference VarDefinition.
+ unsigned CanonicalRef = 0;
// The map with which Exp should be interpreted.
Context Ctx;
bool isReference() const { return !Exp; }
+ void invalidateRef() { DirectRef = CanonicalRef = 0; }
+
private:
// Create ordinary variable definition
VarDefinition(const NamedDecl *D, const Expr *E, Context C)
: Dec(D), Exp(E), Ctx(C) {}
// Create reference to previous definition
- VarDefinition(const NamedDecl *D, unsigned R, Context C)
- : Dec(D), Ref(R), Ctx(C) {}
+ VarDefinition(const NamedDecl *D, unsigned DirectRef, unsigned CanonicalRef,
+ Context C)
+ : Dec(D), DirectRef(DirectRef), CanonicalRef(CanonicalRef), Ctx(C) {}
};
private:
@@ -445,7 +451,7 @@ private:
public:
LocalVariableMap() {
// index 0 is a placeholder for undefined variables (aka phi-nodes).
- VarDefinitions.push_back(VarDefinition(nullptr, 0u, getEmptyContext()));
+ VarDefinitions.push_back(VarDefinition(nullptr, 0, 0, getEmptyContext()));
}
/// Look up a definition, within the given context.
@@ -471,7 +477,7 @@ public:
Ctx = VarDefinitions[i].Ctx;
return VarDefinitions[i].Exp;
}
- i = VarDefinitions[i].Ref;
+ i = VarDefinitions[i].DirectRef;
}
return nullptr;
}
@@ -508,7 +514,7 @@ public:
void dump() {
for (unsigned i = 1, e = VarDefinitions.size(); i < e; ++i) {
const Expr *Exp = VarDefinitions[i].Exp;
- unsigned Ref = VarDefinitions[i].Ref;
+ unsigned Ref = VarDefinitions[i].DirectRef;
dumpVarDefinitionName(i);
llvm::errs() << " = ";
@@ -539,9 +545,9 @@ protected:
friend class VarMapBuilder;
// Resolve any definition ID down to its non-reference base ID.
- unsigned getCanonicalDefinitionID(unsigned ID) {
+ unsigned getCanonicalDefinitionID(unsigned ID) const {
while (ID > 0 && VarDefinitions[ID].isReference())
- ID = VarDefinitions[ID].Ref;
+ ID = VarDefinitions[ID].CanonicalRef;
return ID;
}
@@ -564,10 +570,11 @@ protected:
}
// Add a new reference to an existing definition.
- Context addReference(const NamedDecl *D, unsigned i, Context Ctx) {
+ Context addReference(const NamedDecl *D, unsigned Ref, Context Ctx) {
unsigned newID = VarDefinitions.size();
Context NewCtx = ContextFactory.add(Ctx, D, newID);
- VarDefinitions.push_back(VarDefinition(D, i, Ctx));
+ VarDefinitions.push_back(
+ VarDefinition(D, Ref, getCanonicalDefinitionID(Ref), Ctx));
return NewCtx;
}
@@ -769,15 +776,14 @@ void LocalVariableMap::intersectBackEdge(Context C1, Context C2) {
const unsigned *I2 = C2.lookup(P.first);
if (!I2) {
// Variable does not exist at the end of the loop, invalidate.
- VDef->Ref = 0;
+ VDef->invalidateRef();
continue;
}
// Compare the canonical IDs. This correctly handles chains of references
// and determines if the variable is truly loop-invariant.
- if (getCanonicalDefinitionID(VDef->Ref) != getCanonicalDefinitionID(*I2)) {
- VDef->Ref = 0; // Mark this variable as undefined
- }
+ if (VDef->CanonicalRef != getCanonicalDefinitionID(*I2))
+ VDef->invalidateRef(); // Mark this variable as undefined
}
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
index e603884..f8e511e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
@@ -400,6 +400,32 @@ void OpenACCRecipeBuilderBase::createRecipeDestroySection(
mlir::acc::YieldOp::create(builder, locEnd);
}
+void OpenACCRecipeBuilderBase::makeBoundsInit(
+ mlir::Value alloca, mlir::Location loc, mlir::Block *block,
+ const VarDecl *allocaDecl, QualType origType, bool isInitSection) {
+ mlir::OpBuilder::InsertionGuard guardCase(builder);
+ builder.setInsertionPointToEnd(block);
+ CIRGenFunction::LexicalScope ls(cgf, loc, block);
+
+ CIRGenFunction::AutoVarEmission tempDeclEmission{*allocaDecl};
+ tempDeclEmission.EmittedAsOffload = true;
+
+ // The init section is the only one of the handful that only has a single
+ // argument for the 'type', so we have to drop 1 for init, and future calls
+ // to this will need to drop 2.
+ llvm::MutableArrayRef<mlir::BlockArgument> boundsRange =
+ block->getArguments().drop_front(isInitSection ? 1 : 2);
+
+ mlir::Value subscriptedValue = alloca;
+ for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange))
+ subscriptedValue = createBoundsLoop(subscriptedValue, boundArg, loc,
+ /*inverse=*/false);
+
+ tempDeclEmission.setAllocatedAddress(
+ Address{subscriptedValue, cgf.convertType(origType),
+ cgf.getContext().getDeclAlign(allocaDecl)});
+ cgf.emitAutoVarInit(tempDeclEmission);
+}
// TODO: OpenACC: When we get this implemented for the reduction/firstprivate,
// this might end up re-merging with createRecipeInitCopy. For now, keep it
@@ -442,11 +468,16 @@ void OpenACCRecipeBuilderBase::createPrivateInitRecipe(
cgf.emitAutoVarAlloca(*allocaDecl, builder.saveInsertionPoint());
cgf.emitAutoVarInit(tempDeclEmission);
} else {
- makeBoundsAlloca(block, exprRange, loc, "openacc.private.init", numBounds,
- boundTypes);
-
- if (initExpr)
- cgf.cgm.errorNYI(exprRange, "private-init with bounds initialization");
+ mlir::Value alloca = makeBoundsAlloca(
+ block, exprRange, loc, "openacc.private.init", numBounds, boundTypes);
+
+ // If the initializer is trivial, there is nothing to do here, so save
+ // ourselves some effort.
+ if (initExpr && (!cgf.isTrivialInitializer(initExpr) ||
+ cgf.getContext().getLangOpts().getTrivialAutoVarInit() !=
+ LangOptions::TrivialAutoVarInitKind::Uninitialized))
+ makeBoundsInit(alloca, loc, block, allocaDecl, origType,
+ /*isInitSection=*/true);
}
mlir::acc::YieldOp::create(builder, locEnd);
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
index d802ccb..203eaff 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
@@ -35,6 +35,10 @@ class OpenACCRecipeBuilderBase {
size_t numBounds,
llvm::ArrayRef<QualType> boundTypes);
+ void makeBoundsInit(mlir::Value alloca, mlir::Location loc,
+ mlir::Block *block, const VarDecl *allocaDecl,
+ QualType origType, bool isInitSection);
+
protected:
CIRGen::CIRGenFunction &cgf;
CIRGen::CIRGenBuilderTy &builder;
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 22c01c4..d6cd7eb 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -2083,6 +2083,9 @@ void Parser::SkipMalformedDecl() {
return;
break;
+ case tok::kw_extern:
+ // 'extern' at the start of a line is almost certainly a good
+ // place to pick back up parsing
case tok::kw_namespace:
// 'namespace' at the start of a line is almost certainly a good
// place to pick back up parsing, except in an Objective-C
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 1131e1f..16d42d2 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -13660,7 +13660,7 @@ bool Sema::CheckUsingDeclQualifier(SourceLocation UsingLoc, bool HasTypename,
if (Cxx20Enumerator) {
Diag(NameLoc, diag::warn_cxx17_compat_using_decl_non_member_enumerator)
- << SS.getRange();
+ << SS.getScopeRep() << SS.getRange();
return false;
}
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 06b2529..4d3c7d6 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -20107,9 +20107,10 @@ static void DoMarkVarDeclReferenced(
bool NeededForConstantEvaluation =
isPotentiallyConstantEvaluatedContext(SemaRef) && UsableInConstantExpr;
- bool NeedDefinition = OdrUse == OdrUseContext::Used ||
- NeededForConstantEvaluation ||
- Var->getType()->isUndeducedType();
+ bool NeedDefinition =
+ OdrUse == OdrUseContext::Used || NeededForConstantEvaluation ||
+ (TSK != clang::TSK_Undeclared && !UsableInConstantExpr &&
+ Var->getType()->isUndeducedType());
assert(!isa<VarTemplatePartialSpecializationDecl>(Var) &&
"Can't instantiate a partial template specialization.");
diff --git a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
index 3d54d1e..fe673ea 100644
--- a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
+++ b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
@@ -1428,10 +1428,13 @@ void Sema::DeclareImplicitDeductionGuides(TemplateDecl *Template,
DeclareImplicitDeductionGuidesForTypeAlias(*this, AliasTemplate, Loc);
return;
}
- if (CXXRecordDecl *DefRecord =
- cast<CXXRecordDecl>(Template->getTemplatedDecl())->getDefinition()) {
+ CXXRecordDecl *DefRecord =
+ dyn_cast_or_null<CXXRecordDecl>(Template->getTemplatedDecl());
+ if (!DefRecord)
+ return;
+ if (const CXXRecordDecl *Definition = DefRecord->getDefinition()) {
if (TemplateDecl *DescribedTemplate =
- DefRecord->getDescribedClassTemplate())
+ Definition->getDescribedClassTemplate())
Template = DescribedTemplate;
}
diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index 8e9d6fe..af0ef52 100644
--- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -2658,14 +2658,20 @@ RegionStoreManager::bindArray(LimitedRegionBindingsConstRef B,
return bindAggregate(B, R, V);
}
- // Handle lazy compound values.
+ // FIXME Single value constant should have been handled before this call to
+ // bindArray. This is only a hotfix to not crash.
+ if (Init.isConstant())
+ return bindAggregate(B, R, Init);
+
if (std::optional LCV = Init.getAs<nonloc::LazyCompoundVal>()) {
if (std::optional NewB = tryBindSmallArray(B, R, AT, *LCV))
return *NewB;
-
return bindAggregate(B, R, Init);
}
+ if (isa<nonloc::SymbolVal>(Init))
+ return bindAggregate(B, R, Init);
+
if (Init.isUnknown())
return bindAggregate(B, R, UnknownVal());
diff --git a/clang/test/Analysis/initializer.cpp b/clang/test/Analysis/initializer.cpp
index 713e121..88758f7 100644
--- a/clang/test/Analysis/initializer.cpp
+++ b/clang/test/Analysis/initializer.cpp
@@ -610,3 +610,51 @@ void top() {
consume(parseMatchComponent());
}
} // namespace elementwise_copy_small_array_from_post_initializer_of_cctor
+
+namespace gh147686 {
+// The problem reported in https://github.com/llvm/llvm-project/issues/147686
+// is sensitive to the initializer form: using parenthesis to initialize m_ptr
+// resulted in crashes when analyzing *m_ptr = '\0'; but using braces is fine.
+
+struct A {
+ A() : m_ptr(m_buf) { *m_ptr = '\0'; } // no-crash
+ A(int overload) : m_ptr{m_buf} { *m_ptr = '\0'; }
+ A(char src) : m_ptr(m_buf) { *m_ptr = src; } // no-crash
+ A(char src, int overload) : m_ptr{m_buf} { *m_ptr = src; }
+ char m_buf[64] = {0};
+ char * m_ptr;
+};
+
+void test1() {
+ A a;
+ clang_analyzer_eval(a.m_buf[0] == 0); // expected-warning{{TRUE}}
+ // FIXME The next eval should result in TRUE.
+ clang_analyzer_eval(*a.m_ptr == 0); // expected-warning{{UNKNOWN}}
+}
+
+void test2() {
+ A a(314);
+ clang_analyzer_eval(a.m_buf[0] == 0); // expected-warning{{TRUE}}
+ clang_analyzer_eval(*a.m_ptr == 0); // expected-warning{{TRUE}}
+}
+
+void test3() {
+ A a(0);
+ clang_analyzer_eval(a.m_buf[0] == 0); // expected-warning{{TRUE}}
+ clang_analyzer_eval(*a.m_ptr == 0); // expected-warning{{TRUE}}
+}
+
+void test3Bis(char arg) {
+ A a(arg);
+ // FIXME This test should behave like test3.
+ clang_analyzer_eval(a.m_buf[0] == arg); // expected-warning{{FALSE}} // expected-warning{{TRUE}}
+ clang_analyzer_eval(*a.m_ptr == arg); // expected-warning{{UNKNOWN}}
+}
+
+void test4(char arg) {
+ A a(arg, 314);
+ clang_analyzer_eval(a.m_buf[0] == arg); // expected-warning{{TRUE}}
+ clang_analyzer_eval(*a.m_ptr == arg); // expected-warning{{TRUE}}
+}
+
+} // namespace gh147686
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp b/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp
index 10f4482..f636a0f 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp
@@ -1,4 +1,4 @@
-// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
struct NoCopyConstruct {};
@@ -66,7 +66,6 @@ struct HasDtor {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
@@ -74,7 +73,6 @@ struct HasDtor {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
@@ -82,7 +80,6 @@ struct HasDtor {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
@@ -90,7 +87,6 @@ struct HasDtor {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_13CopyConstruct : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_CopyConstruct x 5>, !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
@@ -98,7 +94,30 @@ struct HasDtor {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_14NonDefaultCtor : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_NonDefaultCtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
@@ -106,7 +125,6 @@ struct HasDtor {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_7HasDtor : !cir.ptr<!cir.array<!rec_HasDtor x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasDtor x 5>, !cir.ptr<!cir.array<!rec_HasDtor x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-private-clause.c b/clang/test/CIR/CodeGenOpenACC/compute-private-clause.c
index 097005e..34b8b69 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-private-clause.c
+++ b/clang/test/CIR/CodeGenOpenACC/compute-private-clause.c
@@ -27,7 +27,6 @@ struct NoCopyConstruct {};
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
@@ -35,7 +34,6 @@ struct NoCopyConstruct {};
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
@@ -43,7 +41,6 @@ struct NoCopyConstruct {};
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp b/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp
index d854222..af84684 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp
@@ -1,4 +1,4 @@
-// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
struct NoCopyConstruct {};
@@ -59,42 +59,60 @@ struct HasDtor {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_13CopyConstruct : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_CopyConstruct x 5>, !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_14NonDefaultCtor : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_NonDefaultCtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
// CHECK: acc.private.recipe @privatization__Bcnt1__ZTSA5_7HasDtor : !cir.ptr<!cir.array<!rec_HasDtor x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasDtor x 5>, !cir.ptr<!cir.array<!rec_HasDtor x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
diff --git a/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp b/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp
index b356f0f..6824f77 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp
@@ -1,4 +1,4 @@
-// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
struct NoCopyConstruct {};
@@ -66,7 +66,6 @@ struct HasDtor {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
@@ -74,7 +73,6 @@ struct HasDtor {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
@@ -82,7 +80,6 @@ struct HasDtor {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
@@ -90,7 +87,6 @@ struct HasDtor {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_13CopyConstruct : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_CopyConstruct x 5>, !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
@@ -98,7 +94,30 @@ struct HasDtor {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_14NonDefaultCtor : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_NonDefaultCtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+ // CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
@@ -106,7 +125,6 @@ struct HasDtor {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt1__ZTSA5_7HasDtor : !cir.ptr<!cir.array<!rec_HasDtor x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasDtor x 5>, !cir.ptr<!cir.array<!rec_HasDtor x 5>>, ["openacc.private.init"]
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-CtorDtor.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-CtorDtor.cpp
index 30a14ac..101f18e8 100644
--- a/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-CtorDtor.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-CtorDtor.cpp
@@ -1,4 +1,4 @@
-// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
struct CtorDtor {
int i;
@@ -14,7 +14,33 @@ void do_things(unsigned A, unsigned B) {
// CHECK: acc.private.recipe @privatization__Bcnt1__ZTSA5_8CtorDtor : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CtorDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!rec_CtorDtor x 5>, !cir.ptr<!cir.array<!rec_CtorDtor x 5>>, ["openacc.private.init"] {alignment = 4 : i64}
-// TODO: Add Init here.
+//
+// Init Section.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+//
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.array<!rec_CtorDtor x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!rec_CtorDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -100,7 +126,57 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt2__ZTSA5_A5_8CtorDtor : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!rec_CtorDtor x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>, ["openacc.private.init"] {alignment = 4 : i64}
-// TODO: Add Init here.
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+//
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> -> !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+//
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+//
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT:} destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -217,7 +293,78 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT:acc.private.recipe @privatization__Bcnt3__ZTSA5_A5_A5_8CtorDtor : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>>, ["openacc.private.init"] {alignment = 4 : i64}
-// TODO: Add Init here.
+//
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> -> !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[BOUND1_STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[BOUND1_STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT:} destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -310,7 +457,73 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt2__ZTSA5_A5_A5_8CtorDtor : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>>, ["openacc.private.init"] {alignment = 4 : i64}
-// TODO: Add Init here.
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+//
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>
+//
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+//
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> -> !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<5> : !u64i
+// CHECK-NEXT: %[[ARR_DECAY:.*]] = cir.cast array_to_ptrdecay %[[STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[ARR_DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ARR_SIZE]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[ARR_IDX:.*]] = cir.alloca !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>, ["__array_idx"] {alignment = 1 : i64}
+// CHECK-NEXT: cir.store %[[ARR_DECAY]], %[[ARR_IDX]] : !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: cir.do {
+// CHECK-NEXT: %[[IDX_LOAD:.*]] = cir.load %[[ARR_IDX]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[IDX_LOAD]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[INC_STRIDE:.*]] = cir.ptr_stride(%[[IDX_LOAD]] : !cir.ptr<!rec_CtorDtor>, %[[ONE]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.store %[[INC_STRIDE]], %[[ARR_IDX]] : !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } while {
+// CHECK-NEXT: %[[IDX_LOAD:.*]] = cir.load %[[ARR_IDX]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ne, %[[IDX_LOAD]], %[[LAST_ELT]]) : !cir.ptr<!rec_CtorDtor>, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT:} destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}):
diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-NoOps.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-NoOps.cpp
index 753389f..7e2b8b8 100644
--- a/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-NoOps.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-NoOps.cpp
@@ -1,4 +1,4 @@
-// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
struct NoOps { int i = 0; };
@@ -9,7 +9,33 @@ void do_things(unsigned A, unsigned B) {
// CHECK: acc.private.recipe @privatization__Bcnt1__ZTSA5_5NoOps : !cir.ptr<!cir.array<!rec_NoOps x 5>> init {
// CHECK-NEXT: ^bb0(%arg0: !cir.ptr<!cir.array<!rec_NoOps x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!rec_NoOps x 5>, !cir.ptr<!cir.array<!rec_NoOps x 5>>, ["openacc.private.init"] {alignment = 4 : i64}
-// TODO: Add Init here.
+//
+// Init Section.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+//
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -45,7 +71,58 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt2__ZTSA5_A5_5NoOps : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> init {
// CHECK-NEXT: ^bb0(%arg0: !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!rec_NoOps x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>, ["openacc.private.init"] {alignment = 4 : i64}
-// TODO: Add Init here.
+//
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+//
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.array<!rec_NoOps x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_NoOps x 5>>
+//
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+//
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT:}
;
@@ -84,7 +161,78 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT:acc.private.recipe @privatization__Bcnt3__ZTSA5_A5_A5_5NoOps : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>>, ["openacc.private.init"] {alignment = 4 : i64}
-// TODO: Add Init here.
+//
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY]] : !cir.ptr<!cir.array<!rec_NoOps x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[BOUND1_STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[BOUND1_STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT:}
;
@@ -98,7 +246,73 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt2__ZTSA5_A5_A5_5NoOps : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>>, ["openacc.private.init"] {alignment = 4 : i64}
-// TODO: Add Init here.
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+//
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>
+//
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+//
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!cir.array<!rec_NoOps x 5>>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<5> : !u64i
+// CHECK-NEXT: %[[ARR_DECAY:.*]] = cir.cast array_to_ptrdecay %[[STRIDE]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[ARR_DECAY]] : !cir.ptr<!rec_NoOps>, %[[ARR_SIZE]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[ARR_IDX:.*]] = cir.alloca !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>, ["__array_idx"] {alignment = 1 : i64}
+// CHECK-NEXT: cir.store %[[ARR_DECAY]], %[[ARR_IDX]] : !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.do {
+// CHECK-NEXT: %[[IDX_LOAD:.*]] = cir.load %[[ARR_IDX]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[IDX_LOAD]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[INC_STRIDE:.*]] = cir.ptr_stride(%[[IDX_LOAD]] : !cir.ptr<!rec_NoOps>, %[[ONE]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.store %[[INC_STRIDE]], %[[ARR_IDX]] : !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } while {
+// CHECK-NEXT: %[[IDX_LOAD:.*]] = cir.load %[[ARR_IDX]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ne, %[[IDX_LOAD]], %[[LAST_ELT]]) : !cir.ptr<!rec_NoOps>, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT:}
;
diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-int.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-int.cpp
index 3d4aaa0..e83e548 100644
--- a/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-int.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-int.cpp
@@ -7,7 +7,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK: acc.private.recipe @privatization__Bcnt1__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init {
// CHECK-NEXT: ^bb0(%arg0: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.private.init"] {alignment = 4 : i64}
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -26,7 +25,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt2__ZTSA5_A5_i : !cir.ptr<!cir.array<!cir.array<!s32i x 5> x 5>> init {
// CHECK-NEXT: ^bb0(%arg0: !cir.ptr<!cir.array<!cir.array<!s32i x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!s32i x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!s32i x 5> x 5>>, ["openacc.private.init"] {alignment = 4 : i64}
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT:}
;
@@ -47,7 +45,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT:acc.private.recipe @privatization__Bcnt3__ZTSA5_A5_A5_i : !cir.ptr<!cir.array<!cir.array<!cir.array<!s32i x 5> x 5> x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!s32i x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!cir.array<!s32i x 5> x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!cir.array<!s32i x 5> x 5> x 5>>, ["openacc.private.init"] {alignment = 4 : i64}
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT:}
;
@@ -61,7 +58,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: acc.private.recipe @privatization__Bcnt2__ZTSA5_A5_A5_i : !cir.ptr<!cir.array<!cir.array<!cir.array<!s32i x 5> x 5> x 5>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!s32i x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}):
// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!cir.array<!s32i x 5> x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!cir.array<!s32i x 5> x 5> x 5>>, ["openacc.private.init"] {alignment = 4 : i64}
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT:}
;
diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-CtorDtor.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-CtorDtor.cpp
index 4d0e481..3149493 100644
--- a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-CtorDtor.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-CtorDtor.cpp
@@ -1,4 +1,4 @@
-// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
struct CtorDtor {
int i;
@@ -44,7 +44,33 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+//
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[ELT_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[ELT_STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+//
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.ptr<!rec_CtorDtor>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!rec_CtorDtor>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -154,7 +180,55 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+//
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -321,7 +395,77 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: }
//
//
-// TODO: Add Init here.
+// Init:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_LOAD:.*]] = cir.load %[[BOUND3_STRIDE]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_LOAD]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_LOAD]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -475,7 +619,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -535,7 +678,56 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: }
// CHECK-NEXT: }
//
-// TODO: Add Init here.
+//
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> -> !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[ELT_STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[ELT_STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+//
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -641,7 +833,55 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+//
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>, !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[ELT_STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[ELT_STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -783,7 +1023,78 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+//
+// Init Section.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.ptr<!cir.ptr<!rec_CtorDtor>> x 5>> -> !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_STRIDE_LOAD:.*]] = cir.load %[[BOUND3_STRIDE]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_LOAD]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.array<!cir.ptr<!cir.ptr<!rec_CtorDtor>> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!cir.ptr<!cir.ptr<!rec_CtorDtor>> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -914,7 +1225,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -996,7 +1306,77 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: }
// CHECK-NEXT: }
//
-// TODO: Add Init here.
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>>, !cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_LOAD:.*]] = cir.load %[[BOUND3_STRIDE]] : !cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>, !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_LOAD]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -1149,7 +1529,71 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+//
+// Initialization.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>>, !cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>, !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<5> : !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ARR_SIZE]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>, ["__array_idx"] {alignment = 1 : i64}
+// CHECK-NEXT: cir.store %[[DECAY]], %[[IDX]] : !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: cir.do {
+// CHECK-NEXT: %[[IDX_LOAD:.*]] = cir.load %[[IDX]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[IDX_LOAD]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.ptr_stride(%[[IDX_LOAD]] : !cir.ptr<!rec_CtorDtor>, %[[ONE]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.store %[[INC]], %[[IDX]] : !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } while {
+// CHECK-NEXT: %[[IDX_LOAD:.*]] = cir.load %[[IDX]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[IDX_LOAD]], %[[ELT]]) : !cir.ptr<!rec_CtorDtor>, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -1313,7 +1757,77 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>>, !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> -> !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -1437,7 +1951,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-NoOps.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-NoOps.cpp
index 4687320..ed8c380 100644
--- a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-NoOps.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-NoOps.cpp
@@ -1,4 +1,4 @@
-// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
struct NoOps { int i = 0; };
@@ -39,14 +39,40 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+//
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[ELT_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[ELT_STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+//
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
#pragma acc parallel private(OnePtr[B])
;
#pragma acc parallel private(OnePtr)
-// CHECK-NEXT: acc.private.recipe @privatization__ZTSP5NoOps : !cir.ptr<!cir.ptr<!rec_NoOps>> init {
+// CHECK: acc.private.recipe @privatization__ZTSP5NoOps : !cir.ptr<!cir.ptr<!rec_NoOps>> init {
// CHECK-NEXT: ^bb0(%arg0: !cir.ptr<!cir.ptr<!rec_NoOps>> {{.*}}):
// CHECK-NEXT: cir.alloca !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>, ["openacc.private.init"] {alignment = 8 : i64}
// CHECK-NEXT: acc.yield
@@ -118,7 +144,54 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+// Init Section.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -187,7 +260,7 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.condition(%[[CMP]])
// CHECK-NEXT: } body {
// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[SRC_IDX:.*]] = cir.binop(mul, %[[UB1_CAST]], %[[ITR_LOAD]]) : !u64i
+// CHECK-NEXT: %[[SRC_IDX:.*]] = cir.binop(mul, %[[UB2_CAST]], %[[ITR_LOAD]]) : !u64i
// CHECK-NEXT: %[[SRC:.*]] = cir.ptr_stride(%[[ARR_ALLOCA2]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[SRC_IDX]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>>
// CHECK-NEXT: %[[DEST:.*]] = cir.ptr_stride(%[[ARR_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
// CHECK-NEXT: cir.store %[[SRC]], %[[DEST]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
@@ -229,7 +302,78 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+//
+// Init:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_LOAD:.*]] = cir.load %[[BOUND3_STRIDE]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_LOAD]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_LOAD]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -303,7 +447,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -362,7 +505,56 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+//
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.ptr<!rec_NoOps> x 5>> -> !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[ELT_STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[ELT_STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+//
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -415,7 +607,54 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+// Init Section
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.array<!rec_NoOps x 5>>>, !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.array<!rec_NoOps x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[ELT_STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[ELT_STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
#pragma acc parallel private(PtrToArrays[B][A:B])
@@ -502,7 +741,77 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+// Init Section.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.ptr<!cir.ptr<!rec_NoOps>> x 5>> -> !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_STRIDE_LOAD:.*]] = cir.load %[[BOUND3_STRIDE]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_LOAD]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -553,7 +862,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -634,7 +942,77 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.array<!rec_NoOps x 5>>>>, !cir.ptr<!cir.ptr<!cir.array<!rec_NoOps x 5>>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!cir.array<!rec_NoOps x 5>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.array<!rec_NoOps x 5>>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_LOAD:.*]] = cir.load %[[BOUND3_STRIDE]] : !cir.ptr<!cir.ptr<!cir.array<!rec_NoOps x 5>>>, !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_LOAD]] : !cir.ptr<!cir.array<!rec_NoOps x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -708,7 +1086,70 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+// Init Section.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.array<!rec_NoOps x 5>>>>, !cir.ptr<!cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!cir.array<!rec_NoOps x 5>>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.array<!rec_NoOps x 5>>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!cir.array<!rec_NoOps x 5>>>, !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!cir.array<!rec_NoOps x 5>>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<5> : !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[STRIDE]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_NoOps>, %[[ARR_SIZE]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>, ["__array_idx"] {alignment = 1 : i64}
+// CHECK-NEXT: cir.store %[[DECAY]], %[[IDX]] : !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.do {
+// CHECK-NEXT: %[[IDX_LOAD:.*]] = cir.load %[[IDX]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[IDX_LOAD]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.ptr_stride(%[[IDX_LOAD]] : !cir.ptr<!rec_NoOps>, %[[ONE]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.store %[[INC]], %[[IDX]] : !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } while {
+// CHECK-NEXT: %[[IDX_LOAD:.*]] = cir.load %[[IDX]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[IDX_LOAD]], %[[ELT]]) : !cir.ptr<!rec_NoOps>, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -735,7 +1176,7 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
// CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<40> : !u64i
-// CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.binop(mul, %[[UB2_CAST]], %[[ARR_SIZE]]) : !u64i
+// CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.binop(mul, %[[UB3_CAST]], %[[ARR_SIZE]]) : !u64i
// CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca !cir.array<!cir.ptr<!rec_NoOps> x 5>, !cir.ptr<!cir.array<!cir.ptr<!rec_NoOps> x 5>>, %[[ALLOCA_SIZE]] : !u64i, ["openacc.init.bounds"] {alignment = 8 : i64}
//
// CHECK-NEXT: cir.scope {
@@ -749,7 +1190,7 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.condition(%[[CMP]])
// CHECK-NEXT: } body {
// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[SRC_IDX:.*]] = cir.binop(mul, %[[UB2_CAST]], %[[ITR_LOAD]]) : !u64i
+// CHECK-NEXT: %[[SRC_IDX:.*]] = cir.binop(mul, %[[UB3_CAST]], %[[ITR_LOAD]]) : !u64i
// CHECK-NEXT: %[[SRC:.*]] = cir.ptr_stride(%[[ARR_ALLOCA]] : !cir.ptr<!cir.array<!cir.ptr<!rec_NoOps> x 5>>, %[[SRC_IDX]] : !u64i), !cir.ptr<!cir.array<!cir.ptr<!rec_NoOps> x 5>>
// CHECK-NEXT: %[[DEST:.*]] = cir.ptr_stride(%[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_NoOps> x 5>>>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_NoOps> x 5>>>
// CHECK-NEXT: cir.store %[[SRC]], %[[DEST]] : !cir.ptr<!cir.array<!cir.ptr<!rec_NoOps> x 5>>, !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_NoOps> x 5>>>
@@ -799,7 +1240,77 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+// Init Section:
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_NoOps> x 5>>>, !cir.ptr<!cir.array<!cir.ptr<!rec_NoOps> x 5>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.array<!cir.ptr<!rec_NoOps> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.ptr<!rec_NoOps> x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.ptr<!rec_NoOps> x 5>> -> !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -843,7 +1354,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-int.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-int.cpp
index db5d578..aac7573 100644
--- a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-int.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-int.cpp
@@ -38,7 +38,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: }
// CHECK-NEXT: }
//
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -117,7 +116,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -227,7 +225,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -301,7 +298,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -359,7 +355,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -411,7 +406,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -499,7 +493,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -550,7 +543,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -632,7 +624,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT:}
;
@@ -706,7 +697,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -797,7 +787,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -842,7 +831,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-CtorDtor.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-CtorDtor.cpp
index 65b0365..77b7143 100644
--- a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-CtorDtor.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-CtorDtor.cpp
@@ -1,4 +1,4 @@
-// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
struct CtorDtor {
int i;
@@ -57,7 +57,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: }
// CHECK-NEXT: }
//
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -130,7 +129,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -243,7 +241,79 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+// Initialization Section
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUNDS3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUNDS3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TOP_LEVEL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_STRIDE_LOAD:.*]] = cir.load %[[BOUND3_STRIDE]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_LOAD]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUNDS1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUNDS1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+//
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>> {{.*}}, %[[BOUNDS1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUNDS2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUNDS3:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -379,7 +449,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -452,7 +521,58 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+//
+// Initialization Section
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+//
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TOP_LEVEL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: %[[TLA_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+//
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUNDS1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUNDS1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+//
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_STRIDE_LOAD:.*]] = cir.load %[[TLA_STRIDE]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[TLA_STRIDE_LOAD]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>> {{.*}}, %[[BOUNDS1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUNDS2:.*]]: !acc.data_bounds_ty {{.*}}):
@@ -565,7 +685,32 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: }
// CHECK-NEXT: }
//
-// TODO: Add Init here.
+// Initialization Section
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TOP_LEVEL_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!rec_CtorDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+//
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } destroy {
// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!rec_CtorDtor>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!rec_CtorDtor>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}):
diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-NoOps.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-NoOps.cpp
index 07e06f8..b988fc4 100644
--- a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-NoOps.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-NoOps.cpp
@@ -1,4 +1,4 @@
-// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
struct NoOps { int i = 0; };
@@ -50,7 +50,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -124,7 +123,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -237,7 +235,79 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: }
// CHECK-NEXT: }
//
-// TODO: Add Init here.
+// Init Section.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TOP_LEVEL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_STRIDE_LOAD:.*]] = cir.load %[[BOUND3_STRIDE]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_LOAD]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+//
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -250,7 +320,7 @@ void do_things(unsigned A, unsigned B) {
T **TwoPtr;
#pragma acc parallel private(TwoPtr)
-// CHECK-NEXT: acc.private.recipe @privatization__ZTSPP5NoOps : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> init {
+// CHECK: acc.private.recipe @privatization__ZTSPP5NoOps : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> init {
// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> {{.*}}):
// CHECK-NEXT: cir.alloca !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, ["openacc.private.init"] {alignment = 8 : i64}
// CHECK-NEXT: acc.yield
@@ -294,7 +364,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -367,7 +436,58 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
+//
+// Initialization Section.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+//
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TOP_LEVEL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[TLA_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>>
+//
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+//
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_STRIDE_LOAD:.*]] = cir.load %[[TLA_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[TLA_STRIDE_LOAD]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -423,7 +543,32 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: }
// CHECK-NEXT: }
//
-// TODO: Add Init here.
+// Init Section.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[TOP_LEVEL_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!rec_NoOps>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+//
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-int.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-int.cpp
index a3b7dca..c87e1a6 100644
--- a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-int.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-int.cpp
@@ -48,7 +48,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -122,7 +121,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: }
// CHECK-NEXT: }
//
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -233,7 +231,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: }
// CHECK-NEXT: }
//
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -291,7 +288,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: }
// CHECK-NEXT: }
//
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -365,7 +361,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: }
// CHECK-NEXT: }
//
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
@@ -420,7 +415,6 @@ void do_things(unsigned A, unsigned B) {
// CHECK-NEXT: cir.yield
// CHECK-NEXT: }
// CHECK-NEXT: }
-// TODO: Add Init here.
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
;
diff --git a/clang/test/CIR/IR/alloca.cir b/clang/test/CIR/IR/alloca.cir
index 4a13c44..d94da81 100644
--- a/clang/test/CIR/IR/alloca.cir
+++ b/clang/test/CIR/IR/alloca.cir
@@ -1,5 +1,5 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!u64i = !cir.int<u, 64>
!u8i = !cir.int<u, 8>
diff --git a/clang/test/CIR/IR/array-ctor.cir b/clang/test/CIR/IR/array-ctor.cir
index 2378992..fd2ec7e 100644
--- a/clang/test/CIR/IR/array-ctor.cir
+++ b/clang/test/CIR/IR/array-ctor.cir
@@ -1,5 +1,5 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!u8i = !cir.int<u, 8>
!rec_S = !cir.record<struct "S" padded {!u8i}>
diff --git a/clang/test/CIR/IR/array-dtor.cir b/clang/test/CIR/IR/array-dtor.cir
index 6d08d16..1bb9ff9 100644
--- a/clang/test/CIR/IR/array-dtor.cir
+++ b/clang/test/CIR/IR/array-dtor.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!u8i = !cir.int<u, 8>
!rec_S = !cir.record<struct "S" padded {!u8i}>
diff --git a/clang/test/CIR/IR/array.cir b/clang/test/CIR/IR/array.cir
index bba5360..ddc6b92 100644
--- a/clang/test/CIR/IR/array.cir
+++ b/clang/test/CIR/IR/array.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
diff --git a/clang/test/CIR/IR/atomic.cir b/clang/test/CIR/IR/atomic.cir
index 6ca5af2..8520763 100644
--- a/clang/test/CIR/IR/atomic.cir
+++ b/clang/test/CIR/IR/atomic.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
!u32i = !cir.int<u, 32>
diff --git a/clang/test/CIR/IR/binassign.cir b/clang/test/CIR/IR/binassign.cir
index 6d2c5c8..0247126 100644
--- a/clang/test/CIR/IR/binassign.cir
+++ b/clang/test/CIR/IR/binassign.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | cir-opt | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
!s8i = !cir.int<s, 8>
diff --git a/clang/test/CIR/IR/bitfield_info.cir b/clang/test/CIR/IR/bitfield_info.cir
index 682e090..2d743fb 100644
--- a/clang/test/CIR/IR/bitfield_info.cir
+++ b/clang/test/CIR/IR/bitfield_info.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
!u32i = !cir.int<u, 32>
diff --git a/clang/test/CIR/IR/call.cir b/clang/test/CIR/IR/call.cir
index 9607df7..59f28be 100644
--- a/clang/test/CIR/IR/call.cir
+++ b/clang/test/CIR/IR/call.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
diff --git a/clang/test/CIR/IR/cast.cir b/clang/test/CIR/IR/cast.cir
index 11b1664..3f2fca9 100644
--- a/clang/test/CIR/IR/cast.cir
+++ b/clang/test/CIR/IR/cast.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | cir-opt | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
module {
diff --git a/clang/test/CIR/IR/cmp.cir b/clang/test/CIR/IR/cmp.cir
index fdf538d..0d47398 100644
--- a/clang/test/CIR/IR/cmp.cir
+++ b/clang/test/CIR/IR/cmp.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | cir-opt | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
!u32i = !cir.int<u, 32>
diff --git a/clang/test/CIR/IR/complex.cir b/clang/test/CIR/IR/complex.cir
index a73a865..a7e0c77 100644
--- a/clang/test/CIR/IR/complex.cir
+++ b/clang/test/CIR/IR/complex.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
diff --git a/clang/test/CIR/IR/copy.cir b/clang/test/CIR/IR/copy.cir
index 2cfb25d..f9db29a 100644
--- a/clang/test/CIR/IR/copy.cir
+++ b/clang/test/CIR/IR/copy.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
module {
diff --git a/clang/test/CIR/IR/func.cir b/clang/test/CIR/IR/func.cir
index 0e9a92f..9532859 100644
--- a/clang/test/CIR/IR/func.cir
+++ b/clang/test/CIR/IR/func.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
!s64i = !cir.int<s, 64>
diff --git a/clang/test/CIR/IR/global-init.cir b/clang/test/CIR/IR/global-init.cir
index 727c067..2fd25df 100644
--- a/clang/test/CIR/IR/global-init.cir
+++ b/clang/test/CIR/IR/global-init.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt --verify-roundtrip %s -o - | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!u8i = !cir.int<u, 8>
diff --git a/clang/test/CIR/IR/global-var-linkage.cir b/clang/test/CIR/IR/global-var-linkage.cir
index e1b7de4..df74e38 100644
--- a/clang/test/CIR/IR/global-var-linkage.cir
+++ b/clang/test/CIR/IR/global-var-linkage.cir
@@ -1,5 +1,4 @@
-// RUN: cir-opt %s -o %t.cir
-// RUN: FileCheck --input-file=%t.cir %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
diff --git a/clang/test/CIR/IR/global.cir b/clang/test/CIR/IR/global.cir
index 28fad6b..0464db8 100644
--- a/clang/test/CIR/IR/global.cir
+++ b/clang/test/CIR/IR/global.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s -o - | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s8i = !cir.int<s, 8>
!s16i = !cir.int<s, 16>
diff --git a/clang/test/CIR/IR/label.cir b/clang/test/CIR/IR/label.cir
index 2211a4e..1049766 100644
--- a/clang/test/CIR/IR/label.cir
+++ b/clang/test/CIR/IR/label.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
diff --git a/clang/test/CIR/IR/module.cir b/clang/test/CIR/IR/module.cir
index 7ce2c0b..8c782fd 100644
--- a/clang/test/CIR/IR/module.cir
+++ b/clang/test/CIR/IR/module.cir
@@ -1,5 +1,4 @@
-// RUN: cir-opt %s -split-input-file -o %t.cir
-// RUN: FileCheck --input-file=%t.cir %s
+// RUN: cir-opt %s -split-input-file --verify-roundtrip | FileCheck %s
// Should parse and print C source language attribute.
module attributes {cir.lang = #cir.lang<c>} { }
diff --git a/clang/test/CIR/IR/stack-save-restore.cir b/clang/test/CIR/IR/stack-save-restore.cir
index f98889ac..476f212 100644
--- a/clang/test/CIR/IR/stack-save-restore.cir
+++ b/clang/test/CIR/IR/stack-save-restore.cir
@@ -1,6 +1,6 @@
// Test the CIR operations can parse and print correctly (roundtrip)
-// RUN: cir-opt %s | cir-opt | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!u8i = !cir.int<u, 8>
diff --git a/clang/test/CIR/IR/struct.cir b/clang/test/CIR/IR/struct.cir
index 33f2e98..2e011fb 100644
--- a/clang/test/CIR/IR/struct.cir
+++ b/clang/test/CIR/IR/struct.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!u8i = !cir.int<u, 8>
!u16i = !cir.int<u, 16>
diff --git a/clang/test/CIR/IR/switch-flat.cir b/clang/test/CIR/IR/switch-flat.cir
index 8c11a74..d39c3e7 100644
--- a/clang/test/CIR/IR/switch-flat.cir
+++ b/clang/test/CIR/IR/switch-flat.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
cir.func @FlatSwitchWithoutDefault(%arg0: !s32i) {
diff --git a/clang/test/CIR/IR/switch.cir b/clang/test/CIR/IR/switch.cir
index 0bdc9c1..87d45bf 100644
--- a/clang/test/CIR/IR/switch.cir
+++ b/clang/test/CIR/IR/switch.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
cir.func @s0() {
diff --git a/clang/test/CIR/IR/ternary.cir b/clang/test/CIR/IR/ternary.cir
index e419c7f..78e1de4 100644
--- a/clang/test/CIR/IR/ternary.cir
+++ b/clang/test/CIR/IR/ternary.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | cir-opt | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!u32i = !cir.int<u, 32>
module {
diff --git a/clang/test/CIR/IR/throw.cir b/clang/test/CIR/IR/throw.cir
index 8b24b48..e7a1bf4 100644
--- a/clang/test/CIR/IR/throw.cir
+++ b/clang/test/CIR/IR/throw.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
diff --git a/clang/test/CIR/IR/unary.cir b/clang/test/CIR/IR/unary.cir
index ba3bc20d..d01d4eb 100644
--- a/clang/test/CIR/IR/unary.cir
+++ b/clang/test/CIR/IR/unary.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
!s64i = !cir.int<s, 64>
diff --git a/clang/test/CIR/IR/vector.cir b/clang/test/CIR/IR/vector.cir
index 6d8e5be..d274c35 100644
--- a/clang/test/CIR/IR/vector.cir
+++ b/clang/test/CIR/IR/vector.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
diff --git a/clang/test/CIR/IR/vtable-addrpt.cir b/clang/test/CIR/IR/vtable-addrpt.cir
index 106e748..7c8fa8d 100644
--- a/clang/test/CIR/IR/vtable-addrpt.cir
+++ b/clang/test/CIR/IR/vtable-addrpt.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
// Test the parsing and printing of a constructor that uses a vtable addess_point op.
diff --git a/clang/test/CIR/IR/vtable-attr.cir b/clang/test/CIR/IR/vtable-attr.cir
index 3854208..70e3296 100644
--- a/clang/test/CIR/IR/vtable-attr.cir
+++ b/clang/test/CIR/IR/vtable-attr.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!rec_Q = !cir.record<struct "Q" {!cir.vptr}>
!rec_S = !cir.record<struct "S" {!cir.vptr}>
diff --git a/clang/test/CIR/IR/vtt-addrpoint.cir b/clang/test/CIR/IR/vtt-addrpoint.cir
index 11e5f4d..823ddd2 100644
--- a/clang/test/CIR/IR/vtt-addrpoint.cir
+++ b/clang/test/CIR/IR/vtt-addrpoint.cir
@@ -1,4 +1,4 @@
-// RUN: cir-opt %s | FileCheck %s
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
// Test the parsing and printing of the two forms of vtt.address_point op, as
// they will appear in constructors.
diff --git a/clang/test/Parser/recovery-after-expected-unqualified-id.cpp b/clang/test/Parser/recovery-after-expected-unqualified-id.cpp
new file mode 100644
index 0000000..8019b46d
--- /dev/null
+++ b/clang/test/Parser/recovery-after-expected-unqualified-id.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -verify %s
+
+3.2 // expected-error {{expected unqualified-id}}
+
+extern "C" {
+ typedef int Int;
+}
+
+Int foo(); // Ok
diff --git a/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp b/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp
index 2f1817d..fd1a5c0 100644
--- a/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp
+++ b/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp
@@ -586,3 +586,18 @@ Baz a{};
static_assert(__is_same(decltype(a), A<A<int>>));
} // namespace GH133132
+
+namespace GH130604 {
+template <typename T> struct A {
+ A(T);
+};
+
+template <typename T, template <typename> class TT = A> using Alias = TT<T>; // #gh130604-alias
+template <typename T> using Alias2 = Alias<T>;
+
+Alias2 a(42);
+// expected-error@-1 {{no viable constructor or deduction guide for deduction of template arguments of 'Alias2'}}
+Alias b(42);
+// expected-error@-1 {{alias template 'Alias' requires template arguments; argument deduction only allowed for class templates or alias template}}
+// expected-note@#gh130604-alias {{template is declared here}}
+}
diff --git a/clang/test/SemaCXX/cxx98-compat.cpp b/clang/test/SemaCXX/cxx98-compat.cpp
index 8e7acf7..587c242 100644
--- a/clang/test/SemaCXX/cxx98-compat.cpp
+++ b/clang/test/SemaCXX/cxx98-compat.cpp
@@ -1,6 +1,7 @@
-// RUN: %clang_cc1 -fsyntax-only -std=c++11 -Wc++98-compat -verify %s
-// RUN: %clang_cc1 -fsyntax-only -std=c++14 -Wc++98-compat -verify %s -DCXX14COMPAT
-// RUN: %clang_cc1 -fsyntax-only -std=c++17 -Wc++98-compat -verify %s -DCXX14COMPAT -DCXX17COMPAT
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -Wc++98-compat -verify=expected,not-cpp20 %s
+// RUN: %clang_cc1 -fsyntax-only -std=c++14 -Wc++98-compat -verify=expected,not-cpp20 %s -DCXX14COMPAT
+// RUN: %clang_cc1 -fsyntax-only -std=c++17 -Wc++98-compat -verify=expected,not-cpp20 %s -DCXX14COMPAT -DCXX17COMPAT
+// RUN: %clang_cc1 -fsyntax-only -std=c++20 -Wc++98-compat -verify=expected,cpp20 %s -DCXX14COMPAT -DCXX17COMPAT
namespace std {
struct type_info;
@@ -226,7 +227,8 @@ void TrivialButNonPODThroughEllipsis() {
}
struct HasExplicitConversion {
- explicit operator bool(); // expected-warning {{explicit conversion functions are incompatible with C++98}}
+ // FIXME I think we should generate this diagnostic in C++20
+ explicit operator bool(); // not-cpp20-warning {{explicit conversion functions are incompatible with C++98}}
};
struct Struct {};
@@ -430,3 +432,12 @@ void ctad_test() {
CTAD t = s; // expected-warning {{class template argument deduction is incompatible with C++ standards before C++17}}
}
#endif
+
+namespace GH161702 {
+struct S {
+ enum E { A };
+ using E::A; // expected-warning {{enumeration type in nested name specifier is incompatible with C++98}}
+ // not-cpp20-error@-1 {{using declaration refers to its own class}}
+ // cpp20-warning@-2 {{member using declaration naming non-class ''E'' enumerator is incompatible with C++ standards before C++20}}
+};
+}
diff --git a/flang/include/flang/Lower/OpenACC.h b/flang/include/flang/Lower/OpenACC.h
index 19d7594..4622dbc 100644
--- a/flang/include/flang/Lower/OpenACC.h
+++ b/flang/include/flang/Lower/OpenACC.h
@@ -77,7 +77,8 @@ static constexpr llvm::StringRef privatizationRecipePrefix = "privatization";
mlir::Value genOpenACCConstruct(AbstractConverter &,
Fortran::semantics::SemanticsContext &,
pft::Evaluation &,
- const parser::OpenACCConstruct &);
+ const parser::OpenACCConstruct &,
+ Fortran::lower::SymMap &localSymbols);
void genOpenACCDeclarativeConstruct(
AbstractConverter &, Fortran::semantics::SemanticsContext &,
StatementContext &, const parser::OpenACCDeclarativeConstruct &);
diff --git a/flang/include/flang/Lower/SymbolMap.h b/flang/include/flang/Lower/SymbolMap.h
index 813df77..e57b6a4 100644
--- a/flang/include/flang/Lower/SymbolMap.h
+++ b/flang/include/flang/Lower/SymbolMap.h
@@ -260,6 +260,10 @@ public:
return lookupSymbol(*sym);
}
+ /// Find a symbol by name and return its value if it appears in the current
+ /// mappings. This lookup is more expensive as it iterates over the map.
+ const semantics::Symbol *lookupSymbolByName(llvm::StringRef symName);
+
/// Find `symbol` and return its value if it appears in the inner-most level
/// map.
SymbolBox shallowLookupSymbol(semantics::SymbolRef sym);
diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h
index e90e9c6..a0d5ae7 100644
--- a/flang/include/flang/Semantics/symbol.h
+++ b/flang/include/flang/Semantics/symbol.h
@@ -801,7 +801,7 @@ public:
AccPrivate, AccFirstPrivate, AccShared,
// OpenACC data-mapping attribute
AccCopy, AccCopyIn, AccCopyInReadOnly, AccCopyOut, AccCreate, AccDelete,
- AccPresent, AccLink, AccDeviceResident, AccDevicePtr,
+ AccPresent, AccLink, AccDeviceResident, AccDevicePtr, AccUseDevice,
// OpenACC declare
AccDeclare,
// OpenACC data-movement attribute
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 149e51b..780d56f 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3182,7 +3182,7 @@ private:
mlir::OpBuilder::InsertPoint insertPt = builder->saveInsertionPoint();
localSymbols.pushScope();
mlir::Value exitCond = genOpenACCConstruct(
- *this, bridge.getSemanticsContext(), getEval(), acc);
+ *this, bridge.getSemanticsContext(), getEval(), acc, localSymbols);
const Fortran::parser::OpenACCLoopConstruct *accLoop =
std::get_if<Fortran::parser::OpenACCLoopConstruct>(&acc.u);
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 95d0ada..f9b9b850 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -3184,7 +3184,8 @@ genACCHostDataOp(Fortran::lower::AbstractConverter &converter,
Fortran::lower::pft::Evaluation &eval,
Fortran::semantics::SemanticsContext &semanticsContext,
Fortran::lower::StatementContext &stmtCtx,
- const Fortran::parser::AccClauseList &accClauseList) {
+ const Fortran::parser::AccClauseList &accClauseList,
+ Fortran::lower::SymMap &localSymbols) {
mlir::Value ifCond;
llvm::SmallVector<mlir::Value> dataOperands;
bool addIfPresentAttr = false;
@@ -3199,6 +3200,19 @@ genACCHostDataOp(Fortran::lower::AbstractConverter &converter,
} else if (const auto *useDevice =
std::get_if<Fortran::parser::AccClause::UseDevice>(
&clause.u)) {
+ // When CUDA Fotran is enabled, extra symbols are used in the host_data
+ // region. Look for them and bind their values with the symbols in the
+ // outer scope.
+ if (semanticsContext.IsEnabled(Fortran::common::LanguageFeature::CUDA)) {
+ const Fortran::parser::AccObjectList &objectList{useDevice->v};
+ for (const auto &accObject : objectList.v) {
+ Fortran::semantics::Symbol &symbol =
+ getSymbolFromAccObject(accObject);
+ const Fortran::semantics::Symbol *baseSym =
+ localSymbols.lookupSymbolByName(symbol.name().ToString());
+ localSymbols.copySymbolBinding(*baseSym, symbol);
+ }
+ }
genDataOperandOperations<mlir::acc::UseDeviceOp>(
useDevice->v, converter, semanticsContext, stmtCtx, dataOperands,
mlir::acc::DataClause::acc_use_device,
@@ -3239,11 +3253,11 @@ genACCHostDataOp(Fortran::lower::AbstractConverter &converter,
hostDataOp.setIfPresentAttr(builder.getUnitAttr());
}
-static void
-genACC(Fortran::lower::AbstractConverter &converter,
- Fortran::semantics::SemanticsContext &semanticsContext,
- Fortran::lower::pft::Evaluation &eval,
- const Fortran::parser::OpenACCBlockConstruct &blockConstruct) {
+static void genACC(Fortran::lower::AbstractConverter &converter,
+ Fortran::semantics::SemanticsContext &semanticsContext,
+ Fortran::lower::pft::Evaluation &eval,
+ const Fortran::parser::OpenACCBlockConstruct &blockConstruct,
+ Fortran::lower::SymMap &localSymbols) {
const auto &beginBlockDirective =
std::get<Fortran::parser::AccBeginBlockDirective>(blockConstruct.t);
const auto &blockDirective =
@@ -3273,7 +3287,7 @@ genACC(Fortran::lower::AbstractConverter &converter,
accClauseList);
} else if (blockDirective.v == llvm::acc::ACCD_host_data) {
genACCHostDataOp(converter, currentLocation, eval, semanticsContext,
- stmtCtx, accClauseList);
+ stmtCtx, accClauseList, localSymbols);
}
}
@@ -4647,13 +4661,15 @@ mlir::Value Fortran::lower::genOpenACCConstruct(
Fortran::lower::AbstractConverter &converter,
Fortran::semantics::SemanticsContext &semanticsContext,
Fortran::lower::pft::Evaluation &eval,
- const Fortran::parser::OpenACCConstruct &accConstruct) {
+ const Fortran::parser::OpenACCConstruct &accConstruct,
+ Fortran::lower::SymMap &localSymbols) {
mlir::Value exitCond;
Fortran::common::visit(
common::visitors{
[&](const Fortran::parser::OpenACCBlockConstruct &blockConstruct) {
- genACC(converter, semanticsContext, eval, blockConstruct);
+ genACC(converter, semanticsContext, eval, blockConstruct,
+ localSymbols);
},
[&](const Fortran::parser::OpenACCCombinedConstruct
&combinedConstruct) {
diff --git a/flang/lib/Lower/SymbolMap.cpp b/flang/lib/Lower/SymbolMap.cpp
index 080f21e..78529e0 100644
--- a/flang/lib/Lower/SymbolMap.cpp
+++ b/flang/lib/Lower/SymbolMap.cpp
@@ -45,6 +45,16 @@ Fortran::lower::SymMap::lookupSymbol(Fortran::semantics::SymbolRef symRef) {
return SymbolBox::None{};
}
+const Fortran::semantics::Symbol *
+Fortran::lower::SymMap::lookupSymbolByName(llvm::StringRef symName) {
+ for (auto jmap = symbolMapStack.rbegin(), jend = symbolMapStack.rend();
+ jmap != jend; ++jmap)
+ for (auto const &[sym, symBox] : *jmap)
+ if (sym->name().ToString() == symName)
+ return sym;
+ return nullptr;
+}
+
Fortran::lower::SymbolBox Fortran::lower::SymMap::shallowLookupSymbol(
Fortran::semantics::SymbolRef symRef) {
auto *sym = symRef->HasLocalLocality() ? &*symRef : &symRef->GetUltimate();
diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index 1049a6d2..7b88100 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -1189,7 +1189,8 @@ void CheckHelper::CheckObjectEntity(
}
} else if (!subpDetails && symbol.owner().kind() != Scope::Kind::Module &&
symbol.owner().kind() != Scope::Kind::MainProgram &&
- symbol.owner().kind() != Scope::Kind::BlockConstruct) {
+ symbol.owner().kind() != Scope::Kind::BlockConstruct &&
+ symbol.owner().kind() != Scope::Kind::OpenACCConstruct) {
messages_.Say(
"ATTRIBUTES(%s) may apply only to module, host subprogram, block, or device subprogram data"_err_en_US,
parser::ToUpperCaseLetters(common::EnumToString(attr)));
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index b1eaaa8..624b890 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -328,6 +328,11 @@ public:
return false;
}
+ bool Pre(const parser::AccClause::UseDevice &x) {
+ ResolveAccObjectList(x.v, Symbol::Flag::AccUseDevice);
+ return false;
+ }
+
void Post(const parser::Name &);
private:
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index d1150a9..5041a6a 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -1387,6 +1387,8 @@ private:
// Create scopes for OpenACC constructs
class AccVisitor : public virtual DeclarationVisitor {
public:
+ explicit AccVisitor(SemanticsContext &context) : context_{context} {}
+
void AddAccSourceRange(const parser::CharBlock &);
static bool NeedsScope(const parser::OpenACCBlockConstruct &);
@@ -1395,6 +1397,7 @@ public:
void Post(const parser::OpenACCBlockConstruct &);
bool Pre(const parser::OpenACCCombinedConstruct &);
void Post(const parser::OpenACCCombinedConstruct &);
+ bool Pre(const parser::AccClause::UseDevice &x);
bool Pre(const parser::AccBeginBlockDirective &x) {
AddAccSourceRange(x.source);
return true;
@@ -1430,6 +1433,11 @@ public:
void Post(const parser::AccBeginLoopDirective &x) {
messageHandler().set_currStmtSource(std::nullopt);
}
+
+ void CopySymbolWithDevice(const parser::Name *name);
+
+private:
+ SemanticsContext &context_;
};
bool AccVisitor::NeedsScope(const parser::OpenACCBlockConstruct &x) {
@@ -1459,6 +1467,60 @@ bool AccVisitor::Pre(const parser::OpenACCBlockConstruct &x) {
return true;
}
+void AccVisitor::CopySymbolWithDevice(const parser::Name *name) {
+ // When CUDA Fortran is enabled together with OpenACC, new
+ // symbols are created for the one appearing in the use_device
+ // clause. These new symbols have the CUDA Fortran device
+ // attribute.
+ if (context_.languageFeatures().IsEnabled(common::LanguageFeature::CUDA)) {
+ name->symbol = currScope().CopySymbol(*name->symbol);
+ if (auto *object{name->symbol->detailsIf<ObjectEntityDetails>()}) {
+ object->set_cudaDataAttr(common::CUDADataAttr::Device);
+ }
+ }
+}
+
+bool AccVisitor::Pre(const parser::AccClause::UseDevice &x) {
+ for (const auto &accObject : x.v.v) {
+ common::visit(
+ common::visitors{
+ [&](const parser::Designator &designator) {
+ if (const auto *name{
+ semantics::getDesignatorNameIfDataRef(designator)}) {
+ Symbol *prev{currScope().FindSymbol(name->source)};
+ if (prev != name->symbol) {
+ name->symbol = prev;
+ }
+ CopySymbolWithDevice(name);
+ } else {
+ if (const auto *dataRef{
+ std::get_if<parser::DataRef>(&designator.u)}) {
+ using ElementIndirection =
+ common::Indirection<parser::ArrayElement>;
+ if (auto *ind{std::get_if<ElementIndirection>(&dataRef->u)}) {
+ const parser::ArrayElement &arrayElement{ind->value()};
+ Walk(arrayElement.subscripts);
+ const parser::DataRef &base{arrayElement.base};
+ if (auto *name{std::get_if<parser::Name>(&base.u)}) {
+ Symbol *prev{currScope().FindSymbol(name->source)};
+ if (prev != name->symbol) {
+ name->symbol = prev;
+ }
+ CopySymbolWithDevice(name);
+ }
+ }
+ }
+ }
+ },
+ [&](const parser::Name &name) {
+ // TODO: common block in use_device?
+ },
+ },
+ accObject.u);
+ }
+ return false;
+}
+
void AccVisitor::Post(const parser::OpenACCBlockConstruct &x) {
if (NeedsScope(x)) {
PopScope();
@@ -2038,7 +2100,8 @@ public:
ResolveNamesVisitor(
SemanticsContext &context, ImplicitRulesMap &rules, Scope &top)
- : BaseVisitor{context, *this, rules}, topScope_{top} {
+ : BaseVisitor{context, *this, rules}, AccVisitor(context),
+ topScope_{top} {
PushScope(top);
}
diff --git a/flang/test/Lower/OpenACC/acc-host-data-cuda-device.f90 b/flang/test/Lower/OpenACC/acc-host-data-cuda-device.f90
new file mode 100644
index 0000000..da034ad
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-host-data-cuda-device.f90
@@ -0,0 +1,43 @@
+
+! RUN: bbc -fopenacc -fcuda -emit-hlfir %s -o - | FileCheck %s
+
+module m
+
+interface doit
+subroutine __device_sub(a)
+ real(4), device, intent(in) :: a(:,:,:)
+ !dir$ ignore_tkr(c) a
+end
+subroutine __host_sub(a)
+ real(4), intent(in) :: a(:,:,:)
+ !dir$ ignore_tkr(c) a
+end
+end interface
+end module
+
+program testex1
+integer, parameter :: ntimes = 10
+integer, parameter :: ni=128
+integer, parameter :: nj=256
+integer, parameter :: nk=64
+real(4), dimension(ni,nj,nk) :: a
+
+!$acc enter data copyin(a)
+
+block; use m
+!$acc host_data use_device(a)
+do nt = 1, ntimes
+ call doit(a)
+end do
+!$acc end host_data
+end block
+
+block; use m
+do nt = 1, ntimes
+ call doit(a)
+end do
+end block
+end
+
+! CHECK: fir.call @_QP__device_sub
+! CHECK: fir.call @_QP__host_sub
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered_map-iterator/TestDataFormatterStdUnorderedMap.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered_map-iterator/TestDataFormatterStdUnorderedMap.py
index 1e920fa..45f7b5b 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered_map-iterator/TestDataFormatterStdUnorderedMap.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered_map-iterator/TestDataFormatterStdUnorderedMap.py
@@ -124,11 +124,6 @@ class StdUnorderedMapDataFormatterTestCase(TestBase):
self.check_ptr_ptr("ptr5")
self.check_ptr_ptr("ptr6")
- @expectedFailureAll(
- bugnumber="https://github.com/llvm/llvm-project/issues/146040",
- compiler="clang",
- compiler_version=["<", "21"],
- )
@add_test_categories(["libc++"])
def test_ptr_libcxx(self):
self.build(dictionary={"USE_LIBCPP": 1})
diff --git a/lldb/test/API/lang/cpp/abi_tag_structors/TestAbiTagStructors.py b/lldb/test/API/lang/cpp/abi_tag_structors/TestAbiTagStructors.py
index 87d8adb..2d3e4f7 100644
--- a/lldb/test/API/lang/cpp/abi_tag_structors/TestAbiTagStructors.py
+++ b/lldb/test/API/lang/cpp/abi_tag_structors/TestAbiTagStructors.py
@@ -10,6 +10,11 @@ from lldbsuite.test import lldbutil
class AbiTagStructorsTestCase(TestBase):
+ @skipIf(
+ compiler="clang",
+ compiler_version=["<", "22"],
+ bugnumber="Required Clang flag not supported",
+ )
@expectedFailureAll(oslist=["windows"])
def test_with_structor_linkage_names(self):
self.build(dictionary={"CXXFLAGS_EXTRAS": "-gstructor-decl-linkage-names"})
@@ -73,7 +78,16 @@ class AbiTagStructorsTestCase(TestBase):
Test that without linkage names on structor declarations we can't call
ABI-tagged structors.
"""
- self.build(dictionary={"CXXFLAGS_EXTRAS": "-gno-structor-decl-linkage-names"})
+ # In older versions of Clang the -gno-structor-decl-linkage-names
+ # behaviour was the default.
+ if self.expectedCompiler(["clang"]) and self.expectedCompilerVersion(
+ [">=", "22.0"]
+ ):
+ self.build(
+ dictionary={"CXXFLAGS_EXTRAS": "-gno-structor-decl-linkage-names"}
+ )
+ else:
+ self.build()
lldbutil.run_to_source_breakpoint(
self, "Break here", lldb.SBFileSpec("main.cpp", False)
@@ -105,12 +119,23 @@ class AbiTagStructorsTestCase(TestBase):
"expression TaggedLocal()", error=True, substrs=["Couldn't look up symbols"]
)
+ @skipIf(compiler="clang", compiler_version=["<", "22"])
@expectedFailureAll(oslist=["windows"])
- def test_nested_no_structor_linkage_names(self):
+ def test_nested_with_structor_linkage_names(self):
self.build(dictionary={"CXXFLAGS_EXTRAS": "-gstructor-decl-linkage-names"})
self.do_nested_structor_test()
@expectedFailureAll(oslist=["windows"])
- def test_nested_with_structor_linkage_names(self):
- self.build(dictionary={"CXXFLAGS_EXTRAS": "-gno-structor-decl-linkage-names"})
+ def test_nested_no_structor_linkage_names(self):
+ # In older versions of Clang the -gno-structor-decl-linkage-names
+ # behaviour was the default.
+ if self.expectedCompiler(["clang"]) and self.expectedCompilerVersion(
+ [">=", "22.0"]
+ ):
+ self.build(
+ dictionary={"CXXFLAGS_EXTRAS": "-gno-structor-decl-linkage-names"}
+ )
+ else:
+ self.build()
+
self.do_nested_structor_test()
diff --git a/lldb/test/API/lang/cpp/expr-definition-in-dylib/TestExprDefinitionInDylib.py b/lldb/test/API/lang/cpp/expr-definition-in-dylib/TestExprDefinitionInDylib.py
index c0545c70..b3bed43 100644
--- a/lldb/test/API/lang/cpp/expr-definition-in-dylib/TestExprDefinitionInDylib.py
+++ b/lldb/test/API/lang/cpp/expr-definition-in-dylib/TestExprDefinitionInDylib.py
@@ -6,6 +6,11 @@ from lldbsuite.test import lldbutil
class ExprDefinitionInDylibTestCase(TestBase):
+ @skipIf(
+ compiler="clang",
+ compiler_version=["<", "22"],
+ bugnumber="Required Clang flag not supported",
+ )
@skipIfWindows
def test_with_structor_linkage_names(self):
"""
@@ -74,7 +79,16 @@ class ExprDefinitionInDylibTestCase(TestBase):
Tests that if structor declarations don't have linkage names, we can't
call ABI-tagged constructors. But non-tagged ones are fine.
"""
- self.build(dictionary={"CXXFLAGS_EXTRAS": "-gno-structor-decl-linkage-names"})
+ # In older versions of Clang the -gno-structor-decl-linkage-names
+ # behaviour was the default.
+ if self.expectedCompiler(["clang"]) and self.expectedCompilerVersion(
+ [">=", "22.0"]
+ ):
+ self.build(
+ dictionary={"CXXFLAGS_EXTRAS": "-gno-structor-decl-linkage-names"}
+ )
+ else:
+ self.build()
target = self.dbg.CreateTarget(self.getBuildArtifact("a.out"))
self.assertTrue(target, VALID_TARGET)
@@ -95,6 +109,6 @@ class ExprDefinitionInDylibTestCase(TestBase):
self.expect_expr("Foo(10)", result_type="Foo")
- self.expect("Base()", error=True)
+ self.expect("expr Base()", error=True)
- self.expect("Bar()", error=True)
+ self.expect("expr Bar()", error=True)
diff --git a/lldb/test/API/lang/cpp/structured-binding/TestStructuredBinding.py b/lldb/test/API/lang/cpp/structured-binding/TestStructuredBinding.py
index 5f939ec..882c91d 100644
--- a/lldb/test/API/lang/cpp/structured-binding/TestStructuredBinding.py
+++ b/lldb/test/API/lang/cpp/structured-binding/TestStructuredBinding.py
@@ -99,16 +99,21 @@ class TestStructuredBinding(TestBase):
self.expect_expr("ty2", result_value="'z'")
self.expect_expr("tz2", result_value="10")
- self.expect(
- "frame variable",
- substrs=[
- "tx1 =",
- "ty1 =",
- "tz1 =",
- "tx2 =",
- "ty2 =",
- "tz2 =",
- "mp1 =",
- "mp2 =",
- ],
- )
+ # Older versions of Clang marked structured binding variables
+ # as artificial, and thus LLDB wouldn't display them.
+ if self.expectedCompiler(["clang"]) and self.expectedCompilerVersion(
+ [">=", "22.0"]
+ ):
+ self.expect(
+ "frame variable",
+ substrs=[
+ "tx1 =",
+ "ty1 =",
+ "tz1 =",
+ "tx2 =",
+ "ty2 =",
+ "tz2 =",
+ "mp1 =",
+ "mp2 =",
+ ],
+ )
diff --git a/lldb/test/Shell/Expr/TestGlobalSymbolObjCConflict.c b/lldb/test/Shell/Expr/TestGlobalSymbolObjCConflict.c
index 62c0162..8f1bb62 100644
--- a/lldb/test/Shell/Expr/TestGlobalSymbolObjCConflict.c
+++ b/lldb/test/Shell/Expr/TestGlobalSymbolObjCConflict.c
@@ -1,3 +1,5 @@
+// XFAIL: target-windows
+
// Tests that LLDB correctly parses global symbols
// starting with 'O'. On some platforms (e.g., Darwin)
// C-symbols are prefixed with a '_'. The LLDB Macho-O
@@ -9,7 +11,7 @@
// RUN: %clang_host -c -g -fno-common %s -o %t.o
// RUN: %clang_host %t.o -o %t.out
// RUN: %lldb -b -x %t.out \
-// RUN: -o "b 27" \
+// RUN: -o "b 29" \
// RUN: -o "run" \
// RUN: -o "p OglobalVar" \
// RUN: -o "p Oabc" | FileCheck %s
diff --git a/llvm/.clang-format b/llvm/.clang-format
index 5bead5f..ecb44bf 100644
--- a/llvm/.clang-format
+++ b/llvm/.clang-format
@@ -1,2 +1,2 @@
BasedOnStyle: LLVM
-
+LineEnding: LF
diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index c7304e3..e80c138 100644
--- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -378,6 +378,8 @@ struct ScalarEnumerationTraits<TargetStackID::Value> {
IO.enumCase(ID, "default", TargetStackID::Default);
IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill);
IO.enumCase(ID, "scalable-vector", TargetStackID::ScalableVector);
+ IO.enumCase(ID, "scalable-predicate-vector",
+ TargetStackID::ScalablePredicateVector);
IO.enumCase(ID, "wasm-local", TargetStackID::WasmLocal);
IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc);
}
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 00c7343..b37c677 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -497,7 +497,14 @@ public:
/// Should this stack ID be considered in MaxAlignment.
bool contributesToMaxAlignment(uint8_t StackID) {
return StackID == TargetStackID::Default ||
- StackID == TargetStackID::ScalableVector;
+ StackID == TargetStackID::ScalableVector ||
+ StackID == TargetStackID::ScalablePredicateVector;
+ }
+
+ bool isScalableStackID(int ObjectIdx) const {
+ uint8_t StackID = getStackID(ObjectIdx);
+ return StackID == TargetStackID::ScalableVector ||
+ StackID == TargetStackID::ScalablePredicateVector;
}
/// setObjectAlignment - Change the alignment of the specified stack object.
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 0e29e45..75696faf 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -32,6 +32,7 @@ enum Value {
SGPRSpill = 1,
ScalableVector = 2,
WasmLocal = 3,
+ ScalablePredicateVector = 4,
NoAlloc = 255
};
}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 7c9aef5..fbc92d7 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -130,8 +130,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
class AdvSIMD_1VectorArg_Expand_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
- class AdvSIMD_1VectorArg_Long_Intrinsic
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>], [IntrNoMem]>;
class AdvSIMD_1IntArg_Narrow_Intrinsic
: DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty], [IntrNoMem]>;
class AdvSIMD_1VectorArg_Narrow_Intrinsic
@@ -150,9 +148,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_2VectorArg_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
- class AdvSIMD_2VectorArg_Compare_Intrinsic
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
- [IntrNoMem]>;
class AdvSIMD_2Arg_FloatCompare_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>],
[IntrNoMem]>;
@@ -160,10 +155,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMTruncatedType<0>, LLVMTruncatedType<0>],
[IntrNoMem]>;
- class AdvSIMD_2VectorArg_Wide_Intrinsic
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, LLVMTruncatedType<0>],
- [IntrNoMem]>;
class AdvSIMD_2VectorArg_Narrow_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMExtendedType<0>, LLVMExtendedType<0>],
@@ -172,10 +163,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
: DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMExtendedType<0>, llvm_i32_ty],
[IntrNoMem]>;
- class AdvSIMD_2VectorArg_Scalar_Expand_BySize_Intrinsic
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty],
- [IntrNoMem]>;
class AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMTruncatedType<0>],
@@ -184,10 +171,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMTruncatedType<0>, llvm_i32_ty],
[IntrNoMem]>;
- class AdvSIMD_2VectorArg_Tied_Narrow_Intrinsic
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMOneNthElementsVectorType<0, 2>, llvm_anyvector_ty],
- [IntrNoMem]>;
class AdvSIMD_2VectorArg_Lane_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, llvm_anyint_ty, llvm_i32_ty],
@@ -205,14 +188,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem]>;
- class AdvSIMD_3VectorArg_Tied_Narrow_Intrinsic
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMOneNthElementsVectorType<0, 2>, llvm_anyvector_ty,
- LLVMMatchType<1>], [IntrNoMem]>;
- class AdvSIMD_3VectorArg_Scalar_Tied_Narrow_Intrinsic
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMOneNthElementsVectorType<0, 2>, llvm_anyvector_ty, llvm_i32_ty],
- [IntrNoMem]>;
class AdvSIMD_CvtFxToFP_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
[IntrNoMem]>;
@@ -238,11 +213,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
[IntrNoMem]>;
- class AdvSIMD_FML_Intrinsic
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
- [IntrNoMem]>;
-
class AdvSIMD_BF16FML_Intrinsic
: DefaultAttrsIntrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 8e6cf3e..7fe13a3 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1406,8 +1406,24 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
continue;
// Check if VirtReg interferes with OtherReg after this COPY instruction.
- if (!IsDef && VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
- continue;
+ if (Opnd.readsReg()) {
+ SlotIndex Index = LIS->getInstructionIndex(Instr).getRegSlot();
+
+ if (SubReg) {
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
+ if (IsDef)
+ Mask = ~Mask;
+
+ if (any_of(VirtReg.subranges(), [=](const LiveInterval::SubRange &S) {
+ return (S.LaneMask & Mask).any() && S.liveAt(Index);
+ })) {
+ continue;
+ }
+ } else {
+ if (VirtReg.liveAt(Index))
+ continue;
+ }
+ }
MCRegister OtherPhysReg =
OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
@@ -2419,25 +2435,28 @@ void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
unsigned SubReg = Opnd.getSubReg();
// Get the current assignment.
- MCRegister OtherPhysReg =
- OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
- if (OtherSubReg) {
- if (OtherReg.isPhysical()) {
- MCRegister Tuple =
- TRI->getMatchingSuperReg(OtherPhysReg, OtherSubReg, RC);
- if (!Tuple)
- continue;
- OtherPhysReg = Tuple;
- } else {
- // TODO: There should be a hinting mechanism for subregisters
- if (SubReg != OtherSubReg)
- continue;
- }
+ MCRegister OtherPhysReg;
+ if (OtherReg.isPhysical()) {
+ if (OtherSubReg)
+ OtherPhysReg = TRI->getMatchingSuperReg(OtherReg, OtherSubReg, RC);
+ else if (SubReg)
+ OtherPhysReg = TRI->getMatchingSuperReg(OtherReg, SubReg, RC);
+ else
+ OtherPhysReg = OtherReg;
+ } else {
+ OtherPhysReg = VRM->getPhys(OtherReg);
+ // TODO: Should find matching superregister, but applying this in the
+ // non-hint case currently causes regressions
+
+ if (SubReg && OtherSubReg && SubReg != OtherSubReg)
+ continue;
}
// Push the collected information.
- Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
- OtherPhysReg));
+ if (OtherPhysReg) {
+ Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
+ OtherPhysReg));
+ }
}
}
@@ -2466,15 +2485,13 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
// We have a broken hint, check if it is possible to fix it by
// reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
// some register and PhysReg may be available for the other live-ranges.
- SmallSet<Register, 4> Visited;
- SmallVector<Register, 2> RecoloringCandidates;
HintsInfo Info;
Register Reg = VirtReg.reg();
MCRegister PhysReg = VRM->getPhys(Reg);
// Start the recoloring algorithm from the input live-interval, then
// it will propagate to the ones that are copy-related with it.
- Visited.insert(Reg);
- RecoloringCandidates.push_back(Reg);
+ SmallSet<Register, 4> Visited = {Reg};
+ SmallVector<Register, 2> RecoloringCandidates = {Reg};
LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
<< '(' << printReg(PhysReg, TRI) << ")\n");
@@ -2482,12 +2499,10 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
do {
Reg = RecoloringCandidates.pop_back_val();
- // We cannot recolor physical register.
- if (Reg.isPhysical())
- continue;
+ MCRegister CurrPhys = VRM->getPhys(Reg);
// This may be a skipped register.
- if (!VRM->hasPhys(Reg)) {
+ if (!CurrPhys) {
assert(!shouldAllocateRegister(Reg) &&
"We have an unallocated variable which should have been handled");
continue;
@@ -2496,7 +2511,6 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
// Get the live interval mapped with this virtual register to be able
// to check for the interference with the new color.
LiveInterval &LI = LIS->getInterval(Reg);
- MCRegister CurrPhys = VRM->getPhys(Reg);
// Check that the new color matches the register class constraints and
// that it is free for this live range.
if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
@@ -2533,7 +2547,8 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
// Push all copy-related live-ranges to keep reconciling the broken
// hints.
for (const HintInfo &HI : Info) {
- if (Visited.insert(HI.Reg).second)
+ // We cannot recolor physical register.
+ if (HI.Reg.isVirtual() && Visited.insert(HI.Reg).second)
RecoloringCandidates.push_back(HI.Reg);
}
} while (!RecoloringCandidates.empty());
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ff7cd66..87d5453 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -6256,17 +6256,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
// FIXME: Not all targets may support EVL in VP_LOAD. These will have been
// removed from the IR by the ExpandVectorPredication pass but we're
// reintroducing them here.
- EVT LdVT = LD->getMemoryVT();
- EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT);
- EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- WideVT.getVectorElementCount());
+ EVT VT = LD->getValueType(0);
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ EVT WideMaskVT = getSetCCResultType(WideVT);
+
if (ExtType == ISD::NON_EXTLOAD &&
TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) &&
TLI.isTypeLegal(WideMaskVT)) {
SDLoc DL(N);
SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
- LdVT.getVectorElementCount());
+ VT.getVectorElementCount());
SDValue NewLoad =
DAG.getLoadVP(LD->getAddressingMode(), ISD::NON_EXTLOAD, WideVT, DL,
LD->getChain(), LD->getBasePtr(), LD->getOffset(), Mask,
@@ -6303,6 +6303,24 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
return Result;
}
+ if (VT.isVector()) {
+ // If all else fails replace the load with a wide masked load.
+ SDLoc DL(N);
+ EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+
+ SDValue Len = DAG.getElementCount(DL, IdxVT, VT.getVectorElementCount());
+ SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WideMaskVT,
+ DAG.getConstant(0, DL, IdxVT), Len);
+
+ SDValue NewLoad = DAG.getMaskedLoad(
+ WideVT, DL, LD->getChain(), LD->getBasePtr(), LD->getOffset(), Mask,
+ DAG.getPOISON(WideVT), LD->getMemoryVT(), LD->getMemOperand(),
+ LD->getAddressingMode(), LD->getExtensionType());
+
+ ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
+ return NewLoad;
+ }
+
report_fatal_error("Unable to widen vector load");
}
@@ -7516,8 +7534,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
SDValue StVal = ST->getValue();
EVT StVT = StVal.getValueType();
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT);
- EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- WideVT.getVectorElementCount());
+ EVT WideMaskVT = getSetCCResultType(WideVT);
if (TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
TLI.isTypeLegal(WideMaskVT)) {
@@ -7540,6 +7557,22 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
}
+ if (StVT.isVector()) {
+ // If all else fails replace the store with a wide masked store.
+ SDLoc DL(N);
+ EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+
+ SDValue WideStVal = GetWidenedVector(StVal);
+ SDValue Len = DAG.getElementCount(DL, IdxVT, StVT.getVectorElementCount());
+ SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WideMaskVT,
+ DAG.getConstant(0, DL, IdxVT), Len);
+
+ return DAG.getMaskedStore(ST->getChain(), DL, WideStVal, ST->getBasePtr(),
+ ST->getOffset(), Mask, ST->getMemoryVT(),
+ ST->getMemOperand(), ST->getAddressingMode(),
+ ST->isTruncatingStore());
+ }
+
report_fatal_error("Unable to widen vector store");
}
@@ -8298,8 +8331,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
AAMDNodes AAInfo = LD->getAAInfo();
if (LdVT.isScalableVector())
- report_fatal_error("Generating widen scalable extending vector loads is "
- "not yet supported");
+ return SDValue();
EVT EltVT = WidenVT.getVectorElementType();
EVT LdEltVT = LdVT.getVectorElementType();
diff --git a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
index 096a33c..ec75dc3 100644
--- a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
+++ b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
@@ -72,7 +72,7 @@ struct StackFrameLayoutAnalysis {
: Slot(Idx), Size(MFI.getObjectSize(Idx)),
Align(MFI.getObjectAlign(Idx).value()), Offset(Offset),
SlotTy(Invalid), Scalable(false) {
- Scalable = MFI.getStackID(Idx) == TargetStackID::ScalableVector;
+ Scalable = MFI.isScalableStackID(Idx);
if (MFI.isSpillSlotObjectIndex(Idx))
SlotTy = SlotType::Spill;
else if (MFI.isFixedObjectIndex(Idx))
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index ab5c6f3..f5f7b65 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -324,7 +324,37 @@ AArch64FrameLowering::getArgumentStackToRestore(MachineFunction &MF,
static bool produceCompactUnwindFrame(const AArch64FrameLowering &,
MachineFunction &MF);
-// Conservatively, returns true if the function is likely to have an SVE vectors
+enum class AssignObjectOffsets { No, Yes };
+/// Process all the SVE stack objects and the SVE stack size and offsets for
+/// each object. If AssignOffsets is "Yes", the offsets get assigned (and SVE
+/// stack sizes set). Returns the size of the SVE stack.
+static SVEStackSizes determineSVEStackSizes(MachineFunction &MF,
+ AssignObjectOffsets AssignOffsets,
+ bool SplitSVEObjects = false);
+
+static unsigned getStackHazardSize(const MachineFunction &MF) {
+ return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
+}
+
+/// Returns true if PPRs are spilled as ZPRs.
+static bool arePPRsSpilledAsZPR(const MachineFunction &MF) {
+ return MF.getSubtarget().getRegisterInfo()->getSpillSize(
+ AArch64::PPRRegClass) == 16;
+}
+
+StackOffset
+AArch64FrameLowering::getZPRStackSize(const MachineFunction &MF) const {
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return StackOffset::getScalable(AFI->getStackSizeZPR());
+}
+
+StackOffset
+AArch64FrameLowering::getPPRStackSize(const MachineFunction &MF) const {
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return StackOffset::getScalable(AFI->getStackSizePPR());
+}
+
+// Conservatively, returns true if the function is likely to have SVE vectors
// on the stack. This function is safe to be called before callee-saves or
// object offsets have been determined.
static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL,
@@ -338,7 +368,7 @@ static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL,
const MachineFrameInfo &MFI = MF.getFrameInfo();
for (int FI = MFI.getObjectIndexBegin(); FI < MFI.getObjectIndexEnd(); FI++) {
- if (MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ if (MFI.isScalableStackID(FI))
return true;
}
@@ -482,13 +512,6 @@ AArch64FrameLowering::getFixedObjectSize(const MachineFunction &MF,
}
}
-/// Returns the size of the entire SVE stackframe (calleesaves + spills).
-StackOffset
-AArch64FrameLowering::getSVEStackSize(const MachineFunction &MF) const {
- const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
-}
-
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
@@ -514,7 +537,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
!Subtarget.hasSVE();
return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
- getSVEStackSize(MF) || LowerQRegCopyThroughMem);
+ AFI->hasSVEStackSize() || LowerQRegCopyThroughMem);
}
/// hasFPImpl - Return true if the specified function should have a dedicated
@@ -557,7 +580,7 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const {
// CFA in either of these cases.
if (AFI.needsDwarfUnwindInfo(MF) &&
((requiresSaveVG(MF) || AFI.getSMEFnAttrs().hasStreamingBody()) &&
- (!AFI.hasCalculatedStackSizeSVE() || AFI.getStackSizeSVE() > 0)))
+ (!AFI.hasCalculatedStackSizeSVE() || AFI.hasSVEStackSize())))
return true;
// With large callframes around we may need to use FP to access the scavenging
// emergency spillslot.
@@ -1126,10 +1149,6 @@ static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}
-static unsigned getStackHazardSize(const MachineFunction &MF) {
- return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
-}
-
void AArch64FrameLowering::emitPacRetPlusLeafHardening(
MachineFunction &MF) const {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
@@ -1212,7 +1231,9 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
- StackOffset SVEStackSize = getSVEStackSize(MF);
+ StackOffset ZPRStackSize = getZPRStackSize(MF);
+ StackOffset PPRStackSize = getPPRStackSize(MF);
+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
// For VLA-area objects, just emit an offset at the end of the stack frame.
// Whilst not quite correct, these objects do live at the end of the frame and
@@ -1228,7 +1249,7 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
bool FPAfterSVECalleeSaves =
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
- if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
+ if (MFI.isScalableStackID(FI)) {
if (FPAfterSVECalleeSaves &&
-ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize())
return StackOffset::getScalable(ObjectOffset);
@@ -1294,7 +1315,7 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference(
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
bool isFixed = MFI.isFixedObjectIndex(FI);
- bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector;
+ bool isSVE = MFI.isScalableStackID(FI);
return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
PreferFP, ForSimm);
}
@@ -1313,7 +1334,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
bool isCSR =
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
- const StackOffset &SVEStackSize = getSVEStackSize(MF);
+ const StackOffset SVEStackSize = getSVEStackSize(MF);
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
@@ -1615,10 +1636,13 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
FirstReg = Count - 1;
}
bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
- int ScalableByteOffset =
- FPAfterSVECalleeSaves ? 0 : AFI->getSVECalleeSavedStackSize();
+ int ScalableByteOffset = FPAfterSVECalleeSaves
+ ? 0
+ : AFI->getZPRCalleeSavedStackSize() +
+ AFI->getPPRCalleeSavedStackSize();
bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
Register LastReg = 0;
+ bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex();
// When iterating backwards, the loop condition relies on unsigned wraparound.
for (unsigned i = FirstReg; i < Count; i += RegInc) {
@@ -1648,7 +1672,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
}
// Add the stack hazard size as we transition from GPR->FPR CSRs.
- if (AFI->hasStackHazardSlotIndex() &&
+ if (HasCSHazardPadding &&
(!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
AArch64InstrInfo::isFpOrNEON(RPI.Reg1))
ByteOffset += StackFillDir * StackHazardSize;
@@ -1656,7 +1680,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
int Scale = TRI->getSpillSize(*RPI.RC);
// Add the next reg to the pair if it is in the same register class.
- if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) {
+ if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {
MCRegister NextReg = CSI[i + RegInc].getReg();
bool IsFirst = i == FirstReg;
switch (RPI.Type) {
@@ -2021,10 +2045,14 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
}
// Update the StackIDs of the SVE stack slots.
MachineFrameInfo &MFI = MF.getFrameInfo();
- if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) {
+ if (RPI.Type == RegPairInfo::ZPR) {
MFI.setStackID(FrameIdxReg1, TargetStackID::ScalableVector);
if (RPI.isPaired())
MFI.setStackID(FrameIdxReg2, TargetStackID::ScalableVector);
+ } else if (RPI.Type == RegPairInfo::PPR) {
+ MFI.setStackID(FrameIdxReg1, TargetStackID::ScalablePredicateVector);
+ if (RPI.isPaired())
+ MFI.setStackID(FrameIdxReg2, TargetStackID::ScalablePredicateVector);
}
}
return true;
@@ -2232,8 +2260,7 @@ void AArch64FrameLowering::determineStackHazardSlot(
for (auto &MI : MBB) {
std::optional<int> FI = getLdStFrameID(MI, MFI);
if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
- if (MFI.getStackID(*FI) == TargetStackID::ScalableVector ||
- AArch64InstrInfo::isFpOrNEON(MI))
+ if (MFI.isScalableStackID(*FI) || AArch64InstrInfo::isFpOrNEON(MI))
FrameObjects[*FI] |= 2;
else
FrameObjects[*FI] |= 1;
@@ -2260,10 +2287,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned UnspilledCSGPR = AArch64::NoRegister;
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
@@ -2384,15 +2412,19 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// Calculates the callee saved stack size.
unsigned CSStackSize = 0;
- unsigned SVECSStackSize = 0;
+ unsigned ZPRCSStackSize = 0;
+ unsigned PPRCSStackSize = 0;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (unsigned Reg : SavedRegs.set_bits()) {
auto *RC = TRI->getMinimalPhysRegClass(Reg);
assert(RC && "expected register class!");
auto SpillSize = TRI->getSpillSize(*RC);
- if (AArch64::PPRRegClass.contains(Reg) ||
- AArch64::ZPRRegClass.contains(Reg))
- SVECSStackSize += SpillSize;
+ bool IsZPR = AArch64::ZPRRegClass.contains(Reg);
+ bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg);
+ if (IsZPR || (IsPPR && arePPRsSpilledAsZPR(MF)))
+ ZPRCSStackSize += SpillSize;
+ else if (IsPPR)
+ PPRCSStackSize += SpillSize;
else
CSStackSize += SpillSize;
}
@@ -2402,17 +2434,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// only 64-bit GPRs can be added to SavedRegs.
unsigned NumSavedRegs = SavedRegs.count();
- // Increase the callee-saved stack size if the function has streaming mode
- // changes, as we will need to spill the value of the VG register.
- if (requiresSaveVG(MF))
- CSStackSize += 8;
-
// Determine if a Hazard slot should be used, and increase the CSStackSize by
// StackHazardSize if so.
determineStackHazardSlot(MF, SavedRegs);
if (AFI->hasStackHazardSlotIndex())
CSStackSize += getStackHazardSize(MF);
+ // Increase the callee-saved stack size if the function has streaming mode
+ // changes, as we will need to spill the value of the VG register.
+ if (requiresSaveVG(MF))
+ CSStackSize += 8;
+
// If we must call __arm_get_current_vg in the prologue preserve the LR.
if (requiresSaveVG(MF) && !Subtarget.hasSVE())
SavedRegs.set(AArch64::LR);
@@ -2433,8 +2465,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
});
// If any callee-saved registers are used, the frame cannot be eliminated.
- int64_t SVEStackSize =
- alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
+ auto [ZPRLocalStackSize, PPRLocalStackSize] =
+ determineSVEStackSizes(MF, AssignObjectOffsets::No);
+ uint64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize;
+ uint64_t SVEStackSize =
+ alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16);
bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
// The CSR spill slots have not been allocated yet, so estimateStackSize
@@ -2519,7 +2554,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// instructions.
AFI->setCalleeSavedStackSize(AlignedCSStackSize);
AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
- AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
+ AFI->setSVECalleeSavedStackSize(ZPRCSStackSize, alignTo(PPRCSStackSize, 16));
}
bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
@@ -2658,7 +2693,6 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
assert((Max == std::numeric_limits<int>::min() ||
Max + 1 == CS.getFrameIdx()) &&
"SVE CalleeSaves are not consecutive");
-
Min = std::min(Min, CS.getFrameIdx());
Max = std::max(Max, CS.getFrameIdx());
}
@@ -2666,43 +2700,65 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
return Min != std::numeric_limits<int>::max();
}
-// Process all the SVE stack objects and determine offsets for each
-// object. If AssignOffsets is true, the offsets get assigned.
-// Fills in the first and last callee-saved frame indices into
-// Min/MaxCSFrameIndex, respectively.
-// Returns the size of the stack.
-static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
- int &MinCSFrameIndex,
- int &MaxCSFrameIndex,
- bool AssignOffsets) {
+static SVEStackSizes determineSVEStackSizes(MachineFunction &MF,
+ AssignObjectOffsets AssignOffsets,
+ bool SplitSVEObjects) {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+
+ SVEStackSizes SVEStack{};
+
+ // With SplitSVEObjects we maintain separate stack offsets for predicates
+ // (PPRs) and SVE vectors (ZPRs). When SplitSVEObjects is disabled predicates
+ // are included in the SVE vector area.
+ uint64_t &ZPRStackTop = SVEStack.ZPRStackSize;
+ uint64_t &PPRStackTop =
+ SplitSVEObjects ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize;
+
#ifndef NDEBUG
// First process all fixed stack objects.
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
- assert(MFI.getStackID(I) != TargetStackID::ScalableVector &&
+ assert(!MFI.isScalableStackID(I) &&
"SVE vectors should never be passed on the stack by value, only by "
"reference.");
#endif
- auto Assign = [&MFI](int FI, int64_t Offset) {
+ auto AllocateObject = [&](int FI) {
+ uint64_t &StackTop = MFI.getStackID(FI) == TargetStackID::ScalableVector
+ ? ZPRStackTop
+ : PPRStackTop;
+
+ // FIXME: Given that the length of SVE vectors is not necessarily a power of
+ // two, we'd need to align every object dynamically at runtime if the
+ // alignment is larger than 16. This is not yet supported.
+ Align Alignment = MFI.getObjectAlign(FI);
+ if (Alignment > Align(16))
+ report_fatal_error(
+ "Alignment of scalable vectors > 16 bytes is not yet supported");
+
+ StackTop += MFI.getObjectSize(FI);
+ StackTop = alignTo(StackTop, Alignment);
+
+ assert(StackTop < std::numeric_limits<int64_t>::max() &&
+ "SVE StackTop far too large?!");
+
+ int64_t Offset = -int64_t(StackTop);
+ if (AssignOffsets == AssignObjectOffsets::Yes)
+ MFI.setObjectOffset(FI, Offset);
+
LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
- MFI.setObjectOffset(FI, Offset);
};
- int64_t Offset = 0;
-
// Then process all callee saved slots.
+ int MinCSFrameIndex, MaxCSFrameIndex;
if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
- // Assign offsets to the callee save slots.
- for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
- Offset += MFI.getObjectSize(I);
- Offset = alignTo(Offset, MFI.getObjectAlign(I));
- if (AssignOffsets)
- Assign(I, -Offset);
- }
+ for (int FI = MinCSFrameIndex; FI <= MaxCSFrameIndex; ++FI)
+ AllocateObject(FI);
}
- // Ensure that the Callee-save area is aligned to 16bytes.
- Offset = alignTo(Offset, Align(16U));
+ // Ensure the CS area is 16-byte aligned.
+ PPRStackTop = alignTo(PPRStackTop, Align(16U));
+ ZPRStackTop = alignTo(ZPRStackTop, Align(16U));
// Create a buffer of SVE objects to allocate and sort it.
SmallVector<int, 8> ObjectsToAllocate;
@@ -2715,48 +2771,31 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector)
ObjectsToAllocate.push_back(StackProtectorFI);
}
- for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
- unsigned StackID = MFI.getStackID(I);
- if (StackID != TargetStackID::ScalableVector)
- continue;
- if (I == StackProtectorFI)
+
+ for (int FI = 0, E = MFI.getObjectIndexEnd(); FI != E; ++FI) {
+ if (FI == StackProtectorFI || MFI.isDeadObjectIndex(FI))
continue;
- if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
+ if (MaxCSFrameIndex >= FI && FI >= MinCSFrameIndex)
continue;
- if (MFI.isDeadObjectIndex(I))
+
+ if (MFI.getStackID(FI) != TargetStackID::ScalableVector &&
+ MFI.getStackID(FI) != TargetStackID::ScalablePredicateVector)
continue;
- ObjectsToAllocate.push_back(I);
+ ObjectsToAllocate.push_back(FI);
}
// Allocate all SVE locals and spills
- for (unsigned FI : ObjectsToAllocate) {
- Align Alignment = MFI.getObjectAlign(FI);
- // FIXME: Given that the length of SVE vectors is not necessarily a power of
- // two, we'd need to align every object dynamically at runtime if the
- // alignment is larger than 16. This is not yet supported.
- if (Alignment > Align(16))
- report_fatal_error(
- "Alignment of scalable vectors > 16 bytes is not yet supported");
+ for (unsigned FI : ObjectsToAllocate)
+ AllocateObject(FI);
- Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment);
- if (AssignOffsets)
- Assign(FI, -Offset);
- }
+ PPRStackTop = alignTo(PPRStackTop, Align(16U));
+ ZPRStackTop = alignTo(ZPRStackTop, Align(16U));
- return Offset;
-}
+ if (AssignOffsets == AssignObjectOffsets::Yes)
+ AFI->setStackSizeSVE(SVEStack.ZPRStackSize, SVEStack.PPRStackSize);
-int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
- MachineFrameInfo &MFI) const {
- int MinCSFrameIndex, MaxCSFrameIndex;
- return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
-}
-
-int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
- MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
- return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
- true);
+ return SVEStack;
}
/// Attempts to scavenge a register from \p ScavengeableRegs given the used
@@ -3070,12 +3109,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
"Upwards growing stack unsupported");
- int MinCSFrameIndex, MaxCSFrameIndex;
- int64_t SVEStackSize =
- assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
-
- AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
- AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
+ (void)determineSVEStackSizes(MF, AssignObjectOffsets::Yes);
// If this function isn't doing Win64-style C++ EH, we don't need to do
// anything.
@@ -3597,7 +3631,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
// Go to common code if we cannot provide sp + offset.
if (MFI.hasVarSizedObjects() ||
- MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() ||
+ MF.getInfo<AArch64FunctionInfo>()->hasSVEStackSize() ||
MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF))
return getFrameIndexReference(MF, FI, FrameReg);
@@ -3721,8 +3755,7 @@ void AArch64FrameLowering::orderFrameObjects(
if (AFI.hasStackHazardSlotIndex()) {
std::optional<int> FI = getLdStFrameID(MI, MFI);
if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
- if (MFI.getStackID(*FI) == TargetStackID::ScalableVector ||
- AArch64InstrInfo::isFpOrNEON(MI))
+ if (MFI.isScalableStackID(*FI) || AArch64InstrInfo::isFpOrNEON(MI))
FrameObjects[*FI].Accesses |= FrameObject::AccessFPR;
else
FrameObjects[*FI].Accesses |= FrameObject::AccessGPR;
@@ -4080,7 +4113,7 @@ void AArch64FrameLowering::emitRemarks(
}
unsigned RegTy = StackAccess::AccessType::GPR;
- if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) {
+ if (MFI.isScalableStackID(FrameIdx)) {
// SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO
// spill/fill the predicate as a data vector (so are an FPR access).
if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 7bba053..38aa28b1 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -24,6 +24,11 @@ class AArch64FunctionInfo;
class AArch64PrologueEmitter;
class AArch64EpilogueEmitter;
+struct SVEStackSizes {
+ uint64_t ZPRStackSize{0};
+ uint64_t PPRStackSize{0};
+};
+
class AArch64FrameLowering : public TargetFrameLowering {
public:
explicit AArch64FrameLowering()
@@ -124,6 +129,7 @@ public:
return false;
case TargetStackID::Default:
case TargetStackID::ScalableVector:
+ case TargetStackID::ScalablePredicateVector:
case TargetStackID::NoAlloc:
return true;
}
@@ -132,7 +138,8 @@ public:
bool isStackIdSafeForLocalArea(unsigned StackId) const override {
// We don't support putting SVE objects into the pre-allocated local
// frame block at the moment.
- return StackId != TargetStackID::ScalableVector;
+ return (StackId != TargetStackID::ScalableVector &&
+ StackId != TargetStackID::ScalablePredicateVector);
}
void
@@ -145,7 +152,16 @@ public:
bool requiresSaveVG(const MachineFunction &MF) const;
- StackOffset getSVEStackSize(const MachineFunction &MF) const;
+ /// Returns the size of the entire ZPR stackframe (calleesaves + spills).
+ StackOffset getZPRStackSize(const MachineFunction &MF) const;
+
+ /// Returns the size of the entire PPR stackframe (calleesaves + spills).
+ StackOffset getPPRStackSize(const MachineFunction &MF) const;
+
+ /// Returns the size of the entire SVE stackframe (PPRs + ZPRs).
+ StackOffset getSVEStackSize(const MachineFunction &MF) const {
+ return getZPRStackSize(MF) + getPPRStackSize(MF);
+ }
friend class AArch64PrologueEpilogueCommon;
friend class AArch64PrologueEmitter;
@@ -165,10 +181,6 @@ private:
/// Returns true if CSRs should be paired.
bool producePairRegisters(MachineFunction &MF) const;
- int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const;
- int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
- int &MinCSFrameIndex,
- int &MaxCSFrameIndex) const;
/// Make a determination whether a Hazard slot is used and create it if
/// needed.
void determineStackHazardSlot(MachineFunction &MF,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 177b4b0..35bbb0c0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -7497,7 +7497,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
int FI = cast<FrameIndexSDNode>(N)->getIndex();
// We can only encode VL scaled offsets, so only fold in frame indexes
// referencing SVE objects.
- if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
+ if (MFI.isScalableStackID(FI)) {
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
return true;
@@ -7543,7 +7543,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
// We can only encode VL scaled offsets, so only fold in frame indexes
// referencing SVE objects.
- if (MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ if (MFI.isScalableStackID(FI))
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a1f4734..c2a482a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1537,6 +1537,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
+ setOperationAction(ISD::MSTORE, VT, Legal);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
@@ -6617,7 +6618,6 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
"llvm.eh.recoverfp must take a function as the first argument");
return IncomingFPOp;
}
-
case Intrinsic::aarch64_neon_vsri:
case Intrinsic::aarch64_neon_vsli:
case Intrinsic::aarch64_sve_sri:
@@ -9256,8 +9256,7 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
(MI.getOpcode() == AArch64::ADDXri ||
MI.getOpcode() == AArch64::SUBXri)) {
const MachineOperand &MO = MI.getOperand(1);
- if (MO.isFI() && MF.getFrameInfo().getStackID(MO.getIndex()) ==
- TargetStackID::ScalableVector)
+ if (MO.isFI() && MF.getFrameInfo().isScalableStackID(MO.getIndex()))
MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/false,
/*IsImplicit=*/true));
}
@@ -9704,8 +9703,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
MachineFrameInfo &MFI = MF.getFrameInfo();
int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
- if (isScalable)
- MFI.setStackID(FI, TargetStackID::ScalableVector);
+ if (isScalable) {
+ bool IsPred = VA.getValVT() == MVT::aarch64svcount ||
+ VA.getValVT().getVectorElementType() == MVT::i1;
+ MFI.setStackID(FI, IsPred ? TargetStackID::ScalablePredicateVector
+ : TargetStackID::ScalableVector);
+ }
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
SDValue Ptr = DAG.getFrameIndex(
@@ -15154,9 +15157,7 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
: Shift.getOperand(1);
unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
- SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Imm);
-
- return ResultSLI;
+ return DAG.getNode(Inst, DL, VT, X, Y, Imm);
}
static SDValue tryLowerToBSL(SDValue N, SelectionDAG &DAG) {
@@ -29607,7 +29608,7 @@ void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
// than doing it here in finalizeLowering.
if (MFI.hasStackProtectorIndex()) {
for (unsigned int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
- if (MFI.getStackID(i) == TargetStackID::ScalableVector &&
+ if (MFI.isScalableStackID(i) &&
MFI.getObjectSSPLayout(i) != MachineFrameInfo::SSPLK_None) {
MFI.setStackID(MFI.getStackProtectorIndex(),
TargetStackID::ScalableVector);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index f07d351..6ef0a95 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -10176,28 +10176,6 @@ multiclass SIMDScalarLShiftBHSD<bit U, bits<5> opc, string asm,
(!cast<Instruction>(NAME # "d") FPR64:$Rn, vecshiftL64:$imm)>;
}
-multiclass SIMDScalarRShiftBHSD<bit U, bits<5> opc, string asm> {
- def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
- FPR8, FPR8, vecshiftR8, asm, []> {
- let Inst{18-16} = imm{2-0};
- }
-
- def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
- FPR16, FPR16, vecshiftR16, asm, []> {
- let Inst{19-16} = imm{3-0};
- }
-
- def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
- FPR32, FPR32, vecshiftR32, asm, []> {
- let Inst{20-16} = imm{4-0};
- }
-
- def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
- FPR64, FPR64, vecshiftR64, asm, []> {
- let Inst{21-16} = imm{5-0};
- }
-}
-
//----------------------------------------------------------------------------
// AdvSIMD vector x indexed element
//----------------------------------------------------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 35b27ea..5a90da1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -5599,7 +5599,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register store without SVE store instructions");
Opc = AArch64::STR_PXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalablePredicateVector;
}
break;
}
@@ -5614,7 +5614,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
Opc = AArch64::STRSui;
else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
Opc = AArch64::STR_PPXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalablePredicateVector;
}
break;
case 8:
@@ -5784,7 +5784,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
if (IsPNR)
PNRReg = DestReg;
Opc = AArch64::LDR_PXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalablePredicateVector;
}
break;
}
@@ -5799,7 +5799,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
Opc = AArch64::LDRSui;
else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
Opc = AArch64::LDR_PPXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalablePredicateVector;
}
break;
case 8:
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index a81f5b3..b3c9656 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -23,12 +23,21 @@
using namespace llvm;
+static std::optional<uint64_t>
+getSVEStackSize(const AArch64FunctionInfo &MFI,
+ uint64_t (AArch64FunctionInfo::*GetStackSize)() const) {
+ if (!MFI.hasCalculatedStackSizeSVE())
+ return std::nullopt;
+ return (MFI.*GetStackSize)();
+}
+
yaml::AArch64FunctionInfo::AArch64FunctionInfo(
const llvm::AArch64FunctionInfo &MFI)
: HasRedZone(MFI.hasRedZone()),
- StackSizeSVE(MFI.hasCalculatedStackSizeSVE()
- ? std::optional<uint64_t>(MFI.getStackSizeSVE())
- : std::nullopt),
+ StackSizeZPR(
+ getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizeZPR)),
+ StackSizePPR(
+ getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizePPR)),
HasStackFrame(MFI.hasStackFrame()
? std::optional<bool>(MFI.hasStackFrame())
: std::nullopt) {}
@@ -41,8 +50,9 @@ void AArch64FunctionInfo::initializeBaseYamlFields(
const yaml::AArch64FunctionInfo &YamlMFI) {
if (YamlMFI.HasRedZone)
HasRedZone = YamlMFI.HasRedZone;
- if (YamlMFI.StackSizeSVE)
- setStackSizeSVE(*YamlMFI.StackSizeSVE);
+ if (YamlMFI.StackSizeZPR || YamlMFI.StackSizePPR)
+ setStackSizeSVE(YamlMFI.StackSizeZPR.value_or(0),
+ YamlMFI.StackSizePPR.value_or(0));
if (YamlMFI.HasStackFrame)
setHasStackFrame(*YamlMFI.HasStackFrame);
}
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 897c7e8..4a79d9c 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -74,13 +74,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// Amount of stack frame size, not including callee-saved registers.
uint64_t LocalStackSize = 0;
- /// The start and end frame indices for the SVE callee saves.
- int MinSVECSFrameIndex = 0;
- int MaxSVECSFrameIndex = 0;
-
/// Amount of stack frame size used for saving callee-saved registers.
unsigned CalleeSavedStackSize = 0;
- unsigned SVECalleeSavedStackSize = 0;
+ unsigned ZPRCalleeSavedStackSize = 0;
+ unsigned PPRCalleeSavedStackSize = 0;
bool HasCalleeSavedStackSize = false;
bool HasSVECalleeSavedStackSize = false;
@@ -137,9 +134,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// SVE stack size (for predicates and data vectors) are maintained here
/// rather than in FrameInfo, as the placement and Stack IDs are target
/// specific.
- uint64_t StackSizeSVE = 0;
+ uint64_t StackSizeZPR = 0;
+ uint64_t StackSizePPR = 0;
- /// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid.
+ /// HasCalculatedStackSizeSVE indicates whether StackSizeZPR/PPR is valid.
bool HasCalculatedStackSizeSVE = false;
/// Has a value when it is known whether or not the function uses a
@@ -312,16 +310,25 @@ public:
TailCallReservedStack = bytes;
}
- bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
-
- void setStackSizeSVE(uint64_t S) {
+ void setStackSizeSVE(uint64_t ZPR, uint64_t PPR) {
+ StackSizeZPR = ZPR;
+ StackSizePPR = PPR;
HasCalculatedStackSizeSVE = true;
- StackSizeSVE = S;
}
- uint64_t getStackSizeSVE() const {
+ uint64_t getStackSizeZPR() const {
assert(hasCalculatedStackSizeSVE());
- return StackSizeSVE;
+ return StackSizeZPR;
+ }
+ uint64_t getStackSizePPR() const {
+ assert(hasCalculatedStackSizeSVE());
+ return StackSizePPR;
+ }
+
+ bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
+
+ bool hasSVEStackSize() const {
+ return getStackSizeZPR() > 0 || getStackSizePPR() > 0;
}
bool hasStackFrame() const { return HasStackFrame; }
@@ -414,23 +421,25 @@ public:
}
// Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes'
- void setSVECalleeSavedStackSize(unsigned Size) {
- SVECalleeSavedStackSize = Size;
+ void setSVECalleeSavedStackSize(unsigned ZPR, unsigned PPR) {
+ ZPRCalleeSavedStackSize = ZPR;
+ PPRCalleeSavedStackSize = PPR;
HasSVECalleeSavedStackSize = true;
}
- unsigned getSVECalleeSavedStackSize() const {
+ unsigned getZPRCalleeSavedStackSize() const {
assert(HasSVECalleeSavedStackSize &&
- "SVECalleeSavedStackSize has not been calculated");
- return SVECalleeSavedStackSize;
+ "ZPRCalleeSavedStackSize has not been calculated");
+ return ZPRCalleeSavedStackSize;
}
-
- void setMinMaxSVECSFrameIndex(int Min, int Max) {
- MinSVECSFrameIndex = Min;
- MaxSVECSFrameIndex = Max;
+ unsigned getPPRCalleeSavedStackSize() const {
+ assert(HasSVECalleeSavedStackSize &&
+ "PPRCalleeSavedStackSize has not been calculated");
+ return PPRCalleeSavedStackSize;
}
- int getMinSVECSFrameIndex() const { return MinSVECSFrameIndex; }
- int getMaxSVECSFrameIndex() const { return MaxSVECSFrameIndex; }
+ unsigned getSVECalleeSavedStackSize() const {
+ return getZPRCalleeSavedStackSize() + getPPRCalleeSavedStackSize();
+ }
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
unsigned getNumLocalDynamicTLSAccesses() const {
@@ -611,7 +620,8 @@ private:
namespace yaml {
struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
std::optional<bool> HasRedZone;
- std::optional<uint64_t> StackSizeSVE;
+ std::optional<uint64_t> StackSizeZPR;
+ std::optional<uint64_t> StackSizePPR;
std::optional<bool> HasStackFrame;
AArch64FunctionInfo() = default;
@@ -624,7 +634,8 @@ struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
template <> struct MappingTraits<AArch64FunctionInfo> {
static void mapping(IO &YamlIO, AArch64FunctionInfo &MFI) {
YamlIO.mapOptional("hasRedZone", MFI.HasRedZone);
- YamlIO.mapOptional("stackSizeSVE", MFI.StackSizeSVE);
+ YamlIO.mapOptional("stackSizeZPR", MFI.StackSizeZPR);
+ YamlIO.mapOptional("stackSizePPR", MFI.StackSizePPR);
YamlIO.mapOptional("hasStackFrame", MFI.HasStackFrame);
}
};
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index 09b3643..aad6579 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -48,21 +48,19 @@ bool AArch64PrologueEpilogueCommon::isVGInstruction(
return Opc == TargetOpcode::COPY;
}
-// Convenience function to determine whether I is an SVE callee save.
-static bool isSVECalleeSave(MachineBasicBlock::iterator I) {
+// Convenience function to determine whether I is part of the ZPR callee saves.
+static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I) {
switch (I->getOpcode()) {
default:
return false;
- case AArch64::PTRUE_C_B:
case AArch64::LD1B_2Z_IMM:
case AArch64::ST1B_2Z_IMM:
case AArch64::STR_ZXI:
- case AArch64::STR_PXI:
case AArch64::LDR_ZXI:
- case AArch64::LDR_PXI:
- case AArch64::PTRUE_B:
case AArch64::CPY_ZPzI_B:
case AArch64::CMPNE_PPzZI_B:
+ case AArch64::PTRUE_C_B:
+ case AArch64::PTRUE_B:
return I->getFlag(MachineInstr::FrameSetup) ||
I->getFlag(MachineInstr::FrameDestroy);
case AArch64::SEH_SavePReg:
@@ -71,6 +69,23 @@ static bool isSVECalleeSave(MachineBasicBlock::iterator I) {
}
}
+// Convenience function to determine whether I is part of the PPR callee saves.
+static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I) {
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case AArch64::STR_PXI:
+ case AArch64::LDR_PXI:
+ return I->getFlag(MachineInstr::FrameSetup) ||
+ I->getFlag(MachineInstr::FrameDestroy);
+ }
+}
+
+// Convenience function to determine whether I is part of the SVE callee saves.
+static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I) {
+ return isPartOfZPRCalleeSaves(I) || isPartOfPPRCalleeSaves(I);
+}
+
AArch64PrologueEpilogueCommon::AArch64PrologueEpilogueCommon(
MachineFunction &MF, MachineBasicBlock &MBB,
const AArch64FrameLowering &AFL)
@@ -316,7 +331,7 @@ bool AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump(
// When there is an SVE area on the stack, always allocate the
// callee-saves and spills/locals separately.
- if (AFL.getSVEStackSize(MF))
+ if (AFI->hasSVEStackSize())
return false;
return true;
@@ -639,7 +654,7 @@ void AArch64PrologueEmitter::emitPrologue() {
// Now allocate space for the GPR callee saves.
MachineBasicBlock::iterator MBBI = PrologueBeginI;
- while (MBBI != EndI && isSVECalleeSave(MBBI))
+ while (MBBI != EndI && isPartOfSVECalleeSaves(MBBI))
++MBBI;
FirstGPRSaveI = convertCalleeSaveRestoreToSPPrePostIncDec(
MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
@@ -669,7 +684,7 @@ void AArch64PrologueEmitter::emitPrologue() {
MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
while (AfterGPRSavesI != EndI &&
AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
- !isSVECalleeSave(AfterGPRSavesI)) {
+ !isPartOfSVECalleeSaves(AfterGPRSavesI)) {
if (CombineSPBump &&
// Only fix-up frame-setup load/store instructions.
(!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
@@ -700,38 +715,66 @@ void AArch64PrologueEmitter::emitPrologue() {
if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
- StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
- StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
+ StackOffset PPRCalleeSavesSize =
+ StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
+ StackOffset ZPRCalleeSavesSize =
+ StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
+ StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
+ StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
+ StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
+
StackOffset CFAOffset =
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
-
- // Process the SVE callee-saves to determine what space needs to be
- // allocated.
MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
- LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
- << "\n");
- SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
- SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
- // Find callee save instructions in frame.
- // Note: With FPAfterSVECalleeSaves the callee saves have already been
+
+ if (!FPAfterSVECalleeSaves) {
+ MachineBasicBlock::iterator ZPRCalleeSavesBegin = AfterGPRSavesI,
+ ZPRCalleeSavesEnd = AfterGPRSavesI;
+ MachineBasicBlock::iterator PPRCalleeSavesBegin = AfterGPRSavesI,
+ PPRCalleeSavesEnd = AfterGPRSavesI;
+
+ // Process the SVE callee-saves to determine what space needs to be
// allocated.
- if (!FPAfterSVECalleeSaves) {
- MachineBasicBlock::iterator CalleeSavesBegin = AfterGPRSavesI;
- assert(isSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
- while (isSVECalleeSave(AfterSVESavesI) &&
+
+ if (PPRCalleeSavesSize) {
+ LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = "
+ << PPRCalleeSavesSize.getScalable() << "\n");
+
+ PPRCalleeSavesBegin = AfterSVESavesI;
+ assert(isPartOfPPRCalleeSaves(PPRCalleeSavesBegin) &&
+ "Unexpected instruction");
+ while (isPartOfPPRCalleeSaves(AfterSVESavesI) &&
AfterSVESavesI != MBB.getFirstTerminator())
++AfterSVESavesI;
- CalleeSavesEnd = AfterSVESavesI;
+ PPRCalleeSavesEnd = AfterSVESavesI;
+ }
- StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
- // Allocate space for the callee saves (if any).
- allocateStackSpace(CalleeSavesBegin, 0, SVECalleeSavesSize,
- EmitAsyncCFI && !HasFP, CFAOffset,
- MFI.hasVarSizedObjects() || LocalsSize);
+ if (ZPRCalleeSavesSize) {
+ LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = "
+ << ZPRCalleeSavesSize.getScalable() << "\n");
+ ZPRCalleeSavesBegin = AfterSVESavesI;
+ assert(isPartOfZPRCalleeSaves(ZPRCalleeSavesBegin) &&
+ "Unexpected instruction");
+ while (isPartOfZPRCalleeSaves(AfterSVESavesI) &&
+ AfterSVESavesI != MBB.getFirstTerminator())
+ ++AfterSVESavesI;
+ ZPRCalleeSavesEnd = AfterSVESavesI;
}
+
+ // Allocate space for the callee saves (if any).
+ StackOffset LocalsSize =
+ PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
+ MachineBasicBlock::iterator CalleeSavesBegin =
+ AFI->getPPRCalleeSavedStackSize() ? PPRCalleeSavesBegin
+ : ZPRCalleeSavesBegin;
+ allocateStackSpace(CalleeSavesBegin, 0, SVECalleeSavesSize,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || LocalsSize);
+
+ CalleeSavesEnd = AFI->getZPRCalleeSavedStackSize() ? ZPRCalleeSavesEnd
+ : PPRCalleeSavesEnd;
}
CFAOffset += SVECalleeSavesSize;
@@ -746,6 +789,7 @@ void AArch64PrologueEmitter::emitPrologue() {
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
+ StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
allocateStackSpace(CalleeSavesEnd, RealignmentPadding,
SVELocalsSize + StackOffset::getFixed(NumBytes),
EmitAsyncCFI && !HasFP, CFAOffset,
@@ -796,7 +840,8 @@ void AArch64PrologueEmitter::emitPrologue() {
emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
} else {
StackOffset TotalSize =
- SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+ AFL.getSVEStackSize(MF) +
+ StackOffset::getFixed((int64_t)MFI.getStackSize());
CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
CFIBuilder.insertCFIInst(
createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
@@ -1165,7 +1210,7 @@ void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
for (const auto &Info : CSI) {
unsigned FrameIdx = Info.getFrameIdx();
- if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
+ if (MFI.isScalableStackID(FrameIdx))
continue;
assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
@@ -1192,7 +1237,7 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
}
for (const auto &Info : CSI) {
- if (MFI.getStackID(Info.getFrameIdx()) != TargetStackID::ScalableVector)
+ if (!MFI.isScalableStackID(Info.getFrameIdx()))
continue;
// Not all unwinders may know about SVE registers, so assume the lowest
@@ -1322,7 +1367,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
while (FirstGPRRestoreI != Begin) {
--FirstGPRRestoreI;
if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
- (!FPAfterSVECalleeSaves && isSVECalleeSave(FirstGPRRestoreI))) {
+ (!FPAfterSVECalleeSaves && isPartOfSVECalleeSaves(FirstGPRRestoreI))) {
++FirstGPRRestoreI;
break;
} else if (CombineSPBump)
@@ -1346,7 +1391,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
if (HasFP && AFI->hasSwiftAsyncContext())
emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
- const StackOffset &SVEStackSize = AFL.getSVEStackSize(MF);
+ StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
@@ -1372,20 +1417,25 @@ void AArch64EpilogueEmitter::emitEpilogue() {
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
RestoreEnd = FirstGPRRestoreI;
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+ int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
+ int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
+ int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
+
+ if (SVECalleeSavedSize) {
if (FPAfterSVECalleeSaves)
RestoreEnd = MBB.getFirstTerminator();
RestoreBegin = std::prev(RestoreEnd);
while (RestoreBegin != MBB.begin() &&
- isSVECalleeSave(std::prev(RestoreBegin)))
+ isPartOfSVECalleeSaves(std::prev(RestoreBegin)))
--RestoreBegin;
- assert(isSVECalleeSave(RestoreBegin) &&
- isSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
+ assert(isPartOfSVECalleeSaves(RestoreBegin) &&
+ isPartOfSVECalleeSaves(std::prev(RestoreEnd)) &&
+ "Unexpected instruction");
StackOffset CalleeSavedSizeAsOffset =
- StackOffset::getScalable(CalleeSavedSize);
+ StackOffset::getScalable(SVECalleeSavedSize);
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
DeallocateAfter = CalleeSavedSizeAsOffset;
}
@@ -1624,8 +1674,7 @@ void AArch64EpilogueEmitter::emitCalleeSavedRestores(
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
for (const auto &Info : CSI) {
- if (SVE !=
- (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
+ if (SVE != MFI.isScalableStackID(Info.getFrameIdx()))
continue;
MCRegister Reg = Info.getReg();
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 2b0c8ad..3f43b70 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -643,7 +643,7 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
if (ST.hasSVE() || ST.isStreaming()) {
// Frames that have variable sized objects and scalable SVE objects,
// should always use a basepointer.
- if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
+ if (!AFI->hasCalculatedStackSizeSVE() || AFI->hasSVEStackSize())
return true;
}
@@ -783,7 +783,7 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
AFI->hasCalculatedStackSizeSVE()) &&
"Expected SVE area to be calculated by this point");
- return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() &&
+ return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->hasSVEStackSize() &&
!AFI->hasStackHazardSlotIndex();
}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 7c5d4fc..e4b3528 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -924,6 +924,7 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
case TargetStackID::SGPRSpill:
return true;
case TargetStackID::ScalableVector:
+ case TargetStackID::ScalablePredicateVector:
case TargetStackID::WasmLocal:
return false;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 45d194e..939841a 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -2804,6 +2804,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::V6_vL32b_nt_cur_npred_ai:
case Hexagon::V6_vL32b_nt_tmp_pred_ai:
case Hexagon::V6_vL32b_nt_tmp_npred_ai:
+ case Hexagon::V6_vS32Ub_npred_ai:
case Hexagon::V6_vgathermh_pseudo:
case Hexagon::V6_vgathermw_pseudo:
case Hexagon::V6_vgathermhw_pseudo:
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 06ce917..7d4535a 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -2395,6 +2395,7 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
case TargetStackID::NoAlloc:
case TargetStackID::SGPRSpill:
case TargetStackID::WasmLocal:
+ case TargetStackID::ScalablePredicateVector:
return false;
}
llvm_unreachable("Invalid TargetStackID::Value");
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizeImplicitBinding.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizeImplicitBinding.cpp
index aea3397..205895e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizeImplicitBinding.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizeImplicitBinding.cpp
@@ -39,6 +39,7 @@ private:
void collectBindingInfo(Module &M);
uint32_t getAndReserveFirstUnusedBinding(uint32_t DescSet);
void replaceImplicitBindingCalls(Module &M);
+ void verifyUniqueOrderIdPerResource(SmallVectorImpl<CallInst *> &Calls);
// A map from descriptor set to a bit vector of used binding numbers.
std::vector<BitVector> UsedBindings;
@@ -94,6 +95,33 @@ void SPIRVLegalizeImplicitBinding::collectBindingInfo(Module &M) {
});
}
+void SPIRVLegalizeImplicitBinding::verifyUniqueOrderIdPerResource(
+ SmallVectorImpl<CallInst *> &Calls) {
+ // Check that the order Id is unique per resource.
+ for (uint32_t i = 1; i < Calls.size(); ++i) {
+ const uint32_t OrderIdArgIdx = 0;
+ const uint32_t DescSetArgIdx = 1;
+ const uint32_t OrderA =
+ cast<ConstantInt>(Calls[i - 1]->getArgOperand(OrderIdArgIdx))
+ ->getZExtValue();
+ const uint32_t OrderB =
+ cast<ConstantInt>(Calls[i]->getArgOperand(OrderIdArgIdx))
+ ->getZExtValue();
+ if (OrderA == OrderB) {
+ const uint32_t DescSetA =
+ cast<ConstantInt>(Calls[i - 1]->getArgOperand(DescSetArgIdx))
+ ->getZExtValue();
+ const uint32_t DescSetB =
+ cast<ConstantInt>(Calls[i]->getArgOperand(DescSetArgIdx))
+ ->getZExtValue();
+ if (DescSetA != DescSetB) {
+ report_fatal_error("Implicit binding calls with the same order ID must "
+ "have the same descriptor set");
+ }
+ }
+ }
+}
+
uint32_t SPIRVLegalizeImplicitBinding::getAndReserveFirstUnusedBinding(
uint32_t DescSet) {
if (UsedBindings.size() <= DescSet) {
@@ -112,11 +140,23 @@ uint32_t SPIRVLegalizeImplicitBinding::getAndReserveFirstUnusedBinding(
}
void SPIRVLegalizeImplicitBinding::replaceImplicitBindingCalls(Module &M) {
+ uint32_t lastOrderId = -1;
+ uint32_t lastBindingNumber = -1;
+
for (CallInst *OldCI : ImplicitBindingCalls) {
IRBuilder<> Builder(OldCI);
+ const uint32_t OrderId =
+ cast<ConstantInt>(OldCI->getArgOperand(0))->getZExtValue();
const uint32_t DescSet =
cast<ConstantInt>(OldCI->getArgOperand(1))->getZExtValue();
- const uint32_t NewBinding = getAndReserveFirstUnusedBinding(DescSet);
+
+ // Reuse an existing binding for this order ID, if one was already assigned.
+ // Otherwise, assign a new binding.
+ const uint32_t NewBinding = (lastOrderId == OrderId)
+ ? lastBindingNumber
+ : getAndReserveFirstUnusedBinding(DescSet);
+ lastOrderId = OrderId;
+ lastBindingNumber = NewBinding;
SmallVector<Value *, 8> Args;
Args.push_back(Builder.getInt32(DescSet));
@@ -142,6 +182,7 @@ bool SPIRVLegalizeImplicitBinding::runOnModule(Module &M) {
if (ImplicitBindingCalls.empty()) {
return false;
}
+ verifyUniqueOrderIdPerResource(ImplicitBindingCalls);
replaceImplicitBindingCalls(M);
return true;
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 2cfdc75..a068138 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -957,6 +957,8 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
EVT VT) const {
+ if (VT.isVector())
+ return VT.changeVectorElementType(MVT::i1);
return MVT::i32;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 34854e4..cda5568 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -52388,16 +52388,41 @@ static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
// Do not flip "e > c", where "c" is a constant, because Cmp instruction
// cannot take an immediate as its first operand.
//
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
- EFLAGS.getValueType().isInteger() &&
- !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
- SDValue NewSub =
- DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
- EFLAGS.getOperand(1), EFLAGS.getOperand(0));
- SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
+ // If EFLAGS is from a CMP that compares the same operands as the earlier
+ // SUB producing X (i.e. CMP X, Y), we can directly use the carry flag with
+ // SBB/ADC without creating a flipped SUB.
+ if (EFLAGS.getOpcode() == X86ISD::CMP &&
+ EFLAGS.getValueType().isInteger() && X == EFLAGS.getOperand(0)) {
return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
DAG.getVTList(VT, MVT::i32), X,
- DAG.getConstant(0, DL, VT), NewEFLAGS);
+ DAG.getConstant(0, DL, VT), EFLAGS);
+ }
+
+ if (EFLAGS.getOpcode() == X86ISD::SUB &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ // Only create NewSub if we know one of the folds will succeed to avoid
+ // introducing a temporary node that may persist and affect one-use checks
+ // below.
+ if (EFLAGS.getNode()->hasOneUse()) {
+ SDValue NewSub = DAG.getNode(
+ X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
+ return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(0, DL, VT), NewEFLAGS);
+ }
+
+ if (IsSub && X == EFLAGS.getValue(0)) {
+ SDValue NewSub = DAG.getNode(
+ X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
+ return DAG.getNode(X86ISD::SBB, DL, DAG.getVTList(VT, MVT::i32),
+ EFLAGS.getOperand(0), EFLAGS.getOperand(1),
+ NewEFLAGS);
+ }
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index b6b3a95..87000a1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2934,32 +2934,6 @@ static Instruction *foldSelectWithSRem(SelectInst &SI, InstCombinerImpl &IC,
return nullptr;
}
-static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) {
- FreezeInst *FI = dyn_cast<FreezeInst>(Sel.getCondition());
- if (!FI)
- return nullptr;
-
- Value *Cond = FI->getOperand(0);
- Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue();
-
- // select (freeze(x == y)), x, y --> y
- // select (freeze(x != y)), x, y --> x
- // The freeze should be only used by this select. Otherwise, remaining uses of
- // the freeze can observe a contradictory value.
- // c = freeze(x == y) ; Let's assume that y = poison & x = 42; c is 0 or 1
- // a = select c, x, y ;
- // f(a, c) ; f(poison, 1) cannot happen, but if a is folded
- // ; to y, this can happen.
- CmpPredicate Pred;
- if (FI->hasOneUse() &&
- match(Cond, m_c_ICmp(Pred, m_Specific(TrueVal), m_Specific(FalseVal))) &&
- (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)) {
- return Pred == ICmpInst::ICMP_EQ ? FalseVal : TrueVal;
- }
-
- return nullptr;
-}
-
/// Given that \p CondVal is known to be \p CondIsTrue, try to simplify \p SI.
static Value *simplifyNestedSelectsUsingImpliedCond(SelectInst &SI,
Value *CondVal,
@@ -4446,9 +4420,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Instruction *PN = foldSelectToPhi(SI, DT, Builder))
return replaceInstUsesWith(SI, PN);
- if (Value *Fr = foldSelectWithFrozenICmp(SI, Builder))
- return replaceInstUsesWith(SI, Fr);
-
if (Value *V = foldRoundUpIntegerWithPow2Alignment(SI, Builder))
return replaceInstUsesWith(SI, V);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e5d6c81..7fa787b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7954,6 +7954,13 @@ bool VPRecipeBuilder::getScaledReductions(
auto CollectExtInfo = [this, &Exts, &ExtOpTypes,
&ExtKinds](SmallVectorImpl<Value *> &Ops) -> bool {
for (const auto &[I, OpI] : enumerate(Ops)) {
+ auto *CI = dyn_cast<ConstantInt>(OpI);
+ if (I > 0 && CI &&
+ canConstantBeExtended(CI, ExtOpTypes[0], ExtKinds[0])) {
+ ExtOpTypes[I] = ExtOpTypes[0];
+ ExtKinds[I] = ExtKinds[0];
+ continue;
+ }
Value *ExtOp;
if (!match(OpI, m_ZExtOrSExt(m_Value(ExtOp))))
return false;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 02eb637..07b191a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1753,6 +1753,16 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
}
#endif
+bool llvm::canConstantBeExtended(const ConstantInt *CI, Type *NarrowType,
+ TTI::PartialReductionExtendKind ExtKind) {
+ APInt TruncatedVal = CI->getValue().trunc(NarrowType->getScalarSizeInBits());
+ unsigned WideSize = CI->getType()->getScalarSizeInBits();
+ APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
+ ? TruncatedVal.sext(WideSize)
+ : TruncatedVal.zext(WideSize);
+ return ExtendedVal == CI->getValue();
+}
+
TargetTransformInfo::OperandValueInfo
VPCostContext::getOperandInfo(VPValue *V) const {
if (!V->isLiveIn())
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index fe59774..fc1a09e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -468,6 +468,10 @@ public:
};
#endif
+/// Check if a constant \p CI can be safely treated as having been extended
+/// from a narrower type with the given extension kind.
+bool canConstantBeExtended(const ConstantInt *CI, Type *NarrowType,
+ TTI::PartialReductionExtendKind ExtKind);
} // end namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 46909a5..67b9244 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -340,6 +340,14 @@ VPPartialReductionRecipe::computeCost(ElementCount VF,
: Widen->getOperand(1));
ExtAType = GetExtendKind(ExtAR);
ExtBType = GetExtendKind(ExtBR);
+
+ if (!ExtBR && Widen->getOperand(1)->isLiveIn()) {
+ auto *CI = cast<ConstantInt>(Widen->getOperand(1)->getLiveInIRValue());
+ if (canConstantBeExtended(CI, InputTypeA, ExtAType)) {
+ InputTypeB = InputTypeA;
+ ExtBType = ExtAType;
+ }
+ }
};
if (isa<VPWidenCastRecipe>(OpR)) {
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 32c7c64..e810fcb6 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -247,7 +247,7 @@ if (LLVM_INCLUDE_SPIRV_TOOLS_TESTS)
list(APPEND LLVM_TEST_DEPENDS spirv-link)
endif()
-add_custom_target(llvm-test-depends DEPENDS ${LLVM_TEST_DEPENDS})
+add_custom_target(llvm-test-depends DEPENDS ${LLVM_TEST_DEPENDS} UnitTests)
set_target_properties(llvm-test-depends PROPERTIES FOLDER "LLVM/Tests")
if(LLVM_BUILD_TOOLS)
@@ -259,7 +259,7 @@ endif()
add_lit_testsuite(check-llvm "Running the LLVM regression tests"
${CMAKE_CURRENT_BINARY_DIR}
${exclude_from_check_all}
- DEPENDS ${LLVM_TEST_DEPENDS}
+ DEPENDS ${LLVM_TEST_DEPENDS} UnitTests
)
set_target_properties(check-llvm PROPERTIES FOLDER "LLVM/Tests")
diff --git a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir
index aca2816..7fd0cee 100644
--- a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir
+++ b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir
@@ -164,10 +164,10 @@ stack:
- { id: 1, name: z1.addr, size: 16, alignment: 16, stack-id: scalable-vector,
debug-info-variable: '!31', debug-info-expression: '!DIExpression()',
debug-info-location: '!32' }
- - { id: 2, name: p0.addr, size: 2, alignment: 2, stack-id: scalable-vector,
+ - { id: 2, name: p0.addr, size: 2, alignment: 2, stack-id: scalable-predicate-vector,
debug-info-variable: '!33', debug-info-expression: '!DIExpression()',
debug-info-location: '!34' }
- - { id: 3, name: p1.addr, size: 2, alignment: 2, stack-id: scalable-vector,
+ - { id: 3, name: p1.addr, size: 2, alignment: 2, stack-id: scalable-predicate-vector,
debug-info-variable: '!35', debug-info-expression: '!DIExpression()',
debug-info-location: '!36' }
- { id: 4, name: w0.addr, size: 4, alignment: 4, local-offset: -4, debug-info-variable: '!37',
@@ -181,10 +181,10 @@ stack:
- { id: 7, name: localv1, size: 16, alignment: 16, stack-id: scalable-vector,
debug-info-variable: '!45', debug-info-expression: '!DIExpression()',
debug-info-location: '!46' }
- - { id: 8, name: localp0, size: 2, alignment: 2, stack-id: scalable-vector,
+ - { id: 8, name: localp0, size: 2, alignment: 2, stack-id: scalable-predicate-vector,
debug-info-variable: '!48', debug-info-expression: '!DIExpression()',
debug-info-location: '!49' }
- - { id: 9, name: localp1, size: 2, alignment: 2, stack-id: scalable-vector,
+ - { id: 9, name: localp1, size: 2, alignment: 2, stack-id: scalable-predicate-vector,
debug-info-variable: '!51', debug-info-expression: '!DIExpression()',
debug-info-location: '!52' }
machineFunctionInfo: {}
diff --git a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir
index 0ea180b..41ba554 100644
--- a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir
+++ b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir
@@ -96,8 +96,8 @@ stack:
- { id: 1, size: 8, alignment: 8 }
- { id: 2, size: 16, alignment: 16, stack-id: scalable-vector }
- { id: 3, size: 16, alignment: 16, stack-id: scalable-vector }
- - { id: 4, size: 2, alignment: 2, stack-id: scalable-vector }
- - { id: 5, size: 2, alignment: 2, stack-id: scalable-vector }
+ - { id: 4, size: 2, alignment: 2, stack-id: scalable-predicate-vector }
+ - { id: 5, size: 2, alignment: 2, stack-id: scalable-predicate-vector }
machineFunctionInfo: {}
body: |
bb.0.entry:
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
index 03a6aab..1101416 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
@@ -1215,19 +1215,19 @@ body: |
# CHECK: - { id: 2, name: '', type: default, offset: -112, size: 16, alignment: 16,
# CHECK-NEXT: stack-id: scalable-vector,
# CHECK: - { id: 3, name: '', type: default, offset: -114, size: 2, alignment: 2,
-# CHECK-NEXT: stack-id: scalable-vector,
+# CHECK-NEXT: stack-id: scalable-predicate-vector,
# CHECK: - { id: 4, name: '', type: spill-slot, offset: -144, size: 16, alignment: 16,
# CHECK-NEXT: stack-id: scalable-vector,
# CHECK: - { id: 5, name: '', type: spill-slot, offset: -146, size: 2, alignment: 2,
-# CHECK-NEXT: stack-id: scalable-vector,
+# CHECK-NEXT: stack-id: scalable-predicate-vector,
# CHECK: - { id: 6, name: '', type: spill-slot, offset: -16, size: 16, alignment: 16,
# CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: '$z8',
# CHECK: - { id: 7, name: '', type: spill-slot, offset: -32, size: 16, alignment: 16,
# CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: '$z23',
# CHECK: - { id: 8, name: '', type: spill-slot, offset: -34, size: 2, alignment: 2,
-# CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: '$p4',
+# CHECK-NEXT: stack-id: scalable-predicate-vector, callee-saved-register: '$p4',
# CHECK: - { id: 9, name: '', type: spill-slot, offset: -36, size: 2, alignment: 2,
-# CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: '$p15',
+# CHECK-NEXT: stack-id: scalable-predicate-vector, callee-saved-register: '$p15',
# CHECK: - { id: 10, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16,
# CHECK-NEXT: stack-id: default, callee-saved-register: '$fp',
#
@@ -1295,9 +1295,9 @@ stack:
- { id: 0, type: default, size: 32, alignment: 16, stack-id: scalable-vector }
- { id: 1, type: default, size: 4, alignment: 2, stack-id: scalable-vector }
- { id: 2, type: default, size: 16, alignment: 16, stack-id: scalable-vector }
- - { id: 3, type: default, size: 2, alignment: 2, stack-id: scalable-vector }
+ - { id: 3, type: default, size: 2, alignment: 2, stack-id: scalable-predicate-vector }
- { id: 4, type: spill-slot, size: 16, alignment: 16, stack-id: scalable-vector }
- - { id: 5, type: spill-slot, size: 2, alignment: 2, stack-id: scalable-vector }
+ - { id: 5, type: spill-slot, size: 2, alignment: 2, stack-id: scalable-predicate-vector }
body: |
bb.0.entry:
diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
index 2b16dd0f..5569175 100644
--- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir
+++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
@@ -39,7 +39,7 @@ body: |
; CHECK-LABEL: name: spills_fills_stack_id_ppr
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 2, alignment: 2
- ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ''
+ ; CHECK-NEXT: stack-id: scalable-predicate-vector, callee-saved-register: ''
; EXPAND-LABEL: name: spills_fills_stack_id_ppr
; EXPAND: STR_PXI $p0, $sp, 7
@@ -82,7 +82,7 @@ body: |
; CHECK-LABEL: name: spills_fills_stack_id_ppr2
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 2
- ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ''
+ ; CHECK-NEXT: stack-id: scalable-predicate-vector, callee-saved-register: ''
; EXPAND-LABEL: name: spills_fills_stack_id_ppr2
; EXPAND: STR_PXI $p0, $sp, 6
@@ -127,7 +127,7 @@ body: |
; CHECK-LABEL: name: spills_fills_stack_id_ppr2
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 2
- ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ''
+ ; CHECK-NEXT: stack-id: scalable-predicate-vector, callee-saved-register: ''
; EXPAND-LABEL: name: spills_fills_stack_id_ppr2mul2
; EXPAND: STR_PXI $p0, $sp, 6
@@ -172,7 +172,7 @@ body: |
; CHECK-LABEL: name: spills_fills_stack_id_pnr
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 2, alignment: 2
- ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ''
+ ; CHECK-NEXT: stack-id: scalable-predicate-vector, callee-saved-register: ''
; EXPAND-LABEL: name: spills_fills_stack_id_pnr
; EXPAND: STR_PXI $pn0, $sp, 7
@@ -211,7 +211,7 @@ body: |
; CHECK-LABEL: name: spills_fills_stack_id_virtreg_pnr
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 2, alignment: 2
- ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ''
+ ; CHECK-NEXT: stack-id: scalable-predicate-vector, callee-saved-register: ''
; EXPAND-LABEL: name: spills_fills_stack_id_virtreg_pnr
; EXPAND: renamable $pn8 = WHILEGE_CXX_B
diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll
index 7bddd1d..cc63c7f 100644
--- a/llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll
+++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll
@@ -56,9 +56,9 @@ define aarch64_sve_vector_pcs <vscale x 16 x i1> @caller_with_many_svepred_arg(<
; CHECK: name: caller_with_many_svepred_arg
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 2, alignment: 2,
-; CHECK-NEXT: stack-id: scalable-vector
+; CHECK-NEXT: stack-id: scalable-predicate-vector
; CHECK: - { id: 1, name: '', type: default, offset: 0, size: 2, alignment: 2,
-; CHECK-NEXT: stack-id: scalable-vector
+; CHECK-NEXT: stack-id: scalable-predicate-vector
; CHECK-DAG: STR_PXI %{{[0-9]+}}, %stack.0, 0
; CHECK-DAG: STR_PXI %{{[0-9]+}}, %stack.1, 0
; CHECK-DAG: [[BASE1:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0
@@ -90,7 +90,7 @@ define aarch64_sve_vector_pcs <vscale x 16 x i1> @caller_with_svepred_arg_1xv16i
; CHECK: name: caller_with_svepred_arg_1xv16i1_4xv16i1
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 2, alignment: 2,
-; CHECK-NEXT: stack-id: scalable-vector,
+; CHECK-NEXT: stack-id: scalable-predicate-vector,
; CHECK: [[PRED0:%[0-9]+]]:ppr = COPY $p0
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK: STR_PXI [[PRED0]], %stack.0, 0 :: (store (<vscale x 1 x s16>) into %stack.0)
@@ -139,7 +139,7 @@ define [4 x <vscale x 16 x i1>] @caller_with_svepred_arg_4xv16i1_4xv16i1([4 x <v
; CHECK: name: caller_with_svepred_arg_4xv16i1_4xv16i1
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 8, alignment: 2,
-; CHECK-NEXT: stack-id: scalable-vector,
+; CHECK-NEXT: stack-id: scalable-predicate-vector,
; CHECK: [[PRED3:%[0-9]+]]:ppr = COPY $p3
; CHECK: [[PRED2:%[0-9]+]]:ppr = COPY $p2
; CHECK: [[PRED1:%[0-9]+]]:ppr = COPY $p1
@@ -200,7 +200,7 @@ define [2 x <vscale x 32 x i1>] @caller_with_svepred_arg_2xv32i1_1xv16i1([2 x <v
; CHECK: name: caller_with_svepred_arg_2xv32i1_1xv16i1
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 8, alignment: 2,
-; CHECK-NEXT: stack-id: scalable-vector,
+; CHECK-NEXT: stack-id: scalable-predicate-vector,
; CHECK: [[PRED3:%[0-9]+]]:ppr = COPY $p3
; CHECK: [[PRED2:%[0-9]+]]:ppr = COPY $p2
; CHECK: [[PRED1:%[0-9]+]]:ppr = COPY $p1
diff --git a/llvm/test/CodeGen/AArch64/sve-load-store-legalisation.ll b/llvm/test/CodeGen/AArch64/sve-load-store-legalisation.ll
new file mode 100644
index 0000000..584753b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-load-store-legalisation.ll
@@ -0,0 +1,2854 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mattr=+sve < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @sve_load_store_nxv1i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv1i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: st1b { z0.b }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 1 x i8>, ptr %a
+ store <vscale x 1 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv2i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: st1b { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 2 x i8>, ptr %a
+ store <vscale x 2 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv3i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv3i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: st1b { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 3 x i8>, ptr %a
+ store <vscale x 3 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv4i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv4i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: st1b { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 4 x i8>, ptr %a
+ store <vscale x 4 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv5i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv5i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #5 // =0x5
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: st1b { z0.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 5 x i8>, ptr %a
+ store <vscale x 5 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv6i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv6i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: ld1b { z1.s }, p1/z, [x0]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: st1b { z1.s }, p1, [x1]
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: uunpkhi z0.s, z0.h
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: st1b { z0.d }, p0, [x1, #2, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 6 x i8>, ptr %a
+ store <vscale x 6 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv7i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv7i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #7 // =0x7
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: st1b { z0.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 7 x i8>, ptr %a
+ store <vscale x 7 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv8i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: st1b { z0.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 8 x i8>, ptr %a
+ store <vscale x 8 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv9i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv9i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #9 // =0x9
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: st1b { z0.b }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 9 x i8>, ptr %a
+ store <vscale x 9 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv10i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv10i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ptrue p1.h
+; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, #4, mul vl]
+; CHECK-NEXT: ld1b { z1.h }, p1/z, [x0]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z1.b, z0.b
+; CHECK-NEXT: uunpkhi z1.h, z0.b
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: st1b { z0.h }, p1, [x1]
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: st1b { z1.d }, p0, [x1, #4, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 10 x i8>, ptr %a
+ store <vscale x 10 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv11i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv11i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #11 // =0xb
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: st1b { z0.b }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 11 x i8>, ptr %a
+ store <vscale x 11 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv12i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv12i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ptrue p1.h
+; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: ld1b { z1.h }, p1/z, [x0]
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z1.b, z0.b
+; CHECK-NEXT: uunpkhi z1.h, z0.b
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: st1b { z0.h }, p1, [x1]
+; CHECK-NEXT: st1b { z1.s }, p0, [x1, #2, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 12 x i8>, ptr %a
+ store <vscale x 12 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv13i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv13i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #13 // =0xd
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: st1b { z0.b }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 13 x i8>, ptr %a
+ store <vscale x 13 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv14i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv14i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, #6, mul vl]
+; CHECK-NEXT: ptrue p2.h
+; CHECK-NEXT: ld1b { z1.s }, p1/z, [x0, #2, mul vl]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: ld1b { z1.h }, p2/z, [x0]
+; CHECK-NEXT: uzp1 z0.b, z1.b, z0.b
+; CHECK-NEXT: uunpkhi z1.h, z0.b
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpkhi z2.s, z1.h
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: st1b { z0.h }, p2, [x1]
+; CHECK-NEXT: uunpklo z2.d, z2.s
+; CHECK-NEXT: st1b { z1.s }, p1, [x1, #2, mul vl]
+; CHECK-NEXT: st1b { z2.d }, p0, [x1, #6, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 14 x i8>, ptr %a
+ store <vscale x 14 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv15i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv15i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #15 // =0xf
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: st1b { z0.b }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 15 x i8>, ptr %a
+ store <vscale x 15 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv16i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 16 x i8>, ptr %a
+ store <vscale x 16 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv17i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv17i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w10, #17 // =0x11
+; CHECK-NEXT: lsr x9, x8, #4
+; CHECK-NEXT: mul x9, x9, x10
+; CHECK-NEXT: whilelo p0.b, x8, x9
+; CHECK-NEXT: whilelo p1.b, xzr, x9
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
+; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
+; CHECK-NEXT: st1b { z1.b }, p1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 17 x i8>, ptr %a
+ store <vscale x 17 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv18i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv18i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, x8]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpklo z1.s, z0.h
+; CHECK-NEXT: uunpkhi z0.s, z0.h
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: uzp1 z1.s, z1.s, z0.s
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpkhi z1.s, z0.h
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: uunpkhi z1.d, z1.s
+; CHECK-NEXT: uzp1 z1.s, z0.s, z1.s
+; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpkhi z1.s, z0.h
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: uzp1 z1.s, z1.s, z0.s
+; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b
+; CHECK-NEXT: uunpkhi z1.h, z1.b
+; CHECK-NEXT: uunpklo z2.s, z1.h
+; CHECK-NEXT: uunpkhi z1.s, z1.h
+; CHECK-NEXT: uunpkhi z2.d, z2.s
+; CHECK-NEXT: uzp1 z2.s, z0.s, z2.s
+; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
+; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
+; CHECK-NEXT: uunpkhi z1.h, z1.b
+; CHECK-NEXT: uunpklo z2.s, z1.h
+; CHECK-NEXT: uunpkhi z1.s, z1.h
+; CHECK-NEXT: uunpklo z2.d, z2.s
+; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s
+; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
+; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
+; CHECK-NEXT: uunpkhi z1.h, z1.b
+; CHECK-NEXT: uunpkhi z2.s, z1.h
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: uunpkhi z2.d, z2.s
+; CHECK-NEXT: uzp1 z2.s, z0.s, z2.s
+; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
+; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
+; CHECK-NEXT: uunpkhi z1.h, z1.b
+; CHECK-NEXT: uunpkhi z2.s, z1.h
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: uunpklo z2.d, z2.s
+; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s
+; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
+; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT: ldr z1, [x0]
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: st1b { z0.d }, p0, [x1, x8]
+; CHECK-NEXT: str z1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 18 x i8>, ptr %a
+ store <vscale x 18 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv19i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv19i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w10, #19 // =0x13
+; CHECK-NEXT: lsr x9, x8, #4
+; CHECK-NEXT: mul x9, x9, x10
+; CHECK-NEXT: whilelo p0.b, x8, x9
+; CHECK-NEXT: whilelo p1.b, xzr, x9
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
+; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
+; CHECK-NEXT: st1b { z1.b }, p1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 19 x i8>, ptr %a
+ store <vscale x 19 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv20i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv20i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: ld1b { z1.s }, p0/z, [x0, #4, mul vl]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b
+; CHECK-NEXT: uunpkhi z1.h, z1.b
+; CHECK-NEXT: uunpkhi z1.s, z1.h
+; CHECK-NEXT: uzp1 z1.h, z0.h, z1.h
+; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
+; CHECK-NEXT: uunpkhi z1.h, z1.b
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: uzp1 z1.h, z1.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: st1b { z0.s }, p0, [x1, #4, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 20 x i8>, ptr %a
+ store <vscale x 20 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv21i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv21i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w10, #21 // =0x15
+; CHECK-NEXT: lsr x9, x8, #4
+; CHECK-NEXT: mul x9, x9, x10
+; CHECK-NEXT: whilelo p0.b, x8, x9
+; CHECK-NEXT: whilelo p1.b, xzr, x9
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
+; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
+; CHECK-NEXT: st1b { z1.b }, p1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 21 x i8>, ptr %a
+ store <vscale x 21 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv22i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv22i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cntw x8, all, mul #5
+; CHECK-NEXT: ldr z2, [x0]
+; CHECK-NEXT: ptrue p1.d
+; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, #4, mul vl]
+; CHECK-NEXT: ld1b { z1.d }, p1/z, [x0, x8]
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: uzp1 z1.s, z1.s, z0.s
+; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b
+; CHECK-NEXT: uunpkhi z1.h, z1.b
+; CHECK-NEXT: uunpkhi z1.s, z1.h
+; CHECK-NEXT: uzp1 z1.h, z0.h, z1.h
+; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
+; CHECK-NEXT: uunpkhi z1.h, z1.b
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: uzp1 z1.h, z1.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpkhi z1.s, z0.h
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: st1b { z1.d }, p1, [x1, x8]
+; CHECK-NEXT: st1b { z0.s }, p0, [x1, #4, mul vl]
+; CHECK-NEXT: str z2, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 22 x i8>, ptr %a
+ store <vscale x 22 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv23i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv23i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w10, #23 // =0x17
+; CHECK-NEXT: lsr x9, x8, #4
+; CHECK-NEXT: mul x9, x9, x10
+; CHECK-NEXT: whilelo p0.b, x8, x9
+; CHECK-NEXT: whilelo p1.b, xzr, x9
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
+; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
+; CHECK-NEXT: st1b { z1.b }, p1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 23 x i8>, ptr %a
+ store <vscale x 23 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv24i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv24i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: ld1b { z1.h }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: uzp1 z0.b, z1.b, z0.b
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: st1b { z0.h }, p0, [x1, #2, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 24 x i8>, ptr %a
+ store <vscale x 24 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv25i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv25i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w10, #25 // =0x19
+; CHECK-NEXT: lsr x9, x8, #4
+; CHECK-NEXT: mul x9, x9, x10
+; CHECK-NEXT: whilelo p0.b, x8, x9
+; CHECK-NEXT: whilelo p1.b, xzr, x9
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
+; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
+; CHECK-NEXT: st1b { z1.b }, p1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 25 x i8>, ptr %a
+ store <vscale x 25 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv26i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv26i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cnth x8, all, mul #3
+; CHECK-NEXT: ldr z2, [x0]
+; CHECK-NEXT: ptrue p1.h
+; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, x8]
+; CHECK-NEXT: ld1b { z1.h }, p1/z, [x0, #2, mul vl]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z1.b, z0.b
+; CHECK-NEXT: uunpkhi z1.h, z0.b
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: st1b { z1.d }, p0, [x1, x8]
+; CHECK-NEXT: st1b { z0.h }, p1, [x1, #2, mul vl]
+; CHECK-NEXT: str z2, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 26 x i8>, ptr %a
+ store <vscale x 26 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv27i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv27i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w10, #27 // =0x1b
+; CHECK-NEXT: lsr x9, x8, #4
+; CHECK-NEXT: mul x9, x9, x10
+; CHECK-NEXT: whilelo p0.b, x8, x9
+; CHECK-NEXT: whilelo p1.b, xzr, x9
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
+; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
+; CHECK-NEXT: st1b { z1.b }, p1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 27 x i8>, ptr %a
+ store <vscale x 27 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv28i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv28i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr z2, [x0]
+; CHECK-NEXT: ptrue p1.h
+; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, #6, mul vl]
+; CHECK-NEXT: ld1b { z1.h }, p1/z, [x0, #2, mul vl]
+; CHECK-NEXT: str z2, [x1]
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z1.b, z0.b
+; CHECK-NEXT: uunpkhi z1.h, z0.b
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: st1b { z0.h }, p1, [x1, #2, mul vl]
+; CHECK-NEXT: st1b { z1.s }, p0, [x1, #6, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 28 x i8>, ptr %a
+ store <vscale x 28 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv29i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv29i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w10, #29 // =0x1d
+; CHECK-NEXT: lsr x9, x8, #4
+; CHECK-NEXT: mul x9, x9, x10
+; CHECK-NEXT: whilelo p0.b, x8, x9
+; CHECK-NEXT: whilelo p1.b, xzr, x9
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
+; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
+; CHECK-NEXT: st1b { z1.b }, p1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 29 x i8>, ptr %a
+ store <vscale x 29 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv30i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv30i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cntw x8, all, mul #7
+; CHECK-NEXT: ldr z3, [x0]
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, x8]
+; CHECK-NEXT: ptrue p2.h
+; CHECK-NEXT: ld1b { z1.s }, p1/z, [x0, #6, mul vl]
+; CHECK-NEXT: ld1b { z2.h }, p2/z, [x0, #2, mul vl]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z2.b, z0.b
+; CHECK-NEXT: uunpkhi z1.h, z0.b
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpkhi z2.s, z1.h
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: uunpklo z2.d, z2.s
+; CHECK-NEXT: st1b { z2.d }, p0, [x1, x8]
+; CHECK-NEXT: st1b { z0.h }, p2, [x1, #2, mul vl]
+; CHECK-NEXT: st1b { z1.s }, p1, [x1, #6, mul vl]
+; CHECK-NEXT: str z3, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 30 x i8>, ptr %a
+ store <vscale x 30 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv31i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv31i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w10, #31 // =0x1f
+; CHECK-NEXT: lsr x9, x8, #4
+; CHECK-NEXT: mul x9, x9, x10
+; CHECK-NEXT: whilelo p0.b, x8, x9
+; CHECK-NEXT: whilelo p1.b, xzr, x9
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
+; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
+; CHECK-NEXT: st1b { z1.b }, p1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 31 x i8>, ptr %a
+ store <vscale x 31 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv32i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv32i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
+; CHECK-NEXT: ldr z1, [x0]
+; CHECK-NEXT: str z0, [x1, #1, mul vl]
+; CHECK-NEXT: str z1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 32 x i8>, ptr %a
+ store <vscale x 32 x i8> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv1i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv1i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 1 x i16>, ptr %a
+ store <vscale x 1 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv2i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 2 x i16>, ptr %a
+ store <vscale x 2 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv3i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv3i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 3 x i16>, ptr %a
+ store <vscale x 3 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv4i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 4 x i16>, ptr %a
+ store <vscale x 4 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv5i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv5i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #5 // =0x5
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 5 x i16>, ptr %a
+ store <vscale x 5 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv6i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv6i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: ld1h { z1.s }, p1/z, [x0]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: uunpkhi z1.s, z0.h
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: st1h { z0.s }, p1, [x1]
+; CHECK-NEXT: st1h { z1.d }, p0, [x1, #2, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 6 x i16>, ptr %a
+ store <vscale x 6 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv7i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv7i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #7 // =0x7
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 7 x i16>, ptr %a
+ store <vscale x 7 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv8i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 8 x i16>, ptr %a
+ store <vscale x 8 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv9i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv9i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #9 // =0x9
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 9 x i16>, ptr %a
+ store <vscale x 9 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv10i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv10i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, #4, mul vl]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h
+; CHECK-NEXT: uunpkhi z1.s, z1.h
+; CHECK-NEXT: uunpkhi z1.d, z1.s
+; CHECK-NEXT: uzp1 z1.s, z0.s, z1.s
+; CHECK-NEXT: uzp1 z1.h, z0.h, z1.h
+; CHECK-NEXT: uunpkhi z1.s, z1.h
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: uzp1 z1.s, z1.s, z0.s
+; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: st1h { z0.d }, p0, [x1, #4, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 10 x i16>, ptr %a
+ store <vscale x 10 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv11i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv11i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #11 // =0xb
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 11 x i16>, ptr %a
+ store <vscale x 11 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv12i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv12i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: st1h { z0.s }, p0, [x1, #2, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 12 x i16>, ptr %a
+ store <vscale x 12 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv13i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv13i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #13 // =0xd
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 13 x i16>, ptr %a
+ store <vscale x 13 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv14i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv14i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr z2, [x0]
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #6, mul vl]
+; CHECK-NEXT: ld1h { z1.s }, p1/z, [x0, #2, mul vl]
+; CHECK-NEXT: str z2, [x1]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: uunpkhi z1.s, z0.h
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: st1h { z0.s }, p1, [x1, #2, mul vl]
+; CHECK-NEXT: st1h { z1.d }, p0, [x1, #6, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 14 x i16>, ptr %a
+ store <vscale x 14 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv15i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv15i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #15 // =0xf
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 15 x i16>, ptr %a
+ store <vscale x 15 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv16i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv16i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
+; CHECK-NEXT: ldr z1, [x0]
+; CHECK-NEXT: str z0, [x1, #1, mul vl]
+; CHECK-NEXT: str z1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 16 x i16>, ptr %a
+ store <vscale x 16 x i16> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv1i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv1i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 1 x i32>, ptr %a
+ store <vscale x 1 x i32> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv2i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 2 x i32>, ptr %a
+ store <vscale x 2 x i32> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv3i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv3i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 3 x i32>, ptr %a
+ store <vscale x 3 x i32> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv4i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 4 x i32>, ptr %a
+ store <vscale x 4 x i32> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv5i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv5i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #5 // =0x5
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.s }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1w { z1.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 5 x i32>, ptr %a
+ store <vscale x 5 x i32> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv6i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv6i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: st1w { z0.d }, p0, [x1, #2, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 6 x i32>, ptr %a
+ store <vscale x 6 x i32> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv7i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv7i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #7 // =0x7
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.s }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1w { z1.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 7 x i32>, ptr %a
+ store <vscale x 7 x i32> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv8i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
+; CHECK-NEXT: ldr z1, [x0]
+; CHECK-NEXT: str z0, [x1, #1, mul vl]
+; CHECK-NEXT: str z1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 8 x i32>, ptr %a
+ store <vscale x 8 x i32> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv1i64(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv1i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: st1d { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 1 x i64>, ptr %a
+ store <vscale x 1 x i64> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv2i64(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 2 x i64>, ptr %a
+ store <vscale x 2 x i64> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv3i64(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv3i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
+; CHECK-NEXT: st1d { z0.d }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1d { z1.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 3 x i64>, ptr %a
+ store <vscale x 3 x i64> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv4i64(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
+; CHECK-NEXT: ldr z1, [x0]
+; CHECK-NEXT: str z0, [x1, #1, mul vl]
+; CHECK-NEXT: str z1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 4 x i64>, ptr %a
+ store <vscale x 4 x i64> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv1f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv1f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 1 x half>, ptr %a
+ store <vscale x 1 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv2f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 2 x half>, ptr %a
+ store <vscale x 2 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv3f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv3f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 3 x half>, ptr %a
+ store <vscale x 3 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv4f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 4 x half>, ptr %a
+ store <vscale x 4 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv5f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv5f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #5 // =0x5
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 5 x half>, ptr %a
+ store <vscale x 5 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv6f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv6f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: ld1h { z1.s }, p1/z, [x0]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: st1h { z1.s }, p1, [x1]
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: uunpkhi z0.s, z0.h
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: st1h { z0.d }, p0, [x1, #2, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 6 x half>, ptr %a
+ store <vscale x 6 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv7f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv7f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #7 // =0x7
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 7 x half>, ptr %a
+ store <vscale x 7 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv8f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 8 x half>, ptr %a
+ store <vscale x 8 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv9f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv9f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #9 // =0x9
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 9 x half>, ptr %a
+ store <vscale x 9 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv10f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv10f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, #4, mul vl]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: st1h { z1.d }, p0, [x1, #4, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 10 x half>, ptr %a
+ store <vscale x 10 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv11f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv11f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #11 // =0xb
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 11 x half>, ptr %a
+ store <vscale x 11 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv12f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv12f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: st1h { z1.s }, p0, [x1, #2, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 12 x half>, ptr %a
+ store <vscale x 12 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv13f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv13f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #13 // =0xd
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 13 x half>, ptr %a
+ store <vscale x 13 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv14f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv14f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr z2, [x0]
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #6, mul vl]
+; CHECK-NEXT: ld1h { z1.s }, p1/z, [x0, #2, mul vl]
+; CHECK-NEXT: str z2, [x1]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: st1h { z1.s }, p1, [x1, #2, mul vl]
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: uunpkhi z0.s, z0.h
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: st1h { z0.d }, p0, [x1, #6, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 14 x half>, ptr %a
+ store <vscale x 14 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv15f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv15f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #15 // =0xf
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 15 x half>, ptr %a
+ store <vscale x 15 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv16f16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv16f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
+; CHECK-NEXT: ldr z1, [x0]
+; CHECK-NEXT: str z0, [x1, #1, mul vl]
+; CHECK-NEXT: str z1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 16 x half>, ptr %a
+ store <vscale x 16 x half> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv1f32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv1f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 1 x float>, ptr %a
+ store <vscale x 1 x float> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv2f32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 2 x float>, ptr %a
+ store <vscale x 2 x float> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv3f32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv3f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 3 x float>, ptr %a
+ store <vscale x 3 x float> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv4f32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 4 x float>, ptr %a
+ store <vscale x 4 x float> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv5f32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv5f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #5 // =0x5
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.s }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1w { z1.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 5 x float>, ptr %a
+ store <vscale x 5 x float> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv6f32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv6f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: st1w { z1.d }, p0, [x1, #2, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 6 x float>, ptr %a
+ store <vscale x 6 x float> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv7f32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv7f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #7 // =0x7
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.s }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1w { z1.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 7 x float>, ptr %a
+ store <vscale x 7 x float> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv8f32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv8f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
+; CHECK-NEXT: ldr z1, [x0]
+; CHECK-NEXT: str z0, [x1, #1, mul vl]
+; CHECK-NEXT: str z1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 8 x float>, ptr %a
+ store <vscale x 8 x float> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv1f64(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: st1d { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 1 x double>, ptr %a
+ store <vscale x 1 x double> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv2f64(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 2 x double>, ptr %a
+ store <vscale x 2 x double> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv3f64(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv3f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
+; CHECK-NEXT: st1d { z0.d }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1d { z1.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 3 x double>, ptr %a
+ store <vscale x 3 x double> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv4f64(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv4f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
+; CHECK-NEXT: ldr z1, [x0]
+; CHECK-NEXT: str z0, [x1, #1, mul vl]
+; CHECK-NEXT: str z1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 4 x double>, ptr %a
+ store <vscale x 4 x double> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv1bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv1bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 1 x bfloat>, ptr %a
+ store <vscale x 1 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv2bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv2bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 2 x bfloat>, ptr %a
+ store <vscale x 2 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv3bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv3bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 3 x bfloat>, ptr %a
+ store <vscale x 3 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv4bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv4bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 4 x bfloat>, ptr %a
+ store <vscale x 4 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv5bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv5bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #5 // =0x5
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 5 x bfloat>, ptr %a
+ store <vscale x 5 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv6bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv6bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: ld1h { z1.s }, p1/z, [x0]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: st1h { z1.s }, p1, [x1]
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: uunpkhi z0.s, z0.h
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: st1h { z0.d }, p0, [x1, #2, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 6 x bfloat>, ptr %a
+ store <vscale x 6 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv7bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv7bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #7 // =0x7
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 7 x bfloat>, ptr %a
+ store <vscale x 7 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv8bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 8 x bfloat>, ptr %a
+ store <vscale x 8 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv9bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv9bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #9 // =0x9
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 9 x bfloat>, ptr %a
+ store <vscale x 9 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv10bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv10bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, #4, mul vl]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: st1h { z1.d }, p0, [x1, #4, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 10 x bfloat>, ptr %a
+ store <vscale x 10 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv11bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv11bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #11 // =0xb
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 11 x bfloat>, ptr %a
+ store <vscale x 11 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv12bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv12bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: st1h { z1.s }, p0, [x1, #2, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 12 x bfloat>, ptr %a
+ store <vscale x 12 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv13bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv13bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #13 // =0xd
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 13 x bfloat>, ptr %a
+ store <vscale x 13 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv14bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv14bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr z2, [x0]
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #6, mul vl]
+; CHECK-NEXT: ld1h { z1.s }, p1/z, [x0, #2, mul vl]
+; CHECK-NEXT: str z2, [x1]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: st1h { z1.s }, p1, [x1, #2, mul vl]
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: uunpkhi z0.s, z0.h
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: st1h { z0.d }, p0, [x1, #6, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 14 x bfloat>, ptr %a
+ store <vscale x 14 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv15bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv15bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #15 // =0xf
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 15 x bfloat>, ptr %a
+ store <vscale x 15 x bfloat> %c, ptr %b
+ ret void
+}
+
+define void @sve_load_store_nxv16bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_load_store_nxv16bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
+; CHECK-NEXT: ldr z1, [x0]
+; CHECK-NEXT: str z0, [x1, #1, mul vl]
+; CHECK-NEXT: str z1, [x1]
+; CHECK-NEXT: ret
+ %c = load <vscale x 16 x bfloat>, ptr %a
+ store <vscale x 16 x bfloat> %c, ptr %b
+ ret void
+}
+
+define <vscale x 1 x i16> @sve_sextload_nxv1i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv1i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 1 x i8>, ptr %a
+ %c.sext = sext <vscale x 1 x i8> %c to <vscale x 1 x i16>
+ ret <vscale x 1 x i16> %c.sext
+}
+
+define <vscale x 2 x i16> @sve_sextload_nxv2i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 2 x i8>, ptr %a
+ %c.sext = sext <vscale x 2 x i8> %c to <vscale x 2 x i16>
+ ret <vscale x 2 x i16> %c.sext
+}
+
+define <vscale x 3 x i16> @sve_sextload_nxv3i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv3i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 3 x i8>, ptr %a
+ %c.sext = sext <vscale x 3 x i8> %c to <vscale x 3 x i16>
+ ret <vscale x 3 x i16> %c.sext
+}
+
+define <vscale x 4 x i16> @sve_sextload_nxv4i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv4i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 4 x i8>, ptr %a
+ %c.sext = sext <vscale x 4 x i8> %c to <vscale x 4 x i16>
+ ret <vscale x 4 x i16> %c.sext
+}
+
+define <vscale x 5 x i16> @sve_sextload_nxv5i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv5i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #5 // =0x5
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 5 x i8>, ptr %a
+ %c.sext = sext <vscale x 5 x i8> %c to <vscale x 5 x i16>
+ ret <vscale x 5 x i16> %c.sext
+}
+
+define <vscale x 6 x i16> @sve_sextload_nxv6i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv6i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x8, all, mul #3
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 6 x i8>, ptr %a
+ %c.sext = sext <vscale x 6 x i8> %c to <vscale x 6 x i16>
+ ret <vscale x 6 x i16> %c.sext
+}
+
+define <vscale x 7 x i16> @sve_sextload_nxv7i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv7i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #7 // =0x7
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 7 x i8>, ptr %a
+ %c.sext = sext <vscale x 7 x i8> %c to <vscale x 7 x i16>
+ ret <vscale x 7 x i16> %c.sext
+}
+
+define <vscale x 8 x i16> @sve_sextload_nxv8i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 8 x i8>, ptr %a
+ %c.sext = sext <vscale x 8 x i8> %c to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %c.sext
+}
+
+define <vscale x 9 x i16> @sve_sextload_nxv9i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv9i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #9 // =0x9
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 9 x i8>, ptr %a
+ %c.sext = sext <vscale x 9 x i8> %c to <vscale x 9 x i16>
+ ret <vscale x 9 x i16> %c.sext
+}
+
+define <vscale x 10 x i16> @sve_sextload_nxv10i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv10i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: cntd x8, all, mul #5
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: str z1, [sp]
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: st1h { z0.d }, p0, [sp, #4, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 10 x i8>, ptr %a
+ %c.sext = sext <vscale x 10 x i8> %c to <vscale x 10 x i16>
+ ret <vscale x 10 x i16> %c.sext
+}
+
+define <vscale x 11 x i16> @sve_sextload_nxv11i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv11i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #11 // =0xb
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 11 x i8>, ptr %a
+ %c.sext = sext <vscale x 11 x i8> %c to <vscale x 11 x i16>
+ ret <vscale x 11 x i16> %c.sext
+}
+
+define <vscale x 12 x i16> @sve_sextload_nxv12i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv12i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: cntw x8, all, mul #3
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: str z1, [sp]
+; CHECK-NEXT: st1h { z0.s }, p1, [sp, #2, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 12 x i8>, ptr %a
+ %c.sext = sext <vscale x 12 x i8> %c to <vscale x 12 x i16>
+ ret <vscale x 12 x i16> %c.sext
+}
+
+define <vscale x 13 x i16> @sve_sextload_nxv13i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv13i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #13 // =0xd
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 13 x i8>, ptr %a
+ %c.sext = sext <vscale x 13 x i8> %c to <vscale x 13 x i16>
+ ret <vscale x 13 x i16> %c.sext
+}
+
+define <vscale x 14 x i16> @sve_sextload_nxv14i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv14i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: cntd x8, all, mul #7
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ld1sb { z2.h }, p0/z, [x0]
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpkhi z1.s, z0.h
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: str z2, [sp]
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: st1h { z0.s }, p1, [sp, #2, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: st1h { z1.d }, p0, [sp, #6, mul vl]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 14 x i8>, ptr %a
+ %c.sext = sext <vscale x 14 x i8> %c to <vscale x 14 x i16>
+ ret <vscale x 14 x i16> %c.sext
+}
+
+define <vscale x 15 x i16> @sve_sextload_nxv15i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv15i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #15 // =0xf
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 15 x i8>, ptr %a
+ %c.sext = sext <vscale x 15 x i8> %c to <vscale x 15 x i16>
+ ret <vscale x 15 x i16> %c.sext
+}
+
+define <vscale x 16 x i16> @sve_sextload_nxv16i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 16 x i8>, ptr %a
+ %c.sext = sext <vscale x 16 x i8> %c to <vscale x 16 x i16>
+ ret <vscale x 16 x i16> %c.sext
+}
+
+define <vscale x 1 x i32> @sve_sextload_nxv1i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv1i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 1 x i16>, ptr %a
+ %c.sext = sext <vscale x 1 x i16> %c to <vscale x 1 x i32>
+ ret <vscale x 1 x i32> %c.sext
+}
+
+define <vscale x 2 x i32> @sve_sextload_nxv2i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 2 x i16>, ptr %a
+ %c.sext = sext <vscale x 2 x i16> %c to <vscale x 2 x i32>
+ ret <vscale x 2 x i32> %c.sext
+}
+
+define <vscale x 3 x i32> @sve_sextload_nxv3i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv3i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 3 x i16>, ptr %a
+ %c.sext = sext <vscale x 3 x i16> %c to <vscale x 3 x i32>
+ ret <vscale x 3 x i32> %c.sext
+}
+
+define <vscale x 4 x i32> @sve_sextload_nxv4i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 4 x i16>, ptr %a
+ %c.sext = sext <vscale x 4 x i16> %c to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %c.sext
+}
+
+define <vscale x 5 x i32> @sve_sextload_nxv5i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv5i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #5 // =0x5
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sh { z0.s }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.s }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: st1w { z1.s }, p0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 5 x i16>, ptr %a
+ %c.sext = sext <vscale x 5 x i16> %c to <vscale x 5 x i32>
+ ret <vscale x 5 x i32> %c.sext
+}
+
+define <vscale x 6 x i32> @sve_sextload_nxv6i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv6i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: cntd x8, all, mul #3
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sh { z0.s }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ptrue p1.d
+; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0]
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: str z1, [sp]
+; CHECK-NEXT: st1w { z0.d }, p1, [sp, #2, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 6 x i16>, ptr %a
+ %c.sext = sext <vscale x 6 x i16> %c to <vscale x 6 x i32>
+ ret <vscale x 6 x i32> %c.sext
+}
+
+define <vscale x 7 x i32> @sve_sextload_nxv7i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv7i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #7 // =0x7
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sh { z0.s }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.s }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: st1w { z1.s }, p0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 7 x i16>, ptr %a
+ %c.sext = sext <vscale x 7 x i16> %c to <vscale x 7 x i32>
+ ret <vscale x 7 x i32> %c.sext
+}
+
+define <vscale x 8 x i32> @sve_sextload_nxv8i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 8 x i16>, ptr %a
+ %c.sext = sext <vscale x 8 x i16> %c to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %c.sext
+}
+
+define <vscale x 1 x i64> @sve_sextload_nxv1i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv1i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 1 x i32>, ptr %a
+ %c.sext = sext <vscale x 1 x i32> %c to <vscale x 1 x i64>
+ ret <vscale x 1 x i64> %c.sext
+}
+
+define <vscale x 2 x i64> @sve_sextload_nxv2i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 2 x i32>, ptr %a
+ %c.sext = sext <vscale x 2 x i32> %c to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %c.sext
+}
+
+define <vscale x 3 x i64> @sve_sextload_nxv3i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv3i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sw { z0.d }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x0]
+; CHECK-NEXT: st1d { z0.d }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: st1d { z1.d }, p0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 3 x i32>, ptr %a
+ %c.sext = sext <vscale x 3 x i32> %c to <vscale x 3 x i64>
+ ret <vscale x 3 x i64> %c.sext
+}
+
+define <vscale x 4 x i64> @sve_sextload_nxv4i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_sextload_nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 4 x i32>, ptr %a
+ %c.sext = sext <vscale x 4 x i32> %c to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %c.sext
+}
+
+define <vscale x 1 x i16> @sve_zextload_nxv1i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv1i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 1 x i8>, ptr %a
+ %c.zext = sext <vscale x 1 x i8> %c to <vscale x 1 x i16>
+ ret <vscale x 1 x i16> %c.zext
+}
+
+define <vscale x 2 x i16> @sve_zextload_nxv2i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 2 x i8>, ptr %a
+ %c.zext = sext <vscale x 2 x i8> %c to <vscale x 2 x i16>
+ ret <vscale x 2 x i16> %c.zext
+}
+
+define <vscale x 3 x i16> @sve_zextload_nxv3i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv3i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 3 x i8>, ptr %a
+ %c.zext = sext <vscale x 3 x i8> %c to <vscale x 3 x i16>
+ ret <vscale x 3 x i16> %c.zext
+}
+
+define <vscale x 4 x i16> @sve_zextload_nxv4i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv4i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 4 x i8>, ptr %a
+ %c.zext = sext <vscale x 4 x i8> %c to <vscale x 4 x i16>
+ ret <vscale x 4 x i16> %c.zext
+}
+
+define <vscale x 5 x i16> @sve_zextload_nxv5i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv5i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #5 // =0x5
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 5 x i8>, ptr %a
+ %c.zext = sext <vscale x 5 x i8> %c to <vscale x 5 x i16>
+ ret <vscale x 5 x i16> %c.zext
+}
+
+define <vscale x 6 x i16> @sve_zextload_nxv6i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv6i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x8, all, mul #3
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 6 x i8>, ptr %a
+ %c.zext = sext <vscale x 6 x i8> %c to <vscale x 6 x i16>
+ ret <vscale x 6 x i16> %c.zext
+}
+
+define <vscale x 7 x i16> @sve_zextload_nxv7i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv7i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #7 // =0x7
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 7 x i8>, ptr %a
+ %c.zext = sext <vscale x 7 x i8> %c to <vscale x 7 x i16>
+ ret <vscale x 7 x i16> %c.zext
+}
+
+define <vscale x 8 x i16> @sve_zextload_nxv8i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 8 x i8>, ptr %a
+ %c.zext = sext <vscale x 8 x i8> %c to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %c.zext
+}
+
+define <vscale x 9 x i16> @sve_zextload_nxv9i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv9i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #9 // =0x9
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 9 x i8>, ptr %a
+ %c.zext = sext <vscale x 9 x i8> %c to <vscale x 9 x i16>
+ ret <vscale x 9 x i16> %c.zext
+}
+
+define <vscale x 10 x i16> @sve_zextload_nxv10i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv10i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: cntd x8, all, mul #5
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: str z1, [sp]
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: st1h { z0.d }, p0, [sp, #4, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 10 x i8>, ptr %a
+ %c.zext = sext <vscale x 10 x i8> %c to <vscale x 10 x i16>
+ ret <vscale x 10 x i16> %c.zext
+}
+
+define <vscale x 11 x i16> @sve_zextload_nxv11i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv11i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #11 // =0xb
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 11 x i8>, ptr %a
+ %c.zext = sext <vscale x 11 x i8> %c to <vscale x 11 x i16>
+ ret <vscale x 11 x i16> %c.zext
+}
+
+define <vscale x 12 x i16> @sve_zextload_nxv12i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv12i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: cntw x8, all, mul #3
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: str z1, [sp]
+; CHECK-NEXT: st1h { z0.s }, p1, [sp, #2, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 12 x i8>, ptr %a
+ %c.zext = sext <vscale x 12 x i8> %c to <vscale x 12 x i16>
+ ret <vscale x 12 x i16> %c.zext
+}
+
+define <vscale x 13 x i16> @sve_zextload_nxv13i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv13i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #13 // =0xd
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 13 x i8>, ptr %a
+ %c.zext = sext <vscale x 13 x i8> %c to <vscale x 13 x i16>
+ ret <vscale x 13 x i16> %c.zext
+}
+
+define <vscale x 14 x i16> @sve_zextload_nxv14i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv14i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: cntd x8, all, mul #7
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ld1sb { z2.h }, p0/z, [x0]
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpkhi z1.s, z0.h
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: str z2, [sp]
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: st1h { z0.s }, p1, [sp, #2, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: st1h { z1.d }, p0, [sp, #6, mul vl]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 14 x i8>, ptr %a
+ %c.zext = sext <vscale x 14 x i8> %c to <vscale x 14 x i16>
+ ret <vscale x 14 x i16> %c.zext
+}
+
+define <vscale x 15 x i16> @sve_zextload_nxv15i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv15i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #15 // =0xf
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 15 x i8>, ptr %a
+ %c.zext = sext <vscale x 15 x i8> %c to <vscale x 15 x i16>
+ ret <vscale x 15 x i16> %c.zext
+}
+
+define <vscale x 16 x i16> @sve_zextload_nxv16i8(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 16 x i8>, ptr %a
+ %c.zext = sext <vscale x 16 x i8> %c to <vscale x 16 x i16>
+ ret <vscale x 16 x i16> %c.zext
+}
+
+define <vscale x 1 x i32> @sve_zextload_nxv1i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv1i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 1 x i16>, ptr %a
+ %c.zext = sext <vscale x 1 x i16> %c to <vscale x 1 x i32>
+ ret <vscale x 1 x i32> %c.zext
+}
+
+define <vscale x 2 x i32> @sve_zextload_nxv2i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 2 x i16>, ptr %a
+ %c.zext = sext <vscale x 2 x i16> %c to <vscale x 2 x i32>
+ ret <vscale x 2 x i32> %c.zext
+}
+
+define <vscale x 3 x i32> @sve_zextload_nxv3i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv3i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 3 x i16>, ptr %a
+ %c.zext = sext <vscale x 3 x i16> %c to <vscale x 3 x i32>
+ ret <vscale x 3 x i32> %c.zext
+}
+
+define <vscale x 4 x i32> @sve_zextload_nxv4i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 4 x i16>, ptr %a
+ %c.zext = sext <vscale x 4 x i16> %c to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %c.zext
+}
+
+define <vscale x 5 x i32> @sve_zextload_nxv5i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv5i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #5 // =0x5
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sh { z0.s }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.s }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: st1w { z1.s }, p0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 5 x i16>, ptr %a
+ %c.zext = sext <vscale x 5 x i16> %c to <vscale x 5 x i32>
+ ret <vscale x 5 x i32> %c.zext
+}
+
+define <vscale x 6 x i32> @sve_zextload_nxv6i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv6i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: cntd x8, all, mul #3
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sh { z0.s }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ptrue p1.d
+; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0]
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: str z1, [sp]
+; CHECK-NEXT: st1w { z0.d }, p1, [sp, #2, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 6 x i16>, ptr %a
+ %c.zext = sext <vscale x 6 x i16> %c to <vscale x 6 x i32>
+ ret <vscale x 6 x i32> %c.zext
+}
+
+define <vscale x 7 x i32> @sve_zextload_nxv7i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv7i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #7 // =0x7
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sh { z0.s }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.s }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: st1w { z1.s }, p0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 7 x i16>, ptr %a
+ %c.zext = sext <vscale x 7 x i16> %c to <vscale x 7 x i32>
+ ret <vscale x 7 x i32> %c.zext
+}
+
+define <vscale x 8 x i32> @sve_zextload_nxv8i16(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 8 x i16>, ptr %a
+ %c.zext = sext <vscale x 8 x i16> %c to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %c.zext
+}
+
+define <vscale x 1 x i64> @sve_zextload_nxv1i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv1i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 1 x i32>, ptr %a
+ %c.zext = sext <vscale x 1 x i32> %c to <vscale x 1 x i64>
+ ret <vscale x 1 x i64> %c.zext
+}
+
+define <vscale x 2 x i64> @sve_zextload_nxv2i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %c = load <vscale x 2 x i32>, ptr %a
+ %c.zext = sext <vscale x 2 x i32> %c to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %c.zext
+}
+
+define <vscale x 3 x i64> @sve_zextload_nxv3i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv3i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sw { z0.d }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x0]
+; CHECK-NEXT: st1d { z0.d }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: st1d { z1.d }, p0, [sp]
+; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %c = load <vscale x 3 x i32>, ptr %a
+ %c.zext = sext <vscale x 3 x i32> %c to <vscale x 3 x i64>
+ ret <vscale x 3 x i64> %c.zext
+}
+
+define <vscale x 4 x i64> @sve_zextload_nxv4i32(ptr %a, ptr %b) {
+; CHECK-LABEL: sve_zextload_nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+ %c = load <vscale x 4 x i32>, ptr %a
+ %c.zext = sext <vscale x 4 x i32> %c to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %c.zext
+}
diff --git a/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir b/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
index ca77482..fa52b96 100644
--- a/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
+++ b/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
@@ -1,19 +1,9 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn -run-pass register-coalescer -o - %s | FileCheck %s
-# Check that coalescer does not create wider register tuple than in source
-
-# CHECK: - { id: 2, class: vreg_64, preferred-register: '', flags: [ ] }
-# CHECK: - { id: 3, class: vreg_64, preferred-register: '', flags: [ ] }
-# CHECK: - { id: 4, class: vreg_64, preferred-register: '', flags: [ ] }
-# CHECK: - { id: 5, class: vreg_96, preferred-register: '', flags: [ ] }
-# CHECK: - { id: 6, class: vreg_96, preferred-register: '', flags: [ ] }
-# CHECK: - { id: 7, class: vreg_128, preferred-register: '', flags: [ ] }
-# CHECK: - { id: 8, class: vreg_128, preferred-register: '', flags: [ ] }
+# Check that coalescer does not create wider register tuple than in
+# source.
# No more registers shall be defined
-# CHECK-NEXT: liveins:
-# CHECK: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %4,
-# CHECK: FLAT_STORE_DWORDX3 $vgpr0_vgpr1, %6,
-
---
name: main
alignment: 1
@@ -52,6 +42,23 @@ body: |
bb.0.entry:
liveins: $sgpr0, $vgpr0_vgpr1
+ ; CHECK-LABEL: name: main
+ ; CHECK: liveins: $sgpr0, $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY [[DEF]].sub0
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:vreg_64 = COPY [[COPY]].sub1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:vreg_64 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_96 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0_sub1:vreg_96 = COPY [[DEF1]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2:vreg_96 = COPY [[DEF]].sub0
+ ; CHECK-NEXT: FLAT_STORE_DWORDX3 $vgpr0_vgpr1, [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0_sub1_sub2:vreg_128 = COPY [[DEF2]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub3:vreg_128 = COPY [[DEF]].sub0
+ ; CHECK-NEXT: FLAT_STORE_DWORDX4 $vgpr0_vgpr1, [[COPY3]], 0, 0, implicit $exec, implicit $flat_scr
%3 = IMPLICIT_DEF
undef %4.sub0 = COPY $sgpr0
%4.sub1 = COPY %3.sub0
diff --git a/llvm/test/CodeGen/Hexagon/unaligned-vec-store.ll b/llvm/test/CodeGen/Hexagon/unaligned-vec-store.ll
new file mode 100644
index 0000000..267e365
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/unaligned-vec-store.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128B < %s | FileCheck %s
+; REQUIRES: asserts
+
+; Check that the test does not assert when unaligned vector store V6_vS32Ub_npred_ai is generated.
+; CHECK: if (!p{{[0-3]}}) vmemu
+
+target triple = "hexagon-unknown-unknown-elf"
+
+define fastcc void @test(i1 %cmp.i.i) {
+entry:
+ %call.i.i.i172 = load ptr, ptr null, align 4
+ %add.ptr = getelementptr i8, ptr %call.i.i.i172, i32 1
+ store <32 x i32> zeroinitializer, ptr %add.ptr, align 128
+ %add.ptr4.i4 = getelementptr i8, ptr %call.i.i.i172, i32 129
+ br i1 %cmp.i.i, label %common.ret, label %if.end.i.i
+
+common.ret: ; preds = %if.end.i.i, %entry
+ ret void
+
+if.end.i.i: ; preds = %entry
+ store <32 x i32> zeroinitializer, ptr %add.ptr4.i4, align 1
+ br label %common.ret
+}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/ImplicitBinding.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/ImplicitBinding.ll
index cd52498..2964da9 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/ImplicitBinding.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/ImplicitBinding.ll
@@ -32,6 +32,7 @@
; CHECK-DAG: OpDecorate [[g]] Binding 0
; CHECK-DAG: OpDecorate [[h]] DescriptorSet 10
; CHECK-DAG: OpDecorate [[h]] Binding 3
+; CHECK-NOT: OpDecorate [[h]] Binding 4
; CHECK-DAG: OpDecorate [[i]] DescriptorSet 10
; CHECK-DAG: OpDecorate [[i]] Binding 2
@@ -44,30 +45,34 @@ entry:
%3 = tail call target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.SignedImage_i32_5_2_0_0_2_0t(i32 0, i32 2, i32 1, i32 0, ptr nonnull @.str.6)
%4 = tail call target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.SignedImage_i32_5_2_0_0_2_0t(i32 10, i32 1, i32 1, i32 0, ptr nonnull @.str.8)
%5 = tail call target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.SignedImage_i32_5_2_0_0_2_0t(i32 2, i32 10, i32 1, i32 0, ptr nonnull @.str.10)
- %6 = tail call target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.SignedImage_i32_5_2_0_0_2_0t(i32 3, i32 10, i32 1, i32 0, ptr nonnull @.str.12)
- %7 = tail call target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.SignedImage_i32_5_2_0_0_2_0t(i32 10, i32 2, i32 1, i32 0, ptr nonnull @.str.14)
- %8 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %1, i32 0)
- %9 = load i32, ptr addrspace(11) %8, align 4
- %10 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %2, i32 0)
- %11 = load i32, ptr addrspace(11) %10, align 4
- %add.i = add nsw i32 %11, %9
- %12 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %3, i32 0)
- %13 = load i32, ptr addrspace(11) %12, align 4
- %add4.i = add nsw i32 %add.i, %13
- %14 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %4, i32 0)
- %15 = load i32, ptr addrspace(11) %14, align 4
- %add6.i = add nsw i32 %add4.i, %15
- %16 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %5, i32 0)
- %17 = load i32, ptr addrspace(11) %16, align 4
- %add8.i = add nsw i32 %add6.i, %17
- %18 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %6, i32 0)
- %19 = load i32, ptr addrspace(11) %18, align 4
- %add10.i = add nsw i32 %add8.i, %19
- %20 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %7, i32 0)
- %21 = load i32, ptr addrspace(11) %20, align 4
- %add12.i = add nsw i32 %add10.i, %21
- %22 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %0, i32 0)
- store i32 %add12.i, ptr addrspace(11) %22, align 4
+ %6 = tail call target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.SignedImage_i32_5_2_0_0_2_0t(i32 3, i32 10, i32 2, i32 0, ptr nonnull @.str.12)
+ %7 = tail call target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.SignedImage_i32_5_2_0_0_2_0t(i32 3, i32 10, i32 2, i32 1, ptr nonnull @.str.12)
+ %8 = tail call target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.SignedImage_i32_5_2_0_0_2_0t(i32 10, i32 2, i32 1, i32 0, ptr nonnull @.str.14)
+ %9 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %1, i32 0)
+ %10 = load i32, ptr addrspace(11) %9, align 4
+ %11 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %2, i32 0)
+ %12 = load i32, ptr addrspace(11) %11, align 4
+ %add.i = add nsw i32 %12, %10
+ %13 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %3, i32 0)
+ %14 = load i32, ptr addrspace(11) %13, align 4
+ %add4.i = add nsw i32 %add.i, %14
+ %15 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %4, i32 0)
+ %16 = load i32, ptr addrspace(11) %15, align 4
+ %add6.i = add nsw i32 %add4.i, %16
+ %17 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %5, i32 0)
+ %18 = load i32, ptr addrspace(11) %17, align 4
+ %add8.i = add nsw i32 %add6.i, %18
+ %19 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %6, i32 0)
+ %20 = load i32, ptr addrspace(11) %19, align 4
+ %add10.i = add nsw i32 %add8.i, %20
+ %21 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %7, i32 0)
+ %22 = load i32, ptr addrspace(11) %21, align 4
+ %add12.i = add nsw i32 %add10.i, %22
+ %23 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %8, i32 0)
+ %24 = load i32, ptr addrspace(11) %23, align 4
+ %add14.i = add nsw i32 %add12.i, %24
+ %25 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %0, i32 0)
+ store i32 %add14.i, ptr addrspace(11) %25, align 4
ret void
}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/UniqueImplicitBindingNumber.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/UniqueImplicitBindingNumber.ll
new file mode 100644
index 0000000..c968c99
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/UniqueImplicitBindingNumber.ll
@@ -0,0 +1,19 @@
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; CHECK-ERROR: LLVM ERROR: Implicit binding calls with the same order ID must have the same descriptor set
+
+@.str = private unnamed_addr constant [2 x i8] c"b\00", align 1
+@.str.2 = private unnamed_addr constant [2 x i8] c"c\00", align 1
+
+define void @main() local_unnamed_addr #0 {
+entry:
+ %0 = tail call target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.SignedImage_i32_5_2_0_0_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
+ %1 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %0, i32 0)
+ %2 = load i32, ptr addrspace(11) %1, align 4
+ %3 = tail call target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.SignedImage_i32_5_2_0_0_2_0t(i32 0, i32 1, i32 1, i32 0, ptr nonnull @.str.2)
+ %4 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %3, i32 0)
+ store i32 %2, ptr addrspace(11) %4, align 4
+ ret void
+}
+
+
+attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
index d3d6413..eb7c1b6 100644
--- a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
@@ -235,7 +235,7 @@ define half @f12_half(half %dummy, half %val, ptr %dest) {
; CHECK-NEXT: blah %f0
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
; CHECK-NEXT: jl .LBB11_2
; CHECK-NEXT:# %bb.1:
; CHECK-NEXT: lgdr %r0, %f8
@@ -344,7 +344,7 @@ define half @f15_half(half %val, half %dummy, ptr %dest) {
; CHECK-NEXT: blah %f2
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
; CHECK-NEXT: jl .LBB15_2
; CHECK-NEXT:# %bb.1:
; CHECK-NEXT: lgdr %r0, %f8
diff --git a/llvm/test/CodeGen/VE/Vector/vec_divrem.ll b/llvm/test/CodeGen/VE/Vector/vec_divrem.ll
index 3bc0aba..93e2889 100644
--- a/llvm/test/CodeGen/VE/Vector/vec_divrem.ll
+++ b/llvm/test/CodeGen/VE/Vector/vec_divrem.ll
@@ -7,19 +7,22 @@
define <4 x i8> @udiv_by_minus_one(<4 x i8> %x) {
; CHECK-LABEL: udiv_by_minus_one:
; CHECK: # %bb.0:
-; CHECK-NEXT: and %s0, %s0, (56)0
-; CHECK-NEXT: lea %s4, 16843010
-; CHECK-NEXT: muls.l %s0, %s0, %s4
-; CHECK-NEXT: srl %s0, %s0, 32
+; CHECK-NEXT: and %s4, %s0, (56)0
; CHECK-NEXT: and %s1, %s1, (56)0
-; CHECK-NEXT: muls.l %s1, %s1, %s4
-; CHECK-NEXT: srl %s1, %s1, 32
; CHECK-NEXT: and %s2, %s2, (56)0
-; CHECK-NEXT: muls.l %s2, %s2, %s4
-; CHECK-NEXT: srl %s2, %s2, 32
; CHECK-NEXT: and %s3, %s3, (56)0
-; CHECK-NEXT: muls.l %s3, %s3, %s4
-; CHECK-NEXT: srl %s3, %s3, 32
+; CHECK-NEXT: or %s0, 0, (0)1
+; CHECK-NEXT: cmpu.w %s5, %s3, (56)0
+; CHECK-NEXT: or %s3, 0, (0)1
+; CHECK-NEXT: cmov.w.eq %s3, (63)0, %s5
+; CHECK-NEXT: cmpu.w %s5, %s2, (56)0
+; CHECK-NEXT: or %s2, 0, (0)1
+; CHECK-NEXT: cmov.w.eq %s2, (63)0, %s5
+; CHECK-NEXT: cmpu.w %s5, %s1, (56)0
+; CHECK-NEXT: or %s1, 0, (0)1
+; CHECK-NEXT: cmov.w.eq %s1, (63)0, %s5
+; CHECK-NEXT: cmpu.w %s4, %s4, (56)0
+; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s4
; CHECK-NEXT: b.l.t (, %s10)
%r = udiv <4 x i8> %x, <i8 255, i8 255, i8 255, i8 255>
ret <4 x i8> %r
@@ -28,27 +31,18 @@ define <4 x i8> @udiv_by_minus_one(<4 x i8> %x) {
define <4 x i8> @urem_by_minus_one(<4 x i8> %x) {
; CHECK-LABEL: urem_by_minus_one:
; CHECK: # %bb.0:
-; CHECK-NEXT: and %s0, %s0, (56)0
-; CHECK-NEXT: and %s1, %s1, (56)0
-; CHECK-NEXT: and %s2, %s2, (56)0
-; CHECK-NEXT: and %s3, %s3, (56)0
-; CHECK-NEXT: lea %s4, 16843010
-; CHECK-NEXT: muls.l %s5, %s3, %s4
-; CHECK-NEXT: srl %s5, %s5, 32
-; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0
-; CHECK-NEXT: subs.w.sx %s3, %s3, %s5
-; CHECK-NEXT: muls.l %s5, %s2, %s4
-; CHECK-NEXT: srl %s5, %s5, 32
-; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0
-; CHECK-NEXT: subs.w.sx %s2, %s2, %s5
-; CHECK-NEXT: muls.l %s5, %s1, %s4
-; CHECK-NEXT: srl %s5, %s5, 32
-; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0
-; CHECK-NEXT: subs.w.sx %s1, %s1, %s5
-; CHECK-NEXT: muls.l %s4, %s0, %s4
-; CHECK-NEXT: srl %s4, %s4, 32
-; CHECK-NEXT: muls.w.sx %s4, %s4, (56)0
-; CHECK-NEXT: subs.w.sx %s0, %s0, %s4
+; CHECK-NEXT: and %s4, %s0, (56)0
+; CHECK-NEXT: and %s5, %s1, (56)0
+; CHECK-NEXT: and %s6, %s2, (56)0
+; CHECK-NEXT: and %s7, %s3, (56)0
+; CHECK-NEXT: cmpu.w %s7, %s7, (56)0
+; CHECK-NEXT: cmov.w.eq %s3, (0)1, %s7
+; CHECK-NEXT: cmpu.w %s6, %s6, (56)0
+; CHECK-NEXT: cmov.w.eq %s2, (0)1, %s6
+; CHECK-NEXT: cmpu.w %s5, %s5, (56)0
+; CHECK-NEXT: cmov.w.eq %s1, (0)1, %s5
+; CHECK-NEXT: cmpu.w %s4, %s4, (56)0
+; CHECK-NEXT: cmov.w.eq %s0, (0)1, %s4
; CHECK-NEXT: b.l.t (, %s10)
%r = urem <4 x i8> %x, <i8 255, i8 255, i8 255, i8 255>
ret <4 x i8> %r
diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll
index ec1b8a3..f998128 100644
--- a/llvm/test/CodeGen/X86/fshl.ll
+++ b/llvm/test/CodeGen/X86/fshl.ll
@@ -335,84 +335,83 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: pushl %esi
; X86-SLOW-NEXT: andl $-16, %esp
; X86-SLOW-NEXT: subl $32, %esp
-; X86-SLOW-NEXT: movl 24(%ebp), %esi
+; X86-SLOW-NEXT: movl 24(%ebp), %edi
; X86-SLOW-NEXT: movl 28(%ebp), %eax
; X86-SLOW-NEXT: movl 48(%ebp), %edx
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: testb $64, %cl
-; X86-SLOW-NEXT: movl 52(%ebp), %edi
+; X86-SLOW-NEXT: movl 52(%ebp), %ebx
; X86-SLOW-NEXT: jne .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %esi, %edx
-; X86-SLOW-NEXT: movl 32(%ebp), %esi
-; X86-SLOW-NEXT: movl %edi, %ecx
-; X86-SLOW-NEXT: movl %eax, %edi
+; X86-SLOW-NEXT: movl %edi, %edx
+; X86-SLOW-NEXT: movl 32(%ebp), %edi
+; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %eax, %ebx
; X86-SLOW-NEXT: movl 36(%ebp), %eax
; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
; X86-SLOW-NEXT: movl 40(%ebp), %ecx
; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl 44(%ebp), %ecx
+; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: .LBB6_3:
-; X86-SLOW-NEXT: movl 56(%ebp), %ebx
-; X86-SLOW-NEXT: testb $32, %bl
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: testb $32, %cl
; X86-SLOW-NEXT: jne .LBB6_4
; X86-SLOW-NEXT: # %bb.5:
-; X86-SLOW-NEXT: movl %ecx, %ebx
; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, %edi
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: jmp .LBB6_6
; X86-SLOW-NEXT: .LBB6_4:
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ecx, %edx
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, %ebx
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-SLOW-NEXT: .LBB6_6:
-; X86-SLOW-NEXT: movl %edx, %esi
+; X86-SLOW-NEXT: movl %edi, %eax
+; X86-SLOW-NEXT: shll %cl, %eax
+; X86-SLOW-NEXT: shrl %esi
+; X86-SLOW-NEXT: movl %ecx, %edx
+; X86-SLOW-NEXT: notb %dl
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shrl %cl, %esi
+; X86-SLOW-NEXT: orl %eax, %esi
+; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ebx, %eax
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: movl %ebx, %edi
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shll %cl, %eax
; X86-SLOW-NEXT: shrl %edi
-; X86-SLOW-NEXT: movl %ecx, %ebx
-; X86-SLOW-NEXT: notb %bl
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: orl %esi, %edi
+; X86-SLOW-NEXT: orl %eax, %edi
; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-SLOW-NEXT: movl %esi, %eax
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: shrl %edx
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shrl %cl, %edx
-; X86-SLOW-NEXT: orl %eax, %edx
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-SLOW-NEXT: movl %ebx, %eax
+; X86-SLOW-NEXT: shrl %ebx
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shrl %cl, %ebx
+; X86-SLOW-NEXT: orl %eax, %ebx
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-SLOW-NEXT: shll %cl, %eax
; X86-SLOW-NEXT: shrl %esi
-; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shrl %cl, %esi
; X86-SLOW-NEXT: orl %eax, %esi
-; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: shrl %ebx
-; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X86-SLOW-NEXT: shrl %cl, %ebx
-; X86-SLOW-NEXT: orl %eax, %ebx
; X86-SLOW-NEXT: movl 8(%ebp), %eax
-; X86-SLOW-NEXT: movl %ebx, 12(%eax)
-; X86-SLOW-NEXT: movl %esi, 8(%eax)
-; X86-SLOW-NEXT: movl %edx, 4(%eax)
-; X86-SLOW-NEXT: movl %edi, (%eax)
+; X86-SLOW-NEXT: movl %esi, 12(%eax)
+; X86-SLOW-NEXT: movl %ebx, 8(%eax)
+; X86-SLOW-NEXT: movl %edi, 4(%eax)
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SLOW-NEXT: movl %ecx, (%eax)
; X86-SLOW-NEXT: leal -12(%ebp), %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll
index 544ab7f..c307833 100644
--- a/llvm/test/CodeGen/X86/fshr.ll
+++ b/llvm/test/CodeGen/X86/fshr.ll
@@ -322,79 +322,79 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: subl $16, %esp
; X86-SLOW-NEXT: movl 24(%ebp), %edx
; X86-SLOW-NEXT: movl 28(%ebp), %esi
-; X86-SLOW-NEXT: movl 48(%ebp), %ebx
+; X86-SLOW-NEXT: movl 48(%ebp), %edi
; X86-SLOW-NEXT: movl 56(%ebp), %eax
; X86-SLOW-NEXT: testb $64, %al
-; X86-SLOW-NEXT: movl 52(%ebp), %edi
+; X86-SLOW-NEXT: movl 52(%ebp), %eax
; X86-SLOW-NEXT: je .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
-; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl %edx, %ebx
+; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, %edi
; X86-SLOW-NEXT: movl 32(%ebp), %edx
-; X86-SLOW-NEXT: movl %edi, %eax
-; X86-SLOW-NEXT: movl %esi, %edi
+; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: movl %esi, %eax
; X86-SLOW-NEXT: movl 36(%ebp), %esi
; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
-; X86-SLOW-NEXT: movl 40(%ebp), %eax
-; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl 44(%ebp), %eax
+; X86-SLOW-NEXT: movl 40(%ebp), %ecx
+; X86-SLOW-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl 44(%ebp), %ecx
; X86-SLOW-NEXT: .LBB6_3:
-; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: testb $32, %cl
+; X86-SLOW-NEXT: movl 56(%ebp), %ebx
+; X86-SLOW-NEXT: testb $32, %bl
; X86-SLOW-NEXT: je .LBB6_4
; X86-SLOW-NEXT: # %bb.5:
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ecx, %ebx
; X86-SLOW-NEXT: jmp .LBB6_6
; X86-SLOW-NEXT: .LBB6_4:
; X86-SLOW-NEXT: movl %edx, %esi
+; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %eax, %ebx
-; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-SLOW-NEXT: movl %ecx, %edi
+; X86-SLOW-NEXT: movl (%esp), %ebx # 4-byte Reload
; X86-SLOW-NEXT: .LBB6_6:
-; X86-SLOW-NEXT: shrl %cl, %eax
-; X86-SLOW-NEXT: movl %eax, %edx
-; X86-SLOW-NEXT: movl %ecx, %eax
-; X86-SLOW-NEXT: notb %al
-; X86-SLOW-NEXT: movl %ebx, %edi
-; X86-SLOW-NEXT: addl %ebx, %ebx
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %ebx
-; X86-SLOW-NEXT: orl %edx, %ebx
-; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-SLOW-NEXT: leal (%ebx,%ebx), %edx
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %edx
-; X86-SLOW-NEXT: orl %edi, %edx
+; X86-SLOW-NEXT: shrl %cl, %ebx
+; X86-SLOW-NEXT: movl %ecx, %edx
+; X86-SLOW-NEXT: notb %dl
+; X86-SLOW-NEXT: movl %edi, %eax
+; X86-SLOW-NEXT: addl %edi, %edi
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: orl %ebx, %edi
+; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: shrl %cl, %ebx
-; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: shrl %cl, %eax
; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-SLOW-NEXT: leal (%edi,%edi), %ebx
-; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shll %cl, %ebx
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-SLOW-NEXT: orl %eax, %ebx
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-SLOW-NEXT: shrl %cl, %edi
+; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-SLOW-NEXT: leal (%eax,%eax), %edi
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shrl %cl, %eax
; X86-SLOW-NEXT: addl %esi, %esi
-; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: orl %edi, %esi
-; X86-SLOW-NEXT: movl 8(%ebp), %ecx
-; X86-SLOW-NEXT: movl %esi, 12(%ecx)
-; X86-SLOW-NEXT: movl %ebx, 8(%ecx)
-; X86-SLOW-NEXT: movl %edx, 4(%ecx)
-; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
-; X86-SLOW-NEXT: movl %eax, (%ecx)
-; X86-SLOW-NEXT: movl %ecx, %eax
+; X86-SLOW-NEXT: orl %eax, %esi
+; X86-SLOW-NEXT: movl 8(%ebp), %eax
+; X86-SLOW-NEXT: movl %esi, 12(%eax)
+; X86-SLOW-NEXT: movl %edi, 8(%eax)
+; X86-SLOW-NEXT: movl %ebx, 4(%eax)
+; X86-SLOW-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X86-SLOW-NEXT: movl %ecx, (%eax)
; X86-SLOW-NEXT: leal -12(%ebp), %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/sbb.ll b/llvm/test/CodeGen/X86/sbb.ll
index 78d609d..f5a3468 100644
--- a/llvm/test/CodeGen/X86/sbb.ll
+++ b/llvm/test/CodeGen/X86/sbb.ll
@@ -365,3 +365,32 @@ define i32 @uge_sext_add(i32 %0, i32 %1, i32 %2) {
%6 = add nsw i32 %5, %0
ret i32 %6
}
+
+define i32 @sub_sub_ugt(i32 %a, i32 %b) {
+; CHECK-LABEL: sub_sub_ugt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: cmpl %edi, %esi
+; CHECK-NEXT: sbbl %esi, %eax
+; CHECK-NEXT: retq
+ %cmp = icmp ugt i32 %a, %b
+ %conv = zext i1 %cmp to i32
+ %sub = sub i32 %a, %b
+ %res = sub i32 %sub, %conv
+ ret i32 %res
+}
+
+define i32 @sub_sub_ult(i32 %a, i32 %b) {
+; CHECK-LABEL: sub_sub_ult:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: cmpl %edi, %esi
+; CHECK-NEXT: sbbl %esi, %eax
+; CHECK-NEXT: retq
+ %cmp = icmp ult i32 %b, %a
+ %conv = zext i1 %cmp to i32
+ %sub = sub i32 %a, %b
+ %res = sub i32 %sub, %conv
+ ret i32 %res
+}
+
diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll
index 7462c77..049ee47 100644
--- a/llvm/test/CodeGen/X86/shift-i128.ll
+++ b/llvm/test/CodeGen/X86/shift-i128.ll
@@ -613,8 +613,7 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) nou
; i686-NEXT: shldl %cl, %esi, %ebx
; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; i686-NEXT: movl %edi, %esi
-; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; i686-NEXT: movl %eax, %ecx
+; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; i686-NEXT: shll %cl, %esi
; i686-NEXT: shldl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; i686-NEXT: negl %edx
diff --git a/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir b/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir
index 5d644c3..718fa6f 100644
--- a/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir
+++ b/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir
@@ -366,7 +366,8 @@ frameInfo:
maxCallFrameSize: 0
localFrameSize: 144
machineFunctionInfo:
- stackSizeSVE: 0
+ stackSizeZPR: 0
+ stackSizePPR: 0
stack:
- { id: 0, name: StackGuardSlot, offset: -40, size: 8, alignment: 8,
stack-id: default, local-offset: -8 }
diff --git a/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir b/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir
index 013d933..b7a9892 100644
--- a/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir
+++ b/llvm/test/DebugInfo/AArch64/compiler-gen-bbs-livedebugvalues.mir
@@ -69,7 +69,8 @@ frameInfo:
hasCalls: true
maxCallFrameSize: 0
machineFunctionInfo:
- stackSizeSVE: 0
+ stackSizeZPR: 0
+ stackSizePPR: 0
stack:
- { id: 0, type: spill-slot, offset: -20, size: 4, alignment: 4, stack-id: default }
- { id: 1, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default,
diff --git a/llvm/test/Transforms/GVN/condprop.ll b/llvm/test/Transforms/GVN/condprop.ll
index 15ffcbf..eb2a9f1 100644
--- a/llvm/test/Transforms/GVN/condprop.ll
+++ b/llvm/test/Transforms/GVN/condprop.ll
@@ -321,6 +321,66 @@ different:
ret i1 %cmp3
}
+define i1 @test6_phi1(i1 %c, i32 %x, i32 %y) {
+; CHECK-LABEL: @test6_phi1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT: br i1 [[CMP]], label [[BB2]], label [[BB3:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: [[PHI:%.*]] = phi i1 [ false, [[BB1]] ], [ true, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i1 [[PHI]]
+; CHECK: bb3:
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %cmp.not = icmp ne i32 %x, %y
+ br i1 %c, label %bb1, label %bb2
+
+bb1:
+ %cmp = icmp eq i32 %x, %y
+ br i1 %cmp, label %bb2, label %bb3
+
+bb2:
+ %phi = phi i1 [ %cmp.not, %bb1 ], [ true, %entry ]
+ ret i1 %phi
+
+bb3:
+ ret i1 false
+}
+
+define i1 @test6_phi2(i1 %c, i32 %x, i32 %y) {
+; CHECK-LABEL: @test6_phi2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT: br i1 [[CMP]], label [[BB2]], label [[BB3:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: [[PHI:%.*]] = phi i1 [ [[CMP_NOT]], [[BB1]] ], [ true, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i1 [[PHI]]
+; CHECK: bb3:
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ br i1 %c, label %bb1, label %bb2
+
+bb1:
+ %cmp.not = icmp ne i32 %x, %y
+ %cmp = icmp eq i32 %x, %y
+ br i1 %cmp, label %bb2, label %bb3
+
+bb2:
+ %phi = phi i1 [ %cmp.not, %bb1 ], [ true, %entry ]
+ ret i1 %phi
+
+bb3:
+ ret i1 false
+}
+
define i1 @test7(i32 %x, i32 %y) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll
index 0086f6e..b033f60 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll
@@ -20,22 +20,22 @@ define i32 @red_zext_mul_by_63(ptr %start, ptr %end) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], splat (i32 63)
-; CHECK-NEXT: [[TMP5]] = add <16 x i32> [[VEC_PHI]], [[TMP4]]
+; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP5]])
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[GEP_IV_NEXT:%.*]], %[[LOOP]] ]
@@ -48,7 +48,7 @@ define i32 @red_zext_mul_by_63(ptr %start, ptr %end) {
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RED_NEXT_LCSSA]]
;
entry:
@@ -86,17 +86,17 @@ define i32 @red_zext_mul_by_255(ptr %start, ptr %end) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], splat (i32 255)
-; CHECK-NEXT: [[TMP5]] = add <16 x i32> [[VEC_PHI]], [[TMP4]]
+; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP5]])
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
@@ -218,22 +218,22 @@ define i32 @red_sext_mul_by_63(ptr %start, ptr %end) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], splat (i32 63)
-; CHECK-NEXT: [[TMP5]] = add <16 x i32> [[VEC_PHI]], [[TMP4]]
+; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP5]])
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[GEP_IV_NEXT:%.*]], %[[LOOP]] ]
@@ -246,7 +246,7 @@ define i32 @red_sext_mul_by_63(ptr %start, ptr %end) {
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RED_NEXT_LCSSA]]
;
entry:
diff --git a/mlir/.clang-format b/mlir/.clang-format
index a74fda4..76cc928 100644
--- a/mlir/.clang-format
+++ b/mlir/.clang-format
@@ -1,2 +1,3 @@
BasedOnStyle: LLVM
AlwaysBreakTemplateDeclarations: Yes
+LineEnding: LF
diff --git a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
index b5f985f..847951a 100644
--- a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
+++ b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
@@ -116,7 +116,8 @@ mlirApiObjectToCapsule(nanobind::handle apiObject) {
/// Casts object <-> MlirAffineMap.
template <>
struct type_caster<MlirAffineMap> {
- NB_TYPE_CASTER(MlirAffineMap, const_name("MlirAffineMap"))
+ NB_TYPE_CASTER(MlirAffineMap,
+ const_name(MAKE_MLIR_PYTHON_QUALNAME("ir.AffineMap")))
bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
if (auto capsule = mlirApiObjectToCapsule(src)) {
value = mlirPythonCapsuleToAffineMap(capsule->ptr());
@@ -138,7 +139,8 @@ struct type_caster<MlirAffineMap> {
/// Casts object <-> MlirAttribute.
template <>
struct type_caster<MlirAttribute> {
- NB_TYPE_CASTER(MlirAttribute, const_name("MlirAttribute"))
+ NB_TYPE_CASTER(MlirAttribute,
+ const_name(MAKE_MLIR_PYTHON_QUALNAME("ir.Attribute")))
bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
if (auto capsule = mlirApiObjectToCapsule(src)) {
value = mlirPythonCapsuleToAttribute(capsule->ptr());
@@ -161,7 +163,7 @@ struct type_caster<MlirAttribute> {
/// Casts object -> MlirBlock.
template <>
struct type_caster<MlirBlock> {
- NB_TYPE_CASTER(MlirBlock, const_name("MlirBlock"))
+ NB_TYPE_CASTER(MlirBlock, const_name(MAKE_MLIR_PYTHON_QUALNAME("ir.Block")))
bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
if (auto capsule = mlirApiObjectToCapsule(src)) {
value = mlirPythonCapsuleToBlock(capsule->ptr());
@@ -174,7 +176,8 @@ struct type_caster<MlirBlock> {
/// Casts object -> MlirContext.
template <>
struct type_caster<MlirContext> {
- NB_TYPE_CASTER(MlirContext, const_name("MlirContext"))
+ NB_TYPE_CASTER(MlirContext,
+ const_name(MAKE_MLIR_PYTHON_QUALNAME("ir.Context")))
bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
if (src.is_none()) {
// Gets the current thread-bound context.
@@ -192,7 +195,8 @@ struct type_caster<MlirContext> {
/// Casts object <-> MlirDialectRegistry.
template <>
struct type_caster<MlirDialectRegistry> {
- NB_TYPE_CASTER(MlirDialectRegistry, const_name("MlirDialectRegistry"))
+ NB_TYPE_CASTER(MlirDialectRegistry,
+ const_name(MAKE_MLIR_PYTHON_QUALNAME("ir.DialectRegistry")))
bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
if (auto capsule = mlirApiObjectToCapsule(src)) {
value = mlirPythonCapsuleToDialectRegistry(capsule->ptr());
@@ -214,7 +218,8 @@ struct type_caster<MlirDialectRegistry> {
/// Casts object <-> MlirLocation.
template <>
struct type_caster<MlirLocation> {
- NB_TYPE_CASTER(MlirLocation, const_name("MlirLocation"))
+ NB_TYPE_CASTER(MlirLocation,
+ const_name(MAKE_MLIR_PYTHON_QUALNAME("ir.Location")))
bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
if (src.is_none()) {
// Gets the current thread-bound context.
@@ -240,7 +245,7 @@ struct type_caster<MlirLocation> {
/// Casts object <-> MlirModule.
template <>
struct type_caster<MlirModule> {
- NB_TYPE_CASTER(MlirModule, const_name("MlirModule"))
+ NB_TYPE_CASTER(MlirModule, const_name(MAKE_MLIR_PYTHON_QUALNAME("ir.Module")))
bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
if (auto capsule = mlirApiObjectToCapsule(src)) {
value = mlirPythonCapsuleToModule(capsule->ptr());
@@ -262,8 +267,9 @@ struct type_caster<MlirModule> {
/// Casts object <-> MlirFrozenRewritePatternSet.
template <>
struct type_caster<MlirFrozenRewritePatternSet> {
- NB_TYPE_CASTER(MlirFrozenRewritePatternSet,
- const_name("MlirFrozenRewritePatternSet"))
+ NB_TYPE_CASTER(
+ MlirFrozenRewritePatternSet,
+ const_name(MAKE_MLIR_PYTHON_QUALNAME("rewrite.FrozenRewritePatternSet")))
bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
if (auto capsule = mlirApiObjectToCapsule(src)) {
value = mlirPythonCapsuleToFrozenRewritePatternSet(capsule->ptr());
@@ -285,7 +291,8 @@ struct type_caster<MlirFrozenRewritePatternSet> {
/// Casts object <-> MlirOperation.
template <>
struct type_caster<MlirOperation> {
- NB_TYPE_CASTER(MlirOperation, const_name("MlirOperation"))
+ NB_TYPE_CASTER(MlirOperation,
+ const_name(MAKE_MLIR_PYTHON_QUALNAME("ir.Operation")))
bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
if (auto capsule = mlirApiObjectToCapsule(src)) {
value = mlirPythonCapsuleToOperation(capsule->ptr());
@@ -309,7 +316,7 @@ struct type_caster<MlirOperation> {
/// Casts object <-> MlirValue.
template <>
struct type_caster<MlirValue> {
- NB_TYPE_CASTER(MlirValue, const_name("MlirValue"))
+ NB_TYPE_CASTER(MlirValue, const_name(MAKE_MLIR_PYTHON_QUALNAME("ir.Value")))
bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
if (auto capsule = mlirApiObjectToCapsule(src)) {
value = mlirPythonCapsuleToValue(capsule->ptr());
@@ -334,7 +341,8 @@ struct type_caster<MlirValue> {
/// Casts object -> MlirPassManager.
template <>
struct type_caster<MlirPassManager> {
- NB_TYPE_CASTER(MlirPassManager, const_name("MlirPassManager"))
+ NB_TYPE_CASTER(MlirPassManager, const_name(MAKE_MLIR_PYTHON_QUALNAME(
+ "passmanager.PassManager")))
bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
if (auto capsule = mlirApiObjectToCapsule(src)) {
value = mlirPythonCapsuleToPassManager(capsule->ptr());
@@ -347,7 +355,7 @@ struct type_caster<MlirPassManager> {
/// Casts object <-> MlirTypeID.
template <>
struct type_caster<MlirTypeID> {
- NB_TYPE_CASTER(MlirTypeID, const_name("MlirTypeID"))
+ NB_TYPE_CASTER(MlirTypeID, const_name(MAKE_MLIR_PYTHON_QUALNAME("ir.TypeID")))
bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
if (auto capsule = mlirApiObjectToCapsule(src)) {
value = mlirPythonCapsuleToTypeID(capsule->ptr());
@@ -371,7 +379,7 @@ struct type_caster<MlirTypeID> {
/// Casts object <-> MlirType.
template <>
struct type_caster<MlirType> {
- NB_TYPE_CASTER(MlirType, const_name("MlirType"))
+ NB_TYPE_CASTER(MlirType, const_name(MAKE_MLIR_PYTHON_QUALNAME("ir.Type")))
bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
if (auto capsule = mlirApiObjectToCapsule(src)) {
value = mlirPythonCapsuleToType(capsule->ptr());
@@ -394,7 +402,7 @@ struct type_caster<MlirType> {
/// Casts MlirStringRef -> object.
template <>
struct type_caster<MlirStringRef> {
- NB_TYPE_CASTER(MlirStringRef, const_name("MlirStringRef"))
+ NB_TYPE_CASTER(MlirStringRef, const_name("str"))
static handle from_cpp(MlirStringRef s, rv_policy,
cleanup_list *cleanup) noexcept {
return nanobind::str(s.data, s.length).release();
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 8b687a7..29001e2 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -985,7 +985,6 @@ class ScaleArgInfo<TypeConstraint argTyVal, string typeName> {
//===---------------------------------------------------------------------===//
// Scaled {fp4,bf8,fp8} to {bf16,f16,f32} conversion intrinsics
//===---------------------------------------------------------------------===//
-
foreach smallT = [
ScaleArgInfo<I32, "Fp4">,
ScaleArgInfo<ROCDL_V2I32Type, "Fp8">,
@@ -996,6 +995,8 @@ foreach smallT = [
ScaleArgInfo<ROCDL_V8BF16Type, "Bf16">,
ScaleArgInfo<ROCDL_V8F32Type, "F32">,
] in {
+
+ // Up-scaling
def ROCDL_CvtPkScalePk8 # largeT.nameForOp # smallT.nameForOp # Op :
ROCDL_ConcreteNonMemIntrOp<"cvt.scale.pk8." # largeT.name # "." # smallT.name,
[Pure], 1, [2], ["scaleSel"]>,
@@ -1010,13 +1011,30 @@ foreach smallT = [
attr-dict $src `,` $scale `[` $scaleSel `]` `:` type($res)
}];
}
+
+ // Down-scaling
+ def ROCDL_CvtScaleF32Pk8 # smallT.nameForOp # largeT.nameForOp # Op :
+ ROCDL_ConcreteNonMemIntrOp<"cvt.scalef32.pk8." # smallT.name # "." # largeT.name,
+ [Pure], 1>,
+ Arguments<(ins largeT.type:$src, F32:$scale)> {
+ let results = (outs smallT.type:$res);
+ let summary = "Scale and convert packed "
+ # largeT.name # " to packed " # smallT.name ;
+ let description = [{
+ Convert 8 packed }] # largeT.name # [{ values to packed }]
+ # smallT.name # [{, multiplying by the exponent part of `scale`
+ before doing so. This op is for gfx1250+ arch.
+ }];
+ let assemblyFormat = [{
+ attr-dict $src `,` $scale `:` type($res)
+ }];
+ }
} // foreach largeT
} // foreach smallTOp
//===---------------------------------------------------------------------===//
// Scaled {bf6,fp6} to {bf16,f16,f32} conversion intrinsics
//===---------------------------------------------------------------------===//
-
foreach smallT = [
ScaleArgInfo<ROCDL_V3I32Type, "Fp6">,
ScaleArgInfo<ROCDL_V3I32Type, "Bf6">
diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp
index c20b211..32b2b0c 100644
--- a/mlir/lib/Bindings/Python/IRCore.cpp
+++ b/mlir/lib/Bindings/Python/IRCore.cpp
@@ -3219,13 +3219,11 @@ void mlir::python::populateIRCore(nb::module_ &m) {
nb::arg("end_line"), nb::arg("end_col"),
nb::arg("context") = nb::none(), kContextGetFileRangeDocstring)
.def("is_a_file", mlirLocationIsAFileLineColRange)
- .def_prop_ro(
- "filename",
- [](MlirLocation loc) {
- return mlirIdentifierStr(
- mlirLocationFileLineColRangeGetFilename(loc));
- },
- nb::sig("def filename(self) -> str"))
+ .def_prop_ro("filename",
+ [](MlirLocation loc) {
+ return mlirIdentifierStr(
+ mlirLocationFileLineColRangeGetFilename(loc));
+ })
.def_prop_ro("start_line", mlirLocationFileLineColRangeGetStartLine)
.def_prop_ro("start_col", mlirLocationFileLineColRangeGetStartColumn)
.def_prop_ro("end_line", mlirLocationFileLineColRangeGetEndLine)
@@ -3274,12 +3272,10 @@ void mlir::python::populateIRCore(nb::module_ &m) {
nb::arg("name"), nb::arg("childLoc") = nb::none(),
nb::arg("context") = nb::none(), kContextGetNameLocationDocString)
.def("is_a_name", mlirLocationIsAName)
- .def_prop_ro(
- "name_str",
- [](MlirLocation loc) {
- return mlirIdentifierStr(mlirLocationNameGetName(loc));
- },
- nb::sig("def name_str(self) -> str"))
+ .def_prop_ro("name_str",
+ [](MlirLocation loc) {
+ return mlirIdentifierStr(mlirLocationNameGetName(loc));
+ })
.def_prop_ro("child_loc",
[](PyLocation &self) {
return PyLocation(self.getContext(),
@@ -3453,15 +3449,13 @@ void mlir::python::populateIRCore(nb::module_ &m) {
return concreteOperation.getContext().getObject();
},
"Context that owns the Operation")
- .def_prop_ro(
- "name",
- [](PyOperationBase &self) {
- auto &concreteOperation = self.getOperation();
- concreteOperation.checkValid();
- MlirOperation operation = concreteOperation.get();
- return mlirIdentifierStr(mlirOperationGetName(operation));
- },
- nb::sig("def name(self) -> str"))
+ .def_prop_ro("name",
+ [](PyOperationBase &self) {
+ auto &concreteOperation = self.getOperation();
+ concreteOperation.checkValid();
+ MlirOperation operation = concreteOperation.get();
+ return mlirIdentifierStr(mlirOperationGetName(operation));
+ })
.def_prop_ro("operands",
[](PyOperationBase &self) {
return PyOpOperandList(self.getOperation().getRef());
@@ -3603,12 +3597,11 @@ void mlir::python::populateIRCore(nb::module_ &m) {
},
"Reports if the operation is attached to its parent block.")
.def("erase", [](PyOperationBase &self) { self.getOperation().erase(); })
- .def(
- "walk", &PyOperationBase::walk, nb::arg("callback"),
- nb::arg("walk_order") = MlirWalkPostOrder,
- // clang-format off
- nb::sig("def walk(self, callback: Callable[[Operation], WalkResult], walk_order: WalkOrder = " MAKE_MLIR_PYTHON_QUALNAME("ir.WalkOrder.POST_ORDER") ") -> None")
- // clang-format on
+ .def("walk", &PyOperationBase::walk, nb::arg("callback"),
+ nb::arg("walk_order") = MlirWalkPostOrder,
+ // clang-format off
+ nb::sig("def walk(self, callback: Callable[[Operation], WalkResult], walk_order: WalkOrder) -> None")
+ // clang-format on
);
nb::class_<PyOperation, PyOperationBase>(m, "Operation")
@@ -4124,7 +4117,6 @@ void mlir::python::populateIRCore(nb::module_ &m) {
[](PyNamedAttribute &self) {
return mlirIdentifierStr(self.namedAttr.name);
},
- nb::sig("def name(self) -> str"),
"The name of the NamedAttribute binding")
.def_prop_ro(
"attr",
@@ -4342,17 +4334,15 @@ void mlir::python::populateIRCore(nb::module_ &m) {
kValueReplaceAllUsesWithDocstring)
.def(
"replace_all_uses_except",
- [](MlirValue self, MlirValue with, PyOperation &exception) {
+ [](PyValue &self, PyValue &with, PyOperation &exception) {
MlirOperation exceptedUser = exception.get();
mlirValueReplaceAllUsesExcept(self, with, 1, &exceptedUser);
},
nb::arg("with_"), nb::arg("exceptions"),
- nb::sig("def replace_all_uses_except(self, with_: Value, exceptions: "
- "Operation) -> None"),
kValueReplaceAllUsesExceptDocstring)
.def(
"replace_all_uses_except",
- [](MlirValue self, MlirValue with, nb::list exceptions) {
+ [](PyValue &self, PyValue &with, const nb::list &exceptions) {
// Convert Python list to a SmallVector of MlirOperations
llvm::SmallVector<MlirOperation> exceptionOps;
for (nb::handle exception : exceptions) {
@@ -4364,8 +4354,6 @@ void mlir::python::populateIRCore(nb::module_ &m) {
exceptionOps.data());
},
nb::arg("with_"), nb::arg("exceptions"),
- nb::sig("def replace_all_uses_except(self, with_: Value, exceptions: "
- "Sequence[Operation]) -> None"),
kValueReplaceAllUsesExceptDocstring)
.def(
"replace_all_uses_except",
diff --git a/mlir/lib/Bindings/Python/IRModule.h b/mlir/lib/Bindings/Python/IRModule.h
index 598ae01..edbd73e 100644
--- a/mlir/lib/Bindings/Python/IRModule.h
+++ b/mlir/lib/Bindings/Python/IRModule.h
@@ -273,8 +273,7 @@ class DefaultingPyMlirContext
: public Defaulting<DefaultingPyMlirContext, PyMlirContext> {
public:
using Defaulting::Defaulting;
- static constexpr const char kTypeDescription[] =
- MAKE_MLIR_PYTHON_QUALNAME("ir.Context");
+ static constexpr const char kTypeDescription[] = "Context";
static PyMlirContext &resolve();
};
@@ -500,8 +499,7 @@ class DefaultingPyLocation
: public Defaulting<DefaultingPyLocation, PyLocation> {
public:
using Defaulting::Defaulting;
- static constexpr const char kTypeDescription[] =
- MAKE_MLIR_PYTHON_QUALNAME("ir.Location");
+ static constexpr const char kTypeDescription[] = "Location";
static PyLocation &resolve();
operator MlirLocation() const { return *get(); }
diff --git a/mlir/lib/Bindings/Python/IRTypes.cpp b/mlir/lib/Bindings/Python/IRTypes.cpp
index 3488d92..34c5b8d 100644
--- a/mlir/lib/Bindings/Python/IRTypes.cpp
+++ b/mlir/lib/Bindings/Python/IRTypes.cpp
@@ -1010,7 +1010,7 @@ public:
},
nb::arg("elements"), nb::arg("context") = nb::none(),
// clang-format off
- nb::sig("def get_tuple(elements: Sequence[Type], context: mlir.ir.Context | None = None) -> TupleType"),
+ nb::sig("def get_tuple(elements: Sequence[Type], context: Context | None = None) -> TupleType"),
// clang-format on
"Create a tuple type");
c.def(
@@ -1070,7 +1070,7 @@ public:
},
nb::arg("inputs"), nb::arg("results"), nb::arg("context") = nb::none(),
// clang-format off
- nb::sig("def get(inputs: Sequence[Type], results: Sequence[Type], context: mlir.ir.Context | None = None) -> FunctionType"),
+ nb::sig("def get(inputs: Sequence[Type], results: Sequence[Type], context: Context | None = None) -> FunctionType"),
// clang-format on
"Gets a FunctionType from a list of input and result types");
c.def_prop_ro(
diff --git a/mlir/lib/Bindings/Python/MainModule.cpp b/mlir/lib/Bindings/Python/MainModule.cpp
index 52656138..a14f09f 100644
--- a/mlir/lib/Bindings/Python/MainModule.cpp
+++ b/mlir/lib/Bindings/Python/MainModule.cpp
@@ -115,9 +115,6 @@ NB_MODULE(_mlir, m) {
});
},
"typeid"_a, nb::kw_only(), "replace"_a = false,
- // clang-format off
- nb::sig("def register_type_caster(typeid: " MAKE_MLIR_PYTHON_QUALNAME("ir.TypeID") ", *, replace: bool = False) -> object"),
- // clang-format on
"Register a type caster for casting MLIR types to custom user types.");
m.def(
MLIR_PYTHON_CAPI_VALUE_CASTER_REGISTER_ATTR,
@@ -130,9 +127,6 @@ NB_MODULE(_mlir, m) {
});
},
"typeid"_a, nb::kw_only(), "replace"_a = false,
- // clang-format off
- nb::sig("def register_value_caster(typeid: " MAKE_MLIR_PYTHON_QUALNAME("ir.TypeID") ", *, replace: bool = False) -> object"),
- // clang-format on
"Register a value caster for casting MLIR values to custom user values.");
// Define and populate IR submodule.
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index a173cf1..32ebe06 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -77,6 +77,232 @@ struct LLVMPointerPointerLikeModel
};
} // namespace
+/// Generate a name of a canonical loop nest of the format
+/// `<prefix>(_r<idx>_s<idx>)*`. Hereby, `_r<idx>` identifies the region
+/// argument index of an operation that has multiple regions, if the operation
+/// has multiple regions.
+/// `_s<idx>` identifies the position of an operation within a region, where
+/// only operations that may potentially contain loops ("container operations"
+/// i.e. have region arguments) are counted. Again, it is omitted if there is
+/// only one such operation in a region. If there are canonical loops nested
+/// inside each other, also may also use the format `_d<num>` where <num> is the
+/// nesting depth of the loop.
+///
+/// The generated name is a best-effort to make canonical loop unique within an
+/// SSA namespace. This also means that regions with IsolatedFromAbove property
+/// do not consider any parents or siblings.
+static std::string generateLoopNestingName(StringRef prefix,
+ CanonicalLoopOp op) {
+ struct Component {
+ /// If true, this component describes a region operand of an operation (the
+ /// operand's owner) If false, this component describes an operation located
+ /// in a parent region
+ bool isRegionArgOfOp;
+ bool skip = false;
+ bool isUnique = false;
+
+ size_t idx;
+ Operation *op;
+ Region *parentRegion;
+ size_t loopDepth;
+
+ Operation *&getOwnerOp() {
+ assert(isRegionArgOfOp && "Must describe a region operand");
+ return op;
+ }
+ size_t &getArgIdx() {
+ assert(isRegionArgOfOp && "Must describe a region operand");
+ return idx;
+ }
+
+ Operation *&getContainerOp() {
+ assert(!isRegionArgOfOp && "Must describe a operation of a region");
+ return op;
+ }
+ size_t &getOpPos() {
+ assert(!isRegionArgOfOp && "Must describe a operation of a region");
+ return idx;
+ }
+ bool isLoopOp() const {
+ assert(!isRegionArgOfOp && "Must describe a operation of a region");
+ return isa<CanonicalLoopOp>(op);
+ }
+ Region *&getParentRegion() {
+ assert(!isRegionArgOfOp && "Must describe a operation of a region");
+ return parentRegion;
+ }
+ size_t &getLoopDepth() {
+ assert(!isRegionArgOfOp && "Must describe a operation of a region");
+ return loopDepth;
+ }
+
+ void skipIf(bool v = true) { skip = skip || v; }
+ };
+
+ // List of ancestors, from inner to outer.
+ // Alternates between
+ // * region argument of an operation
+ // * operation within a region
+ SmallVector<Component> components;
+
+ // Gather a list of parent regions and operations, and the position within
+ // their parent
+ Operation *o = op.getOperation();
+ while (o) {
+ // Operation within a region
+ Region *r = o->getParentRegion();
+ if (!r)
+ break;
+
+ llvm::ReversePostOrderTraversal<Block *> traversal(&r->getBlocks().front());
+ size_t idx = 0;
+ bool found = false;
+ size_t sequentialIdx = -1;
+ bool isOnlyContainerOp = true;
+ for (Block *b : traversal) {
+ for (Operation &op : *b) {
+ if (&op == o && !found) {
+ sequentialIdx = idx;
+ found = true;
+ }
+ if (op.getNumRegions()) {
+ idx += 1;
+ if (idx > 1)
+ isOnlyContainerOp = false;
+ }
+ if (found && !isOnlyContainerOp)
+ break;
+ }
+ }
+
+ Component &containerOpInRegion = components.emplace_back();
+ containerOpInRegion.isRegionArgOfOp = false;
+ containerOpInRegion.isUnique = isOnlyContainerOp;
+ containerOpInRegion.getContainerOp() = o;
+ containerOpInRegion.getOpPos() = sequentialIdx;
+ containerOpInRegion.getParentRegion() = r;
+
+ Operation *parent = r->getParentOp();
+
+ // Region argument of an operation
+ Component &regionArgOfOperation = components.emplace_back();
+ regionArgOfOperation.isRegionArgOfOp = true;
+ regionArgOfOperation.isUnique = true;
+ regionArgOfOperation.getArgIdx() = 0;
+ regionArgOfOperation.getOwnerOp() = parent;
+
+ // The IsolatedFromAbove trait of the parent operation implies that each
+ // individual region argument has its own separate namespace, so no
+ // ambiguity.
+ if (!parent || parent->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
+ break;
+
+ // Component only needed if operation has multiple region operands. Region
+ // arguments may be optional, but we currently do not consider this.
+ if (parent->getRegions().size() > 1) {
+ auto getRegionIndex = [](Operation *o, Region *r) {
+ for (auto [idx, region] : llvm::enumerate(o->getRegions())) {
+ if (&region == r)
+ return idx;
+ }
+ llvm_unreachable("Region not child of its parent operation");
+ };
+ regionArgOfOperation.isUnique = false;
+ regionArgOfOperation.getArgIdx() = getRegionIndex(parent, r);
+ }
+
+ // next parent
+ o = parent;
+ }
+
+ // Determine whether a region-argument component is not needed
+ for (Component &c : components)
+ c.skipIf(c.isRegionArgOfOp && c.isUnique);
+
+ // Find runs of nested loops and determine each loop's depth in the loop nest
+ size_t numSurroundingLoops = 0;
+ for (Component &c : llvm::reverse(components)) {
+ if (c.skip)
+ continue;
+
+ // non-skipped multi-argument operands interrupt the loop nest
+ if (c.isRegionArgOfOp) {
+ numSurroundingLoops = 0;
+ continue;
+ }
+
+ // Multiple loops in a region means each of them is the outermost loop of a
+ // new loop nest
+ if (!c.isUnique)
+ numSurroundingLoops = 0;
+
+ c.getLoopDepth() = numSurroundingLoops;
+
+ // Next loop is surrounded by one more loop
+ if (isa<CanonicalLoopOp>(c.getContainerOp()))
+ numSurroundingLoops += 1;
+ }
+
+ // In loop nests, skip all but the innermost loop that contains the depth
+ // number
+ bool isLoopNest = false;
+ for (Component &c : components) {
+ if (c.skip || c.isRegionArgOfOp)
+ continue;
+
+ if (!isLoopNest && c.getLoopDepth() >= 1) {
+ // Innermost loop of a loop nest of at least two loops
+ isLoopNest = true;
+ } else if (isLoopNest) {
+ // Non-innermost loop of a loop nest
+ c.skipIf(c.isUnique);
+
+ // If there is no surrounding loop left, this must have been the outermost
+ // loop; leave loop-nest mode for the next iteration
+ if (c.getLoopDepth() == 0)
+ isLoopNest = false;
+ }
+ }
+
+ // Skip non-loop unambiguous regions (but they should interrupt loop nests, so
+ // we mark them as skipped only after computing loop nests)
+ for (Component &c : components)
+ c.skipIf(!c.isRegionArgOfOp && c.isUnique &&
+ !isa<CanonicalLoopOp>(c.getContainerOp()));
+
+ // Components can be skipped if they are already disambiguated by their parent
+ // (or does not have a parent)
+ bool newRegion = true;
+ for (Component &c : llvm::reverse(components)) {
+ c.skipIf(newRegion && c.isUnique);
+
+ // non-skipped components disambiguate unique children
+ if (!c.skip)
+ newRegion = true;
+
+ // ...except canonical loops that need a suffix for each nest
+ if (!c.isRegionArgOfOp && c.getContainerOp())
+ newRegion = false;
+ }
+
+ // Compile the nesting name string
+ SmallString<64> Name{prefix};
+ llvm::raw_svector_ostream NameOS(Name);
+ for (auto &c : llvm::reverse(components)) {
+ if (c.skip)
+ continue;
+
+ if (c.isRegionArgOfOp)
+ NameOS << "_r" << c.getArgIdx();
+ else if (c.getLoopDepth() >= 1)
+ NameOS << "_d" << c.getLoopDepth();
+ else
+ NameOS << "_s" << c.getOpPos();
+ }
+
+ return NameOS.str().str();
+}
+
void OpenMPDialect::initialize() {
addOperations<
#define GET_OP_LIST
@@ -3172,67 +3398,7 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
cliName =
TypeSwitch<Operation *, std::string>(gen->getOwner())
.Case([&](CanonicalLoopOp op) {
- // Find the canonical loop nesting: For each ancestor add a
- // "+_r<idx>" suffix (in reverse order)
- SmallVector<std::string> components;
- Operation *o = op.getOperation();
- while (o) {
- if (o->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
- break;
-
- Region *r = o->getParentRegion();
- if (!r)
- break;
-
- auto getSequentialIndex = [](Region *r, Operation *o) {
- llvm::ReversePostOrderTraversal<Block *> traversal(
- &r->getBlocks().front());
- size_t idx = 0;
- for (Block *b : traversal) {
- for (Operation &op : *b) {
- if (&op == o)
- return idx;
- // Only consider operations that are containers as
- // possible children
- if (!op.getRegions().empty())
- idx += 1;
- }
- }
- llvm_unreachable("Operation not part of the region");
- };
- size_t sequentialIdx = getSequentialIndex(r, o);
- components.push_back(("s" + Twine(sequentialIdx)).str());
-
- Operation *parent = r->getParentOp();
- if (!parent)
- break;
-
- // If the operation has more than one region, also count in
- // which of the regions
- if (parent->getRegions().size() > 1) {
- auto getRegionIndex = [](Operation *o, Region *r) {
- for (auto [idx, region] :
- llvm::enumerate(o->getRegions())) {
- if (&region == r)
- return idx;
- }
- llvm_unreachable("Region not child its parent operation");
- };
- size_t regionIdx = getRegionIndex(parent, r);
- components.push_back(("r" + Twine(regionIdx)).str());
- }
-
- // next parent
- o = parent;
- }
-
- SmallString<64> Name("canonloop");
- for (const std::string &s : reverse(components)) {
- Name += '_';
- Name += s;
- }
-
- return Name;
+ return generateLoopNestingName("canonloop", op);
})
.Case([&](UnrollHeuristicOp op) -> std::string {
llvm_unreachable("heuristic unrolling does not generate a loop");
@@ -3323,7 +3489,8 @@ void CanonicalLoopOp::getAsmBlockNames(OpAsmSetBlockNameFn setNameFn) {
void CanonicalLoopOp::getAsmBlockArgumentNames(Region &region,
OpAsmSetValueNameFn setNameFn) {
- setNameFn(region.getArgument(0), "iv");
+ std::string ivName = generateLoopNestingName("iv", *this);
+ setNameFn(region.getArgument(0), ivName);
}
void CanonicalLoopOp::print(OpAsmPrinter &p) {
diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp
index 132ed81..3385b2a 100644
--- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp
+++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp
@@ -616,11 +616,10 @@ DiagnosedSilenceableFailure transform::ApplyConversionPatternsOp::apply(
if (diag.succeeded()) {
// Tracking failure is the only failure.
return trackingFailure;
- } else {
- diag.attachNote() << "tracking listener also failed: "
- << trackingFailure.getMessage();
- (void)trackingFailure.silence();
}
+ diag.attachNote() << "tracking listener also failed: "
+ << trackingFailure.getMessage();
+ (void)trackingFailure.silence();
}
if (!diag.succeeded())
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index 0bad151..6134695 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -1068,6 +1068,38 @@ llvm.func @rocdl.cvt.scale.pk8(%i32: i32, %v2xi32: vector<2xi32>, %scale: i32) {
// -----
+// CHECK-LABEL: rocdl.cvt.scalef32.pk8
+llvm.func @rocdl.cvt.scalef32.pk8(%v8xf32: vector<8xf32>,
+ %v8xf16: vector<8xf16>,
+ %v8xbf16: vector<8xbf16>,
+ %scale: f32) {
+
+ // CHECK: rocdl.cvt.scalef32.pk8.fp8.f32
+ %0 = rocdl.cvt.scalef32.pk8.fp8.f32 %v8xf32, %scale : vector<2xi32>
+ // CHECK: rocdl.cvt.scalef32.pk8.bf8.f32
+ %1 = rocdl.cvt.scalef32.pk8.bf8.f32 %v8xf32, %scale : vector<2xi32>
+ // CHECK: rocdl.cvt.scalef32.pk8.fp4.f32
+ %2 = rocdl.cvt.scalef32.pk8.fp4.f32 %v8xf32, %scale : i32
+
+ // CHECK: rocdl.cvt.scalef32.pk8.fp8.f16
+ %3 = rocdl.cvt.scalef32.pk8.fp8.f16 %v8xf16, %scale : vector<2xi32>
+ // CHECK: rocdl.cvt.scalef32.pk8.bf8.f16
+ %4 = rocdl.cvt.scalef32.pk8.bf8.f16 %v8xf16, %scale : vector<2xi32>
+ // CHECK: rocdl.cvt.scalef32.pk8.fp4.f16
+ %5 = rocdl.cvt.scalef32.pk8.fp4.f16 %v8xf16, %scale : i32
+
+ // CHECK: rocdl.cvt.scalef32.pk8.fp8.bf16
+ %6 = rocdl.cvt.scalef32.pk8.fp8.bf16 %v8xbf16, %scale : vector<2xi32>
+ // CHECK: rocdl.cvt.scalef32.pk8.bf8.bf16
+ %7 = rocdl.cvt.scalef32.pk8.bf8.bf16 %v8xbf16, %scale : vector<2xi32>
+ // CHECK: rocdl.cvt.scalef32.pk8.fp4.bf16
+ %8 = rocdl.cvt.scalef32.pk8.fp4.bf16 %v8xbf16, %scale : i32
+
+ llvm.return
+}
+
+// -----
+
// CHECK-LABEL: rocdl.cvt.scale.pk16
llvm.func @rocdl.cvt.scale.pk16(%v3xi32: vector<3xi32>, %scale:i32) {
diff --git a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
index adadb8b..0e9385e 100644
--- a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
+++ b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s | FileCheck %s
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
// CHECK-LABEL: @omp_canonloop_raw(
@@ -24,10 +24,10 @@ func.func @omp_canonloop_raw(%tc : i32) -> () {
func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
// CHECK-NEXT: %canonloop_s0 = omp.new_cli
%canonloop_s0 = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
"omp.canonical_loop" (%tc, %canonloop_s0) ({
^bb_first(%iv_first: i32):
- // CHECK-NEXT: = llvm.add %iv, %iv : i32
+ // CHECK-NEXT: = llvm.add %iv_s0, %iv_s0 : i32
%newval = llvm.add %iv_first, %iv_first : i32
// CHECK-NEXT: omp.terminator
omp.terminator
@@ -36,7 +36,7 @@ func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
// CHECK-NEXT: %canonloop_s1 = omp.new_cli
%canonloop_s1 = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
"omp.canonical_loop" (%tc, %canonloop_s1) ({
^bb_second(%iv_second: i32):
// CHECK: omp.terminator
@@ -52,17 +52,17 @@ func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
// CHECK-LABEL: @omp_nested_canonloop_raw(
// CHECK-SAME: %[[tc_outer:.+]]: i32, %[[tc_inner:.+]]: i32)
func.func @omp_nested_canonloop_raw(%tc_outer : i32, %tc_inner : i32) -> () {
- // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop = omp.new_cli
%outer = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop_d1 = omp.new_cli
%inner = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc_outer]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc_outer]]) {
"omp.canonical_loop" (%tc_outer, %outer) ({
^bb_outer(%iv_outer: i32):
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc_inner]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc_inner]]) {
"omp.canonical_loop" (%tc_inner, %inner) ({
^bb_inner(%iv_inner: i32):
- // CHECK-NEXT: = llvm.add %iv, %iv_0 : i32
+ // CHECK-NEXT: = llvm.add %iv, %iv_d1 : i32
%newval = llvm.add %iv_outer, %iv_inner: i32
// CHECK-NEXT: omp.terminator
omp.terminator
@@ -108,16 +108,24 @@ func.func @omp_canonloop_constant_pretty() -> () {
func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () {
// CHECK-NEXT: %canonloop_s0 = omp.new_cli
%canonloop_s0 = omp.new_cli
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
- omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc) {
// CHECK-NEXT: omp.terminator
omp.terminator
}
// CHECK: %canonloop_s1 = omp.new_cli
%canonloop_s1 = omp.new_cli
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) {
- omp.canonical_loop(%canonloop_s1) %iv_0 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: %canonloop_s2 = omp.new_cli
+ %canonloop_s2 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%tc) {
// CHECK-NEXT: omp.terminator
omp.terminator
}
@@ -126,17 +134,17 @@ func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () {
}
-// CHECK-LABEL: @omp_canonloop_nested_pretty(
+// CHECK-LABEL: @omp_canonloop_2d_nested_pretty(
// CHECK-SAME: %[[tc:.+]]: i32)
-func.func @omp_canonloop_nested_pretty(%tc : i32) -> () {
- // CHECK-NEXT: %canonloop_s0 = omp.new_cli
- %canonloop_s0 = omp.new_cli
- // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
- %canonloop_s0_s0 = omp.new_cli
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
- omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) {
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) {
- omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%tc) {
+func.func @omp_canonloop_2d_nested_pretty(%tc : i32) -> () {
+ // CHECK-NEXT: %canonloop = omp.new_cli
+ %canonloop = omp.new_cli
+ // CHECK-NEXT: %canonloop_d1 = omp.new_cli
+ %canonloop_d1 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%tc) {
// CHECK: omp.terminator
omp.terminator
}
@@ -147,6 +155,77 @@ func.func @omp_canonloop_nested_pretty(%tc : i32) -> () {
}
+// CHECK-LABEL: @omp_canonloop_3d_nested_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_3d_nested_pretty(%tc : i32) -> () {
+ // CHECK: %canonloop = omp.new_cli
+ %canonloop = omp.new_cli
+ // CHECK: %canonloop_d1 = omp.new_cli
+ %canonloop_d1 = omp.new_cli
+ // CHECK: %canonloop_d2 = omp.new_cli
+ %canonloop_d2 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_d1) %iv_1d : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ }
+
+ return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_sequential_nested_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_sequential_nested_pretty(%tc : i32) -> () {
+ // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ %canonloop_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop_s0_d1 = omp.new_cli
+ %canonloop_s0_d1 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_d1) %iv_s0_d1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s0_d1) %iv_s0_d1 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+
+ // CHECK-NEXT: %canonloop_s1 = omp.new_cli
+ %canonloop_s1 = omp.new_cli
+ // CHECK-NEXT: %canonloop_s1_d1 = omp.new_cli
+ %canonloop_s1_d1 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s1_d1) %iv_s1_d1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s1_d1) %iv_s1d1 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ }
+
+ return
+}
+
+
// CHECK-LABEL: @omp_newcli_unused(
// CHECK-SAME: )
func.func @omp_newcli_unused() -> () {
@@ -155,3 +234,74 @@ func.func @omp_newcli_unused() -> () {
// CHECK-NEXT: return
return
}
+
+
+// CHECK-LABEL: @omp_canonloop_multiregion_isolatedfromabove(
+func.func @omp_canonloop_multiregion_isolatedfromabove() -> () {
+ omp.private {type = firstprivate} @x.privatizer : !llvm.ptr init {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %c42_i32 = arith.constant 42: i32
+ // CHECK: omp.canonical_loop %iv : i32 in range(%c42_i32) {
+ omp.canonical_loop %iv1 : i32 in range(%c42_i32) {
+ omp.terminator
+ }
+ // CHECK: omp.yield
+ omp.yield(%arg0 : !llvm.ptr)
+ } copy {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %c42_i32 = arith.constant 42: i32
+ // CHECK: omp.canonical_loop %iv : i32 in range(%c42_i32) {
+ omp.canonical_loop %iv : i32 in range(%c42_i32) {
+ // CHECK: omp.canonical_loop %iv_d1 : i32 in range(%c42_i32) {
+ omp.canonical_loop %iv_d1 : i32 in range(%c42_i32) {
+ omp.terminator
+ }
+ omp.terminator
+ }
+ // CHECK: omp.yield
+ omp.yield(%arg0 : !llvm.ptr)
+ } dealloc {
+ ^bb0(%arg0: !llvm.ptr):
+ %c42_i32 = arith.constant 42: i32
+ // CHECK: omp.canonical_loop %iv_s0 : i32 in range(%c42_i32) {
+ omp.canonical_loop %iv_s0 : i32 in range(%c42_i32) {
+ omp.terminator
+ }
+ // CHECK: omp.canonical_loop %iv_s1 : i32 in range(%c42_i32) {
+ omp.canonical_loop %iv_s1 : i32 in range(%c42_i32) {
+ omp.terminator
+ }
+ // CHECK: omp.yield
+ omp.yield
+ }
+
+ // CHECK: return
+ return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_multiregion(
+func.func @omp_canonloop_multiregion(%c : i1) -> () {
+ %c42_i32 = arith.constant 42: i32
+ %canonloop1 = omp.new_cli
+ %canonloop2 = omp.new_cli
+ %canonloop3 = omp.new_cli
+ scf.if %c {
+ // CHECK: omp.canonical_loop(%canonloop_r0) %iv_r0 : i32 in range(%c42_i32) {
+ omp.canonical_loop(%canonloop1) %iv1 : i32 in range(%c42_i32) {
+ omp.terminator
+ }
+ } else {
+ // CHECK: omp.canonical_loop(%canonloop_r1_s0) %iv_r1_s0 : i32 in range(%c42_i32) {
+ omp.canonical_loop(%canonloop2) %iv2 : i32 in range(%c42_i32) {
+ omp.terminator
+ }
+ // CHECK: omp.canonical_loop(%canonloop_r1_s1) %iv_r1_s1 : i32 in range(%c42_i32) {
+ omp.canonical_loop(%canonloop3) %iv3 : i32 in range(%c42_i32) {
+ omp.terminator
+ }
+ }
+
+ // CHECK: return
+ return
+}
diff --git a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
index cda7d0b..16884f4 100644
--- a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
+++ b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
@@ -1,18 +1,18 @@
-// RUN: mlir-opt %s | FileCheck %s
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
// CHECK-LABEL: @omp_unroll_heuristic_raw(
// CHECK-SAME: %[[tc:.+]]: i32) {
func.func @omp_unroll_heuristic_raw(%tc : i32) -> () {
- // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop = omp.new_cli
%canonloop = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
"omp.canonical_loop" (%tc, %canonloop) ({
^bb0(%iv: i32):
omp.terminator
}) : (i32, !omp.cli) -> ()
- // CHECK: omp.unroll_heuristic(%canonloop_s0)
+ // CHECK: omp.unroll_heuristic(%canonloop)
"omp.unroll_heuristic" (%canonloop) : (!omp.cli) -> ()
return
}
@@ -22,12 +22,12 @@ func.func @omp_unroll_heuristic_raw(%tc : i32) -> () {
// CHECK-SAME: %[[tc:.+]]: i32) {
func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () {
// CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
- %canonloop = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ %canonloop = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
omp.terminator
}
- // CHECK: omp.unroll_heuristic(%canonloop_s0)
+ // CHECK: omp.unroll_heuristic(%canonloop)
omp.unroll_heuristic(%canonloop)
return
}
@@ -36,13 +36,13 @@ func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () {
// CHECK-LABEL: @omp_unroll_heuristic_nested_pretty(
// CHECK-SAME: %[[tc:.+]]: i32) {
func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () {
- // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop = omp.new_cli
%cli_outer = omp.new_cli
- // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop_d1 = omp.new_cli
%cli_inner = omp.new_cli
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc) {
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc) {
// CHECK: omp.terminator
omp.terminator
@@ -51,9 +51,9 @@ func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () {
omp.terminator
}
- // CHECK: omp.unroll_heuristic(%canonloop_s0)
+ // CHECK: omp.unroll_heuristic(%canonloop)
omp.unroll_heuristic(%cli_outer)
- // CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0_s0)
+ // CHECK-NEXT: omp.unroll_heuristic(%canonloop_d1)
omp.unroll_heuristic(%cli_inner)
return
}
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index e043a8c..00ee6b7 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -1340,6 +1340,34 @@ llvm.func @rocdl.cvt.scale.pk8(%i32: i32, %v2xi32: vector<2xi32>, %scale: i32) {
llvm.return
}
+// CHECK-LABEL: rocdl.cvt.scalef32.pk8
+// CHECK-SAME:(<8 x float> %[[V8F32:.+]], <8 x half> %[[V8F16:.+]], <8 x bfloat> %[[V8BF16:.+]], float %[[SCALE:.+]])
+llvm.func @rocdl.cvt.scalef32.pk8(%v8xf32: vector<8xf32>, %v8xf16: vector<8xf16>, %v8xbf16: vector<8xbf16>, %scale: f32) {
+
+ // CHECK: call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f32(<8 x float> %[[V8F32]], float %[[SCALE]])
+ %0 = rocdl.cvt.scalef32.pk8.fp8.f32 %v8xf32, %scale : vector<2xi32>
+ // CHECK: call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f32(<8 x float> %[[V8F32]], float %[[SCALE]])
+ %1 = rocdl.cvt.scalef32.pk8.bf8.f32 %v8xf32, %scale : vector<2xi32>
+ // CHECK: call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f32(<8 x float> %[[V8F32]], float %[[SCALE]])
+ %2 = rocdl.cvt.scalef32.pk8.fp4.f32 %v8xf32, %scale : i32
+
+ // CHECK: call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f16(<8 x half> %[[V8F16]], float %[[SCALE]])
+ %3 = rocdl.cvt.scalef32.pk8.fp8.f16 %v8xf16, %scale : vector<2xi32>
+ // CHECK: call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f16(<8 x half> %[[V8F16]], float %[[SCALE]])
+ %4 = rocdl.cvt.scalef32.pk8.bf8.f16 %v8xf16, %scale : vector<2xi32>
+ // CHECK: call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f16(<8 x half> %[[V8F16]], float %[[SCALE]])
+ %5 = rocdl.cvt.scalef32.pk8.fp4.f16 %v8xf16, %scale : i32
+
+ // CHECK: call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.bf16(<8 x bfloat> %[[V8BF16]], float %[[SCALE]])
+ %6 = rocdl.cvt.scalef32.pk8.fp8.bf16 %v8xbf16, %scale : vector<2xi32>
+ // CHECK: call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.bf16(<8 x bfloat> %[[V8BF16]], float %[[SCALE]])
+ %7 = rocdl.cvt.scalef32.pk8.bf8.bf16 %v8xbf16, %scale : vector<2xi32>
+ // CHECK: call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.bf16(<8 x bfloat> %[[V8BF16]], float %[[SCALE]])
+ %8 = rocdl.cvt.scalef32.pk8.fp4.bf16 %v8xbf16, %scale : i32
+
+ llvm.return
+}
+
// CHECK-LABEL: @rocdl.cvt.scale.pk16
// CHECK-SAME:(<3 x i32> %[[SRC0:.+]], i32 %[[SCALE:.+]])
llvm.func @rocdl.cvt.scale.pk16(%v3xi32: vector<3xi32>, %scale:i32) {
diff --git a/orc-rt/include/orc-rt/Error.h b/orc-rt/include/orc-rt/Error.h
index fe0754b..48d9064 100644
--- a/orc-rt/include/orc-rt/Error.h
+++ b/orc-rt/include/orc-rt/Error.h
@@ -114,7 +114,7 @@ private:
void setChecked(bool Checked) { ErrPtr = (ErrPtr & ~uintptr_t(1)) | Checked; }
template <typename ErrT = ErrorInfoBase> std::unique_ptr<ErrT> takePayload() {
- static_assert(std::is_base_of<ErrorInfoBase, ErrT>::value,
+ static_assert(std::is_base_of_v<ErrorInfoBase, ErrT>,
"ErrT is not an ErrorInfoBase subclass");
std::unique_ptr<ErrT> Tmp(getPtr<ErrT>());
setPtr(nullptr);
@@ -288,11 +288,15 @@ private:
Error *Err;
};
+/// Tag to force construction of an Expected value in the success state. See
+/// Expected constructor for details.
+struct ForceExpectedSuccessValue {};
+
template <typename T> class ORC_RT_NODISCARD Expected {
template <class OtherT> friend class Expected;
- static constexpr bool IsRef = std::is_reference<T>::value;
+ static constexpr bool IsRef = std::is_reference_v<T>;
using wrap = std::reference_wrapper<std::remove_reference_t<T>>;
using error_type = std::unique_ptr<ErrorInfoBase>;
using storage_type = std::conditional_t<IsRef, wrap, T>;
@@ -310,10 +314,17 @@ public:
new (getErrorStorage()) error_type(Err.takePayload());
}
+ template <typename OtherT>
+ Expected(OtherT &&Val, ForceExpectedSuccessValue _,
+ std::enable_if_t<std::is_convertible_v<OtherT, T>> * = nullptr)
+ : HasError(false), Unchecked(true) {
+ new (getStorage()) storage_type(std::forward<OtherT>(Val));
+ }
+
/// Create an Expected from a T value.
template <typename OtherT>
Expected(OtherT &&Val,
- std::enable_if_t<std::is_convertible<OtherT, T>::value> * = nullptr)
+ std::enable_if_t<std::is_convertible_v<OtherT, T>> * = nullptr)
: HasError(false), Unchecked(true) {
new (getStorage()) storage_type(std::forward<OtherT>(Val));
}
@@ -324,9 +335,8 @@ public:
/// Move construct an Expected<T> value from an Expected<OtherT>, where OtherT
/// must be convertible to T.
template <class OtherT>
- Expected(
- Expected<OtherT> &&Other,
- std::enable_if_t<std::is_convertible<OtherT, T>::value> * = nullptr) {
+ Expected(Expected<OtherT> &&Other,
+ std::enable_if_t<std::is_convertible_v<OtherT, T>> * = nullptr) {
moveConstruct(std::move(Other));
}
@@ -335,7 +345,7 @@ public:
template <class OtherT>
explicit Expected(
Expected<OtherT> &&Other,
- std::enable_if_t<!std::is_convertible<OtherT, T>::value> * = nullptr) {
+ std::enable_if_t<!std::is_convertible_v<OtherT, T>> * = nullptr) {
moveConstruct(std::move(Other));
}
diff --git a/orc-rt/unittests/ErrorTest.cpp b/orc-rt/unittests/ErrorTest.cpp
index 3fd8279..6b1fc16 100644
--- a/orc-rt/unittests/ErrorTest.cpp
+++ b/orc-rt/unittests/ErrorTest.cpp
@@ -386,6 +386,29 @@ TEST(ErrorTest, ExpectedCovariance) {
(void)!!A2;
}
+// Test that Expected<Error> works as expected.
+TEST(ErrorTest, ExpectedError) {
+ {
+ // Test success-success case.
+ Expected<Error> E(Error::success(), ForceExpectedSuccessValue());
+ EXPECT_TRUE(!!E);
+ cantFail(E.takeError());
+ auto Err = std::move(*E);
+ EXPECT_FALSE(!!Err);
+ }
+
+ {
+ // Test "failure" success case.
+ Expected<Error> E(make_error<StringError>("foo"),
+ ForceExpectedSuccessValue());
+ EXPECT_TRUE(!!E);
+ cantFail(E.takeError());
+ auto Err = std::move(*E);
+ EXPECT_TRUE(!!Err);
+ EXPECT_EQ(toString(std::move(Err)), "foo");
+ }
+}
+
// Test that the ExitOnError utility works as expected.
TEST(ErrorTest, CantFailSuccess) {
cantFail(Error::success());