aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.ci/all_requirements.txt8
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.td16
-rw-r--r--clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h1
-rw-r--r--clang/include/clang/CIR/MissingFeatures.h5
-rw-r--r--clang/lib/AST/ByteCode/Interp.cpp43
-rw-r--r--clang/lib/AST/ByteCode/InterpBuiltin.cpp56
-rw-r--r--clang/lib/AST/ExprConstant.cpp91
-rw-r--r--clang/lib/Basic/Targets/AMDGPU.h7
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp60
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h109
-rw-r--r--clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp144
-rw-r--r--clang/lib/CodeGen/CGExpr.cpp100
-rw-r--r--clang/lib/CodeGen/CGExprAgg.cpp146
-rw-r--r--clang/lib/CodeGen/CGExprScalar.cpp51
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h6
-rw-r--r--clang/lib/Headers/avx512fp16intrin.h3
-rw-r--r--clang/lib/Sema/SemaHLSL.cpp6
-rw-r--r--clang/lib/Sema/SemaOpenACC.cpp21
-rw-r--r--clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp2
-rw-r--r--clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp4
-rw-r--r--clang/test/AST/ByteCode/cxx20.cpp85
-rw-r--r--clang/test/CIR/CodeGen/global-init.cpp23
-rw-r--r--clang/test/CIR/CodeGen/new.cpp14
-rw-r--r--clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp501
-rw-r--r--clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c232
-rw-r--r--clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp501
-rw-r--r--clang/test/CIR/CodeGenOpenACC/firstprivate-clause-recipes.cpp691
-rw-r--r--clang/test/ClangScanDeps/modules-context-hash-from-named-module.cpp121
-rw-r--r--clang/test/CodeGen/X86/avx512f-builtins.c310
-rw-r--r--clang/test/CodeGen/X86/avx512fp16-builtins.c1
-rw-r--r--clang/test/CodeGen/X86/avx512vl-builtins.c331
-rw-r--r--clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl52
-rw-r--r--clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl46
-rw-r--r--clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl221
-rw-r--r--clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl42
-rw-r--r--clang/test/CodeGenOpenCL/amdgpu-features.cl4
-rw-r--r--clang/test/Misc/amdgcn.languageOptsOpenCL.cl34
-rw-r--r--clang/test/SemaCXX/constant-expression-p2280r4.cpp2
-rw-r--r--clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl6
-rw-r--r--clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl21
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp8
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp49
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp6
-rw-r--r--flang/docs/FortranLLVMTestSuite.md2
-rw-r--r--libcxx/docs/FeatureTestMacroTable.rst2
-rw-r--r--libcxx/docs/ReleaseNotes/22.rst1
-rw-r--r--libcxx/docs/Status/Cxx2cPapers.csv2
-rw-r--r--libcxx/include/__ranges/iota_view.h9
-rw-r--r--libcxx/include/ranges5
-rw-r--r--libcxx/include/version2
-rw-r--r--libcxx/modules/std/ranges.inc3
-rw-r--r--libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp27
-rw-r--r--libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp27
-rw-r--r--libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp3
-rw-r--r--libcxx/test/std/ranges/range.factories/range.iota.view/indices.pass.cpp97
-rw-r--r--libcxx/utils/generate_feature_test_macro_components.py5
-rw-r--r--lldb/docs/resources/extensions.rst2
-rw-r--r--llvm/include/llvm/CAS/OnDiskDataAllocator.h95
-rw-r--r--llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h32
-rw-r--r--llvm/include/llvm/Transforms/IPO/FunctionAttrs.h13
-rw-r--r--llvm/lib/CAS/CMakeLists.txt1
-rw-r--r--llvm/lib/CAS/OnDiskDataAllocator.cpp234
-rw-r--r--llvm/lib/CAS/OnDiskTrieRawHashMap.cpp31
-rw-r--r--llvm/lib/IR/Globals.cpp1
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp1
-rw-r--r--llvm/lib/Passes/PassRegistry.def1
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td8
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp7
-rw-r--r--llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp54
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp39
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h17
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td24
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrAltivec.td19
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.td67
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp8
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp4
-rw-r--r--llvm/lib/TargetParser/TargetParser.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/FunctionAttrs.cpp119
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp13
-rw-r--r--llvm/test/Bitcode/thinlto-alias-addrspacecast.ll7
-rw-r--r--llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir4
-rw-r--r--llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll106
-rw-r--r--llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll9
-rw-r--r--llvm/test/CodeGen/PowerPC/vec-nmsub.ll36
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll950
-rw-r--r--llvm/test/Other/new-pm-lto-defaults.ll1
-rw-r--r--llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll40
-rw-r--r--llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_no_address_taken.ll45
-rw-r--r--llvm/test/Transforms/FunctionAttrs/norecurse_lto.ll69
-rw-r--r--llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion.ll141
-rw-r--r--llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion1.ll98
-rw-r--r--llvm/test/Transforms/FunctionAttrs/norecurse_multinode_refscc.ll41
-rw-r--r--llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll88
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/pr162009.ll79
-rw-r--r--llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s2
-rw-r--r--llvm/unittests/CAS/CMakeLists.txt1
-rw-r--r--llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp66
-rw-r--r--llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp10
-rw-r--r--mlir/cmake/modules/AddMLIRPython.cmake127
-rw-r--r--mlir/cmake/modules/MLIRDetectPythonEnv.cmake91
-rw-r--r--mlir/docs/Dialects/Linalg/OpDSL.md4
-rw-r--r--mlir/examples/standalone/pyproject.toml4
-rw-r--r--mlir/examples/standalone/python/CMakeLists.txt18
-rw-r--r--mlir/examples/standalone/python/StandaloneExtensionPybind11.cpp38
-rw-r--r--mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py6
-rw-r--r--mlir/examples/standalone/test/python/smoketest.py11
-rw-r--r--mlir/include/mlir/Bindings/Python/PybindAdaptors.h616
-rw-r--r--mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td4
-rw-r--r--mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td16
-rw-r--r--mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp2
-rw-r--r--mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp5
-rw-r--r--mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp4
-rw-r--r--mlir/lib/Dialect/ArmSME/Transforms/OuterProductFusion.cpp4
-rw-r--r--mlir/lib/Dialect/GPU/IR/GPUDialect.cpp2
-rw-r--r--mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp4
-rw-r--r--mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp6
-rw-r--r--mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp4
-rw-r--r--mlir/lib/Dialect/Linalg/Transforms/Loops.cpp2
-rw-r--r--mlir/lib/Dialect/Linalg/Transforms/NamedToElementwise.cpp5
-rw-r--r--mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp5
-rw-r--r--mlir/lib/Dialect/Linalg/Utils/Utils.cpp4
-rw-r--r--mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp12
-rw-r--r--mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp5
-rw-r--r--mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp7
-rw-r--r--mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp2
-rw-r--r--mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp11
-rw-r--r--mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp8
-rw-r--r--mlir/lib/Dialect/Tosa/IR/TosaOps.cpp67
-rw-r--r--mlir/lib/Dialect/Transform/IR/TransformDialect.cpp15
-rw-r--r--mlir/lib/Dialect/Transform/IR/TransformOps.cpp18
-rw-r--r--mlir/lib/Interfaces/DataLayoutInterfaces.cpp5
-rw-r--r--mlir/lib/Rewrite/ByteCode.cpp8
-rw-r--r--mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp9
-rw-r--r--mlir/lib/Target/LLVMIR/TypeToLLVM.cpp4
-rw-r--r--mlir/lib/Tools/PDLL/AST/NodePrinter.cpp4
-rw-r--r--mlir/lib/Tools/PDLL/AST/Nodes.cpp2
-rw-r--r--mlir/python/CMakeLists.txt61
-rw-r--r--mlir/python/mlir/dialects/python_test.py11
-rw-r--r--mlir/python/requirements.txt2
-rw-r--r--mlir/test/Dialect/Tosa/invalid.mlir55
-rw-r--r--mlir/test/Dialect/Tosa/invalid_extension.mlir22
-rw-r--r--mlir/test/Dialect/Tosa/level_check.mlir30
-rw-r--r--mlir/test/Dialect/Tosa/variables.mlir132
-rw-r--r--mlir/test/Dialect/Tosa/verifier.mlir56
-rw-r--r--mlir/test/Dialect/Transform/ops-invalid.mlir65
-rw-r--r--mlir/test/python/dialects/python_test.py31
-rw-r--r--mlir/test/python/lib/CMakeLists.txt1
-rw-r--r--mlir/test/python/lib/PythonTestModulePybind11.cpp118
-rwxr-xr-xmlir/tools/mlir-linalg-ods-gen/update_core_linalg_named_ops.sh.in2
-rw-r--r--mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp11
-rw-r--r--mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp5
-rw-r--r--mlir/tools/mlir-tblgen/CppGenUtilities.cpp17
-rw-r--r--mlir/tools/mlir-tblgen/CppGenUtilities.h10
-rw-r--r--mlir/tools/mlir-tblgen/DialectGen.cpp9
-rw-r--r--mlir/tools/mlir-tblgen/EnumsGen.cpp10
-rw-r--r--mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp20
-rw-r--r--mlir/tools/mlir-tblgen/OpInterfacesGen.cpp14
-rw-r--r--mlir/tools/mlir-tblgen/TosaUtilsGen.cpp2
-rw-r--r--utils/bazel/llvm-project-overlay/mlir/BUILD.bazel54
164 files changed, 6118 insertions, 2766 deletions
diff --git a/.ci/all_requirements.txt b/.ci/all_requirements.txt
index ac9682a..313ab107 100644
--- a/.ci/all_requirements.txt
+++ b/.ci/all_requirements.txt
@@ -194,10 +194,6 @@ ml-dtypes==0.5.1 ; python_version < "3.13" \
--hash=sha256:d13755f8e8445b3870114e5b6240facaa7cb0c3361e54beba3e07fa912a6e12b \
--hash=sha256:fd918d4e6a4e0c110e2e05be7a7814d10dc1b95872accbf6512b80a109b71ae1
# via -r mlir/python/requirements.txt
-nanobind==2.9.2 \
- --hash=sha256:c37957ffd5eac7eda349cff3622ecd32e5ee1244ecc912c99b5bc8188bafd16e \
- --hash=sha256:e7608472de99d375759814cab3e2c94aba3f9ec80e62cfef8ced495ca5c27d6e
- # via -r mlir/python/requirements.txt
numpy==2.0.2 \
--hash=sha256:0123ffdaa88fa4ab64835dcbde75dcdf89c453c922f18dced6e27c90d1d0ec5a \
--hash=sha256:11a76c372d1d37437857280aa142086476136a8c0f373b2e648ab2c8f18fb195 \
@@ -299,10 +295,6 @@ pyasn1-modules==0.4.2 \
--hash=sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a \
--hash=sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6
# via google-auth
-pybind11==2.13.6 \
- --hash=sha256:237c41e29157b962835d356b370ededd57594a26d5894a795960f0047cb5caf5 \
- --hash=sha256:ba6af10348c12b24e92fa086b39cfba0eff619b61ac77c406167d813b096d39a
- # via -r mlir/python/requirements.txt
pyyaml==6.0.1 \
--hash=sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5 \
--hash=sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc \
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index a0181b7..4165225 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -2409,28 +2409,36 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
def psraq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">;
def psrld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
def psrlq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512f",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pternlogd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
def pternlogd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
def pternlogq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
def pternlogq512_maskz : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pternlogd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
def pternlogd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pternlogd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
def pternlogd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pternlogq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
def pternlogq128_maskz : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pternlogq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
def pternlogq256_maskz : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
}
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index 569491a..89b519e 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -125,6 +125,7 @@ public:
cir::ConstantOp getTrue(mlir::Location loc) { return getBool(true, loc); }
cir::BoolType getBoolTy() { return cir::BoolType::get(getContext()); }
+ cir::VoidType getVoidTy() { return cir::VoidType::get(getContext()); }
cir::PointerType getPointerTo(mlir::Type ty) {
return cir::PointerType::get(ty);
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index f7ca276..f795800 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -37,6 +37,11 @@ struct MissingFeatures {
static bool opGlobalDLLImportExport() { return false; }
static bool opGlobalPartition() { return false; }
static bool opGlobalUsedOrCompilerUsed() { return false; }
+ static bool opGlobalAnnotations() { return false; }
+ static bool opGlobalDtorLowering() { return false; }
+ static bool opGlobalCtorAttr() { return false; }
+ static bool opGlobalCtorPriority() { return false; }
+ static bool opGlobalCtorList() { return false; }
static bool setDSOLocal() { return false; }
static bool setComdat() { return false; }
diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp
index 21af3d6..8904396 100644
--- a/clang/lib/AST/ByteCode/Interp.cpp
+++ b/clang/lib/AST/ByteCode/Interp.cpp
@@ -1638,6 +1638,36 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func,
return true;
}
+static bool GetDynamicDecl(InterpState &S, CodePtr OpPC, Pointer TypePtr,
+ const CXXRecordDecl *&DynamicDecl) {
+ while (TypePtr.isBaseClass())
+ TypePtr = TypePtr.getBase();
+
+ QualType DynamicType = TypePtr.getType();
+ if (TypePtr.isStatic() || TypePtr.isConst()) {
+ const VarDecl *VD = TypePtr.getDeclDesc()->asVarDecl();
+ if (!VD->isConstexpr()) {
+ const Expr *E = S.Current->getExpr(OpPC);
+ APValue V = TypePtr.toAPValue(S.getASTContext());
+ QualType TT = S.getASTContext().getLValueReferenceType(DynamicType);
+ S.FFDiag(E, diag::note_constexpr_polymorphic_unknown_dynamic_type)
+ << AccessKinds::AK_MemberCall << V.getAsString(S.getASTContext(), TT);
+ return false;
+ }
+ }
+
+ if (DynamicType->isPointerType() || DynamicType->isReferenceType()) {
+ DynamicDecl = DynamicType->getPointeeCXXRecordDecl();
+ } else if (DynamicType->isArrayType()) {
+ const Type *ElemType = DynamicType->getPointeeOrArrayElementType();
+ assert(ElemType);
+ DynamicDecl = ElemType->getAsCXXRecordDecl();
+ } else {
+ DynamicDecl = DynamicType->getAsCXXRecordDecl();
+ }
+ return true;
+}
+
bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func,
uint32_t VarArgSize) {
assert(Func->hasThisPointer());
@@ -1662,17 +1692,8 @@ bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func,
}
const CXXRecordDecl *DynamicDecl = nullptr;
- {
- Pointer TypePtr = ThisPtr;
- while (TypePtr.isBaseClass())
- TypePtr = TypePtr.getBase();
-
- QualType DynamicType = TypePtr.getType();
- if (DynamicType->isPointerType() || DynamicType->isReferenceType())
- DynamicDecl = DynamicType->getPointeeCXXRecordDecl();
- else
- DynamicDecl = DynamicType->getAsCXXRecordDecl();
- }
+ if (!GetDynamicDecl(S, OpPC, ThisPtr, DynamicDecl))
+ return false;
assert(DynamicDecl);
const auto *StaticDecl = cast<CXXRecordDecl>(Func->getParentDecl());
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a3c4ba5..6af7ef3 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2897,7 +2897,49 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
});
Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call, bool MaskZ) {
+ assert(Call->getNumArgs() == 5);
+
+ APInt U = popToAPSInt(S, Call->getArg(4)); // Lane mask
+ APInt Imm = popToAPSInt(S, Call->getArg(3)); // Ternary truth table
+ const Pointer &C = S.Stk.pop<Pointer>();
+ const Pointer &B = S.Stk.pop<Pointer>();
+ const Pointer &A = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ unsigned DstLen = A.getNumElems();
+ const QualType ElemQT = getElemType(A);
+ const OptPrimType ElemPT = S.getContext().classify(ElemQT);
+ unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
+ bool DstUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
+
+ INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+ for (unsigned I = 0; I != DstLen; ++I) {
+ APInt ALane = A.elem<T>(I).toAPSInt();
+ APInt BLane = B.elem<T>(I).toAPSInt();
+ APInt CLane = C.elem<T>(I).toAPSInt();
+ APInt RLane(LaneWidth, 0);
+ if (U[I]) { // If lane not masked, compute ternary logic.
+ for (unsigned Bit = 0; Bit != LaneWidth; ++Bit) {
+ unsigned ABit = ALane[Bit];
+ unsigned BBit = BLane[Bit];
+ unsigned CBit = CLane[Bit];
+ unsigned Idx = (ABit << 2) | (BBit << 1) | (CBit);
+ RLane.setBitVal(Bit, Imm[Idx]);
+ }
+ Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
+ } else if (MaskZ) { // If zero masked, zero the lane.
+ Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
+ } else { // Just masked, put in A lane.
+ Dst.elem<T>(I) = static_cast<T>(APSInt(ALane, DstUnsigned));
+ }
+ }
+ });
+ Dst.initializeAllElements();
return true;
}
@@ -3760,6 +3802,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call,
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
+ case X86::BI__builtin_ia32_pternlogd128_mask:
+ case X86::BI__builtin_ia32_pternlogd256_mask:
+ case X86::BI__builtin_ia32_pternlogd512_mask:
+ case X86::BI__builtin_ia32_pternlogq128_mask:
+ case X86::BI__builtin_ia32_pternlogq256_mask:
+ case X86::BI__builtin_ia32_pternlogq512_mask:
+ return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/false);
+ case X86::BI__builtin_ia32_pternlogd128_maskz:
+ case X86::BI__builtin_ia32_pternlogd256_maskz:
+ case X86::BI__builtin_ia32_pternlogd512_maskz:
+ case X86::BI__builtin_ia32_pternlogq128_maskz:
+ case X86::BI__builtin_ia32_pternlogq256_maskz:
+ case X86::BI__builtin_ia32_pternlogq512_maskz:
+ return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/true);
case Builtin::BI__builtin_elementwise_fshl:
return interp__builtin_elementwise_triop(S, OpPC, Call,
llvm::APIntOps::fshl);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 7bf28d9..0b23eed 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12168,6 +12168,97 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(R, E);
}
+ case X86::BI__builtin_ia32_pternlogd128_mask:
+ case X86::BI__builtin_ia32_pternlogd256_mask:
+ case X86::BI__builtin_ia32_pternlogd512_mask:
+ case X86::BI__builtin_ia32_pternlogq128_mask:
+ case X86::BI__builtin_ia32_pternlogq256_mask:
+ case X86::BI__builtin_ia32_pternlogq512_mask: {
+ APValue AValue, BValue, CValue, ImmValue, UValue;
+ if (!EvaluateAsRValue(Info, E->getArg(0), AValue) ||
+ !EvaluateAsRValue(Info, E->getArg(1), BValue) ||
+ !EvaluateAsRValue(Info, E->getArg(2), CValue) ||
+ !EvaluateAsRValue(Info, E->getArg(3), ImmValue) ||
+ !EvaluateAsRValue(Info, E->getArg(4), UValue))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ APInt Imm = ImmValue.getInt();
+ APInt U = UValue.getInt();
+ unsigned ResultLen = AValue.getVectorLength();
+ SmallVector<APValue, 16> ResultElements;
+ ResultElements.reserve(ResultLen);
+
+ for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) {
+ APInt ALane = AValue.getVectorElt(EltNum).getInt();
+ APInt BLane = BValue.getVectorElt(EltNum).getInt();
+ APInt CLane = CValue.getVectorElt(EltNum).getInt();
+
+ if (U[EltNum]) {
+ unsigned BitWidth = ALane.getBitWidth();
+ APInt ResLane(BitWidth, 0);
+
+ for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+ unsigned ABit = ALane[Bit];
+ unsigned BBit = BLane[Bit];
+ unsigned CBit = CLane[Bit];
+
+ unsigned Idx = (ABit << 2) | (BBit << 1) | CBit;
+ ResLane.setBitVal(Bit, Imm[Idx]);
+ }
+ ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned)));
+ } else {
+ ResultElements.push_back(APValue(APSInt(ALane, DestUnsigned)));
+ }
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+ case X86::BI__builtin_ia32_pternlogd128_maskz:
+ case X86::BI__builtin_ia32_pternlogd256_maskz:
+ case X86::BI__builtin_ia32_pternlogd512_maskz:
+ case X86::BI__builtin_ia32_pternlogq128_maskz:
+ case X86::BI__builtin_ia32_pternlogq256_maskz:
+ case X86::BI__builtin_ia32_pternlogq512_maskz: {
+ APValue AValue, BValue, CValue, ImmValue, UValue;
+ if (!EvaluateAsRValue(Info, E->getArg(0), AValue) ||
+ !EvaluateAsRValue(Info, E->getArg(1), BValue) ||
+ !EvaluateAsRValue(Info, E->getArg(2), CValue) ||
+ !EvaluateAsRValue(Info, E->getArg(3), ImmValue) ||
+ !EvaluateAsRValue(Info, E->getArg(4), UValue))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ APInt Imm = ImmValue.getInt();
+ APInt U = UValue.getInt();
+ unsigned ResultLen = AValue.getVectorLength();
+ SmallVector<APValue, 16> ResultElements;
+ ResultElements.reserve(ResultLen);
+
+ for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) {
+ APInt ALane = AValue.getVectorElt(EltNum).getInt();
+ APInt BLane = BValue.getVectorElt(EltNum).getInt();
+ APInt CLane = CValue.getVectorElt(EltNum).getInt();
+
+ unsigned BitWidth = ALane.getBitWidth();
+ APInt ResLane(BitWidth, 0);
+
+ if (U[EltNum]) {
+ for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+ unsigned ABit = ALane[Bit];
+ unsigned BBit = BLane[Bit];
+ unsigned CBit = CLane[Bit];
+
+ unsigned Idx = (ABit << 2) | (BBit << 1) | CBit;
+ ResLane.setBitVal(Bit, Imm[Idx]);
+ }
+ }
+ ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned)));
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+
case Builtin::BI__builtin_elementwise_clzg:
case Builtin::BI__builtin_elementwise_ctzg: {
APValue SourceLHS;
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 552698a..dfcc7940 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -319,9 +319,12 @@ public:
Opts["__opencl_c_images"] = true;
Opts["__opencl_c_3d_image_writes"] = true;
Opts["cl_khr_3d_image_writes"] = true;
+ Opts["__opencl_c_program_scope_global_variables"] = true;
- Opts["__opencl_c_generic_address_space"] =
- GPUKind >= llvm::AMDGPU::GK_GFX700;
+ if (GPUKind >= llvm::AMDGPU::GK_GFX700) {
+ Opts["__opencl_c_generic_address_space"] = true;
+ Opts["__opencl_c_device_enqueue"] = true;
+ }
}
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
index bbc45e5..24a5fc2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
@@ -221,10 +221,9 @@ mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca(
return initialAlloca;
}
-mlir::Value
-OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
- mlir::Value bound,
- mlir::Location loc, bool inverse) {
+std::pair<mlir::Value, mlir::Value> OpenACCRecipeBuilderBase::createBoundsLoop(
+ mlir::Value subscriptedValue, mlir::Value subscriptedValue2,
+ mlir::Value bound, mlir::Location loc, bool inverse) {
mlir::Operation *bodyInsertLoc;
mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy);
@@ -249,7 +248,6 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
return cir::PtrStrideOp::create(builder, loc, eltLoad.getType(), eltLoad,
idxLoad);
-
};
auto forStmtBuilder = [&]() {
@@ -303,6 +301,8 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
if (subscriptedValue)
subscriptedValue = doSubscriptOp(subscriptedValue, load);
+ if (subscriptedValue2)
+ subscriptedValue2 = doSubscriptOp(subscriptedValue2, load);
bodyInsertLoc = builder.createYield(loc);
},
/*stepBuilder=*/
@@ -325,7 +325,7 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
// Leave the insertion point to be inside the body, so we can loop over
// these things.
builder.setInsertionPoint(bodyInsertLoc);
- return subscriptedValue;
+ return {subscriptedValue, subscriptedValue2};
}
mlir::acc::ReductionOperator
@@ -434,7 +434,7 @@ void OpenACCRecipeBuilderBase::createInitRecipe(
mlir::Location loc, mlir::Location locEnd, SourceRange exprRange,
mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds,
llvm::ArrayRef<QualType> boundTypes, const VarDecl *allocaDecl,
- QualType origType) {
+ QualType origType, bool emitInitExpr) {
assert(allocaDecl && "Required recipe variable not set?");
CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, allocaDecl};
@@ -464,14 +464,15 @@ void OpenACCRecipeBuilderBase::createInitRecipe(
// initialize this variable correctly.
CIRGenFunction::AutoVarEmission tempDeclEmission =
cgf.emitAutoVarAlloca(*allocaDecl, builder.saveInsertionPoint());
- cgf.emitAutoVarInit(tempDeclEmission);
+ if (emitInitExpr)
+ cgf.emitAutoVarInit(tempDeclEmission);
} else {
mlir::Value alloca = makeBoundsAlloca(
block, exprRange, loc, allocaDecl->getName(), numBounds, boundTypes);
// If the initializer is trivial, there is nothing to do here, so save
// ourselves some effort.
- if (allocaDecl->getInit() &&
+ if (emitInitExpr && allocaDecl->getInit() &&
(!cgf.isTrivialInitializer(allocaDecl->getInit()) ||
cgf.getContext().getLangOpts().getTrivialAutoVarInit() !=
LangOptions::TrivialAutoVarInitKind::Uninitialized))
@@ -484,35 +485,42 @@ void OpenACCRecipeBuilderBase::createInitRecipe(
void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy(
mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp,
- CIRGenFunction::AutoVarEmission tempDeclEmission,
- mlir::acc::FirstprivateRecipeOp recipe, const VarDecl *varRecipe,
- const VarDecl *temporary) {
- mlir::Block *block =
- createRecipeBlock(recipe.getCopyRegion(), mainOp.getType(), loc,
- /*numBounds=*/0, /*isInit=*/false);
- builder.setInsertionPointToEnd(&recipe.getCopyRegion().back());
+ const VarDecl *allocaDecl, const VarDecl *temporary,
+ mlir::Region &copyRegion, size_t numBounds) {
+ mlir::Block *block = createRecipeBlock(copyRegion, mainOp.getType(), loc,
+ numBounds, /*isInit=*/false);
+ builder.setInsertionPointToEnd(&copyRegion.back());
CIRGenFunction::LexicalScope ls(cgf, loc, block);
- mlir::BlockArgument fromArg = block->getArgument(0);
- mlir::BlockArgument toArg = block->getArgument(1);
+ mlir::Value fromArg = block->getArgument(0);
+ mlir::Value toArg = block->getArgument(1);
- mlir::Type elementTy =
- mlir::cast<cir::PointerType>(mainOp.getType()).getPointee();
+ llvm::MutableArrayRef<mlir::BlockArgument> boundsRange =
+ block->getArguments().drop_front(2);
- // Set the address of the emission to be the argument, so that we initialize
- // that instead of the variable in the other block.
- tempDeclEmission.setAllocatedAddress(
- Address{toArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)});
+ for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange))
+ std::tie(fromArg, toArg) =
+ createBoundsLoop(fromArg, toArg, boundArg, loc, /*inverse=*/false);
+
+ // Set up the 'to' address.
+ mlir::Type elementTy =
+ mlir::cast<cir::PointerType>(toArg.getType()).getPointee();
+ CIRGenFunction::AutoVarEmission tempDeclEmission(*allocaDecl);
tempDeclEmission.emittedAsOffload = true;
+ tempDeclEmission.setAllocatedAddress(
+ Address{toArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)});
+ // Set up the 'from' address from the temporary.
CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, temporary};
cgf.setAddrOfLocalVar(
temporary,
- Address{fromArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)});
-
+ Address{fromArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)});
cgf.emitAutoVarInit(tempDeclEmission);
+
+ builder.setInsertionPointToEnd(&copyRegion.back());
mlir::acc::YieldOp::create(builder, locEnd);
}
+
// This function generates the 'combiner' section for a reduction recipe. Note
// that this function is not 'insertion point' clean, in that it alters the
// insertion point to be inside of the 'combiner' section of the recipe, but
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
index 21707ad..a5da744 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
@@ -49,14 +49,16 @@ protected:
// Creates a loop through an 'acc.bounds', leaving the 'insertion' point to be
// the inside of the loop body. Traverses LB->UB UNLESS `inverse` is set.
// Returns the 'subscriptedValue' changed with the new bounds subscript.
+ std::pair<mlir::Value, mlir::Value>
+ createBoundsLoop(mlir::Value subscriptedValue, mlir::Value subscriptedValue2,
+ mlir::Value bound, mlir::Location loc, bool inverse);
+
mlir::Value createBoundsLoop(mlir::Value subscriptedValue, mlir::Value bound,
- mlir::Location loc, bool inverse);
+ mlir::Location loc, bool inverse) {
+ return createBoundsLoop(subscriptedValue, {}, bound, loc, inverse).first;
+ }
+
mlir::acc::ReductionOperator convertReductionOp(OpenACCReductionOperator op);
- void createFirstprivateRecipeCopy(
- mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp,
- CIRGenFunction::AutoVarEmission tempDeclEmission,
- mlir::acc::FirstprivateRecipeOp recipe, const VarDecl *varRecipe,
- const VarDecl *temporary);
// This function generates the 'combiner' section for a reduction recipe. Note
// that this function is not 'insertion point' clean, in that it alters the
@@ -66,11 +68,19 @@ protected:
mlir::Value mainOp,
mlir::acc::ReductionRecipeOp recipe,
size_t numBounds);
+
void createInitRecipe(mlir::Location loc, mlir::Location locEnd,
SourceRange exprRange, mlir::Value mainOp,
mlir::Region &recipeInitRegion, size_t numBounds,
llvm::ArrayRef<QualType> boundTypes,
- const VarDecl *allocaDecl, QualType origType);
+ const VarDecl *allocaDecl, QualType origType,
+ bool emitInitExpr);
+
+ void createFirstprivateRecipeCopy(mlir::Location loc, mlir::Location locEnd,
+ mlir::Value mainOp,
+ const VarDecl *allocaDecl,
+ const VarDecl *temporary,
+ mlir::Region &copyRegion, size_t numBounds);
void createRecipeDestroySection(mlir::Location loc, mlir::Location locEnd,
mlir::Value mainOp, CharUnits alignment,
@@ -150,63 +160,6 @@ class OpenACCRecipeBuilder : OpenACCRecipeBuilderBase {
return recipeName;
}
- // Create the 'init' section of the recipe, including the 'copy' section for
- // 'firstprivate'. Note that this function is not 'insertion point' clean, in
- // that it alters the insertion point to be inside of the 'destroy' section of
- // the recipe, but doesn't restore it aftewards.
- void createRecipeInitCopy(mlir::Location loc, mlir::Location locEnd,
- SourceRange exprRange, mlir::Value mainOp,
- RecipeTy recipe, const VarDecl *varRecipe,
- const VarDecl *temporary) {
- // TODO: OpenACC: when we get the 'pointer' variants for
- // firstprivate/reduction, this probably should be removed/split into
- // functions for the BuilderBase.
- assert(varRecipe && "Required recipe variable not set?");
-
- CIRGenFunction::AutoVarEmission tempDeclEmission{
- CIRGenFunction::AutoVarEmission::invalid()};
- CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, varRecipe};
-
- // Do the 'init' section of the recipe IR, which does an alloca, then the
- // initialization (except for firstprivate).
- mlir::Block *block =
- createRecipeBlock(recipe.getInitRegion(), mainOp.getType(), loc,
- /*numBounds=*/0, /*isInit=*/true);
- builder.setInsertionPointToEnd(&recipe.getInitRegion().back());
- CIRGenFunction::LexicalScope ls(cgf, loc, block);
-
- tempDeclEmission =
- cgf.emitAutoVarAlloca(*varRecipe, builder.saveInsertionPoint());
-
- // 'firstprivate' doesn't do its initialization in the 'init' section,
- // instead it does it in the 'copy' section. SO, only do 'init' here for
- // reduction.
- if constexpr (std::is_same_v<RecipeTy, mlir::acc::ReductionRecipeOp>) {
- // Unlike Private, the recipe here is always required as it has to do
- // init, not just 'default' init.
- if (!varRecipe->getInit())
- cgf.cgm.errorNYI(exprRange, "reduction init recipe");
- cgf.emitAutoVarInit(tempDeclEmission);
- }
-
- mlir::acc::YieldOp::create(builder, locEnd);
-
- if constexpr (std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>) {
- if (!varRecipe->getInit()) {
- // If we don't have any initialization recipe, we failed during Sema to
- // initialize this correctly. If we disable the
- // Sema::TentativeAnalysisScopes in SemaOpenACC::CreateInitRecipe, it'll
- // emit an error to tell us. However, emitting those errors during
- // production is a violation of the standard, so we cannot do them.
- cgf.cgm.errorNYI(
- exprRange, "firstprivate copy-init recipe not properly generated");
- }
-
- createFirstprivateRecipeCopy(loc, locEnd, mainOp, tempDeclEmission,
- recipe, varRecipe, temporary);
- }
- }
-
public:
OpenACCRecipeBuilder(CIRGen::CIRGenFunction &cgf,
CIRGen::CIRGenBuilderTy &builder)
@@ -221,19 +174,6 @@ public:
BuiltinType::ArraySection) &&
"array section shouldn't make it to recipe creation");
- // TODO: OpenACC: This is a bit of a hackery to get this to not change for
- // the non-private recipes. This will be removed soon, when we get this
- // 'right' for firstprivate and reduction.
- if constexpr (std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>) {
- if (numBounds) {
- cgf.cgm.errorNYI(varRef->getSourceRange(),
- "firstprivate-init with bounds");
- }
- boundTypes = {};
- numBounds = 0;
- origType = baseType;
- }
-
mlir::ModuleOp mod = builder.getBlock()
->getParent()
->template getParentOfType<mlir::ModuleOp>();
@@ -262,21 +202,20 @@ public:
if constexpr (std::is_same_v<RecipeTy, mlir::acc::PrivateRecipeOp>) {
createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
recipe.getInitRegion(), numBounds, boundTypes, varRecipe,
- origType);
+ origType, /*emitInitExpr=*/true);
} else if constexpr (std::is_same_v<RecipeTy,
mlir::acc::ReductionRecipeOp>) {
createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
recipe.getInitRegion(), numBounds, boundTypes, varRecipe,
- origType);
+ origType, /*emitInitExpr=*/true);
createReductionRecipeCombiner(loc, locEnd, mainOp, recipe, numBounds);
} else {
static_assert(std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>);
- // TODO: OpenACC: we probably want this to call createInitRecipe as well,
- // but do so in a way that omits the 'initialization', so that we can do
- // it separately, since it belongs in the 'copy' region. It also might
- // need a way of getting the tempDeclEmission out of it for that purpose.
- createRecipeInitCopy(loc, locEnd, varRef->getSourceRange(), mainOp,
- recipe, varRecipe, temporary);
+ createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
+ recipe.getInitRegion(), numBounds, boundTypes, varRecipe,
+ origType, /*emitInitExpr=*/false);
+ createFirstprivateRecipeCopy(loc, locEnd, mainOp, varRecipe, temporary,
+ recipe.getCopyRegion(), numBounds);
}
if (origType.isDestructedType())
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index c15637d..2eeef81 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -8,18 +8,39 @@
#include "PassDetail.h"
#include "clang/AST/ASTContext.h"
+#include "clang/Basic/Module.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
#include "clang/CIR/Dialect/Passes.h"
#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/Path.h"
#include <memory>
using namespace mlir;
using namespace cir;
+static SmallString<128> getTransformedFileName(mlir::ModuleOp mlirModule) {
+ SmallString<128> fileName;
+
+ if (mlirModule.getSymName())
+ fileName = llvm::sys::path::filename(mlirModule.getSymName()->str());
+
+ if (fileName.empty())
+ fileName = "<null>";
+
+ for (size_t i = 0; i < fileName.size(); ++i) {
+ // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens
+ // to be the set of C preprocessing numbers.
+ if (!clang::isPreprocessingNumberBody(fileName[i]))
+ fileName[i] = '_';
+ }
+
+ return fileName;
+}
+
namespace {
struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
LoweringPreparePass() = default;
@@ -30,9 +51,16 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
void lowerComplexDivOp(cir::ComplexDivOp op);
void lowerComplexMulOp(cir::ComplexMulOp op);
void lowerUnaryOp(cir::UnaryOp op);
+ void lowerGlobalOp(cir::GlobalOp op);
void lowerArrayDtor(cir::ArrayDtor op);
void lowerArrayCtor(cir::ArrayCtor op);
+ /// Build the function that initializes the specified global
+ cir::FuncOp buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op);
+
+ /// Build a module init function that calls all the dynamic initializers.
+ void buildCXXGlobalInitFunc();
+
cir::FuncOp buildRuntimeFunction(
mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
cir::FuncType type,
@@ -47,6 +75,10 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
/// Tracks current module.
mlir::ModuleOp mlirModule;
+ /// Tracks existing dynamic initializers.
+ llvm::StringMap<uint32_t> dynamicInitializerNames;
+ llvm::SmallVector<cir::FuncOp> dynamicInitializers;
+
void setASTContext(clang::ASTContext *c) { astCtx = c; }
};
@@ -589,6 +621,111 @@ void LoweringPreparePass::lowerUnaryOp(cir::UnaryOp op) {
op.erase();
}
+cir::FuncOp
+LoweringPreparePass::buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op) {
+ // TODO(cir): Store this in the GlobalOp.
+ // This should come from the MangleContext, but for now I'm hardcoding it.
+ SmallString<256> fnName("__cxx_global_var_init");
+ // Get a unique name
+ uint32_t cnt = dynamicInitializerNames[fnName]++;
+ if (cnt)
+ fnName += "." + llvm::Twine(cnt).str();
+
+ // Create a variable initialization function.
+ CIRBaseBuilderTy builder(getContext());
+ builder.setInsertionPointAfter(op);
+ auto fnType = cir::FuncType::get({}, builder.getVoidTy());
+ FuncOp f = buildRuntimeFunction(builder, fnName, op.getLoc(), fnType,
+ cir::GlobalLinkageKind::InternalLinkage);
+
+ // Move over the initialzation code of the ctor region.
+ mlir::Block *entryBB = f.addEntryBlock();
+ if (!op.getCtorRegion().empty()) {
+ mlir::Block &block = op.getCtorRegion().front();
+ entryBB->getOperations().splice(entryBB->begin(), block.getOperations(),
+ block.begin(), std::prev(block.end()));
+ }
+
+ // Register the destructor call with __cxa_atexit
+ mlir::Region &dtorRegion = op.getDtorRegion();
+ if (!dtorRegion.empty()) {
+ assert(!cir::MissingFeatures::opGlobalDtorLowering());
+ llvm_unreachable("dtor region lowering is NYI");
+ }
+
+ // Replace cir.yield with cir.return
+ builder.setInsertionPointToEnd(entryBB);
+ mlir::Operation *yieldOp = nullptr;
+ if (!op.getCtorRegion().empty()) {
+ mlir::Block &block = op.getCtorRegion().front();
+ yieldOp = &block.getOperations().back();
+ } else {
+ assert(!cir::MissingFeatures::opGlobalDtorLowering());
+ llvm_unreachable("dtor region lowering is NYI");
+ }
+
+ assert(isa<YieldOp>(*yieldOp));
+ cir::ReturnOp::create(builder, yieldOp->getLoc());
+ return f;
+}
+
+void LoweringPreparePass::lowerGlobalOp(GlobalOp op) {
+ mlir::Region &ctorRegion = op.getCtorRegion();
+ mlir::Region &dtorRegion = op.getDtorRegion();
+
+ if (!ctorRegion.empty() || !dtorRegion.empty()) {
+ // Build a variable initialization function and move the initialzation code
+ // in the ctor region over.
+ cir::FuncOp f = buildCXXGlobalVarDeclInitFunc(op);
+
+ // Clear the ctor and dtor region
+ ctorRegion.getBlocks().clear();
+ dtorRegion.getBlocks().clear();
+
+ assert(!cir::MissingFeatures::astVarDeclInterface());
+ dynamicInitializers.push_back(f);
+ }
+
+ assert(!cir::MissingFeatures::opGlobalAnnotations());
+}
+
+void LoweringPreparePass::buildCXXGlobalInitFunc() {
+ if (dynamicInitializers.empty())
+ return;
+
+ assert(!cir::MissingFeatures::opGlobalCtorList());
+
+ SmallString<256> fnName;
+ // Include the filename in the symbol name. Including "sub_" matches gcc
+ // and makes sure these symbols appear lexicographically behind the symbols
+ // with priority (TBD). Module implementation units behave the same
+ // way as a non-modular TU with imports.
+ // TODO: check CXX20ModuleInits
+ if (astCtx->getCurrentNamedModule() &&
+ !astCtx->getCurrentNamedModule()->isModuleImplementation()) {
+ llvm::raw_svector_ostream out(fnName);
+ std::unique_ptr<clang::MangleContext> mangleCtx(
+ astCtx->createMangleContext());
+ cast<clang::ItaniumMangleContext>(*mangleCtx)
+ .mangleModuleInitializer(astCtx->getCurrentNamedModule(), out);
+ } else {
+ fnName += "_GLOBAL__sub_I_";
+ fnName += getTransformedFileName(mlirModule);
+ }
+
+ CIRBaseBuilderTy builder(getContext());
+ builder.setInsertionPointToEnd(&mlirModule.getBodyRegion().back());
+ auto fnType = cir::FuncType::get({}, builder.getVoidTy());
+ cir::FuncOp f =
+ buildRuntimeFunction(builder, fnName, mlirModule.getLoc(), fnType,
+ cir::GlobalLinkageKind::ExternalLinkage);
+ builder.setInsertionPointToStart(f.addEntryBlock());
+ for (cir::FuncOp &f : dynamicInitializers)
+ builder.createCallOp(f.getLoc(), f, {});
+
+ cir::ReturnOp::create(builder, f.getLoc());
+}
+
static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder,
clang::ASTContext *astCtx,
mlir::Operation *op, mlir::Type eltTy,
@@ -691,6 +828,8 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) {
lowerComplexDivOp(complexDiv);
else if (auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op))
lowerComplexMulOp(complexMul);
+ else if (auto glob = mlir::dyn_cast<cir::GlobalOp>(op))
+ lowerGlobalOp(glob);
else if (auto unary = mlir::dyn_cast<cir::UnaryOp>(op))
lowerUnaryOp(unary);
}
@@ -704,12 +843,15 @@ void LoweringPreparePass::runOnOperation() {
op->walk([&](mlir::Operation *op) {
if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp,
- cir::ComplexMulOp, cir::ComplexDivOp, cir::UnaryOp>(op))
+ cir::ComplexMulOp, cir::ComplexDivOp, cir::GlobalOp,
+ cir::UnaryOp>(op))
opsToTransform.push_back(op);
});
for (mlir::Operation *o : opsToTransform)
runOnOp(o);
+
+ buildCXXGlobalInitFunc();
}
std::unique_ptr<Pass> mlir::createLoweringPreparePass() {
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index e6e4947..9f30287 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -6784,29 +6784,26 @@ LValue CodeGenFunction::EmitPseudoObjectLValue(const PseudoObjectExpr *E) {
return emitPseudoObjectExpr(*this, E, true, AggValueSlot::ignored()).LV;
}
-void CodeGenFunction::FlattenAccessAndType(
- Address Addr, QualType AddrType,
- SmallVectorImpl<std::pair<Address, llvm::Value *>> &AccessList,
- SmallVectorImpl<QualType> &FlatTypes) {
- // WorkList is list of type we are processing + the Index List to access
- // the field of that type in Addr for use in a GEP
- llvm::SmallVector<std::pair<QualType, llvm::SmallVector<llvm::Value *, 4>>,
- 16>
+void CodeGenFunction::FlattenAccessAndTypeLValue(
+ LValue Val, SmallVectorImpl<LValue> &AccessList) {
+
+ llvm::SmallVector<
+ std::tuple<LValue, QualType, llvm::SmallVector<llvm::Value *, 4>>, 16>
WorkList;
llvm::IntegerType *IdxTy = llvm::IntegerType::get(getLLVMContext(), 32);
- // Addr should be a pointer so we need to 'dereference' it
- WorkList.push_back({AddrType, {llvm::ConstantInt::get(IdxTy, 0)}});
+ WorkList.push_back({Val, Val.getType(), {llvm::ConstantInt::get(IdxTy, 0)}});
while (!WorkList.empty()) {
- auto [T, IdxList] = WorkList.pop_back_val();
+ auto [LVal, T, IdxList] = WorkList.pop_back_val();
T = T.getCanonicalType().getUnqualifiedType();
assert(!isa<MatrixType>(T) && "Matrix types not yet supported in HLSL");
+
if (const auto *CAT = dyn_cast<ConstantArrayType>(T)) {
uint64_t Size = CAT->getZExtSize();
for (int64_t I = Size - 1; I > -1; I--) {
llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList;
IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I));
- WorkList.emplace_back(CAT->getElementType(), IdxListCopy);
+ WorkList.emplace_back(LVal, CAT->getElementType(), IdxListCopy);
}
} else if (const auto *RT = dyn_cast<RecordType>(T)) {
const RecordDecl *Record = RT->getOriginalDecl()->getDefinitionOrSelf();
@@ -6814,44 +6811,75 @@ void CodeGenFunction::FlattenAccessAndType(
const CXXRecordDecl *CXXD = dyn_cast<CXXRecordDecl>(Record);
- llvm::SmallVector<QualType, 16> FieldTypes;
+ llvm::SmallVector<
+ std::tuple<LValue, QualType, llvm::SmallVector<llvm::Value *, 4>>, 16>
+ ReverseList;
if (CXXD && CXXD->isStandardLayout())
Record = CXXD->getStandardLayoutBaseWithFields();
// deal with potential base classes
if (CXXD && !CXXD->isStandardLayout()) {
- for (auto &Base : CXXD->bases())
- FieldTypes.push_back(Base.getType());
+ if (CXXD->getNumBases() > 0) {
+ assert(CXXD->getNumBases() == 1 &&
+ "HLSL doesn't support multiple inheritance.");
+ auto Base = CXXD->bases_begin();
+ llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList;
+ IdxListCopy.push_back(llvm::ConstantInt::get(
+ IdxTy, 0)); // base struct should be at index zero
+ ReverseList.emplace_back(LVal, Base->getType(), IdxListCopy);
+ }
}
- for (auto *FD : Record->fields())
- FieldTypes.push_back(FD->getType());
+ const CGRecordLayout &Layout = CGM.getTypes().getCGRecordLayout(Record);
- for (int64_t I = FieldTypes.size() - 1; I > -1; I--) {
- llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList;
- IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I));
- WorkList.insert(WorkList.end(), {FieldTypes[I], IdxListCopy});
+ llvm::Type *LLVMT = ConvertTypeForMem(T);
+ CharUnits Align = getContext().getTypeAlignInChars(T);
+ LValue RLValue;
+ bool createdGEP = false;
+ for (auto *FD : Record->fields()) {
+ if (FD->isBitField()) {
+ if (FD->isUnnamedBitField())
+ continue;
+ if (!createdGEP) {
+ createdGEP = true;
+ Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList,
+ LLVMT, Align, "gep");
+ RLValue = MakeAddrLValue(GEP, T);
+ }
+ LValue FieldLVal = EmitLValueForField(RLValue, FD, true);
+ ReverseList.push_back({FieldLVal, FD->getType(), {}});
+ } else {
+ llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList;
+ IdxListCopy.push_back(
+ llvm::ConstantInt::get(IdxTy, Layout.getLLVMFieldNo(FD)));
+ ReverseList.emplace_back(LVal, FD->getType(), IdxListCopy);
+ }
}
+
+ std::reverse(ReverseList.begin(), ReverseList.end());
+ llvm::append_range(WorkList, ReverseList);
} else if (const auto *VT = dyn_cast<VectorType>(T)) {
llvm::Type *LLVMT = ConvertTypeForMem(T);
CharUnits Align = getContext().getTypeAlignInChars(T);
- Address GEP =
- Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "vector.gep");
+ Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, LLVMT,
+ Align, "vector.gep");
+ LValue Base = MakeAddrLValue(GEP, T);
for (unsigned I = 0, E = VT->getNumElements(); I < E; I++) {
- llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, I);
- // gep on vector fields is not recommended so combine gep with
- // extract/insert
- AccessList.emplace_back(GEP, Idx);
- FlatTypes.push_back(VT->getElementType());
+ llvm::Constant *Idx = llvm::ConstantInt::get(IdxTy, I);
+ LValue LV =
+ LValue::MakeVectorElt(Base.getAddress(), Idx, VT->getElementType(),
+ Base.getBaseInfo(), TBAAAccessInfo());
+ AccessList.emplace_back(LV);
}
- } else {
- // a scalar/builtin type
- llvm::Type *LLVMT = ConvertTypeForMem(T);
- CharUnits Align = getContext().getTypeAlignInChars(T);
- Address GEP =
- Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "gep");
- AccessList.emplace_back(GEP, nullptr);
- FlatTypes.push_back(T);
+ } else { // a scalar/builtin type
+ if (!IdxList.empty()) {
+ llvm::Type *LLVMT = ConvertTypeForMem(T);
+ CharUnits Align = getContext().getTypeAlignInChars(T);
+ Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList,
+ LLVMT, Align, "gep");
+ AccessList.emplace_back(MakeAddrLValue(GEP, T));
+ } else // must be a bitfield we already created an lvalue for
+ AccessList.emplace_back(LVal);
}
}
}
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index b8150a2..07b9aeb 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -488,100 +488,62 @@ static bool isTrivialFiller(Expr *E) {
return false;
}
-static void EmitHLSLAggregateSplatCast(CodeGenFunction &CGF, Address DestVal,
- QualType DestTy, llvm::Value *SrcVal,
- QualType SrcTy, SourceLocation Loc) {
+// emit an elementwise cast where the RHS is a scalar or vector
+// or emit an aggregate splat cast
+static void EmitHLSLScalarElementwiseAndSplatCasts(CodeGenFunction &CGF,
+ LValue DestVal,
+ llvm::Value *SrcVal,
+ QualType SrcTy,
+ SourceLocation Loc) {
// Flatten our destination
- SmallVector<QualType> DestTypes; // Flattened type
- SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList;
- // ^^ Flattened accesses to DestVal we want to store into
- CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes);
-
- assert(SrcTy->isScalarType() && "Invalid HLSL Aggregate splat cast.");
- for (unsigned I = 0, Size = StoreGEPList.size(); I < Size; ++I) {
- llvm::Value *Cast =
- CGF.EmitScalarConversion(SrcVal, SrcTy, DestTypes[I], Loc);
-
- // store back
- llvm::Value *Idx = StoreGEPList[I].second;
- if (Idx) {
- llvm::Value *V =
- CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert");
- Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx);
- }
- CGF.Builder.CreateStore(Cast, StoreGEPList[I].first);
- }
-}
-
-// emit a flat cast where the RHS is a scalar, including vector
-static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal,
- QualType DestTy, llvm::Value *SrcVal,
- QualType SrcTy, SourceLocation Loc) {
- // Flatten our destination
- SmallVector<QualType, 16> DestTypes; // Flattened type
- SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList;
- // ^^ Flattened accesses to DestVal we want to store into
- CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes);
-
- assert(SrcTy->isVectorType() && "HLSL Flat cast doesn't handle splatting.");
- const VectorType *VT = SrcTy->getAs<VectorType>();
- SrcTy = VT->getElementType();
- assert(StoreGEPList.size() <= VT->getNumElements() &&
- "Cannot perform HLSL flat cast when vector source \
- object has less elements than flattened destination \
- object.");
- for (unsigned I = 0, Size = StoreGEPList.size(); I < Size; I++) {
- llvm::Value *Load = CGF.Builder.CreateExtractElement(SrcVal, I, "vec.load");
+ SmallVector<LValue, 16> StoreList;
+ CGF.FlattenAccessAndTypeLValue(DestVal, StoreList);
+
+ bool isVector = false;
+ if (auto *VT = SrcTy->getAs<VectorType>()) {
+ isVector = true;
+ SrcTy = VT->getElementType();
+ assert(StoreList.size() <= VT->getNumElements() &&
+ "Cannot perform HLSL flat cast when vector source \
+ object has less elements than flattened destination \
+ object.");
+ }
+
+ for (unsigned I = 0, Size = StoreList.size(); I < Size; I++) {
+ LValue DestLVal = StoreList[I];
+ llvm::Value *Load =
+ isVector ? CGF.Builder.CreateExtractElement(SrcVal, I, "vec.load")
+ : SrcVal;
llvm::Value *Cast =
- CGF.EmitScalarConversion(Load, SrcTy, DestTypes[I], Loc);
-
- // store back
- llvm::Value *Idx = StoreGEPList[I].second;
- if (Idx) {
- llvm::Value *V =
- CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert");
- Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx);
- }
- CGF.Builder.CreateStore(Cast, StoreGEPList[I].first);
+ CGF.EmitScalarConversion(Load, SrcTy, DestLVal.getType(), Loc);
+ CGF.EmitStoreThroughLValue(RValue::get(Cast), DestLVal);
}
}
// emit a flat cast where the RHS is an aggregate
-static void EmitHLSLElementwiseCast(CodeGenFunction &CGF, Address DestVal,
- QualType DestTy, Address SrcVal,
- QualType SrcTy, SourceLocation Loc) {
+static void EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue DestVal,
+ LValue SrcVal, SourceLocation Loc) {
// Flatten our destination
- SmallVector<QualType, 16> DestTypes; // Flattened type
- SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList;
- // ^^ Flattened accesses to DestVal we want to store into
- CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes);
+ SmallVector<LValue, 16> StoreList;
+ CGF.FlattenAccessAndTypeLValue(DestVal, StoreList);
// Flatten our src
- SmallVector<QualType, 16> SrcTypes; // Flattened type
- SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList;
- // ^^ Flattened accesses to SrcVal we want to load from
- CGF.FlattenAccessAndType(SrcVal, SrcTy, LoadGEPList, SrcTypes);
+ SmallVector<LValue, 16> LoadList;
+ CGF.FlattenAccessAndTypeLValue(SrcVal, LoadList);
- assert(StoreGEPList.size() <= LoadGEPList.size() &&
- "Cannot perform HLSL flat cast when flattened source object \
+ assert(StoreList.size() <= LoadList.size() &&
+ "Cannot perform HLSL elementwise cast when flattened source object \
has less elements than flattened destination object.");
- // apply casts to what we load from LoadGEPList
+ // apply casts to what we load from LoadList
// and store result in Dest
- for (unsigned I = 0, E = StoreGEPList.size(); I < E; I++) {
- llvm::Value *Idx = LoadGEPList[I].second;
- llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[I].first, "load");
- Load =
- Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load;
- llvm::Value *Cast =
- CGF.EmitScalarConversion(Load, SrcTypes[I], DestTypes[I], Loc);
-
- // store back
- Idx = StoreGEPList[I].second;
- if (Idx) {
- llvm::Value *V =
- CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert");
- Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx);
- }
- CGF.Builder.CreateStore(Cast, StoreGEPList[I].first);
+ for (unsigned I = 0, E = StoreList.size(); I < E; I++) {
+ LValue DestLVal = StoreList[I];
+ LValue SrcLVal = LoadList[I];
+ RValue RVal = CGF.EmitLoadOfLValue(SrcLVal, Loc);
+ assert(RVal.isScalar() && "All flattened source values should be scalars");
+ llvm::Value *Val = RVal.getScalarVal();
+ llvm::Value *Cast = CGF.EmitScalarConversion(Val, SrcLVal.getType(),
+ DestLVal.getType(), Loc);
+ CGF.EmitStoreThroughLValue(RValue::get(Cast), DestLVal);
}
}
@@ -988,31 +950,33 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
Expr *Src = E->getSubExpr();
QualType SrcTy = Src->getType();
RValue RV = CGF.EmitAnyExpr(Src);
- QualType DestTy = E->getType();
- Address DestVal = Dest.getAddress();
+ LValue DestLVal = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());
SourceLocation Loc = E->getExprLoc();
- assert(RV.isScalar() && "RHS of HLSL splat cast must be a scalar.");
+ assert(RV.isScalar() && SrcTy->isScalarType() &&
+ "RHS of HLSL splat cast must be a scalar.");
llvm::Value *SrcVal = RV.getScalarVal();
- EmitHLSLAggregateSplatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc);
+ EmitHLSLScalarElementwiseAndSplatCasts(CGF, DestLVal, SrcVal, SrcTy, Loc);
break;
}
case CK_HLSLElementwiseCast: {
Expr *Src = E->getSubExpr();
QualType SrcTy = Src->getType();
RValue RV = CGF.EmitAnyExpr(Src);
- QualType DestTy = E->getType();
- Address DestVal = Dest.getAddress();
+ LValue DestLVal = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());
SourceLocation Loc = E->getExprLoc();
if (RV.isScalar()) {
llvm::Value *SrcVal = RV.getScalarVal();
- EmitHLSLScalarFlatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc);
+ assert(SrcTy->isVectorType() &&
+ "HLSL Elementwise cast doesn't handle splatting.");
+ EmitHLSLScalarElementwiseAndSplatCasts(CGF, DestLVal, SrcVal, SrcTy, Loc);
} else {
assert(RV.isAggregate() &&
"Can't perform HLSL Aggregate cast on a complex type.");
Address SrcVal = RV.getAggregateAddress();
- EmitHLSLElementwiseCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc);
+ EmitHLSLElementwiseCast(CGF, DestLVal, CGF.MakeAddrLValue(SrcVal, SrcTy),
+ Loc);
}
break;
}
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index c961222..06d9d81 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2397,39 +2397,37 @@ bool CodeGenFunction::ShouldNullCheckClassCastValue(const CastExpr *CE) {
}
// RHS is an aggregate type
-static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, Address RHSVal,
- QualType RHSTy, QualType LHSTy,
- SourceLocation Loc) {
- SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList;
- SmallVector<QualType, 16> SrcTypes; // Flattened type
- CGF.FlattenAccessAndType(RHSVal, RHSTy, LoadGEPList, SrcTypes);
- // LHS is either a vector or a builtin?
+static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal,
+ QualType DestTy, SourceLocation Loc) {
+ SmallVector<LValue, 16> LoadList;
+ CGF.FlattenAccessAndTypeLValue(SrcVal, LoadList);
+ // Dest is either a vector or a builtin?
// if its a vector create a temp alloca to store into and return that
- if (auto *VecTy = LHSTy->getAs<VectorType>()) {
- assert(SrcTypes.size() >= VecTy->getNumElements() &&
- "Flattened type on RHS must have more elements than vector on LHS.");
+ if (auto *VecTy = DestTy->getAs<VectorType>()) {
+ assert(LoadList.size() >= VecTy->getNumElements() &&
+ "Flattened type on RHS must have the same number or more elements "
+ "than vector on LHS.");
llvm::Value *V =
- CGF.Builder.CreateLoad(CGF.CreateIRTemp(LHSTy, "flatcast.tmp"));
+ CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp"));
// write to V.
for (unsigned I = 0, E = VecTy->getNumElements(); I < E; I++) {
- llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[I].first, "load");
- llvm::Value *Idx = LoadGEPList[I].second;
- Load = Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract")
- : Load;
- llvm::Value *Cast = CGF.EmitScalarConversion(
- Load, SrcTypes[I], VecTy->getElementType(), Loc);
+ RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc);
+ assert(RVal.isScalar() &&
+ "All flattened source values should be scalars.");
+ llvm::Value *Cast =
+ CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[I].getType(),
+ VecTy->getElementType(), Loc);
V = CGF.Builder.CreateInsertElement(V, Cast, I);
}
return V;
}
- // i its a builtin just do an extract element or load.
- assert(LHSTy->isBuiltinType() &&
+ // if its a builtin just do an extract element or load.
+ assert(DestTy->isBuiltinType() &&
"Destination type must be a vector or builtin type.");
- llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[0].first, "load");
- llvm::Value *Idx = LoadGEPList[0].second;
- Load =
- Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load;
- return CGF.EmitScalarConversion(Load, LHSTy, SrcTypes[0], Loc);
+ RValue RVal = CGF.EmitLoadOfLValue(LoadList[0], Loc);
+ assert(RVal.isScalar() && "All flattened source values should be scalars.");
+ return CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[0].getType(),
+ DestTy, Loc);
}
// VisitCastExpr - Emit code for an explicit or implicit cast. Implicit casts
@@ -2954,12 +2952,11 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
case CK_HLSLElementwiseCast: {
RValue RV = CGF.EmitAnyExpr(E);
SourceLocation Loc = CE->getExprLoc();
- QualType SrcTy = E->getType();
assert(RV.isAggregate() && "Not a valid HLSL Elementwise Cast.");
// RHS is an aggregate
- Address SrcVal = RV.getAggregateAddress();
- return EmitHLSLElementwiseCast(CGF, SrcVal, SrcTy, DestTy, Loc);
+ LValue SrcVal = CGF.MakeAddrLValue(RV.getAggregateAddress(), E->getType());
+ return EmitHLSLElementwiseCast(CGF, SrcVal, DestTy, Loc);
}
} // end of switch
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index f0565c1..99de6e1 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4464,10 +4464,8 @@ public:
AggValueSlot slot = AggValueSlot::ignored());
LValue EmitPseudoObjectLValue(const PseudoObjectExpr *e);
- void FlattenAccessAndType(
- Address Addr, QualType AddrTy,
- SmallVectorImpl<std::pair<Address, llvm::Value *>> &AccessList,
- SmallVectorImpl<QualType> &FlatTypes);
+ void FlattenAccessAndTypeLValue(LValue LVal,
+ SmallVectorImpl<LValue> &AccessList);
llvm::Value *EmitIvarOffset(const ObjCInterfaceDecl *Interface,
const ObjCIvarDecl *Ivar);
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 4bd7981..d951ba0 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -41,7 +41,8 @@ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1)));
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
#endif
-static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) {
+static __inline__ _Float16 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtsh_h(__m512h __a) {
return __a[0];
}
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index fa30c66b..2b375b9 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3571,9 +3571,6 @@ bool SemaHLSL::CanPerformAggregateSplatCast(Expr *Src, QualType DestTy) {
if (SrcVecTy)
SrcTy = SrcVecTy->getElementType();
- if (ContainsBitField(DestTy))
- return false;
-
llvm::SmallVector<QualType> DestTypes;
BuildFlattenedTypeList(DestTy, DestTypes);
@@ -3600,9 +3597,6 @@ bool SemaHLSL::CanPerformElementwiseCast(Expr *Src, QualType DestTy) {
(DestTy->isScalarType() || DestTy->isVectorType()))
return false;
- if (ContainsBitField(DestTy) || ContainsBitField(SrcTy))
- return false;
-
llvm::SmallVector<QualType> DestTypes;
BuildFlattenedTypeList(DestTy, DestTypes);
llvm::SmallVector<QualType> SrcTypes;
diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index 7ad7049..8471f02 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -2724,16 +2724,6 @@ Expr *GenerateReductionInitRecipeExpr(ASTContext &Context,
return InitExpr;
}
-const Expr *StripOffBounds(const Expr *VarExpr) {
- while (isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(VarExpr)) {
- if (const auto *AS = dyn_cast<ArraySectionExpr>(VarExpr))
- VarExpr = AS->getBase()->IgnoreParenImpCasts();
- else if (const auto *Sub = dyn_cast<ArraySubscriptExpr>(VarExpr))
- VarExpr = Sub->getBase()->IgnoreParenImpCasts();
- }
- return VarExpr;
-}
-
VarDecl *CreateAllocaDecl(ASTContext &Ctx, DeclContext *DC,
SourceLocation BeginLoc, IdentifierInfo *VarName,
QualType VarTy) {
@@ -2794,17 +2784,18 @@ OpenACCPrivateRecipe SemaOpenACC::CreatePrivateInitRecipe(const Expr *VarExpr) {
OpenACCFirstPrivateRecipe
SemaOpenACC::CreateFirstPrivateInitRecipe(const Expr *VarExpr) {
- // TODO: OpenACC: This shouldn't be necessary, see PrivateInitRecipe
- VarExpr = StripOffBounds(VarExpr);
-
+ // We don't strip bounds here, so that we are doing our recipe init at the
+ // 'lowest' possible level. Codegen is going to have to do its own 'looping'.
if (!VarExpr || VarExpr->getType()->isDependentType())
return OpenACCFirstPrivateRecipe::Empty();
QualType VarTy =
VarExpr->getType().getNonReferenceType().getUnqualifiedType();
- // TODO: OpenACC: for arrays/bounds versions, we're going to have to do a
- // different initializer, but for now we can go ahead with this.
+ // Array sections are special, and we have to treat them that way.
+ if (const auto *ASE =
+ dyn_cast<ArraySectionExpr>(VarExpr->IgnoreParenImpCasts()))
+ VarTy = ArraySectionExpr::getBaseOriginalType(ASE);
VarDecl *AllocaDecl = CreateAllocaDecl(
getASTContext(), SemaRef.getCurContext(), VarExpr->getBeginLoc(),
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
index e1f4d0d..b0096d8 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
@@ -509,6 +509,8 @@ bool initializeScanCompilerInstance(
ScanInstance.getFrontendOpts().DisableFree = false;
ScanInstance.getFrontendOpts().GenerateGlobalModuleIndex = false;
ScanInstance.getFrontendOpts().UseGlobalModuleIndex = false;
+ ScanInstance.getFrontendOpts().GenReducedBMI = false;
+ ScanInstance.getFrontendOpts().ModuleOutputPath.clear();
// This will prevent us compiling individual modules asynchronously since
// FileManager is not thread-safe, but it does improve performance for now.
ScanInstance.getFrontendOpts().ModulesShareFileManager = true;
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index d67178c..a117bec 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -263,6 +263,10 @@ makeCommonInvocationForModuleBuild(CompilerInvocation CI) {
// units.
CI.getFrontendOpts().Inputs.clear();
CI.getFrontendOpts().OutputFile.clear();
+ CI.getFrontendOpts().GenReducedBMI = false;
+ CI.getFrontendOpts().ModuleOutputPath.clear();
+ CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = false;
+ CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = false;
// LLVM options are not going to affect the AST
CI.getFrontendOpts().LLVMArgs.clear();
diff --git a/clang/test/AST/ByteCode/cxx20.cpp b/clang/test/AST/ByteCode/cxx20.cpp
index 67bf9a7..1888998 100644
--- a/clang/test/AST/ByteCode/cxx20.cpp
+++ b/clang/test/AST/ByteCode/cxx20.cpp
@@ -1070,9 +1070,30 @@ namespace Virtual {
public:
int a = f();
- virtual constexpr int f() { return 10; }
+ virtual constexpr int f() const { return 10; }
};
+ K k;
+ static_assert(k.f() == 10); // both-error {{not an integral constant expression}} \
+ // both-note {{virtual function called on object 'k' whose dynamic type is not constant}}
+
+ void f() {
+ constexpr K k;
+ static_assert(k.f() == 10);
+ }
+
+ void f2() {
+ K k;
+ static_assert(k.f() == 10); // both-error {{not an integral constant expression}} \
+ // both-note {{virtual function called on object 'k' whose dynamic type is not constant}}
+ }
+
+ static_assert(K().f() == 10);
+
+ void f3() {
+ static_assert(K().f() == 10);
+ }
+
class L : public K {
public:
int b = f();
@@ -1083,6 +1104,42 @@ namespace Virtual {
static_assert(l.a == 10);
static_assert(l.b == 10);
static_assert(l.c == 10);
+ static_assert(l.f() == 10);
+
+ struct M {
+ K& mk = k;
+ };
+ static_assert(M{}.mk.f() == 10); // both-error {{not an integral constant expression}} \
+ // both-note {{virtual function called on object 'k' whose dynamic type is not constant}}
+
+ struct N {
+ K* mk = &k;
+ };
+ static_assert(N{}.mk->f() == 10); // both-error {{not an integral constant expression}} \
+ // both-note {{virtual function called on object 'k' whose dynamic type is not constant}}
+
+ extern K o;
+ static_assert(o.f() == 10); // both-error {{not an integral constant expression}} \
+ // both-note {{virtual function called on object 'o' whose dynamic type is not constant}}
+ static K p;
+ static_assert(p.f() == 10); // both-error {{not an integral constant expression}} \
+ // both-note {{virtual function called on object 'p' whose dynamic type is not constant}}
+
+ void f4() {
+ static K p;
+ static_assert(p.f() == 10); // both-error {{not an integral constant expression}} \
+ // both-note {{virtual function called on object 'p' whose dynamic type is not constant}}
+ }
+
+ const K q;
+ static_assert(q.f() == 10); // both-error {{not an integral constant expression}} \
+ // both-note {{virtual function called on object 'q' whose dynamic type is not constant}}
+
+ void f5() {
+ const K q;
+ static_assert(q.f() == 10); // both-error {{not an integral constant expression}} \
+ // both-note {{virtual function called on object 'q' whose dynamic type is not constant}}
+ }
}
namespace DiscardedTrivialCXXConstructExpr {
@@ -1100,3 +1157,29 @@ namespace DiscardedTrivialCXXConstructExpr {
constexpr int y = foo(12); // both-error {{must be initialized by a constant expression}} \
// both-note {{in call to}}
}
+
+namespace VirtualFunctionCallThroughArrayElem {
+ struct X {
+ constexpr virtual int foo() const {
+ return 3;
+ }
+ };
+ constexpr X xs[5];
+ static_assert(xs[3].foo() == 3);
+
+ constexpr X xs2[1][2];
+ static_assert(xs2[0].foo() == 3); // both-error {{is not a structure or union}}
+ static_assert(xs2[0][0].foo() == 3);
+
+ struct Y: public X {
+ constexpr int foo() const override {
+ return 1;
+ }
+ };
+ constexpr Y ys[20];
+ static_assert(ys[12].foo() == static_cast<const X&>(ys[12]).foo());
+
+ X a[3][4];
+ static_assert(a[2][3].foo()); // both-error {{not an integral constant expression}} \
+ // both-note {{virtual function called on object 'a[2][3]' whose dynamic type is not constant}}
+}
diff --git a/clang/test/CIR/CodeGen/global-init.cpp b/clang/test/CIR/CodeGen/global-init.cpp
index 102affc..0c19e68 100644
--- a/clang/test/CIR/CodeGen/global-init.cpp
+++ b/clang/test/CIR/CodeGen/global-init.cpp
@@ -1,8 +1,9 @@
-// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2> %t-before.cir
+// RUN: FileCheck --input-file=%t-before.cir %s --check-prefix=CIR-BEFORE-LPP
// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
-// Note: The CIR generated from this test isn't ready for lowering to LLVM yet.
-// That will require changes to LoweringPrepare.
+// Note: The LoweringPrepare work isn't yet complete. We still need to create
+// the global ctor list attribute.
struct NeedsCtor {
NeedsCtor();
@@ -10,8 +11,16 @@ struct NeedsCtor {
NeedsCtor needsCtor;
-// CIR: cir.func private @_ZN9NeedsCtorC1Ev(!cir.ptr<!rec_NeedsCtor>)
-// CIR: cir.global external @needsCtor = ctor : !rec_NeedsCtor {
-// CIR: %[[THIS:.*]] = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor>
-// CIR: cir.call @_ZN9NeedsCtorC1Ev(%[[THIS]]) : (!cir.ptr<!rec_NeedsCtor>) -> ()
+// CIR-BEFORE-LPP: cir.global external @needsCtor = ctor : !rec_NeedsCtor {
+// CIR-BEFORE-LPP: %[[THIS:.*]] = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor>
+// CIR-BEFORE-LPP: cir.call @_ZN9NeedsCtorC1Ev(%[[THIS]]) : (!cir.ptr<!rec_NeedsCtor>) -> ()
+
+// CIR: cir.global external @needsCtor = #cir.zero : !rec_NeedsCtor
+// CIR: cir.func internal private @__cxx_global_var_init() {
+// CIR: %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor>
+// CIR: cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> ()
+
+// CIR: cir.func private @_GLOBAL__sub_I_[[FILENAME:.*]]() {
+// CIR: cir.call @__cxx_global_var_init() : () -> ()
+// CIR: cir.return
// CIR: }
diff --git a/clang/test/CIR/CodeGen/new.cpp b/clang/test/CIR/CodeGen/new.cpp
index 91dae3f..3dcf7af 100644
--- a/clang/test/CIR/CodeGen/new.cpp
+++ b/clang/test/CIR/CodeGen/new.cpp
@@ -158,13 +158,13 @@ void test_new_with_complex_type() {
}
// CHECK: cir.func{{.*}} @_Z26test_new_with_complex_typev
-// CHECK: %0 = cir.alloca !cir.ptr<!cir.complex<!cir.float>>, !cir.ptr<!cir.ptr<!cir.complex<!cir.float>>>, ["a", init]
-// CHECK: %1 = cir.const #cir.int<8> : !u64i
-// CHECK: %2 = cir.call @_Znwm(%1) : (!u64i) -> !cir.ptr<!void>
-// CHECK: %3 = cir.cast bitcast %2 : !cir.ptr<!void> -> !cir.ptr<!cir.complex<!cir.float>>
-// CHECK: %4 = cir.const #cir.const_complex<#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float> : !cir.complex<!cir.float>
-// CHECK: cir.store align(8) %4, %3 : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
-// CHECK: cir.store align(8) %3, %0 : !cir.ptr<!cir.complex<!cir.float>>, !cir.ptr<!cir.ptr<!cir.complex<!cir.float>>>
+// CHECK: %[[A_ADDR:.*]] = cir.alloca !cir.ptr<!cir.complex<!cir.float>>, !cir.ptr<!cir.ptr<!cir.complex<!cir.float>>>, ["a", init]
+// CHECK: %[[COMPLEX_SIZE:.*]] = cir.const #cir.int<8> : !u64i
+// CHECK: %[[NEW_COMPLEX:.*]] = cir.call @_Znwm(%[[COMPLEX_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK: %[[COMPLEX_PTR:.*]] = cir.cast bitcast %[[NEW_COMPLEX]] : !cir.ptr<!void> -> !cir.ptr<!cir.complex<!cir.float>>
+// CHECK: %[[COMPLEX_VAL:.*]] = cir.const #cir.const_complex<#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float> : !cir.complex<!cir.float>
+// CHECK: cir.store{{.*}} %[[COMPLEX_VAL]], %[[COMPLEX_PTR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+// CHECK: cir.store{{.*}} %[[COMPLEX_PTR]], %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.ptr<!cir.ptr<!cir.complex<!cir.float>>>
// LLVM: define{{.*}} void @_Z26test_new_with_complex_typev
// LLVM: %[[A_ADDR:.*]] = alloca ptr, i64 1, align 8
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp b/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp
index e836a37a..726cd3e 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp
@@ -1,4 +1,4 @@
-// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
struct NoCopyConstruct {};
@@ -81,292 +81,247 @@ struct HasDtor {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}):
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
// CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!s32i>, %[[ZERO]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !s32i, !cir.ptr<!s32i>
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[ONE]] : !s64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ONE_2]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[TWO]] : !s64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[TWO_2]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[THREE]] : !s64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[THREE_2]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[FOUR]] : !s64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[FOUR_2]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!s32i>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load{{.*}} %[[STRIDE_FROM]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: cir.store{{.*}} %[[FROM_LOAD]], %[[STRIDE_TO]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}):
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
// CHECK-NEXT: cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!cir.float>, %[[ZERO]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !cir.float, !cir.ptr<!cir.float>
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[ONE]] : !s64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ONE_2]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[TWO]] : !s64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[TWO_2]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[THREE]] : !s64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[THREE_2]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[FOUR]] : !s64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[FOUR_2]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!cir.float>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load{{.*}} %[[STRIDE_FROM]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: cir.store{{.*}} %[[FROM_LOAD]], %[[STRIDE_TO]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}):
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
// CHECK-NEXT: cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
-//
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_13CopyConstruct : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}):
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_13CopyConstruct : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
// CHECK-NEXT: cir.alloca !cir.array<!rec_CopyConstruct x 5>, !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
-//
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_CopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_14NonDefaultCtor : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}):
-// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["openacc.firstprivate.init"]
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ONE]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[TWO]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[THREE]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
-//
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_7HasDtor : !cir.ptr<!cir.array<!rec_HasDtor x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}):
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_7HasDtor : !cir.ptr<!cir.array<!rec_HasDtor x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
// CHECK-NEXT: cir.alloca !cir.array<!rec_HasDtor x 5>, !cir.ptr<!cir.array<!rec_HasDtor x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ZERO]] : !u64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[ONE]] : !s64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[TWO]] : !s64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[THREE]] : !s64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[FOUR]] : !s64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
-//
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
-//
// CHECK-NEXT: } destroy {
-// CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}):
-// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<4> : !u64i
-// CHECK-NEXT: %[[ARRPTR:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[ELEM:.*]] = cir.ptr_stride(%[[ARRPTR]] : !cir.ptr<!rec_HasDtor>, %[[LAST_IDX]] : !u64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>, ["__array_idx"]
-// CHECK-NEXT: cir.store %[[ELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>
-// CHECK-NEXT: cir.do {
-// CHECK-NEXT: %[[ELEM_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!cir.ptr<!rec_HasDtor>>, !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[ELEM_LOAD]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> ()
-// CHECK-NEXT: %[[NEG_ONE:.*]] = cir.const #cir.int<-1> : !s64i
-// CHECK-NEXT: %[[PREVELEM:.*]] = cir.ptr_stride(%[[ELEM_LOAD]] : !cir.ptr<!rec_HasDtor>, %[[NEG_ONE]] : !s64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: cir.store %[[PREVELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>
+// CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.binop(sub, %[[UB_CAST]], %[[ONE]]) : !u64i
+// CHECK-NEXT: cir.store %[[LAST_SUB_ONE]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR_LOAD]], %[[LB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> ()
// CHECK-NEXT: cir.yield
-// CHECK-NEXT: } while {
-// CHECK-NEXT: %[[ELEM_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!cir.ptr<!rec_HasDtor>>, !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[ELEM_LOAD]], %[[ARRPTR]]) : !cir.ptr<!rec_HasDtor>, !cir.bool
-// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[DEC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
+//
extern "C" void acc_combined() {
// CHECK: cir.func{{.*}} @acc_combined() {
@@ -482,7 +437,7 @@ extern "C" void acc_combined() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1]"}
- // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) {
+ // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) {
// CHECK-NEXT: acc.loop combined(serial)
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -497,7 +452,7 @@ extern "C" void acc_combined() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1]"}
- // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(parallel)
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -512,7 +467,7 @@ extern "C" void acc_combined() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1]"}
- // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) {
+ // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) {
// CHECK-NEXT: acc.loop combined(serial)
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -527,7 +482,7 @@ extern "C" void acc_combined() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1]"}
- // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) {
+ // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) {
// CHECK-NEXT: acc.loop combined(parallel)
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -542,7 +497,7 @@ extern "C" void acc_combined() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1]"}
- // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) {
+ // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) {
// CHECK-NEXT: acc.loop combined(parallel)
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -557,7 +512,7 @@ extern "C" void acc_combined() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1]"}
- // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) {
+ // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) {
// CHECK-NEXT: acc.loop combined(parallel)
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -607,12 +562,12 @@ extern "C" void acc_combined() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE6:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1]"}
- // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>)
+ // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>)
// CHECK-NEXT: acc.loop combined(serial)
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -629,7 +584,7 @@ extern "C" void acc_combined() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1:1]"}
- // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) {
+ // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) {
// CHECK-NEXT: acc.loop combined(parallel)
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -645,7 +600,7 @@ extern "C" void acc_combined() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1:1]"}
- // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(serial)
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -661,7 +616,7 @@ extern "C" void acc_combined() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1:1]"}
- // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) {
+ // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) {
// CHECK-NEXT: acc.loop combined(parallel)
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -677,7 +632,7 @@ extern "C" void acc_combined() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1:1]"}
- // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) {
+ // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) {
// CHECK-NEXT: acc.loop combined(serial)
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -693,7 +648,7 @@ extern "C" void acc_combined() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1:1]"}
- // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) {
+ // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) {
// CHECK-NEXT: acc.loop combined(parallel)
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -709,7 +664,7 @@ extern "C" void acc_combined() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1:1]"}
- // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) {
+ // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) {
// CHECK-NEXT: acc.loop combined(parallel)
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -765,12 +720,12 @@ extern "C" void acc_combined() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE6:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1:1]"}
- // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>)
+ // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>)
// CHECK-NEXT: acc.loop combined(parallel)
// CHECK: acc.yield
// CHECK-NEXT: } loc
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c
index de6e7b0..94c2973 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c
+++ b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c
@@ -1,4 +1,4 @@
-// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
struct NoCopyConstruct {};
@@ -34,140 +34,110 @@ struct NoCopyConstruct {};
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}):
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
// CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!s32i>, %[[ZERO]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !s32i, !cir.ptr<!s32i>
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[ONE]] : !s64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ONE_2]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[TWO]] : !s64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[TWO_2]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[THREE]] : !s64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[THREE_2]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[FOUR]] : !s64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[FOUR_2]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!s32i>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load{{.*}} %[[STRIDE_FROM]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: cir.store{{.*}} %[[FROM_LOAD]], %[[STRIDE_TO]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}):
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
// CHECK-NEXT: cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!cir.float>, %[[ZERO]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !cir.float, !cir.ptr<!cir.float>
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[ONE]] : !s64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ONE_2]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[TWO]] : !s64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[TWO_2]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[THREE]] : !s64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[THREE_2]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[FOUR]] : !s64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[FOUR_2]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!cir.float>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load{{.*}} %[[STRIDE_FROM]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: cir.store{{.*}} %[[FROM_LOAD]], %[[STRIDE_TO]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}):
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
// CHECK-NEXT: cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.copy %[[FROM_OFFSET:.*]] to %[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.copy %[[FROM_OFFSET]] to %[[TO_OFFSET]] : !cir.ptr<!rec_NoCopyConstruct>
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.copy %[[FROM_OFFSET]] to %[[TO_OFFSET]] : !cir.ptr<!rec_NoCopyConstruct>
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.copy %[[FROM_OFFSET]] to %[[TO_OFFSET]] : !cir.ptr<!rec_NoCopyConstruct>
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.copy %[[FROM_OFFSET]] to %[[TO_OFFSET]] : !cir.ptr<!rec_NoCopyConstruct>
-//
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: cir.copy %[[STRIDE_FROM]] to %[[STRIDE_TO]] : !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
@@ -227,7 +197,7 @@ void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1]"}
- // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>)
+ // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc parallel firstprivate(someFloatArr[1])
@@ -239,7 +209,7 @@ void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1]"}
- // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>)
+ // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc serial firstprivate(noCopyArr[1])
@@ -251,7 +221,7 @@ void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1]"}
- // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>)
+ // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc serial firstprivate(someIntArr[1], someFloatArr[1], noCopyArr[1])
@@ -277,9 +247,9 @@ void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE3:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1]"}
- // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>)
+ // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
@@ -293,7 +263,7 @@ void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1:1]"}
- // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>)
+ // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc serial firstprivate(someFloatArr[1:1])
@@ -306,7 +276,7 @@ void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1:1]"}
- // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>)
+ // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc parallel firstprivate(noCopyArr[1:1])
@@ -319,7 +289,7 @@ void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1:1]"}
- // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>)
+ // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc parallel firstprivate(someIntArr[1:1], someFloatArr[1:1], noCopyArr[1:1])
@@ -348,9 +318,9 @@ void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE3:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1:1]"}
- // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>)
+ // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
}
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp
index fca3ca8..1e174bb 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp
@@ -1,4 +1,4 @@
-// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
struct NoCopyConstruct {};
@@ -81,292 +81,247 @@ struct HasDtor {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}):
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
// CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!s32i>, %[[ZERO]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !s32i, !cir.ptr<!s32i>
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[ONE]] : !s64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ONE_2]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[TWO]] : !s64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[TWO_2]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[THREE]] : !s64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[THREE_2]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[FOUR]] : !s64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[FOUR_2]] : !u64i), !cir.ptr<!s32i>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!s32i>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load{{.*}} %[[STRIDE_FROM]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: cir.store{{.*}} %[[FROM_LOAD]], %[[STRIDE_TO]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}):
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
// CHECK-NEXT: cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!cir.float>, %[[ZERO]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !cir.float, !cir.ptr<!cir.float>
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[ONE]] : !s64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ONE_2]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[TWO]] : !s64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[TWO_2]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[THREE]] : !s64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[THREE_2]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[FOUR]] : !s64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[FOUR_2]] : !u64i), !cir.ptr<!cir.float>
-// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!cir.float>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load{{.*}} %[[STRIDE_FROM]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK-NEXT: cir.store{{.*}} %[[FROM_LOAD]], %[[STRIDE_TO]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}):
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
// CHECK-NEXT: cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
-// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
-//
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_13CopyConstruct : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}):
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_13CopyConstruct : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
// CHECK-NEXT: cir.alloca !cir.array<!rec_CopyConstruct x 5>, !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
-// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
-//
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_CopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_14NonDefaultCtor : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}):
-// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["openacc.firstprivate.init"]
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ONE]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[TWO]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[THREE]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
-// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
-//
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
//
-// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_7HasDtor : !cir.ptr<!cir.array<!rec_HasDtor x 5>> init {
-// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}):
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_7HasDtor : !cir.ptr<!cir.array<!rec_HasDtor x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
// CHECK-NEXT: cir.alloca !cir.array<!rec_HasDtor x 5>, !cir.ptr<!cir.array<!rec_HasDtor x 5>>, ["openacc.firstprivate.init"]
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } copy {
-// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ZERO]] : !u64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
-//
-// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[ONE]] : !s64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
-//
-// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[TWO]] : !s64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
-//
-// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[THREE]] : !s64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
-//
-// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[FOUR]] : !s64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
-//
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
-//
// CHECK-NEXT: } destroy {
-// CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}):
-// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<4> : !u64i
-// CHECK-NEXT: %[[ARRPTR:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[ELEM:.*]] = cir.ptr_stride(%[[ARRPTR]] : !cir.ptr<!rec_HasDtor>, %[[LAST_IDX]] : !u64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>, ["__array_idx"]
-// CHECK-NEXT: cir.store %[[ELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>
-// CHECK-NEXT: cir.do {
-// CHECK-NEXT: %[[ELEM_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!cir.ptr<!rec_HasDtor>>, !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[ELEM_LOAD]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> ()
-// CHECK-NEXT: %[[NEG_ONE:.*]] = cir.const #cir.int<-1> : !s64i
-// CHECK-NEXT: %[[PREVELEM:.*]] = cir.ptr_stride(%[[ELEM_LOAD]] : !cir.ptr<!rec_HasDtor>, %[[NEG_ONE]] : !s64i), !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: cir.store %[[PREVELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>
+// CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.binop(sub, %[[UB_CAST]], %[[ONE]]) : !u64i
+// CHECK-NEXT: cir.store %[[LAST_SUB_ONE]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR_LOAD]], %[[LB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> ()
// CHECK-NEXT: cir.yield
-// CHECK-NEXT: } while {
-// CHECK-NEXT: %[[ELEM_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!cir.ptr<!rec_HasDtor>>, !cir.ptr<!rec_HasDtor>
-// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[ELEM_LOAD]], %[[ARRPTR]]) : !cir.ptr<!rec_HasDtor>, !cir.bool
-// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[DEC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
+//
extern "C" void acc_compute() {
// CHECK: cir.func{{.*}} @acc_compute() {
@@ -461,7 +416,7 @@ extern "C" void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1]"}
- // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>)
+ // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc parallel firstprivate(someFloatArr[1])
@@ -473,7 +428,7 @@ extern "C" void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1]"}
- // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>)
+ // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc serial firstprivate(noCopyArr[1])
@@ -485,7 +440,7 @@ extern "C" void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1]"}
- // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>)
+ // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc parallel firstprivate(hasCopyArr[1])
@@ -497,7 +452,7 @@ extern "C" void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1]"}
- // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>)
+ // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc parallel firstprivate(notDefCtorArr[1])
@@ -509,7 +464,7 @@ extern "C" void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1]"}
- // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>)
+ // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc parallel firstprivate(dtorArr[1])
@@ -521,7 +476,7 @@ extern "C" void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1]"}
- // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>)
+ // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc serial firstprivate(someIntArr[1], someFloatArr[1], noCopyArr[1], hasCopyArr[1], notDefCtorArr[1], dtorArr[1])
@@ -568,12 +523,12 @@ extern "C" void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE6:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1]"}
- // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>)
+ // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
@@ -587,7 +542,7 @@ extern "C" void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1:1]"}
- // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>)
+ // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc serial firstprivate(someFloatArr[1:1])
@@ -600,7 +555,7 @@ extern "C" void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1:1]"}
- // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>)
+ // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc parallel firstprivate(noCopyArr[1:1])
@@ -613,7 +568,7 @@ extern "C" void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1:1]"}
- // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>)
+ // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc serial firstprivate(hasCopyArr[1:1])
@@ -626,7 +581,7 @@ extern "C" void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1:1]"}
- // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>)
+ // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc parallel firstprivate(notDefCtorArr[1:1])
@@ -639,7 +594,7 @@ extern "C" void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1:1]"}
- // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>)
+ // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc parallel firstprivate(dtorArr[1:1])
@@ -652,7 +607,7 @@ extern "C" void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1:1]"}
- // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>)
+ // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
#pragma acc parallel firstprivate(someIntArr[1:1], someFloatArr[1:1], noCopyArr[1:1], hasCopyArr[1:1], notDefCtorArr[1:1], dtorArr[1:1])
@@ -705,12 +660,12 @@ extern "C" void acc_compute() {
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64)
// CHECK-NEXT: %[[PRIVATE6:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1:1]"}
- // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>,
- // CHECK-SAME: @firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>)
+ // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>,
+ // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>)
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
}
diff --git a/clang/test/CIR/CodeGenOpenACC/firstprivate-clause-recipes.cpp b/clang/test/CIR/CodeGenOpenACC/firstprivate-clause-recipes.cpp
new file mode 100644
index 0000000..e10d737
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/firstprivate-clause-recipes.cpp
@@ -0,0 +1,691 @@
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+
+// Note: unlike the 'private' recipe checks, this is just for spot-checking,
+// so this test isn't as comprehensive. The same code paths are used for
+// 'private', so we just count on those to catch the errors.
+struct NoOps {
+ int i;
+ ~NoOps();
+};
+
+struct CtorDtor {
+ int i;
+ CtorDtor();
+ ~CtorDtor();
+};
+
+void do_things(unsigned A, unsigned B) {
+ NoOps ThreeArr[5][5][5];
+
+#pragma acc parallel firstprivate(ThreeArr[B][B][B])
+// CHECK:acc.firstprivate.recipe @firstprivatization__Bcnt3__ZTSA5_A5_A5_5NoOps : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
+// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>>, ["openacc.firstprivate.init"] {alignment = 4 : i64}
+// CHECK-NEXT: acc.yield
+// CHECK-NEXT: } copy {
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>
+// CHECK-NEXT: %[[BOUND3_STRIDE_FROM:.*]] = cir.ptr_stride(%[[TLA_DECAY_FROM]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>
+// CHECK-NEXT: %[[TLA_DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>
+// CHECK-NEXT: %[[BOUND3_STRIDE_TO:.*]] = cir.ptr_stride(%[[TLA_DECAY_TO]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE_FROM]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE_FROM:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY_FROM]] : !cir.ptr<!cir.array<!rec_NoOps x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE_TO]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE_TO:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY_TO]] : !cir.ptr<!cir.array<!rec_NoOps x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE_FROM]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[BOUND1_STRIDE_FROM:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY_FROM]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE_TO]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[BOUND1_STRIDE_TO:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY_TO]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1ERKS_(%[[BOUND1_STRIDE_TO]], %[[BOUND1_STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_NoOps>, !cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: acc.yield
+// CHECK-NEXT:} destroy {
+// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.binop(sub, %[[UB3_CAST]], %[[ONE]]) : !u64i
+// CHECK-NEXT: cir.store %[[LAST_SUB_ONE]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR3_LOAD]], %[[LB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.binop(sub, %[[UB2_CAST]], %[[ONE]]) : !u64i
+// CHECK-NEXT: cir.store %[[LAST_SUB_ONE]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR2_LOAD]], %[[LB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY]] : !cir.ptr<!cir.array<!rec_NoOps x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_NoOps x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.binop(sub, %[[UB1_CAST]], %[[ONE]]) : !u64i
+// CHECK-NEXT: cir.store %[[LAST_SUB_ONE]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR1_LOAD]], %[[LB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[BOUND1_STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsD1Ev(%[[BOUND1_STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[DEC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[DEC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[DEC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT:acc.yield
+// CHECK-NEXT:}
+ ;
+
+ NoOps ***ThreePtr;
+#pragma acc parallel firstprivate(ThreePtr[B][B][A:B])
+// CHECK: acc.firstprivate.recipe @firstprivatization__Bcnt3__ZTSPPP5NoOps : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
+// CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, ["openacc.firstprivate.init"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[INT_PTR_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_PTR_UPPER_BOUND]] : index to !u64i
+// CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i
+// CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.binop(mul, %[[UPPER_BOUND_CAST]], %[[SIZEOF_PTR]]) : !u64i
+// CHECK-NEXT: %[[INT_PTR_PTR_VLA_ALLOCA:.*]] = cir.alloca !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[CALC_ALLOCA_SIZE]] : !u64i, ["openacc.init.bounds"] {alignment = 8 : i64}
+//
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[UPPER_LIMIT:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UPPER_LIMIT]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+//
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[SRC_IDX:.*]] = cir.binop(mul, %[[UPPER_BOUND_CAST]], %[[ITR_LOAD]]) : !u64i
+// CHECK-NEXT: %[[SRC_STRIDE:.*]] = cir.ptr_stride(%[[INT_PTR_PTR_VLA_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[SRC_IDX]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: %[[DEST_STRIDE:.*]] = cir.ptr_stride(%[[TOP_LEVEL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>
+// CHECK-NEXT: cir.store %[[SRC_STRIDE]], %[[DEST_STRIDE]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>
+// CHECK-NEXT: cir.yield
+//
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+//
+// CHECK-NEXT: %[[INT_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index to !u64i
+// CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.binop(mul, %[[UPPER_BOUND_CAST_2]], %[[UPPER_BOUND_CAST]]) : !u64i
+// CHECK-NEXT: %[[SIZEOF_PTR_PTR:.*]] = cir.const #cir.int<8> : !u64i
+// CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.binop(mul, %[[NUM_ELTS]], %[[SIZEOF_PTR_PTR]]) : !u64i
+// CHECK-NEXT: %[[INT_PTR_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[CALC_ALLOCA_SIZE]] : !u64i, ["openacc.init.bounds"] {alignment = 8 : i64}
+//
+//
+// Copy array pointer to the original alloca.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UPPER_BOUND_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+//
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[SRC_IDX:.*]] = cir.binop(mul, %[[UPPER_BOUND_CAST_2]], %[[ITR_LOAD]]) : !u64i
+// CHECK-NEXT: %[[SRC_STRIDE:.*]] = cir.ptr_stride(%[[INT_PTR_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[SRC_IDX]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[DEST_STRIDE:.*]] = cir.ptr_stride(%[[INT_PTR_PTR_VLA_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: cir.store %[[SRC_STRIDE]], %[[DEST_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: cir.yield
+//
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+//
+// CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UPPER_BOUND_CAST_3:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i
+// CHECK-NEXT: %[[NUM_ELTS_2:.*]] = cir.binop(mul, %[[UPPER_BOUND_CAST_3]], %[[NUM_ELTS]]) : !u64i
+// CHECK-NEXT: %[[SIZEOF_INT:.*]] = cir.const #cir.int<4> : !u64i
+// CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.binop(mul, %[[NUM_ELTS_2]], %[[SIZEOF_INT]]) : !u64i
+// CHECK-NEXT: %[[INT_PTR_ALLOCA:.*]] = cir.alloca !rec_NoOps, !cir.ptr<!rec_NoOps>, %[[CALC_ALLOCA_SIZE]] : !u64i, ["openacc.init.bounds"] {alignment = 4 : i64}
+//
+// Copy array pointer to the original alloca.
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[NUM_ELTS]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+//
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[SRC_IDX:.*]] = cir.binop(mul, %[[UPPER_BOUND_CAST_3]], %[[ITR_LOAD]]) : !u64i
+// CHECK-NEXT: %[[SRC_STRIDE:.*]] = cir.ptr_stride(%[[INT_PTR_ALLOCA]] : !cir.ptr<!rec_NoOps>, %[[SRC_IDX]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[DEST_STRIDE:.*]] = cir.ptr_stride(%[[INT_PTR_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.store %[[SRC_STRIDE]], %[[DEST_STRIDE]] : !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.yield
+//
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: acc.yield
+//
+// CHECK-NEXT: } copy {
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD_FROM:.*]] = cir.load %[[ARG_FROM]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: %[[BOUND3_STRIDE_FROM:.*]] = cir.ptr_stride(%[[TLA_LOAD_FROM]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: %[[TLA_LOAD_TO:.*]] = cir.load %[[ARG_TO]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: %[[BOUND3_STRIDETO:.*]] = cir.ptr_stride(%[[TLA_LOAD_TO]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_STRIDE_LOAD_FROM:.*]] = cir.load %[[BOUND3_STRIDE_FROM]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[BOUND2_STRIDE_FROM:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_LOAD_FROM]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[BOUND3_STRIDE_LOAD_TO:.*]] = cir.load %[[BOUND3_STRIDE_TO]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[BOUND2_STRIDE_TO:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_LOAD_TO]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD_FROM:.*]] = cir.load %[[BOUND2_STRIDE_FROM]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD_FROM]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD_TO:.*]] = cir.load %[[BOUND2_STRIDE_TO]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD_TO]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsC1ERKS_(%[[BOUND1_STRIDE_TO]], %[[BOUND1_STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_NoOps>, !cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+//
+// CHECK-NEXT: acc.yield
+// CHECK-NEXT: } destroy {
+// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[ONE_BELOW_UB3:.*]] = cir.binop(sub, %[[UB3_CAST]], %[[CONST_ONE]]) : !u64i
+// CHECK-NEXT: cir.store %[[ONE_BELOW_UB3]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR3_LOAD]], %[[LB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[PRIVATE]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.binop(sub, %[[UB2_CAST]], %[[CONST_ONE]]) : !u64i
+// CHECK-NEXT: cir.store %[[ONE_BELOW_UB2]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR2_LOAD]], %[[LB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_STRIDE_LOAD:.*]] = cir.load %[[BOUND3_STRIDE]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_LOAD]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.binop(sub, %[[UB1_CAST]], %[[CONST_ONE]]) : !u64i
+// CHECK-NEXT: cir.store %[[ONE_BELOW_UB1]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR1_LOAD]], %[[LB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: cir.call @_ZN5NoOpsD1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[DEC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[DEC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[DEC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+//
+// CHECK-NEXT: acc.yield
+// CHECK-NEXT: }
+;
+ using PtrTArrayTy = CtorDtor*[5];
+ PtrTArrayTy *PtrArrayPtr;
+
+#pragma acc parallel firstprivate(PtrArrayPtr[B][B][B])
+// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt3__ZTSPA5_P8CtorDtor : !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
+// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>>, ["openacc.firstprivate.init"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<40> : !u64i
+// CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.binop(mul, %[[UB3_CAST]], %[[ARR_SIZE]]) : !u64i
+// CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca !cir.array<!cir.ptr<!rec_CtorDtor> x 5>, !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, %[[ALLOCA_SIZE]] : !u64i, ["openacc.init.bounds"] {alignment = 8 : i64}
+//
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[UPP_BOUND:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UPP_BOUND]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[SRC_IDX:.*]] = cir.binop(mul, %[[UB3_CAST]], %[[ITR_LOAD]]) : !u64i
+// CHECK-NEXT: %[[SRC:.*]] = cir.ptr_stride(%[[ARR_ALLOCA]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, %[[SRC_IDX]] : !u64i), !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>
+// CHECK-NEXT: %[[DEST:.*]] = cir.ptr_stride(%[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>>
+// CHECK-NEXT: cir.store %[[SRC]], %[[DEST]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+//
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.binop(mul, %[[UB2_CAST]], %[[UB3_CAST]]) : !u64i
+//
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARR_ALLOCA]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> -> !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ZERO]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+//
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[NUM_ELTS2:.*]] = cir.binop(mul, %[[UB1_CAST]], %[[NUM_ELTS]]) : !u64i
+// CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<4> : !u64i
+// CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.binop(mul, %[[NUM_ELTS2]], %[[ELT_SIZE]]) : !u64i
+// CHECK-NEXT: %[[ARR_ALLOCA2:.*]] = cir.alloca !rec_CtorDtor, !cir.ptr<!rec_CtorDtor>, %[[ALLOCA_SIZE]] : !u64i, ["openacc.init.bounds"] {alignment = 4 : i64}
+//
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["itr"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i
+// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[NUM_ELTS]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[SRC_IDX:.*]] = cir.binop(mul, %[[UB1_CAST]], %[[ITR_LOAD]]) : !u64i
+// CHECK-NEXT: %[[SRC:.*]] = cir.ptr_stride(%[[ARR_ALLOCA2]] : !cir.ptr<!rec_CtorDtor>, %[[SRC_IDX]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[DEST:.*]] = cir.ptr_stride(%[[STRIDE]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: cir.store %[[SRC]], %[[DEST]] : !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: acc.yield
+// CHECK-NEXT: } copy {
+// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[CMP]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD_FROM:.*]] = cir.load %[[ARG_FROM]] : !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>>, !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>
+// CHECK-NEXT: %[[BOUND3_STRIDE_FROM:.*]] = cir.ptr_stride(%[[TLA_LOAD_FROM]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>
+// CHECK-NEXT: %[[TLA_LOAD_TO:.*]] = cir.load %[[ARG_TO]] : !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>>, !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>
+// CHECK-NEXT: %[[BOUND3_STRIDE_TO:.*]] = cir.ptr_stride(%[[TLA_LOAD_TO]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE_FROM]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> -> !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: %[[BOUND2_STRIDE_FROM:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY_FROM]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE_TO]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> -> !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: %[[BOUND2_STRIDE_TO:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY_TO]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD_FROM:.*]] = cir.load %[[BOUND2_STRIDE_FROM]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD_FROM]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD_TO:.*]] = cir.load %[[BOUND2_STRIDE_TO]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD_TO]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorC1ERKS_(%[[BOUND1_STRIDE_TO]], %[[BOUND1_STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_CtorDtor>, !cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: acc.yield
+// CHECK-NEXT: } destroy {
+// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}):
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i
+// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i
+// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[ONE_BELOW_UB3:.*]] = cir.binop(sub, %[[UB3_CAST]], %[[CONST_ONE]]) : !u64i
+// CHECK-NEXT: cir.store %[[ONE_BELOW_UB3]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+//
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR3_LOAD]], %[[LB3_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[PRIVATE]] : !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>>, !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>
+// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i
+// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i
+// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.binop(sub, %[[UB2_CAST]], %[[CONST_ONE]]) : !u64i
+// CHECK-NEXT: cir.store %[[ONE_BELOW_UB2]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR2_LOAD]], %[[LB2_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> -> !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i
+// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i
+// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.binop(sub, %[[UB1_CAST]], %[[CONST_ONE]]) : !u64i
+// CHECK-NEXT: cir.store %[[ONE_BELOW_UB1]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR1_LOAD]], %[[LB1_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: cir.call @_ZN8CtorDtorD1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> ()
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR1_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[DEC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR2_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[DEC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR3_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[DEC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: acc.yield
+// CHECK-NEXT: }
+ ;
+}
diff --git a/clang/test/ClangScanDeps/modules-context-hash-from-named-module.cpp b/clang/test/ClangScanDeps/modules-context-hash-from-named-module.cpp
new file mode 100644
index 0000000..c272022
--- /dev/null
+++ b/clang/test/ClangScanDeps/modules-context-hash-from-named-module.cpp
@@ -0,0 +1,121 @@
+// Checks that driver-generated options for C++ module inputs preserve the
+// canonical module build commands compared to an equivalent non-module input,
+// and that they do not produce additional internal scanning PCMs.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+//--- main.cpp
+#include "root.h"
+import A;
+import B;
+
+auto main() -> int { return 1; }
+
+//--- A.cppm
+module;
+#include "root.h"
+export module A;
+
+//--- B.cppm
+module;
+#include "root.h"
+export module B;
+
+//--- module.modulemap
+module root { header "root.h" }
+
+//--- root.h
+// empty
+
+// RUN: %clang -std=c++23 -fmodules \
+// RUN: -fmodules-cache-path=%t/modules-cache \
+// RUN: %t/main.cpp %t/A.cppm %t/B.cppm \
+// RUN: -fsyntax-only -fdriver-only -MJ %t/deps.json
+//
+// RUN: sed -e '1s/^/[/' -e '$s/,$/]/' -e 's:\\\\\?:/:g' %t/deps.json \
+// RUN: > %t/compile_commands.json
+//
+// RUN: clang-scan-deps \
+// RUN: -compilation-database=%t/compile_commands.json \
+// RUN: -format experimental-full \
+// RUN: | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
+
+// CHECK: {
+// CHECK-NEXT: "modules": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "clang-module-deps": [],
+// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/module.modulemap",
+// CHECK: "context-hash": "[[HASH_ROOT:.*]]",
+// CHECK-NEXT: "file-deps": [
+// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
+// CHECK-NEXT: "[[PREFIX]]/root.h"
+// CHECK-NEXT: ],
+// CHECK-NEXT: "link-libraries": [],
+// CHECK-NEXT: "name": "root"
+// CHECK-NEXT: }
+// CHECK-NEXT: ],
+// CHECK-NEXT: "translation-units": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "commands": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "clang-context-hash": "{{.*}}",
+// CHECK-NEXT: "named-module-deps": [
+// CHECK-NEXT: "A",
+// CHECK-NEXT: "B"
+// CHECK-NEXT: ],
+// CHECK-NEXT: "clang-module-deps": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "context-hash": "[[HASH_ROOT]]",
+// CHECK-NEXT: "module-name": "root"
+// CHECK-NEXT: }
+// CHECK-NEXT: ],
+// CHECK: "file-deps": [
+// CHECK-NEXT: "[[PREFIX]]/main.cpp"
+// CHECK-NEXT: ],
+// CHECK-NEXT: "input-file": "[[PREFIX]]/main.cpp"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: },
+// CHECK-NEXT: {
+// CHECK-NEXT: "commands": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "clang-context-hash": "{{.*}}",
+// CHECK-NEXT: "named-module": "A",
+// CHECK-NEXT: "clang-module-deps": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "context-hash": "[[HASH_ROOT]]",
+// CHECK-NEXT: "module-name": "root"
+// CHECK-NEXT: }
+// CHECK-NEXT: ],
+// CHECK: "file-deps": [
+// CHECK-NEXT: "[[PREFIX]]/A.cppm"
+// CHECK-NEXT: ],
+// CHECK-NEXT: "input-file": "[[PREFIX]]/A.cppm"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: },
+// CHECK-NEXT: {
+// CHECK-NEXT: "commands": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "clang-context-hash": "{{.*}}",
+// CHECK-NEXT: "named-module": "B",
+// CHECK-NEXT: "clang-module-deps": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "context-hash": "[[HASH_ROOT]]",
+// CHECK-NEXT: "module-name": "root"
+// CHECK-NEXT: }
+// CHECK-NEXT: ],
+// CHECK: "file-deps": [
+// CHECK-NEXT: "[[PREFIX]]/B.cppm"
+// CHECK-NEXT: ],
+// CHECK-NEXT: "input-file": "[[PREFIX]]/B.cppm"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: }
+
+// This tests that the scanner doesn't produce multiple internal scanning PCMs
+// for our single Clang module (root).
+// RUN: find %t/modules-cache -name "*.pcm" | wc -l | grep 1
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 47cb485a..7756f0d 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -6273,6 +6273,78 @@ __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) {
// CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 240)
return _mm512_ternarylogic_epi32(__A, __B, __C, _MM_TERNLOG_A);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_ternarylogic_epi32(
+ ((__m512i)((__v16si){
+ 0x6AA79987, (int)0xBB91433A, 0x029A7245, (int)0xD1F6F86C,
+ (int)0xD340BBCD, (int)0xCD8778E7, 0x4C73A942, (int)0xDAEA58BA,
+ 0x5E503A67, (int)0xEE897110, 0x3193CA54, 0x452EC40A,
+ (int)0x90E5E945, 0x6FACAA50, 0x29645F8B, 0x5F811CB9
+ })),
+ ((__m512i)((__v16si){
+ 0x1FCFF454, (int)0xDFC9E3B1, 0x6ED4E94B, 0x42D6CB5C,
+ (int)0x8FE46024, (int)0xA091250E, 0x2CA1C789, (int)0x9C9CEA0C,
+ (int)0x8D9FE5B9, 0x2FD2B7A4, 0x5ADAD121, (int)0xBCF74D7A,
+ (int)0xF543BBCF, (int)0xBB9D58E4, 0x175F0CD2, (int)0x87F26AEE
+ })),
+ ((__m512i)((__v16si){
+ (int)0xFA882692, (int)0xBC428D42, 0x6980A81F, (int)0x95C5FB98,
+ (int)0x8101E89A, 0x2AA4857E, 0x25ECE845, 0x34A9AF41,
+ (int)0xB80E3B0D, 0x13ED748B, 0x30A1F6D5, (int)0xD64A3CE0,
+ 0x57708107, 0x527122DC, 0x06057C82, 0x7576714A
+ })),
+ (unsigned char)0x11), // ~A & ~C
+ 0x00300929, 0x0034100C, (int)0x902B16A0, 0x28280423,
+ 0x701A1741, 0x554A5A81, (int)0xD2121032, 0x434210B2,
+ 0x42600042, (int)0xC0000850, (int)0x8504080A, 0x01008205,
+ 0x088C4430, 0x04028503, (int)0xE8A0832D, 0x08098411));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_ternarylogic_epi32(
+ ((__m512i)((__v16si){
+ (int)0xA3B1799D, (int)0x46685257, (int)0x392456DE, (int)0xBC8960A9,
+ (int)0x6C031199, (int)0x07A0CA6E, (int)0x37F8A88B, (int)0x8B8148F6,
+ (int)0x386ECBE0, (int)0x96DA1DAC, (int)0xCE4A2BBD, (int)0xB2B9437A,
+ (int)0x571AA876, (int)0x27CD8130, (int)0x562B0F79, (int)0x17BE3111
+ })),
+ ((__m512i)((__v16si){
+ (int)0x18C26797, (int)0xD8F56413, (int)0x9A8DCA03, (int)0xCE9FF57F,
+ (int)0xBACFB3D0, (int)0x89463E85, (int)0x60E7A113, (int)0x8D5288F1,
+ (int)0xDC98D2C1, (int)0x93CD59BF, (int)0xB45ED1F0, (int)0x19DB3AD0,
+ (int)0x47294739, (int)0x5D65A441, (int)0x5EC42E08, (int)0xA5E5A5AB
+ })),
+ ((__m512i)((__v16si){
+ (int)0xBAA80DD4, (int)0x29D4BEEF, (int)0x6123FDF7, (int)0x8E944239,
+ (int)0xAF42E12F, (int)0xC6A7EE39, (int)0x50C187FC, (int)0x448AAA9E,
+ (int)0x508EBAD7, (int)0xA7CAD415, (int)0x757750A9, (int)0x43CF2FDE,
+ (int)0x95A76D79, (int)0x663F1C97, (int)0xFF5E9FF0, (int)0x827050A8
+ })),
+ (unsigned char)0x38), // (C & ~B) | (~C & A & B)
+ (int)0xBB311C08, (int)0x0E9C3644, (int)0x21219CDD, (int)0x32140090,
+ (int)0xC640A009, (int)0x86A6E46B, (int)0x57190998, (int)0x0683C006,
+ (int)0x60E61921, (int)0x05124411, (int)0x7A147A0D, (int)0xA36269AA,
+ (int)0x1033ED4F, (int)0x62A80531, (int)0x086F0171, (int)0x925A10B8));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_ternarylogic_epi32(
+ ((__m512i)((__v16si){
+ (int)0x3193CA54, (int)0x90E5E945, (int)0x29645F8B, (int)0x6ED4E94B,
+ (int)0x8D9FE5B9, (int)0x8101E89A, (int)0x25ECE845, (int)0xB80E3B0D,
+ (int)0x57708107, (int)0x06057C82, (int)0x56EAA301, (int)0xBE99854A,
+ (int)0x00E266D0, (int)0xDEEA959E, (int)0x2DCAABD5, (int)0x6A1ECCDA})),
+ ((__m512i)((__v16si){
+ (int)0x93FD7234, (int)0xBC90A6EC, (int)0xD3285151, (int)0xCE9FB6A8,
+ (int)0x3B788B66, (int)0xDF8960AD, (int)0x2F927291, (int)0x96AF0DEA,
+ (int)0xF56AE7EA, (int)0x2A04F77A, (int)0xD50B612B, (int)0x3AA725CB,
+ (int)0x8A04F74F, (int)0x282FE557, (int)0x52E1FBB0, (int)0x0CA02F4D})),
+ ((__m512i)((__v16si){
+ (int)0xB6307BAD, (int)0x141CB03E, (int)0xEBAA7701, (int)0xC9F0B072,
+ (int)0x5E2503DD, (int)0xC2E1DAC4, (int)0x0FC01B11, (int)0xA0485922,
+ (int)0x339BB47E, (int)0xB2D4F32A, (int)0x8E7AE9AF, (int)0x147DE9B0,
+ (int)0xF79FCAA0, (int)0x3B0B6398, (int)0x29DDF4C7, (int)0x49CDBEC7})),
+ (unsigned char)0xC3), // ~(B ^ C)
+ (int)0x5D91479F, (int)0xD38AB056, (int)0x05B3F125, (int)0x5FB4A01C,
+ (int)0x49189120, (int)0xA17777C8, (int)0xF581652B, (int)0xD15EC918,
+ (int)0x5DE59912, (int)0xD3FE7407, (int)0x7C1E3DD5, (int)0x7BC15F7E,
+ (int)0x75196E60, (int)0x093A8F36, (int)0x80D4AF9A, (int)0x99411C68));
__m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_ternarylogic_epi32
@@ -6280,6 +6352,63 @@ __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i _
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_ternarylogic_epi32(__A, __U, __B, __C, _MM_TERNLOG_B);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_ternarylogic_epi32(
+ _mm512_setr_epi32(
+ (int)0xFFFFFFFF, 0x00000000, (int)0xDEADBEEF, (int)0xCAFEBABE, 0x12345678, (int)0x87654321,
+ (int)0xAAAAAAAA, 0x55555555, (int)0xF00DBEEF, (int)0xBAD2FEAF, 0x0112358D, (int)0xDEADF00D,
+ (int)0x8BADF00D, (int)0xBADDCAFE, (int)0xBAADF00D, (int)0xBAAAAAAD),
+ (__mmask16)0x9D71,
+ _mm512_setr_epi32(
+ 0x11111111, 0x22222222, 0x33333333, 0x44444444, (int)0xABCDEF01, (int)0xFEDCBA98,
+ (int)0xCCCCCCCC, 0x33333333, 0x1337BEEF, 0x01010101, (int)0x81321345, (int)0xBAADF00D,
+ 0x1BADB002, 0x5EE7C0DE, 0x12345678, 0x55555555),
+ _mm512_setr_epi32(
+ (int)0xF0F0F0F0, 0x0F0F0F0F, 0x1234ABCD, (int)0x9876FEDC, 0x00FF00FF, (int)0xFF00FF00,
+ (int)0xFF0000FF, 0x00FFFF00, 0x50D4CAFE, (int)0x8BADF00D, (int)0xABCDEFFF, (int)0xFEEDF00D,
+ (int)0xBEEFCAFE, (int)0xDEADC0DE, (int)0x1BADBEEF, 0x33333333),
+ (unsigned char)0xB1), // op: (~B & (A | ~C)) | (B & A & C)
+ (int)0xFEFEFEFE, 0x00000000, (int)0xDEADBEEF, (int)0xCAFEBABE, 0x54341078, (int)0x87234367,
+ (int)0xAA3333AA, 0x55555555, (int)0xFC0C8BEE, (int)0xBAD2FEAF, 0x5500258D, (int)0xDFBFFFFF,
+ (int)0xCABDC50D, (int)0xBADDCAFE, (int)0xBAADF00D, (int)0xBAAAAAA9));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_ternarylogic_epi32(
+ _mm512_setr_epi32(
+ 0x0000FFFF, (int)0xFFFF0000, 0x01010101, (int)0xFF00FF00, (int)0xAAAAAAAA, 0x33333333,
+ (int)0xF0F0F0F0, 0x0F0F0F0F, 0x12345678, (int)0x87654321, 0x7FFFFFFF, (int)0xDEADBEEF,
+ (int)0xCAFEBABE, 0x01234567, (int)0xABCDEF01, (int)0xFEDCBA98),
+ (__mmask16)0x3C3C,
+ _mm512_setr_epi32(
+ 0x1111EEEE, 0x2222DDDD, (int)0x80808080, 0x00FF00FF, 0x55555555, 0x00000000,
+ (int)0xCCCCCCCC, 0x33333333, 0x11111111, 0x22222222, (int)0x80000000, 0x12345678,
+ 0x11223344, (int)0xFEDCBA98, (int)0xBAD0BAD0, (int)0xBEEFCAFE),
+ _mm512_setr_epi32(
+ 0x12345678, (int)0x87654321, 0x7F7F7F7F, (int)0xFEDCBA98, (int)0xCCCCCCCC, (int)0xFFFFFFFF,
+ 0x11111111, 0x22222222, (int)0xABABABAB, (int)0xCDCDCDCD, 0x00000001, (int)0xFACEB00C,
+ 0x55667788, (int)0xABCDEF01, 0x12345678, (int)0xDEADBEEF),
+ (unsigned char)0xE8), // op: (A & B) | (B & C) | (C & A) (Majority)
+ 0x0000FFFF, (int)0xFFFF0000, 0x01010101, (int)0xFEDCBA98, (int)0xCCCCCCCC, 0x33333333,
+ (int)0xF0F0F0F0, 0x0F0F0F0F, 0x12345678, (int)0x87654321, 0x00000001, (int)0xDAACB66C,
+ 0x5166338C, (int)0xABCDEF01, (int)0xABCDEF01, (int)0xFEDCBA98));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_ternarylogic_epi32(
+ _mm512_setr_epi32(
+ (int)0xDEADBEEF, 0x01234567, (int)0xAAAAAAAA, 0x0F0F0F0F, (int)0xBAADF00D, 0x00000001,
+ (int)0x80000000, 0x7FFFFFFF, (int)0xCAFEBABE, 0x13579BDF, (int)0xABCDEF01, (int)0xCAFEBABE,
+ (int)0xDEADBEEF, (int)0xFF00FF00, (int)0xBEEFCAFE, 0x00000001),
+ (__mmask16)0xBEEF,
+ _mm512_setr_epi32(
+ (int)0xFACEB00C, (int)0x89ABCDEF, 0x55555555, (int)0xF0F0F0F0, 0x1337C0DE, 0x00000002,
+ 0x40000000, (int)0xBFFFFFFF, 0x00000000, 0x2468ACE0, 0x10FEDCBA, 0x00000000,
+ (int)0xFEEDFACE, 0x00FF00FF, 0x12345678, 0x00000002),
+ _mm512_setr_epi32(
+ 0x12345678, (int)0xFFFFFFFF, (int)0xCCCCCCCC, (int)0x88888888, (int)0xDEADC0DE, 0x00000004,
+ 0x20000000, (int)0xDFFFFFFF, (int)0xFFFFFFFF, (int)0xFEDCBA98, 0x55555555, (int)0xFFFFFFFF,
+ (int)0x8BADF00D, (int)0xF0F0F0F0, (int)0xFACEB00C, 0x00000003),
+ (unsigned char)0x96), // op: A ^ B ^ C (XOR3)
+ (int)0x3657589B, 0x77777777, 0x33333333, 0x77777777, (int)0xBAADF00D, 0x00000007,
+ (int)0xE0000000, 0x1FFFFFFF, (int)0xCAFEBABE, (int)0xC9E38DA7, (int)0xEE6666EE, 0x35014541,
+ (int)0xABEDB42C, 0x0F0F0F0F, (int)0xBEEFCAFE, 0x00000000));
__m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_ternarylogic_epi32
@@ -6287,12 +6416,106 @@ __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> zeroinitializer
return _mm512_maskz_ternarylogic_epi32(__U, __A, __B, __C, _MM_TERNLOG_C);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_ternarylogic_epi32(
+ (__mmask16)0x6498,
+ ((__m512i)((__v16si){
+ 1393174638, 1243877629, -826208314, 1770837977,
+ -1678093555, -414088391, 1288769935, 703296098,
+ 1428104678, 405688910, -167788555, 1965219804,
+ -1959018749, 514303227, 754191429, 579811517})),
+ ((__m512i)((__v16si){
+ -1301280384, -923736510, -797648805, 475853364,
+ 1247377062, 213070102, 626020209, 2037794518,
+ 122183669, 1712787569, -1042441569, -1416844145,
+ 1374304252, -1323427639, 1432483217, 1621706359})),
+ ((__m512i)((__v16si){
+ 234227517, -313293475, 1851213039, -300885844,
+ -1479339544, 575183087, -655840260, -1853668117,
+ 433622095, 933629633, -1324904005, -68434060,
+ 486070655, 226865941, -1461464269, 1471789621})),
+ (unsigned char)0xAB), // (~A & ~B) | (B & C)
+ 0, 0, 0, -298592082,
+ -1479042568, 0, 0, -1752969749,
+ 0, 0, -1157115461, 0,
+ 0, 1304818453, -1427385541, 0));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_ternarylogic_epi32(
+ (__mmask16)0xA593,
+ ((__m512i)((__v16si){
+ 1789368711, -1148107974, 43676229, -772343700,
+ -750732339, -846759705, 1282648386, -622176070,
+ 1582316135, -292982512, 831769172, 1160692746,
+ -1863980731, 1873586768, 694443915, 1602297017})),
+ ((__m512i)((__v16si){
+ 533722196, -540417103, 1859447115, 1121373020,
+ -1880858588, -1601100530, 748799881, -1667438068,
+ -1918900807, 802338724, 1524289825, -1124643462,
+ -180110385, -1147315996, 392105170, -2014156050})),
+ ((__m512i)((__v16si){
+ -91740526, -1136489150, 1770039327, -1782187112,
+ -2130581350, 715425150, 636282949, 883535681,
+ -1207026931, 334328971, 815920853, -699777824,
+ 1466990855, 1383146204, 101022850, 1970696522})),
+ (unsigned char)0x21), // (~B) & ~(A ^ C)
+ 1611661482, 539234310, 0, 0,
+ 538610824, 0, 0, 18874368,
+ 270539268, 0, -1543175586, 0,
+ 0, 1075980051, 0, 1342738432));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_ternarylogic_epi32(
+ (__mmask16)0xC3A5,
+ ((__m512i)((__v16si){
+ 0x00000000, -0x1, (int)0x80000000, 0x7FFFFFFF,
+ (int)0xAAAAAAAA, 0x55555555, 0x00000001, (int)0xFFFFFFFE,
+ 0x0000FFFF, (int)0xFFFF0000, (int)0xDEADBEEF, (int)0xCAFEBABE,
+ 0x01234567, (int)0x89ABCDEF, 0x13579BDF, 0x2468ACE0})),
+ ((__m512i)((__v16si){
+ 0x2468ACE0, 0x13579BDF, (int)0x89ABCDEF, 0x01234567,
+ (int)0xCAFEBABE, (int)0xDEADBEEF, (int)0xFFFF0000, 0x0000FFFF,
+ (int)0xFFFFFFFE, 0x00000001, 0x55555555, (int)0xAAAAAAAA,
+ 0x7FFFFFFF, (int)0x80000000, -0x1, 0x00000000})),
+ ((__m512i)((__v16si){
+ -0x1, 0x00000000, -0x1, 0x00000000,
+ -0x1, 0x00000000, -0x1, 0x00000000,
+ -0x1, 0x00000000, -0x1, 0x00000000,
+ -0x1, 0x00000000, -0x1, 0x00000000})),
+ (unsigned char)0xC9), // F = (A & B) | (~A & ~(B ^ C))
+ 0x2468ACE0, 0x0, (int)0x89ABCDEF, 0x0,
+ 0x0, 0x74071445, 0x0, 0x0000FFFE,
+ (int)0xFFFFFFFE, 0x0000FFFE, 0x0, 0x0,
+ 0x0, 0x0, (int)0xFFFFFFFF, (int)0xDB97531F));
__m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_ternarylogic_epi64
// CHECK: @llvm.x86.avx512.pternlog.q.512({{.*}}, i32 192)
return _mm512_ternarylogic_epi64(__A, __B, __C, _MM_TERNLOG_A & _MM_TERNLOG_B);
}
+TEST_CONSTEXPR(match_v8di(
+ _mm512_ternarylogic_epi64(
+ ((__m512i)((__v8di){0x1111, 0x2222, 0x3333, 0x4444, 0x5555, 0x6666, 0x7777, 0x8888})),
+ ((__m512i)((__v8di){0xAAAA, 0xBBBB, 0xCCCC, 0xDDDD, 0xEEEE, 0xFFFF, 0x1111, 0x2222})),
+ ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+ (unsigned char)0xD8), // C ? B : A
+ 0xAAAA, 0x2222, 0xCCCC, 0x4444, 0xEEEE, 0x6666, 0x1111, 0x8888));
+TEST_CONSTEXPR(match_v8di(
+ _mm512_ternarylogic_epi64(
+ ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, 0xF0F0, 0xFF, -0x5555555555555556, 0x5555555555555555})),
+ ((__m512i)((__v8di){0x1234, 0xFFFF, 0xFF, 0xF0F, 0x3333, 0xFF00, -0x5555555555555556, -0x0F0F0F0F0F0F0F10})),
+ ((__m512i)((__v8di){0xFFFF, 0x1234, 0xF0F, 0xFF00, 0xF0F0, 0x3333, 0x5555555555555555, 0x0F0F0F0F0F0F0F0})),
+ (unsigned char)0x8F), // ~A | (B & C)
+ 0x1234, -0x1, 0xF, -0x1, -0xC0C1, -0x100, 0x5555555555555555, -0x5505050505050506));
+TEST_CONSTEXPR(match_v8di(
+ _mm512_ternarylogic_epi64(
+ ((__m512i)((__v8di){0x7FFFFFFFFFFFFFFF, 0x0, 0x00FF00FF00FF00FF, 0x0F0F0F0F0F0F0F0F,
+ 0x123456789ABCDEF0, 0x3333333333333333, 0x5555555555555555, 0x0123456789ABCDEF})),
+ ((__m512i)((__v8di){0x1111111111111111, 0x2222222222222222, 0xFFFFFFFF, -0x100000000,
+ 0x0, -0x3333333333333334, -0x0F0F0F0F0F0F0F10, -0x123456789ABCDF0})),
+ ((__m512i)((__v8di){0x2222222222222222, 0x1111111111111111, -0x1000000000000, 0xFFFFFFFF,
+ -0x1, 0x0, 0x0F0F0F0F0F0F0F0F, 0x0})),
+ (unsigned char)0xE0), // A & (B | C)
+ 0x3333333333333333, 0x0, 0x00FF000000FF00FF, 0x0F0F0F0F0F0F0F0F,
+ 0x123456789ABCDEF0, 0x0, 0x5555555555555555, 0x0));
__m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_ternarylogic_epi64
@@ -6300,6 +6523,40 @@ __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_ternarylogic_epi64(__A, __U, __B, __C, _MM_TERNLOG_B | _MM_TERNLOG_C);
}
+TEST_CONSTEXPR(match_v8di(
+ _mm512_mask_ternarylogic_epi64(
+ ((__m512i)((__v8di){0x0LL, 0x1LL, 0x2LL, 0x3LL, 0x4LL, 0x5LL, 0x6LL, 0x7LL})),
+ (__mmask8)0xFF,
+ ((__m512i)((__v8di){0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL})),
+ ((__m512i)((__v8di){0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL})),
+ (unsigned char)0x96),
+ 0x1, 0x0, 0x3, 0x2, 0x5, 0x4, 0x7, 0x6));
+TEST_CONSTEXPR(match_v8di(
+ _mm512_mask_ternarylogic_epi64(
+ ((__m512i)((__v8di){
+ (long long)0x9FD641D41C6A70FEULL, (long long)0xB51D9082CF18D398ULL,
+ (long long)0x730E520285F4D01BULL, (long long)0x347E72CE341FD932ULL,
+ (long long)0x438F8D9BEA5D486FULL, (long long)0xFDB554A5DEEF750DULL,
+ (long long)0x0ABAA254BFFC2308ULL, (long long)0x825FE29BF1D51FC6ULL
+ })),
+ (__mmask8)0xE4,
+ ((__m512i)((__v8di){
+ (long long)0xC1779B12FA832A6EULL, (long long)0xCF6E876B587C4762ULL,
+ (long long)0x25DC09833D4ECA24ULL, (long long)0x34E55E25691BB80AULL,
+ (long long)0x9A02450CD8F20DD7ULL, (long long)0x78B9E240FB5B77A9ULL,
+ (long long)0xE1F37F76C1162596ULL, (long long)0xDCCB561738CE2941ULL
+ })),
+ ((__m512i)((__v8di){
+ (long long)0xD13840986BC8DC3CULL, (long long)0x34CDE7E8C960187EULL,
+ (long long)0x7EE068D9D111EEB8ULL, (long long)0xAD11149DE686B811ULL,
+ (long long)0x849F38BFD9AB0DFAULL, (long long)0x5C28948ED106227BULL,
+ (long long)0xFB1918D4A18E304DULL, (long long)0x4EDE6944F84AD59FULL
+ })),
+ (unsigned char)0x67),
+ (long long)0x9FD641D41C6A70FEULL, (long long)0xB51D9082CF18D398ULL,
+ (long long)0xDB3DE57EEE5F25DCULL, (long long)0x347E72CE341FD932ULL,
+ (long long)0x438F8D9BEA5D486FULL, (long long)0x26D37FDE2A5DDDD2ULL,
+ (long long)0x1EEE67AB6099DDFBULL, (long long)0xB3353F73C6A4FCFEULL));
__m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_ternarylogic_epi64
@@ -6307,6 +6564,59 @@ __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i _
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> zeroinitializer
return _mm512_maskz_ternarylogic_epi64(__U, __A, __B, __C, ~_MM_TERNLOG_A | (_MM_TERNLOG_B ^ _MM_TERNLOG_C));
}
+TEST_CONSTEXPR(match_v8di(
+ _mm512_maskz_ternarylogic_epi64(
+ (__mmask8)0x6D,
+ ((__m512i)((__v8di){
+ (long long)0xFFFFFFFFFFFFFFFF, (long long)0x0000000000000000,
+ (long long)0x0000FFFF0000FFFF, (long long)0x5555555555555555,
+ (long long)0x0123456789ABCDEF, (long long)0x1122334455667788,
+ (long long)0x00000000FFFFFFFF, (long long)0x0F0F0F0F0F0F0F0F
+ })),
+ ((__m512i)((__v8di){
+ (long long)0x000000000000000B, (long long)0x000000000000000C,
+ (long long)0x00000000FFFF0000, (long long)0x3333333333333333,
+ (long long)0x0FEDCBA987654321, (long long)0x1111111111111111,
+ (long long)0x7FFFFFFFFFFFFFFF, (long long)0x2222222222222222
+ })),
+ ((__m512i)((__v8di){
+ (long long)0x000000000000000C, (long long)0x000000000000000B,
+ (long long)0x00F0F0F0F0F0F0F0, (long long)0x5555555555555555,
+ (long long)0x0000000000000000, (long long)0x7FFFFFFFFFFFFFFF,
+ (long long)0x0000000000000001, (long long)0x2222222222222222
+ })),
+ (unsigned char)0x89),
+ (long long)0x0000000000000008, (long long)0x0000000000000000,
+ (long long)0xFF0F0000F0F00000, (long long)0x9999999999999999,
+ (long long)0x0000000000000000, (long long)0x9111111111111111,
+ (long long)0x8000000000000001, (long long)0x0000000000000000));
+
+TEST_CONSTEXPR(match_v8di(
+ _mm512_maskz_ternarylogic_epi64(
+ (__mmask8)0x6D,
+ ((__m512i)((__v8di){
+ (long long)0xFFFFFFFFFFFFFFFF, (long long)0x0000000000000000,
+ (long long)0x0000FFFF0000FFFF, (long long)0x5555555555555555,
+ (long long)0x0123456789ABCDEF, (long long)0x1122334455667788,
+ (long long)0x00000000FFFFFFFF, (long long)0x0F0F0F0F0F0F0F0F
+ })),
+ ((__m512i)((__v8di){
+ (long long)0x000000000000000B, (long long)0x000000000000000C,
+ (long long)0x00000000FFFF0000, (long long)0x3333333333333333,
+ (long long)0x0FEDCBA987654321, (long long)0x1111111111111111,
+ (long long)0x7FFFFFFFFFFFFFFF, (long long)0x2222222222222222
+ })),
+ ((__m512i)((__v8di){
+ (long long)0x000000000000000C, (long long)0x000000000000000B,
+ (long long)0x00F0F0F0F0F0F0F0, (long long)0x5555555555555555,
+ (long long)0x0000000000000000, (long long)0x7FFFFFFFFFFFFFFF,
+ (long long)0x0000000000000001, (long long)0x2222222222222222
+ })),
+ (unsigned char)0x29),
+ (long long)0x0000000000000004, (long long)0x0000000000000000,
+ (long long)0xFF0FF0F0F0F0F0F0, (long long)0xCCCCCCCCCCCCCCCC,
+ (long long)0x0000000000000000, (long long)0x8033225544776699,
+ (long long)0x8000000000000000, (long long)0x0000000000000000));
__m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_shuffle_f32x4
diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c
index 37443d5..dbf89b3 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -17,6 +17,7 @@ _Float16 test_mm512_cvtsh_h(__m512h __A) {
// CHECK: extractelement <32 x half> %{{.*}}, i32 0
return _mm512_cvtsh_h(__A);
}
+TEST_CONSTEXPR(_mm512_cvtsh_h((__m512h){-32.0, 31.0, -30.0, 29.0, -28.0, 27.0, -26.0, 25.0, -24.0, 23.0, -22.0, 21.0, -20.0, 19.0, -18.0, 17.0, -16.0, 15.0, -14.0, 13.0, -12.0, 11.0, -10.0, 9.0, -8.0, 7.0, -6.0, 5.0, -4.0, 3.0, -2.0, 1.0}) == -32.0);
__m128h test_mm_setzero_ph(void) {
// CHECK-LABEL: test_mm_setzero_ph
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 6d91870..51385d5 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -8359,6 +8359,27 @@ __m128i test_mm_ternarylogic_epi32(__m128i __A, __m128i __B, __m128i __C) {
// CHECK: @llvm.x86.avx512.pternlog.d.128
return _mm_ternarylogic_epi32(__A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_ternarylogic_epi32(
+ ((__m128i)((__v4si){(int)0x7FFFFFFF, (int)0x80000000, (int)0xAAAAAAAA, 0x00000000})),
+ ((__m128i)((__v4si){0x00000000, (int)0xFFFFFFFF, 0x12345678, (int)0xFFFFFFFF})),
+ ((__m128i)((__v4si){(int)0xCAFEBABE, 0x0F0F0F0F, (int)0xFFFFFFFF, 0x00000000})),
+ (unsigned char)0xCA), /* B ? (A | C) : (C & ~A) */
+ (int)0x80000000, (int)0x8F0F0F0F, 0x5775577D, 0x00000000));
+TEST_CONSTEXPR(match_v4si(
+ _mm_ternarylogic_epi32(
+ ((__m128i)((__v4si){0x12345678, (int)0x80000000, 0x00000000, (int)0xAAAAAAAA})),
+ ((__m128i)((__v4si){0x0000FFFF, 0x7FFFFFFF, 0x55555555, 0x00000000})),
+ ((__m128i)((__v4si){(int)0xF0F0F0F0, 0x00000001, 0x0F0F0F0F, 0x33333333})),
+ (unsigned char)0xFE), /* A | B | C */
+ (int)0xF2F4FFFF, (int)0xFFFFFFFF, 0x5F5F5F5F, (int)0xBBBBBBBB));
+TEST_CONSTEXPR(match_v4si(
+ _mm_ternarylogic_epi32(
+ ((__m128i)((__v4si){(int)0xFFFFFFFF, 0x12345678, (int)0x80000000, 0x0F0F0F0F})),
+ ((__m128i)((__v4si){0x00FF00FF, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0xF0F0F0F0})),
+ ((__m128i)((__v4si){0x0F0F0F0F, 0x00FF00FF, (int)0xFFFFFFFF, (int)0xFFFFFFFF})),
+ (unsigned char)0x80), /* A & B & C */
+ 0x000F000F, 0x00340078, 0x00000000, 0x00000000));
__m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_mask_ternarylogic_epi32
@@ -8366,6 +8387,30 @@ __m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B,
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_ternarylogic_epi32(__A, __U, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_ternarylogic_epi32(
+ ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})),
+ (__mmask8)0x03,
+ ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})),
+ ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })),
+ (unsigned char)0xCA), // A ? B : C
+ 0xB, 0xC, -0x1, 0x0));
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_ternarylogic_epi32(
+ ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
+ (__mmask8)0x0C,
+ ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
+ ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE), // A | B | C
+ 0x9, 0x9, 0xF, 0xF));
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_ternarylogic_epi32(
+ ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
+ (__mmask8)0x05,
+ ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
+ ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80), // A & B & C
+ 0x0, 0x9, 0x0, 0x9));
__m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_maskz_ternarylogic_epi32
@@ -8373,12 +8418,57 @@ __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B,
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
return _mm_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_ternarylogic_epi32(
+ (__mmask8)0x0B,
+ ((__m128i)((__v4si){(int)0xDEADBEEF, 0, (int)0xFFFFFFFF, 0x13579BDF})),
+ ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, 0})),
+ ((__m128i)((__v4si){(int)0xCAFEBABE, (int)0xFFFFFFFF, 0, 0x2468ACE0})),
+ (unsigned char)0xE2), // B ? A : C
+ (int)0xDEADBEEF, (int)0xFFFFFFFF, 0, 0x2468ACE0));
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_ternarylogic_epi32(
+ (__mmask8)0x0C,
+ ((__m128i)((__v4si){0, (int)0xFFFFFFFF, (int)0xAAAAAAAA, 0x55555555})),
+ ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, (int)0xFFFFFFFF})),
+ ((__m128i)((__v4si){(int)0xF0F0F0F0, 0, 0, (int)0xFFFFFFFF})),
+ (unsigned char)0x7F), // ~(A & B) | ~(B & C)
+ 0, 0, (int)0xFFFFFFFF, (int)0xAAAAAAAA));
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_ternarylogic_epi32(
+ (__mmask8)0x05,
+ ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, 0x12345678, 0})),
+ ((__m128i)((__v4si){0, 0, 0x0000FFFF, (int)0xFFFFFFFF})),
+ ((__m128i)((__v4si){0, 0, 0x0000000F, 0})),
+ (unsigned char)0xBF), // ~A | ~B | C imm = 0xBF
+ (int)0xFFFFFFFF, 0, (int)0xFFFFA98F, 0));
__m256i test_mm256_ternarylogic_epi32(__m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_ternarylogic_epi32
// CHECK: @llvm.x86.avx512.pternlog.d.256
return _mm256_ternarylogic_epi32(__A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_ternarylogic_epi32(
+ ((__m256i)((__v8si){0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0x80000000, 0x00FF00FF, (int)0xF0F0F0F0, (int)0xAAAAAAAA})),
+ ((__m256i)((__v8si){(int)0xDEADBEEF, 0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555, 0x66666666, 0x77777777})),
+ ((__m256i)((__v8si){(int)0xCAFEBABE, (int)0x88888888, (int)0x99999999, (int)0xAAAAAAAA, (int)0xBBBBBBBB, (int)0xCCCCCCCC, (int)0xDDDDDDDD, (int)0xFFFFFFFF})),
+ (unsigned char)0xF0), /* A */
+ 0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0x80000000, 0x00FF00FF, (int)0xF0F0F0F0, (int)0xAAAAAAAA));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_ternarylogic_epi32(
+ ((__m256i)((__v8si){0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0x80000000, 0x00FF00FF, (int)0xF0F0F0F0, (int)0xAAAAAAAA})),
+ ((__m256i)((__v8si){(int)0xAAAAAAAA, (int)0xBBBBBBBB, (int)0xCCCCCCCC, (int)0xDDDDDDDD, (int)0xEEEEEEEE, (int)0xFFFFFFFF, 0x00000000, 0x11111111})),
+ ((__m256i)((__v8si){0x22222222, 0x33333333, 0x44444444, 0x55555555, 0x66666666, 0x77777777, (int)0x88888888, (int)0x99999999})),
+ (unsigned char)0x0F), /* ~A */
+ (int)0xEDCBA987, (int)0xFFFFFFFF, 0x00000000, (int)0x80000000, 0x7FFFFFFF, (int)0xFF00FF00, 0x0F0F0F0F, 0x55555555));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_ternarylogic_epi32(
+ ((__m256i)((__v8si){0x0F0F0F0F, (int)0xAAAAAAAA, 0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x13579BDF, (int)0x80000000, 0x7FFFFFFF})),
+ ((__m256i)((__v8si){(int)0xF0F0F0F0, 0x55555555, 0x11111111, (int)0xFFFFFFFF, 0x00000000, 0x02468ACE, 0x7FFFFFFF, (int)0x80000000})),
+ ((__m256i)((__v8si){(int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA})),
+ (unsigned char)0x3C), /* A ^ B */
+ (int)0xFFFFFFFF, (int)0xFFFFFFFF, 0x03254769, (int)0xFFFFFFFF, (int)0xFFFFFFFF, 0x11111111, (int)0xFFFFFFFF, (int)0xFFFFFFFF));
__m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_mask_ternarylogic_epi32
@@ -8386,6 +8476,30 @@ __m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_ternarylogic_epi32(__A, __U, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_ternarylogic_epi32(
+ ((__m256i)((__v8si){(int)0xFFFFFFFF, 0x00000000, 0x12345678, (int)0xAAAAAAAA, 0x7FFFFFFF, (int)0x80000000, 0x13579BDF, 0x2468ACE0})),
+ (__mmask8)0xA5,
+ ((__m256i)((__v8si){(int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x00000000})),
+ ((__m256i)((__v8si){0x00000000, (int)0xFFFFFFFF, 0x55555555, 0x33333333, (int)0x89ABCDEF, 0x00000000, (int)0xFFFFFFFF, 0x11111111})),
+ (unsigned char)0xE2), /* B ? A : C */
+ (int)0xFFFFFFFF, 0x00000000, 0x12345678, (int)0xAAAAAAAA, 0x7FFFFFFF, 0x00000000, 0x13579BDF, 0x11111111));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_ternarylogic_epi32(
+ ((__m256i)((__v8si){0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F})),
+ (__mmask8)0xFF,
+ ((__m256i)((__v8si){0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF})),
+ ((__m256i)((__v8si){0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333})),
+ (unsigned char)0x96), /* A ^ B ^ C */
+ 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_ternarylogic_epi32(
+ ((__m256i)((__v8si){(int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x12345678, (int)0xAAAAAAAA, 0x55555555, (int)0x80000000, 0x7FFFFFFF})),
+ (__mmask8)0x5A,
+ ((__m256i)((__v8si){0x00000000, (int)0xFFFFFFFF, 0x11111111, (int)0xFFFFFFFF, 0x55555555, (int)0xAAAAAAAA, (int)0x80000000, 0x7FFFFFFF})),
+ ((__m256i)((__v8si){0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000})),
+ (unsigned char)0xC0), /* A & B */
+ (int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x12345678, 0x00000000, 0x55555555, (int)0x80000000, 0x7FFFFFFF));
__m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_maskz_ternarylogic_epi32
@@ -8393,12 +8507,60 @@ __m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i _
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> zeroinitializer
return _mm256_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_maskz_ternarylogic_epi32(
+ (__mmask8)0x6D,
+ ((__m256i)((__v8si){(int)-1, 0, (int)-1, 0, (int)-1, 0, (int)-1, 0})),
+ ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+ ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0x30), /* A & ~B */
+ (int)0xFFFFFFF4, 0, (int)0xFFFFFFF4, 0, 0, 0, (int)0xFFFFFFF4, 0));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_maskz_ternarylogic_epi32(
+ (__mmask8)0x90,
+ ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x44), /* B & ~C */
+ 0, 0, 0, 0, 0x4, 0, 0, 0x4));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_maskz_ternarylogic_epi32(
+ (__mmask8)0x0F,
+ ((__m256i)((__v8si){0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3})),
+ ((__m256i)((__v8si){0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1})),
+ ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x28), /* (A ^ B) & C */
+ 0x2, 0x2, 0x2, 0x2, 0, 0, 0, 0));
__m128i test_mm_ternarylogic_epi64(__m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_ternarylogic_epi64
// CHECK: @llvm.x86.avx512.pternlog.q.128
return _mm_ternarylogic_epi64(__A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_ternarylogic_epi64(
+ ((__m128i)((__v2di){ (long long)0xBB91433A6AA79987ULL, (long long)0xD1F6F86C029A7245ULL })),
+ ((__m128i)((__v2di){ (long long)0xCD8778E7D340BBCDULL, (long long)0xDAEA58BA4C73A942ULL })),
+ ((__m128i)((__v2di){ (long long)0xEE8971105E503A67ULL, (long long)0x452EC40A3193CA54ULL })),
+ (unsigned char)0x77), // F = ~(A & B)
+ (long long)0x337E8FFFADBFC5BAULL,
+ (long long)0xBFD5BFF5FFEC77BFULL));
+TEST_CONSTEXPR(match_v2di(
+ _mm_ternarylogic_epi64(
+ ((__m128i)((__v2di){ (long long)0x6FACAA5090E5E945ULL, (long long)0x5F811CB929645F8BULL })),
+ ((__m128i)((__v2di){ (long long)0xDFC9E3B11FCFF454ULL, (long long)0x42D6CB5C6ED4E94BULL })),
+ ((__m128i)((__v2di){ (long long)0xA091250E8FE46024ULL, (long long)0x9C9CEA0C2CA1C789ULL })),
+ (unsigned char)0xDD), // F = (~A) | B
+ (long long)0xDFEFFBF17FDFFFDFULL,
+ (long long)0x63F7DFFFFFDEF97FULL));
+TEST_CONSTEXPR(match_v2di(
+ _mm_ternarylogic_epi64(
+ ((__m128i)((__v2di){ (long long)0x2FD2B7A48D9FE5B9ULL, (long long)0xBCF74D7A5ADAD121ULL })),
+ ((__m128i)((__v2di){ (long long)0xBB9D58E4F543BBCFULL, (long long)0x87F26AEE175F0CD2ULL })),
+ ((__m128i)((__v2di){ (long long)0xBC428D42FA882692ULL, (long long)0x95C5FB986980A81FULL })),
+ (unsigned char)0x22), // F = A & ~B
+ (long long)0x044285020A880410ULL,
+ (long long)0x100591106880A00DULL));
__m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_mask_ternarylogic_epi64
@@ -8406,6 +8568,40 @@ __m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B,
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_ternarylogic_epi64(__A, __U, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_mask_ternarylogic_epi64(
+ ((__m128i)((__v2di){(long long)0xF4C3B00C0D15EA5ELL, (long long)0x0123456789ABCDE0LL})),
+ (__mmask8)0x9D,
+ ((__m128i)((__v2di){(long long)0x9A7F3C2155EE00DDLL, (long long)0xDEADBEEFCAFEBABELL})),
+ ((__m128i)((__v2di){(long long)0x00F0F0F0F0F0F0F0LL, (long long)0x13579BDF2468ACE0LL})),
+ (unsigned char)0xFF), // All 1s
+ (long long)-1,
+ (long long)0x0123456789ABCDE0LL));
+TEST_CONSTEXPR(match_v2di(
+ _mm_mask_ternarylogic_epi64(
+ ((__m128i)((__v2di){ (long long)0x3A7C19E54B20D8A1LL, (long long)0x4F12B39D0C85E762LL })),
+ (__mmask8)0xD2,
+ ((__m128i)((__v2di){ (long long)0x6D93A0F217C54E3BLL, (long long)0x24E1C7A95B08D6F2LL })),
+ ((__m128i)((__v2di){ (long long)0x5A0C3E19D472B8F5LL, (long long)0x0187D3B2C9E4056ALL })),
+ (unsigned char)0x00),
+ (long long)0x3A7C19E54B20D8A1LL,
+ (long long)0x0LL)); // All 0s
+TEST_CONSTEXPR(match_v2di(
+ _mm_mask_ternarylogic_epi64(
+ ((__m128i)((__v2di){
+ (long long)0xA3F10B6C7D8294E1ULL, (long long)0x19D4E7350AB2C98FLL
+ })),
+ (__mmask8)0xB5,
+ ((__m128i)((__v2di){
+ (long long)0x5C2E9A10F4B7D863LL, (long long)0x9B7E1D2C3A4F5E60LL
+ })),
+ ((__m128i)((__v2di){
+ (long long)0x2A6D3F81C9E047B5LL, (long long)0x7F0A1C3E5D2B6490LL
+ })),
+ (unsigned char)0x55), // ~C
+ (long long)0xD592C07E361FB84AULL,
+ (long long)0x19D4E7350AB2C98FLL
+));
__m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_maskz_ternarylogic_epi64
@@ -8413,12 +8609,72 @@ __m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B,
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> zeroinitializer
return _mm_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_maskz_ternarylogic_epi64(
+ (__mmask8)0xA9,
+ ((__m128i)((__v2di){
+ (long long)0x8F3A5C7E21D4B690ULL, (long long)0x5AD02CE19B7F46A3ULL
+ })),
+ ((__m128i)((__v2di){
+ (long long)0xC19E04B2A7D35F68ULL, (long long)0x2F7B93C4E1A05D76ULL
+ })),
+ ((__m128i)((__v2di){
+ (long long)0x7A0C1D2E3F405162ULL, (long long)0xD4E5F60718293A4BULL
+ })),
+ (unsigned char)0xD2), // F = C ? (B | ~A) : (A & ~B)
+ (long long)0xB53A457239D4B692ULL,
+ (long long)0x0ULL));
+TEST_CONSTEXPR(match_v2di(
+ _mm_maskz_ternarylogic_epi64(
+ (__mmask8)0xB6,
+ ((__m128i)((__v2di){
+ (long long)0x83C1D2E3F4051627ULL, (long long)0x5A0B1C2D3E4F6071ULL
+ })),
+ ((__m128i)((__v2di){
+ (long long)0x9E8D7C6B5A493827ULL, (long long)0x13579BDF2468ACE0ULL
+ })),
+ ((__m128i)((__v2di){
+ (long long)0x02468ACE13579BDFULL, (long long)0xFEDCBA9876543210ULL
+ })),
+ (unsigned char)0xFE), // F = A | B | C
+ (long long)0x0ULL,
+ (long long)0xFFDFBFFF7E7FFEF1ULL));
+TEST_CONSTEXPR(match_v2di(
+ _mm_maskz_ternarylogic_epi64(
+ (__mmask8)0xA5,
+ ((__m128i)((__v2di){
+ (long long)0x1C80317FA3B1799DULL, (long long)0xBDD640FB06671AD1ULL
+ })),
+ ((__m128i)((__v2di){
+ (long long)0x3EB13B9046685257ULL, (long long)0x23B8C1E9392456DEULL
+ })),
+ ((__m128i)((__v2di){
+ (long long)0x1A3D1FA7BC8960A9ULL, (long long)0xBD9C66B3AD3C2D6DULL
+ })),
+ (unsigned char)0x80), // F = A & B & C
+ (long long)0x1800110000004001ULL,
+ (long long)0x0ULL
+));
__m256i test_mm256_ternarylogic_epi64(__m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_ternarylogic_epi64
// CHECK: @llvm.x86.avx512.pternlog.q.256
return _mm256_ternarylogic_epi64(__A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_ternarylogic_epi64(
+ ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})),
+ ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})),
+ ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0x94),
+ (long long)-0x8, (long long)0x3, (long long)-0x8, (long long)0x3));
+TEST_CONSTEXPR(match_v4di(
+ _mm256_ternarylogic_epi64(
+ ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
+ ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
+ ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x76),
+ (long long)0xF, (long long)0xF, (long long)0xF, (long long)0xF));
__m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_mask_ternarylogic_epi64
@@ -8426,6 +8682,46 @@ __m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_ternarylogic_epi64(__A, __U, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_mask_ternarylogic_epi64(
+ ((__m256i)((__v4di){
+ (long long)0x0123456789ABCDEFULL, (long long)0x0F0F0F0F0F0F0F0FULL,
+ (long long)0xAAAAAAAAAAAAAAAALL, (long long)0x13579BDF02468ACEULL
+ })),
+ (__mmask8)0x09,
+ ((__m256i)((__v4di){
+ (long long)0x1111111111111111ULL, (long long)0x2222222222222222ULL,
+ (long long)0x3333333333333333ULL, (long long)0x4444444444444444ULL
+ })),
+ ((__m256i)((__v4di){
+ (long long)0x5555555555555555ULL, (long long)0x6666666666666666ULL,
+ (long long)0x7777777777777777ULL, (long long)0x8888888888888888ULL
+ })),
+ (unsigned char)0x12),
+ (long long)0x44660022CCEE88AAULL,
+ (long long)0x0F0F0F0F0F0F0F0FULL,
+ (long long)0xAAAAAAAAAAAAAAAALL,
+ (long long)0x9B9B13138A8A0202ULL));
+TEST_CONSTEXPR(match_v4di(
+ _mm256_mask_ternarylogic_epi64(
+ ((__m256i)((__v4di){
+ (long long)0xDEADBEEFDEADBEEFULL, (long long)0xCAFEBABECAFEBABEULL,
+ (long long)0xF00DFACEF00DFACEULL, (long long)0x0123456789ABCDEFULL
+ })),
+ (__mmask8)0x06,
+ ((__m256i)((__v4di){
+ (long long)0x0000000000000000ULL, (long long)0xFFFFFFFFFFFFFFFFULL,
+ (long long)0x13579BDF13579BDFULL, (long long)0x0AAAAAAAAAAAAAAULL
+ })),
+ ((__m256i)((__v4di){
+ (long long)0x1111111111111111ULL, (long long)0x2222222222222222ULL,
+ (long long)0x3333333333333333ULL, (long long)0x4444444444444444ULL
+ })),
+ (unsigned char)0x23),
+ (long long)0xDEADBEEFDEADBEEFULL,
+ (long long)0x0000000000000000ULL,
+ (long long)0x2CA024202CA02420ULL,
+ (long long)0x0123456789ABCDEFULL));
__m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_maskz_ternarylogic_epi64
@@ -8433,6 +8729,41 @@ __m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i _
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> zeroinitializer
return _mm256_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_maskz_ternarylogic_epi64(
+ (__mmask8)0x05,
+ ((__m256i)((__v4di){
+ (long long)0x1, (long long)0x2, (long long)0x0, (long long)0x7
+ })),
+ ((__m256i)((__v4di){
+ (long long)0x0, (long long)0x3, (long long)0x4, (long long)0x0
+ })),
+ ((__m256i)((__v4di){
+ (long long)0x0, (long long)0x5, (long long)0x0, (long long)0x1
+ })),
+ (unsigned char)0xFE),
+ (long long)0x1,
+ (long long)0x0,
+ (long long)0x4,
+ (long long)0x0));
+TEST_CONSTEXPR(match_v4di(
+ _mm256_maskz_ternarylogic_epi64(
+ (__mmask8)0x0A,
+ ((__m256i)((__v4di){
+ (long long)0x1, (long long)0x0, (long long)0x2, (long long)0x1
+ })),
+ ((__m256i)((__v4di){
+ (long long)0x0, (long long)0x1, (long long)0x0, (long long)0x0
+ })),
+ ((__m256i)((__v4di){
+ (long long)0x0, (long long)0x0, (long long)0x4, (long long)0x1
+ })),
+ (unsigned char)0xED),
+ (long long)0x0,
+ (long long)-0x1,
+ (long long)0x0,
+ (long long)-0x1));
+
__m256 test_mm256_shuffle_f32x4(__m256 __A, __m256 __B) {
// CHECK-LABEL: test_mm256_shuffle_f32x4
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
index 512fcd4..9524f02 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
@@ -54,18 +54,16 @@ struct S {
// struct splats
// CHECK-LABEL: define void {{.*}}call3
-// CHECK: [[A:%.*]] = alloca <1 x i32>, align 4
+// CHECK: [[AA:%.*]] = alloca i32, align 4
// CHECK: [[s:%.*]] = alloca %struct.S, align 1
-// CHECK-NEXT: store <1 x i32> splat (i32 1), ptr [[A]], align 4
-// CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[A]], align 4
-// CHECK-NEXT: [[VL:%.*]] = extractelement <1 x i32> [[L]], i32 0
+// CHECK-NEXT: store i32 %A, ptr [[AA]], align 4
+// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[AA]], align 4
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
-// CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float
+// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
+// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float
// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
-export void call3() {
- int1 A = {1};
+export void call3(int A) {
S s = (S)A;
}
@@ -85,3 +83,41 @@ export void call5() {
int1 A = {1};
S s = (S)A;
}
+
+struct BFields {
+ double DF;
+ int E: 15;
+ int : 8;
+ float F;
+};
+
+struct Derived : BFields {
+ int G;
+};
+
+// derived struct with bitfields splat from scalar
+// CHECK-LABEL: call6
+// CHECK: [[AAddr:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[D:%.*]] = alloca %struct.Derived, align 1
+// CHECK-NEXT: store i32 %A, ptr [[AAddr]], align 4
+// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[AAddr]], align 4
+// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0
+// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1
+// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0, i32 0
+// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0, i32 2
+// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 1
+// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[B]] to double
+// CHECK-NEXT: store double [[C]], ptr [[Gep1]], align 8
+// CHECK-NEXT: [[H:%.*]] = trunc i32 [[B]] to i24
+// CHECK-NEXT: [[BFL:%.*]] = load i24, ptr [[E]], align 1
+// CHECK-NEXT: [[BFV:%.*]] = and i24 [[H]], 32767
+// CHECK-NEXT: [[BFC:%.*]] = and i24 [[BFL]], -32768
+// CHECK-NEXT: [[BFS:%.*]] = or i24 [[BFC]], [[BFV]]
+// CHECK-NEXT: store i24 [[BFS]], ptr [[E]], align 1
+// CHECK-NEXT: [[C4:%.*]] = sitofp i32 [[B]] to float
+// CHECK-NEXT: store float [[C4]], ptr [[Gep2]], align 4
+// CHECK-NEXT: store i32 [[B]], ptr [[Gep3]], align 4
+// CHECK-NEXT: ret void
+export void call6(int A) {
+ Derived D = (Derived)A;
+}
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
index ac02ddf..5f2182e 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
@@ -10,7 +10,8 @@
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1
// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4
-// CHECK-NEXT: store i32 [[L]], ptr [[B]], align 4
+// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float
+// CHECK-NEXT: store float [[C]], ptr [[B]], align 4
export void call0() {
int A[2] = {0,1};
float B = (float)A;
@@ -141,3 +142,46 @@ export void call7() {
int A[1] = {1};
A = (int[1])s;
}
+
+struct BFields {
+ double D;
+ int E: 15;
+ int : 8;
+ float F;
+};
+
+struct Derived : BFields {
+ int G;
+};
+
+// flatten from a derived struct with bitfields
+// CHECK-LABEL: call8
+// CHECK: [[A:%.*]] = alloca [4 x i32], align 4
+// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
+// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 1
+// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 2
+// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 3
+// CHECK-NEXT: [[Gep4:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0
+// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep4]], i32 0, i32 1
+// CHECK-NEXT: [[Gep5:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 0
+// CHECK-NEXT: [[Gep6:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 2
+// CHECK-NEXT: [[Gep7:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 1
+// CHECK-NEXT: [[Z:%.*]] = load double, ptr [[Gep5]], align 8
+// CHECK-NEXT: [[C:%.*]] = fptosi double [[Z]] to i32
+// CHECK-NEXT: store i32 [[C]], ptr [[Gep]], align 4
+// CHECK-NEXT: [[BFL:%.*]] = load i24, ptr [[E]], align 1
+// CHECK-NEXT: [[BFShl:%.*]] = shl i24 [[BFL]], 9
+// CHECK-NEXT: [[BFAshr:%.*]] = ashr i24 [[BFShl]], 9
+// CHECK-NEXT: [[BFC:%.*]] = sext i24 [[BFAshr]] to i32
+// CHECK-NEXT: store i32 [[BFC]], ptr [[Gep1]], align 4
+// CHECK-NEXT: [[Y:%.*]] = load float, ptr [[Gep6]], align 4
+// CHECK-NEXT: [[C8:%.*]] = fptosi float [[Y]] to i32
+// CHECK-NEXT: store i32 [[C8]], ptr [[Gep2]], align 4
+// CHECK-NEXT: [[X:%.*]] = load i32, ptr [[Gep7]], align 4
+// CHECK-NEXT: store i32 [[X]], ptr [[Gep3]], align 4
+// CHECK-NEXT: ret void
+export void call8(Derived D) {
+ int A[4] = (int[4])D;
+}
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
index 81b9f5b..4e29994 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+// RUN: %clang_cc1 -finclude-default-header -fnative-half-type -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
struct S {
int X;
@@ -127,14 +127,219 @@ struct T {
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.T, ptr [[Tmp]], i32 0, i32 0
-// CHECK-NEXT: %gep3 = getelementptr inbounds %struct.T, ptr %agg-temp, i32 0, i32 1
-// CHECK-NEXT: %gep4 = getelementptr inbounds %struct.T, ptr %agg-temp, i32 0, i32 2
-// CHECK-NEXT: %load = load i32, ptr %gep2, align 4
-// CHECK-NEXT: store i32 %load, ptr %gep, align 4
-// CHECK-NEXT: %load5 = load i32, ptr %gep3, align 4
-// CHECK-NEXT: %conv = sitofp i32 %load5 to float
-// CHECK-NEXT: store float %conv, ptr %gep1, align 4
+// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds %struct.T, ptr %agg-temp, i32 0, i32 1
+// CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds %struct.T, ptr %agg-temp, i32 0, i32 2
+// CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[G3]], align 4
+// CHECK-NEXT: store i32 [[L1]], ptr [[G1]], align 4
+// CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[G4]], align 4
+// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L2]] to float
+// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
export void call8() {
T t = {1,2,3};
S s = (S)t;
}
+
+struct BFields {
+ double D;
+ int E: 15;
+ int : 8;
+ float F;
+};
+
+struct Derived : BFields {
+ int G;
+};
+
+// Derived Struct truncate to scalar
+// CHECK-LABEL: call9
+// CHECK: [[D2:%.*]] = alloca double, align 8
+// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
+// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0
+// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1
+// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 0
+// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 2
+// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 1
+// CHECK-NEXT: [[A:%.*]] = load double, ptr [[Gep1]], align 8
+// CHECK-NEXT: store double [[A]], ptr [[D2]], align 8
+// CHECK-NEXT: ret void
+export void call9(Derived D) {
+ double D2 = (double)D;
+}
+
+// Derived struct from vector
+// CHECK-LABEL: call10
+// CHECK: [[IAddr:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT: [[D:%.*]] = alloca %struct.Derived, align 1
+// CHECK-NEXT: store <4 x i32> %I, ptr [[IAddr]], align 16
+// CHECK-NEXT: [[A:%.*]] = load <4 x i32>, ptr [[IAddr]], align 16
+// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0
+// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1
+// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0, i32 0
+// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0, i32 2
+// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 1
+// CHECK-NEXT: [[VL:%.*]] = extractelement <4 x i32> [[A]], i64 0
+// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to double
+// CHECK-NEXT: store double [[C]], ptr [[Gep1]], align 8
+// CHECK-NEXT: [[VL4:%.*]] = extractelement <4 x i32> [[A]], i64 1
+// CHECK-NEXT: [[B:%.*]] = trunc i32 [[VL4]] to i24
+// CHECK-NEXT: [[BFL:%.*]] = load i24, ptr [[E]], align 1
+// CHECK-NEXT: [[BFV:%.*]] = and i24 [[B]], 32767
+// CHECK-NEXT: [[BFC:%.*]] = and i24 [[BFL]], -32768
+// CHECK-NEXT: [[BFSet:%.*]] = or i24 [[BFC]], [[BFV]]
+// CHECK-NEXT: store i24 [[BFSet]], ptr [[E]], align 1
+// CHECK-NEXT: [[VL5:%.*]] = extractelement <4 x i32> [[A]], i64 2
+// CHECK-NEXT: [[C6:%.*]] = sitofp i32 [[VL5]] to float
+// CHECK-NEXT: store float [[C6]], ptr [[Gep2]], align 4
+// CHECK-NEXT: [[VL7:%.*]] = extractelement <4 x i32> [[A]], i64 3
+// CHECK-NEXT: store i32 [[VL7]], ptr [[Gep3]], align 4
+// CHECK-NEXT: ret void
+export void call10(int4 I) {
+ Derived D = (Derived)I;
+}
+
+// truncate derived struct
+// CHECK-LABEL: call11
+// CHECK: [[B:%.*]] = alloca %struct.BFields, align 1
+// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[D]], i32 19, i1 false)
+// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.BFields, ptr [[B]], i32 0
+// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1
+// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.BFields, ptr [[B]], i32 0, i32 0
+// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds %struct.BFields, ptr [[B]], i32 0, i32 2
+// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0
+// CHECK-NEXT: [[E4:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep3]], i32 0, i32 1
+// CHECK-NEXT: [[Gep5:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 0
+// CHECK-NEXT: [[Gep6:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 2
+// CHECK-NEXT: [[Gep7:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 1
+// CHECK-NEXT: [[A:%.*]] = load double, ptr [[Gep5]], align 8
+// CHECK-NEXT: store double [[A]], ptr [[Gep1]], align 8
+// CHECK-NEXT: [[BFl:%.*]] = load i24, ptr [[E4]], align 1
+// CHECK-NEXT: [[Shl:%.*]] = shl i24 [[BFL]], 9
+// CHECK-NEXT: [[Ashr:%.*]] = ashr i24 [[Shl]], 9
+// CHECK-NEXT: [[BFC:%.*]] = sext i24 [[Ashr]] to i32
+// CHECK-NEXT: [[B:%.*]] = trunc i32 [[BFC]] to i24
+// CHECK-NEXT: [[BFL8:%.*]] = load i24, ptr [[E]], align 1
+// CHECK-NEXT: [[BFV:%.*]] = and i24 [[B]], 32767
+// CHECK-NEXT: [[BFC:%.*]] = and i24 [[BFL8]], -32768
+// CHECK-NEXT: [[BFSet:%.*]] = or i24 [[BFC]], [[BFV]]
+// CHECK-NEXT: store i24 [[BFSet]], ptr [[E]], align 1
+// CHECK-NEXT: [[C:%.*]] = load float, ptr [[Gep6]], align 4
+// CHECK-NEXT: store float [[C]], ptr [[Gep2]], align 4
+// CHECK-NEXT: ret void
+export void call11(Derived D) {
+ BFields B = (BFields)D;
+}
+
+struct Empty {
+};
+
+// cast to an empty struct
+// CHECK-LABEL: call12
+// CHECK: [[I:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT: [[E:%.*]] = alloca %struct.Empty, align 1
+// CHECK-NEXT: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr [[I]], align 16
+// CHECK-NEXT: [[A:%.*]] = load <4 x i32>, ptr [[I]], align 16
+// CHECK-NEXt: ret void
+export void call12() {
+ int4 I = {1,2,3,4};
+ Empty E = (Empty)I;
+}
+
+struct MoreBFields {
+ int A;
+ uint64_t B: 60;
+ float C;
+ uint16_t D: 10;
+ uint16_t E: 6;
+ int : 32;
+ double F;
+ int : 8;
+ uint G;
+};
+
+// more complicated bitfield case
+// CHECK-LABEL: call13
+// CHECK: [[AA:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[MBF:%.*]] = alloca %struct.MoreBFields, align 1
+// CHECK-NEXT: store i32 %A, ptr [[AA]], align 4
+// CHECK-NEXT: [[Z:%.*]] = load i32, ptr [[AA]], align 4
+// get the gep for the struct.
+// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0
+// CHECK-NEXT: [[FieldB:%.*]] = getelementptr inbounds nuw %struct.MoreBFields, ptr [[Gep]], i32 0, i32 1
+// D and E share the same field index
+// CHECK-NEXT: [[FieldD:%.*]] = getelementptr inbounds nuw %struct.MoreBFields, ptr [[Gep]], i32 0, i32 3
+// CHECK-NEXT: [[FieldE:%.*]] = getelementptr inbounds nuw %struct.MoreBFields, ptr [[Gep]], i32 0, i32 3
+// CHECK-NEXT: [[FieldA:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0, i32 0
+// CHECK-NEXT: [[FieldC:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0, i32 2
+// CHECK-NEXT: [[FieldF:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0, i32 5
+// CHECK-NEXT: [[FieldG:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0, i32 7
+// store int A into field A
+// CHECK-NEXT: store i32 [[Z]], ptr [[FieldA]], align 4
+// store int A in bitField B, do necessary conversions
+// CHECK-NEXT: [[Conv:%.*]] = sext i32 [[Z]] to i64
+// CHECK-NEXT: [[BFL:%.*]] = load i64, ptr [[FieldB]], align 1
+// CHECK-NEXT: [[BFV:%.*]] = and i64 [[Conv]], 1152921504606846975
+// CHECK-NEXT: [[BFC:%.*]] = and i64 [[BFL]], -1152921504606846976
+// CHECK-NEXT: [[BFS:%.*]] = or i64 [[BFC]], [[BFV]]
+// CHECK-NEXT: store i64 [[BFS]], ptr [[FieldB]], align 1
+// store int A into field C
+// CHECK-NEXT: [[Conv5:%.*]] = sitofp i32 [[Z]] to float
+// CHECK-NEXT: store float [[Conv5]], ptr [[FieldC]], align 4
+// store int A into bitfield D
+// CHECK-NEXT: [[Conv6:%.*]] = trunc i32 [[Z]] to i16
+// CHECK-NEXT: [[FDL:%.*]] = load i16, ptr [[FieldD]], align 1
+// CHECK-NEXT: [[FDV:%.*]] = and i16 [[Conv6]], 1023
+// CHECK-NEXT: [[FDC:%.*]] = and i16 [[FDL]], -1024
+// CHECK-NEXT: [[FDS:%.*]] = or i16 [[FDC]], [[FDV]]
+// CHECK-NEXT: store i16 [[FDS]], ptr [[FieldD]], align 1
+// store int A into bitfield E;
+// CHECK-NEXT: [[Conv11:%.*]] = trunc i32 [[Z]] to i16
+// CHECK-NEXT: [[FEL:%.*]] = load i16, ptr [[FieldE]], align 1
+// CHECK-NEXT: [[FEV:%.*]] = and i16 [[Conv11]], 63
+// CHECK-NEXT: [[FESHL:%.*]] = shl i16 [[FEV]], 10
+// CHECK-NEXT: [[FEC:%.*]] = and i16 [[FEL]], 1023
+// CHECK-NEXT: [[FES:%.*]] = or i16 [[FEC]], [[FESHL]]
+// CHECK-NEXT: store i16 [[FES]], ptr [[FieldE]], align 1
+// store int A into field F
+// CHECK-NEXT: [[Conv16:%.*]] = sitofp i32 [[Z]] to double
+// CHECK-NEXT: store double [[Conv16]], ptr [[FieldF]], align 8
+// store int A into field G
+// CHECK-NEXT: store i32 [[Z]], ptr [[FieldG]], align 4
+// CHECK-NEXT: ret void
+export void call13(int A) {
+ MoreBFields MBF = (MoreBFields)A;
+}
+
+struct Inner {
+ int Z;
+ int Y : 25;
+};
+
+struct Outer {
+ int A;
+ Inner I;
+};
+
+// show usage of "extra" gep for struct containing bitfield
+// CHECK-LABEL: call14
+// CHECK: [[AA:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[O:%.*]] = alloca %struct.Outer, align 1
+// CHECK-NEXT: store i32 %A, ptr [[AA]], align 4
+// CHECK-NEXT: [[Z:%.*]] = load i32, ptr [[AA]], align 4
+// CHECK-NEXT: [[FieldA:%.*]] = getelementptr inbounds %struct.Outer, ptr [[O]], i32 0, i32 0
+// showing real usage of "extra gep". need Inner struct to generate access of its bitfield.
+// CHECK-NEXT: [[FieldI:%.*]] = getelementptr inbounds %struct.Outer, ptr [[O]], i32 0, i32 1
+// CHECK-NEXT: [[FieldY:%.*]] = getelementptr inbounds nuw %struct.Inner, ptr [[FieldI]], i32 0, i32 1
+// CHECK-NEXT: [[FieldZ:%.*]] = getelementptr inbounds %struct.Outer, ptr [[O]], i32 0, i32 1, i32 0
+// CHECK-NEXT: store i32 [[Z]], ptr [[FieldA]], align 4
+// CHECK-NEXT: store i32 [[Z]], ptr [[FieldZ]], align 4
+// CHECK-NEXT: [[BFL:%.*]] = load i32, ptr [[FieldY]], align 1
+// CHECK-NEXT: [[BFV:%.*]] = and i32 [[Z]], 33554431
+// CHECK-NEXT: [[BFC:%.*]] = and i32 [[BFL]], -33554432
+// CHECK-NEXT: [[BFS:%.*]] = or i32 [[BFC]], [[BFV]]
+// CHECK-NEXT: store i32 [[BFS]], ptr [[FieldY]], align 1
+// CHECK-NEXT: ret void
+export void call14(int A) {
+ Outer O = (Outer)A;
+}
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index 253b38a7c..26aa41a 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -79,3 +79,45 @@ export void call5() {
S s = {1, 2.0};
int A = (int)s;
}
+
+struct BFields {
+ double D;
+ int E: 15;
+ int : 8;
+ float F;
+};
+
+struct Derived : BFields {
+ int G;
+};
+
+// vector flat cast from derived struct with bitfield
+// CHECK-LABEL: call6
+// CHECK: [[A:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
+// CHECK-NEXT: [[FlatTmp:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
+// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0
+// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1
+// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 0
+// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 2
+// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 1
+// CHECK-NEXT: [[Z:%.*]] = load <4 x i32>, ptr [[FlatTmp]], align 16
+// CHECK-NEXT: [[Y:%.*]] = load double, ptr [[Gep1]], align 8
+// CHECK-NEXT: [[C:%.*]] = fptosi double [[Y]] to i32
+// CHECK-NEXT: [[X:%.*]] = insertelement <4 x i32> [[Z]], i32 [[C]], i64 0
+// CHECK-NEXT: [[BFL:%.*]] = load i24, ptr [[E]], align 1
+// CHECK-NEXT: [[BFShl:%.*]] = shl i24 [[BFL]], 9
+// CHECK-NEXT: [[BFAshr:%.*]] = ashr i24 [[BFShl]], 9
+// CHECK-NEXT: [[BFC:%.*]] = sext i24 [[BFAshr]] to i32
+// CHECK-NEXT: [[W:%.*]] = insertelement <4 x i32> [[X]], i32 [[BFC]], i64 1
+// CHECK-NEXT: [[V:%.*]] = load float, ptr [[Gep2]], align 4
+// CHECK-NEXT: [[C4:%.*]] = fptosi float [[V]] to i32
+// CHECK-NEXT: [[U:%.*]] = insertelement <4 x i32> [[W]], i32 [[C4]], i64 2
+// CHECK-NEXT: [[T:%.*]] = load i32, ptr [[Gep3]], align 4
+// CHECK-NEXT: [[S:%.*]] = insertelement <4 x i32> [[U]], i32 [[T]], i64 3
+// CHECK-NEXT: store <4 x i32> [[S]], ptr [[A]], align 16
+// CHECK-NEXT: ret void
+export void call6(Derived D) {
+ int4 A = (int4)D;
+}
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index c0c22bc..7cc83c0 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -109,8 +109,8 @@
// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+cluster,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
-// GFX1251: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+cluster,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
+// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
+// GFX1251: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
diff --git a/clang/test/Misc/amdgcn.languageOptsOpenCL.cl b/clang/test/Misc/amdgcn.languageOptsOpenCL.cl
index 50c78d7..80c0825 100644
--- a/clang/test/Misc/amdgcn.languageOptsOpenCL.cl
+++ b/clang/test/Misc/amdgcn.languageOptsOpenCL.cl
@@ -8,6 +8,9 @@
// RUN: %clang_cc1 -x cl -cl-std=CL1.2 %s -verify -triple amdgcn-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES
// RUN: %clang_cc1 -x cl -cl-std=CL2.0 %s -verify -triple amdgcn-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES
+// RUN: %clang_cc1 -x cl -cl-std=CL3.0 %s -verify -triple amdgcn-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES
+// RUN: %clang_cc1 -x cl -cl-std=CL3.0 %s -verify -triple amdgcn-unknown-unknown -target-cpu gfx700 -Wpedantic-core-features -DTEST_CORE_FEATURES -DFLAT_SUPPORT
+
// Extensions in all versions
#ifndef cl_clang_storage_class_specifiers
#error "Missing cl_clang_storage_class_specifiers define"
@@ -156,10 +159,31 @@
#pragma OPENCL EXTENSION cl_amd_media_ops2: enable
#if (__OPENCL_C_VERSION__ >= 300)
-#ifndef __opencl_c_generic_address_space
-#error "Missing __opencl_c_generic_address_space define"
-#else
-#error "Incorrect __opencl_c_generic_address_space define"
+ #ifndef __opencl_c_program_scope_global_variables
+ #error "Missing __opencl_c_program_scope_global_variables define"
+ #endif
#endif
-#pragma OPENCL EXTENSION __opencl_c_generic_address_space: enable
+
+#if (__OPENCL_C_VERSION__ >= 300)
+ #ifdef FLAT_SUPPORT
+ #ifndef __opencl_c_generic_address_space
+ #error "Missing __opencl_c_generic_address_space define"
+ #endif
+ #else
+ #ifdef __opencl_c_generic_address_space
+ #error "Incorrect __opencl_c_generic_address_space define"
+ #endif
+ #endif
+#endif
+
+#if (__OPENCL_C_VERSION__ >= 300)
+ #ifdef FLAT_SUPPORT
+ #ifndef __opencl_c_device_enqueue
+ #error "Missing __opencl_c_device_enqueue define"
+ #endif
+ #else
+ #ifdef __opencl_c_device_enqueue
+ #error "Incorrect __opencl_c_device_enqueue define"
+ #endif
+ #endif
#endif
diff --git a/clang/test/SemaCXX/constant-expression-p2280r4.cpp b/clang/test/SemaCXX/constant-expression-p2280r4.cpp
index 78e2e17..5cbfaff 100644
--- a/clang/test/SemaCXX/constant-expression-p2280r4.cpp
+++ b/clang/test/SemaCXX/constant-expression-p2280r4.cpp
@@ -44,7 +44,7 @@ void splash(Swim& swam) { // nointerpreter-note {{declared here}
static_assert(how_many(swam) == 28); // ok
static_assert(Swim().lochte() == 12); // ok
static_assert(swam.lochte() == 12); // expected-error {{static assertion expression is not an integral constant expression}} \
- // nointerpreter-note {{virtual function called on object 'swam' whose dynamic type is not constant}}
+ // expected-note {{virtual function called on object 'swam' whose dynamic type is not constant}}
static_assert(swam.coughlin == 12); // expected-error {{static assertion expression is not an integral constant expression}} \
// nointerpreter-note {{read of variable 'swam' whose value is not known}}
}
diff --git a/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl b/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl
index 2320e13..fbb47bd 100644
--- a/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl
+++ b/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl
@@ -13,12 +13,6 @@ struct R {
};
};
-// casting types which contain bitfields is not yet supported.
-export void cantCast() {
- S s = (S)1;
- // expected-error@-1 {{no matching conversion for C-style cast from 'int' to 'S'}}
-}
-
// Can't cast a union
export void cantCast2() {
R r = (R)1;
diff --git a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
index 3059150..d9f50e9 100644
--- a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
+++ b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
@@ -7,27 +7,6 @@ export void cantCast() {
// expected-error@-1 {{C-style cast from 'int[3]' to 'int[4]' is not allowed}}
}
-struct S {
-// expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int2' (aka 'vector<int, 2>') to 'const S' for 1st argument}}
-// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int2' (aka 'vector<int, 2>') to 'S' for 1st argument}}
-// expected-note@-3 {{candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided}}
- int A : 8;
- int B;
-};
-
-// casting types which contain bitfields is not yet supported.
-export void cantCast2() {
- S s = {1,2};
- int2 C = (int2)s;
- // expected-error@-1 {{cannot convert 'S' to 'int2' (aka 'vector<int, 2>') without a conversion operator}}
-}
-
-export void cantCast3() {
- int2 C = {1,2};
- S s = (S)C;
- // expected-error@-1 {{no matching conversion for C-style cast from 'int2' (aka 'vector<int, 2>') to 'S'}}
-}
-
struct R {
// expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int2' (aka 'vector<int, 2>') to 'const R' for 1st argument}}
// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int2' (aka 'vector<int, 2>') to 'R' for 1st argument}}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index b46a810..28e3522 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -2412,7 +2412,11 @@ TSAN_INTERCEPTOR(int, vfork, int fake) {
}
#endif
-#if SANITIZER_LINUX
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
+// Bionic's pthread_create internally calls clone. When the CLONE_THREAD flag is
+// set, clone does not create a new process but a new thread. This is a
+// workaround for Android. Disabling the interception of clone solves the
+// problem in most scenarios.
TSAN_INTERCEPTOR(int, clone, int (*fn)(void *), void *stack, int flags,
void *arg, int *parent_tid, void *tls, pid_t *child_tid) {
SCOPED_INTERCEPTOR_RAW(clone, fn, stack, flags, arg, parent_tid, tls,
@@ -3135,7 +3139,7 @@ void InitializeInterceptors() {
TSAN_INTERCEPT(fork);
TSAN_INTERCEPT(vfork);
-#if SANITIZER_LINUX
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
TSAN_INTERCEPT(clone);
#endif
#if !SANITIZER_ANDROID
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
index 4b55aab..6b65387 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
@@ -486,8 +486,20 @@ int ExtractRecvmsgFDs(void *msgp, int *fds, int nfd) {
// Reverse operation of libc stack pointer mangling
static uptr UnmangleLongJmpSp(uptr mangled_sp) {
-#if defined(__x86_64__)
-# if SANITIZER_LINUX
+# if SANITIZER_ANDROID
+ if (longjmp_xor_key == 0) {
+ // bionic libc initialization process: __libc_init_globals ->
+ // __libc_init_vdso (calls strcmp) -> __libc_init_setjmp_cookie. strcmp is
+ // intercepted by TSan, so during TSan initialization the setjmp_cookie
+ // remains uninitialized. On Android, longjmp_xor_key must be set on first
+ // use.
+ InitializeLongjmpXorKey();
+ CHECK_NE(longjmp_xor_key, 0);
+ }
+# endif
+
+# if defined(__x86_64__)
+# if SANITIZER_LINUX
// Reverse of:
// xor %fs:0x30, %rsi
// rol $0x11, %rsi
@@ -542,13 +554,23 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) {
# else
# define LONG_JMP_SP_ENV_SLOT 2
# endif
-#elif SANITIZER_LINUX
-# ifdef __aarch64__
-# define LONG_JMP_SP_ENV_SLOT 13
-# elif defined(__loongarch__)
-# define LONG_JMP_SP_ENV_SLOT 1
-# elif defined(__mips64)
-# define LONG_JMP_SP_ENV_SLOT 1
+# elif SANITIZER_ANDROID
+# ifdef __aarch64__
+# define LONG_JMP_SP_ENV_SLOT 3
+# elif SANITIZER_RISCV64
+# define LONG_JMP_SP_ENV_SLOT 3
+# elif defined(__x86_64__)
+# define LONG_JMP_SP_ENV_SLOT 6
+# else
+# error unsupported
+# endif
+# elif SANITIZER_LINUX
+# ifdef __aarch64__
+# define LONG_JMP_SP_ENV_SLOT 13
+# elif defined(__loongarch__)
+# define LONG_JMP_SP_ENV_SLOT 1
+# elif defined(__mips64)
+# define LONG_JMP_SP_ENV_SLOT 1
# elif SANITIZER_RISCV64
# define LONG_JMP_SP_ENV_SLOT 13
# elif defined(__s390x__)
@@ -556,7 +578,7 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) {
# else
# define LONG_JMP_SP_ENV_SLOT 6
# endif
-#endif
+# endif
uptr ExtractLongJmpSp(uptr *env) {
uptr mangled_sp = env[LONG_JMP_SP_ENV_SLOT];
@@ -653,7 +675,12 @@ ThreadState *cur_thread() {
}
CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, &oldset, nullptr));
}
- return thr;
+
+ // Skia calls mallopt(M_THREAD_DISABLE_MEM_INIT, 1), which sets the least
+ // significant bit of TLS_SLOT_SANITIZER to 1. Scudo allocator uses this bit
+ // as a flag to disable memory initialization. This is a workaround to get the
+ // correct ThreadState pointer.
+ reinterpret_cast<ThreadState*>(addr & ~1ULL);
}
void set_cur_thread(ThreadState *thr) {
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
index b1464cc..978d853 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
@@ -206,10 +206,14 @@ void ThreadStart(ThreadState *thr, Tid tid, ThreadID os_id,
}
#endif
-#if !SANITIZER_GO
+#if !SANITIZER_GO && !SANITIZER_ANDROID
// Don't imitate stack/TLS writes for the main thread,
// because its initialization is synchronized with all
// subsequent threads anyway.
+ // Because thr is created by MmapOrDie, the thr object
+ // is not in tls, the pointer to the thr object is in
+ // TLS_SLOT_SANITIZER slot. So skip this check on
+ // Android platform.
if (tid != kMainTid) {
if (stk_addr && stk_size) {
const uptr pc = StackTrace::GetNextInstructionPc(
diff --git a/flang/docs/FortranLLVMTestSuite.md b/flang/docs/FortranLLVMTestSuite.md
index 8d9daa4..17083b4 100644
--- a/flang/docs/FortranLLVMTestSuite.md
+++ b/flang/docs/FortranLLVMTestSuite.md
@@ -73,3 +73,5 @@ instructions described [above](#running-the-llvm-test-suite-with-fortran).
There are additional configure-time options that can be used with the gfortran
tests. More details about those options and their purpose can be found in
[`Fortran/gfortran/README.md`](https://github.com/llvm/llvm-test-suite/tree/main/Fortran/gfortran/README.md).
+
+ These tests are Free Software and are shared under the terms of the GNU General Public License (GPL). For more details, please see the accompanying [`LICENSE`](https://github.com/llvm/llvm-test-suite/tree/main/Fortran/gfortran/LICENSE.txt) file.
diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst
index 3c7175c..c7f01e6 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -488,6 +488,8 @@ Status
---------------------------------------------------------- -----------------
``__cpp_lib_ranges_concat`` *unimplemented*
---------------------------------------------------------- -----------------
+ ``__cpp_lib_ranges_indices`` ``202506L``
+ ---------------------------------------------------------- -----------------
``__cpp_lib_ratio`` ``202306L``
---------------------------------------------------------- -----------------
``__cpp_lib_rcu`` *unimplemented*
diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst
index 8d023a1..ec23ba9 100644
--- a/libcxx/docs/ReleaseNotes/22.rst
+++ b/libcxx/docs/ReleaseNotes/22.rst
@@ -42,6 +42,7 @@ Implemented Papers
is implemented in this release)
- P3044R2: sub-``string_view`` from ``string`` (`Github <https://llvm.org/PR148140>`__)
- P3223R2: Making ``std::istream::ignore`` less surprising (`Github <https://llvm.org/PR148178>`__)
+- P3060R3: Add ``std::views::indices(n)`` (`Github <https://llvm.org/PR148175>`__)
- P3168R2: Give ``std::optional`` Range Support (`Github <https://llvm.org/PR105430>`__)
Improvements and New Features
diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv
index 4e0918b..69b9984 100644
--- a/libcxx/docs/Status/Cxx2cPapers.csv
+++ b/libcxx/docs/Status/Cxx2cPapers.csv
@@ -149,7 +149,7 @@
"`P3503R3 <https://wg21.link/P3503R3>`__","Make type-erased allocator use in ``promise`` and ``packaged_task`` consistent","2025-06 (Sofia)","","","`#148164 <https://github.com/llvm/llvm-project/issues/148164>`__",""
"`P3008R6 <https://wg21.link/P3008R6>`__","Atomic floating-point min/max","2025-06 (Sofia)","","","`#148168 <https://github.com/llvm/llvm-project/issues/148168>`__",""
"`P3111R8 <https://wg21.link/P3111R8>`__","Atomic Reduction Operations","2025-06 (Sofia)","","","`#148174 <https://github.com/llvm/llvm-project/issues/148174>`__",""
-"`P3060R3 <https://wg21.link/P3060R3>`__","Add ``std::views::indices(n)``","2025-06 (Sofia)","","","`#148175 <https://github.com/llvm/llvm-project/issues/148175>`__",""
+"`P3060R3 <https://wg21.link/P3060R3>`__","Add ``std::views::indices(n)``","2025-06 (Sofia)","|Complete|","22","`#148175 <https://github.com/llvm/llvm-project/issues/148175>`__",""
"`P2319R5 <https://wg21.link/P2319R5>`__","Prevent ``path`` presentation problems","2025-06 (Sofia)","","","`#148177 <https://github.com/llvm/llvm-project/issues/148177>`__",""
"`P3223R2 <https://wg21.link/P3223R2>`__","Making ``std::istream::ignore`` less surprising","2025-06 (Sofia)","|Complete|","22","`#148178 <https://github.com/llvm/llvm-project/issues/148178>`__",""
"`P2781R9 <https://wg21.link/P2781R9>`__","``std::constant_wrapper``","2025-06 (Sofia)","","","`#148179 <https://github.com/llvm/llvm-project/issues/148179>`__",""
diff --git a/libcxx/include/__ranges/iota_view.h b/libcxx/include/__ranges/iota_view.h
index 32ff340..22adc22 100644
--- a/libcxx/include/__ranges/iota_view.h
+++ b/libcxx/include/__ranges/iota_view.h
@@ -393,6 +393,15 @@ struct __fn {
inline namespace __cpo {
inline constexpr auto iota = __iota::__fn{};
} // namespace __cpo
+
+# if _LIBCPP_STD_VER >= 26
+
+inline constexpr auto indices = [](__integer_like auto __size) static {
+ return ranges::views::iota(decltype(__size){}, __size);
+};
+
+# endif
+
} // namespace views
} // namespace ranges
diff --git a/libcxx/include/ranges b/libcxx/include/ranges
index 96d7a6b..cfaa66a 100644
--- a/libcxx/include/ranges
+++ b/libcxx/include/ranges
@@ -267,6 +267,11 @@ namespace std::ranges {
template<class W, class Bound>
inline constexpr bool enable_borrowed_range<iota_view<W, Bound>> = true;
+ namespace views {
+ inline constexpr unspecified iota = unspecified;
+ inline constexpr unspecified indices = unspecified; // Since C++26
+ }
+
// [range.repeat], repeat view
template<class T>
concept integer-like-with-usable-difference-type = // exposition only
diff --git a/libcxx/include/version b/libcxx/include/version
index a132f08..99e6929 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -205,6 +205,7 @@ __cpp_lib_ranges_chunk_by 202202L <ranges>
__cpp_lib_ranges_concat 202403L <ranges>
__cpp_lib_ranges_contains 202207L <algorithm>
__cpp_lib_ranges_find_last 202207L <algorithm>
+__cpp_lib_ranges_indices 202506L <ranges>
__cpp_lib_ranges_iota 202202L <numeric>
__cpp_lib_ranges_join_with 202202L <ranges>
__cpp_lib_ranges_repeat 202207L <ranges>
@@ -591,6 +592,7 @@ __cpp_lib_void_t 201411L <type_traits>
# define __cpp_lib_out_ptr 202311L
// # define __cpp_lib_philox_engine 202406L
// # define __cpp_lib_ranges_concat 202403L
+# define __cpp_lib_ranges_indices 202506L
# define __cpp_lib_ratio 202306L
// # define __cpp_lib_rcu 202306L
# define __cpp_lib_reference_wrapper 202403L
diff --git a/libcxx/modules/std/ranges.inc b/libcxx/modules/std/ranges.inc
index 7ede42e..cc7daa3 100644
--- a/libcxx/modules/std/ranges.inc
+++ b/libcxx/modules/std/ranges.inc
@@ -114,6 +114,9 @@ export namespace std {
namespace views {
using std::ranges::views::iota;
+#if _LIBCPP_STD_VER >= 26
+ using std::ranges::views::indices;
+#endif
} // namespace views
#if _LIBCPP_STD_VER >= 23
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp
index df19f03..5116864 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp
@@ -48,6 +48,10 @@
# error "__cpp_lib_ranges_concat should not be defined before c++26"
# endif
+# ifdef __cpp_lib_ranges_indices
+# error "__cpp_lib_ranges_indices should not be defined before c++26"
+# endif
+
# ifdef __cpp_lib_ranges_join_with
# error "__cpp_lib_ranges_join_with should not be defined before c++23"
# endif
@@ -98,6 +102,10 @@
# error "__cpp_lib_ranges_concat should not be defined before c++26"
# endif
+# ifdef __cpp_lib_ranges_indices
+# error "__cpp_lib_ranges_indices should not be defined before c++26"
+# endif
+
# ifdef __cpp_lib_ranges_join_with
# error "__cpp_lib_ranges_join_with should not be defined before c++23"
# endif
@@ -148,6 +156,10 @@
# error "__cpp_lib_ranges_concat should not be defined before c++26"
# endif
+# ifdef __cpp_lib_ranges_indices
+# error "__cpp_lib_ranges_indices should not be defined before c++26"
+# endif
+
# ifdef __cpp_lib_ranges_join_with
# error "__cpp_lib_ranges_join_with should not be defined before c++23"
# endif
@@ -201,6 +213,10 @@
# error "__cpp_lib_ranges_concat should not be defined before c++26"
# endif
+# ifdef __cpp_lib_ranges_indices
+# error "__cpp_lib_ranges_indices should not be defined before c++26"
+# endif
+
# ifdef __cpp_lib_ranges_join_with
# error "__cpp_lib_ranges_join_with should not be defined before c++23"
# endif
@@ -278,6 +294,10 @@
# error "__cpp_lib_ranges_concat should not be defined before c++26"
# endif
+# ifdef __cpp_lib_ranges_indices
+# error "__cpp_lib_ranges_indices should not be defined before c++26"
+# endif
+
# ifndef __cpp_lib_ranges_join_with
# error "__cpp_lib_ranges_join_with should be defined in c++23"
# endif
@@ -400,6 +420,13 @@
# endif
# endif
+# ifndef __cpp_lib_ranges_indices
+# error "__cpp_lib_ranges_indices should be defined in c++26"
+# endif
+# if __cpp_lib_ranges_indices != 202506L
+# error "__cpp_lib_ranges_indices should have the value 202506L in c++26"
+# endif
+
# ifndef __cpp_lib_ranges_join_with
# error "__cpp_lib_ranges_join_with should be defined in c++26"
# endif
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
index 6aa704a..9a8a1da 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
@@ -664,6 +664,10 @@
# error "__cpp_lib_ranges_find_last should not be defined before c++23"
# endif
+# ifdef __cpp_lib_ranges_indices
+# error "__cpp_lib_ranges_indices should not be defined before c++26"
+# endif
+
# ifdef __cpp_lib_ranges_iota
# error "__cpp_lib_ranges_iota should not be defined before c++23"
# endif
@@ -1608,6 +1612,10 @@
# error "__cpp_lib_ranges_find_last should not be defined before c++23"
# endif
+# ifdef __cpp_lib_ranges_indices
+# error "__cpp_lib_ranges_indices should not be defined before c++26"
+# endif
+
# ifdef __cpp_lib_ranges_iota
# error "__cpp_lib_ranges_iota should not be defined before c++23"
# endif
@@ -2723,6 +2731,10 @@
# error "__cpp_lib_ranges_find_last should not be defined before c++23"
# endif
+# ifdef __cpp_lib_ranges_indices
+# error "__cpp_lib_ranges_indices should not be defined before c++26"
+# endif
+
# ifdef __cpp_lib_ranges_iota
# error "__cpp_lib_ranges_iota should not be defined before c++23"
# endif
@@ -4111,6 +4123,10 @@
# error "__cpp_lib_ranges_find_last should not be defined before c++23"
# endif
+# ifdef __cpp_lib_ranges_indices
+# error "__cpp_lib_ranges_indices should not be defined before c++26"
+# endif
+
# ifdef __cpp_lib_ranges_iota
# error "__cpp_lib_ranges_iota should not be defined before c++23"
# endif
@@ -5694,6 +5710,10 @@
# error "__cpp_lib_ranges_find_last should have the value 202207L in c++23"
# endif
+# ifdef __cpp_lib_ranges_indices
+# error "__cpp_lib_ranges_indices should not be defined before c++26"
+# endif
+
# ifndef __cpp_lib_ranges_iota
# error "__cpp_lib_ranges_iota should be defined in c++23"
# endif
@@ -7610,6 +7630,13 @@
# error "__cpp_lib_ranges_find_last should have the value 202207L in c++26"
# endif
+# ifndef __cpp_lib_ranges_indices
+# error "__cpp_lib_ranges_indices should be defined in c++26"
+# endif
+# if __cpp_lib_ranges_indices != 202506L
+# error "__cpp_lib_ranges_indices should have the value 202506L in c++26"
+# endif
+
# ifndef __cpp_lib_ranges_iota
# error "__cpp_lib_ranges_iota should be defined in c++26"
# endif
diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp
index 4949787..7e2510f 100644
--- a/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp
+++ b/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp
@@ -89,6 +89,9 @@ static_assert(test(std::ranges::ssize, a));
// views::empty<T> is not a CPO
static_assert(test(std::views::iota, 1));
static_assert(test(std::views::iota, 1, 10));
+#if TEST_STD_VER >= 26
+static_assert(test(std::views::indices, 10));
+#endif
#ifndef TEST_HAS_NO_LOCALIZATION
static_assert(test(std::views::istream<int>, stream));
#endif
diff --git a/libcxx/test/std/ranges/range.factories/range.iota.view/indices.pass.cpp b/libcxx/test/std/ranges/range.factories/range.iota.view/indices.pass.cpp
new file mode 100644
index 0000000..d92b6cb
--- /dev/null
+++ b/libcxx/test/std/ranges/range.factories/range.iota.view/indices.pass.cpp
@@ -0,0 +1,97 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: std-at-least-c++26
+
+// ranges
+
+// inline constexpr unspecified indices = unspecified;
+
+#include <cassert>
+#include <cstddef>
+#include <ranges>
+#include <vector>
+
+#include "test_macros.h"
+#define TEST_HAS_NO_INT128 // Size cannot be larger than 64 bits
+#include "type_algorithms.h"
+
+#include "types.h"
+
+// Test SFINAE.
+
+template <typename SizeType>
+concept HasIndices = requires(SizeType s) { std::ranges::views::indices(s); };
+
+struct NotIntegerLike {};
+
+void test_SFINAE() {
+ static_assert(HasIndices<std::size_t>);
+ types::for_each(types::integer_types(), []<typename T> { static_assert(HasIndices<T>); });
+
+ // Non-integer-like types should not satisfy HasIndices
+ static_assert(!HasIndices<bool>);
+ static_assert(!HasIndices<float>);
+ static_assert(!HasIndices<void>);
+ static_assert(!HasIndices<SomeInt>); // Does satisfy is_integer_like, but not the conversion to std::size_t
+ static_assert(!HasIndices<NotIntegerLike>);
+}
+
+constexpr bool test() {
+ {
+ auto indices_view = std::ranges::views::indices(5);
+ static_assert(std::ranges::range<decltype(indices_view)>);
+
+ assert(indices_view.size() == 5);
+
+ assert(indices_view[0] == 0);
+ assert(indices_view[1] == 1);
+ assert(indices_view[2] == 2);
+ assert(indices_view[3] == 3);
+ assert(indices_view[4] == 4);
+ }
+
+ {
+ std::vector v(5, 0);
+
+ auto indices_view = std::ranges::views::indices(std::ranges::size(v));
+ static_assert(std::ranges::range<decltype(indices_view)>);
+
+ assert(indices_view.size() == 5);
+
+ assert(indices_view[0] == 0);
+ assert(indices_view[1] == 1);
+ assert(indices_view[2] == 2);
+ assert(indices_view[3] == 3);
+ assert(indices_view[4] == 4);
+ }
+
+ {
+ std::vector v(5, SomeInt{});
+
+ auto indices_view = std::ranges::views::indices(std::ranges::size(v));
+ static_assert(std::ranges::range<decltype(indices_view)>);
+
+ assert(indices_view.size() == 5);
+
+ assert(indices_view[0] == 0);
+ assert(indices_view[1] == 1);
+ assert(indices_view[2] == 2);
+ assert(indices_view[3] == 3);
+ assert(indices_view[4] == 4);
+ }
+
+ return true;
+}
+
+int main(int, char**) {
+ test();
+ static_assert(test());
+
+ return 0;
+}
diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py
index 5d469d4..2d5b66d9 100644
--- a/libcxx/utils/generate_feature_test_macro_components.py
+++ b/libcxx/utils/generate_feature_test_macro_components.py
@@ -1114,6 +1114,11 @@ feature_test_macros = [
"headers": ["algorithm"],
},
{
+ "name": "__cpp_lib_ranges_indices",
+ "values": {"c++26": 202506},
+ "headers": ["ranges"],
+ },
+ {
"name": "__cpp_lib_ranges_iota",
"values": {"c++23": 202202},
"headers": ["numeric"],
diff --git a/lldb/docs/resources/extensions.rst b/lldb/docs/resources/extensions.rst
index 30bd6d5..61fffe7 100644
--- a/lldb/docs/resources/extensions.rst
+++ b/lldb/docs/resources/extensions.rst
@@ -134,5 +134,5 @@ Objective-C runtime
Clang emits the Objective-C runtime version into the
``DW_TAG_compile_unit`` using the
-``DW_AT_APPLE_major_runtime_version`` attribute. The value 2 stands
+``DW_AT_APPLE_major_runtime_vers`` attribute. The value 2 stands
for Objective-C 2.0.
diff --git a/llvm/include/llvm/CAS/OnDiskDataAllocator.h b/llvm/include/llvm/CAS/OnDiskDataAllocator.h
new file mode 100644
index 0000000..2809df8
--- /dev/null
+++ b/llvm/include/llvm/CAS/OnDiskDataAllocator.h
@@ -0,0 +1,95 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file declares interface for OnDiskDataAllocator, a file backed data
+/// pool can be used to allocate space to store data packed in a single file. It
+/// is based on MappedFileRegionArena and includes a header in the beginning to
+/// provide metadata.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CAS_ONDISKDATAALLOCATOR_H
+#define LLVM_CAS_ONDISKDATAALLOCATOR_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CAS/FileOffset.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm::cas {
+
+/// Sink for data. Stores variable length data with 8-byte alignment. Does not
+/// track size of data, which is assumed to known from context, or embedded.
+/// Uses 0-padding but does not guarantee 0-termination.
+class OnDiskDataAllocator {
+public:
+ using ValueProxy = MutableArrayRef<char>;
+
+ /// A pointer to data stored on disk.
+ class OnDiskPtr {
+ public:
+ FileOffset getOffset() const { return Offset; }
+ explicit operator bool() const { return bool(getOffset()); }
+ const ValueProxy &operator*() const {
+ assert(Offset && "Null dereference");
+ return Value;
+ }
+ const ValueProxy *operator->() const {
+ assert(Offset && "Null dereference");
+ return &Value;
+ }
+
+ OnDiskPtr() = default;
+
+ private:
+ friend class OnDiskDataAllocator;
+ OnDiskPtr(FileOffset Offset, ValueProxy Value)
+ : Offset(Offset), Value(Value) {}
+ FileOffset Offset;
+ ValueProxy Value;
+ };
+
+ /// Get the data of \p Size stored at the given \p Offset. Note the allocator
+ /// doesn't keep track of the allocation size, thus \p Size doesn't need to
+ /// match the size of allocation but needs to be smaller.
+ Expected<ArrayRef<char>> get(FileOffset Offset, size_t Size) const;
+
+ /// Allocate at least \p Size with 8-byte alignment.
+ Expected<OnDiskPtr> allocate(size_t Size);
+
+ /// \returns the buffer that was allocated at \p create time, with size
+ /// \p UserHeaderSize.
+ MutableArrayRef<uint8_t> getUserHeader();
+
+ size_t size() const;
+ size_t capacity() const;
+
+ static Expected<OnDiskDataAllocator>
+ create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize,
+ std::optional<uint64_t> NewFileInitialSize,
+ uint32_t UserHeaderSize = 0,
+ function_ref<void(void *)> UserHeaderInit = nullptr);
+
+ OnDiskDataAllocator(OnDiskDataAllocator &&RHS);
+ OnDiskDataAllocator &operator=(OnDiskDataAllocator &&RHS);
+
+ // No copy. Just call \a create() again.
+ OnDiskDataAllocator(const OnDiskDataAllocator &) = delete;
+ OnDiskDataAllocator &operator=(const OnDiskDataAllocator &) = delete;
+
+ ~OnDiskDataAllocator();
+
+private:
+ struct ImplType;
+ explicit OnDiskDataAllocator(std::unique_ptr<ImplType> Impl);
+ std::unique_ptr<ImplType> Impl;
+};
+
+} // namespace llvm::cas
+
+#endif // LLVM_CAS_ONDISKDATAALLOCATOR_H
diff --git a/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h b/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h
index 5e41bf6..fbd68d0 100644
--- a/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h
+++ b/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h
@@ -133,38 +133,38 @@ public:
bool IsValue = false;
};
- class pointer;
- class const_pointer : public PointerImpl<ConstValueProxy> {
+ class OnDiskPtr;
+ class ConstOnDiskPtr : public PointerImpl<ConstValueProxy> {
public:
- const_pointer() = default;
+ ConstOnDiskPtr() = default;
private:
- friend class pointer;
+ friend class OnDiskPtr;
friend class OnDiskTrieRawHashMap;
- using const_pointer::PointerImpl::PointerImpl;
+ using ConstOnDiskPtr::PointerImpl::PointerImpl;
};
- class pointer : public PointerImpl<ValueProxy> {
+ class OnDiskPtr : public PointerImpl<ValueProxy> {
public:
- operator const_pointer() const {
- return const_pointer(Value, getOffset(), IsValue);
+ operator ConstOnDiskPtr() const {
+ return ConstOnDiskPtr(Value, getOffset(), IsValue);
}
- pointer() = default;
+ OnDiskPtr() = default;
private:
friend class OnDiskTrieRawHashMap;
- using pointer::PointerImpl::PointerImpl;
+ using OnDiskPtr::PointerImpl::PointerImpl;
};
/// Find the value from hash.
///
/// \returns pointer to the value if exists, otherwise returns a non-value
/// pointer that evaluates to `false` when convert to boolean.
- const_pointer find(ArrayRef<uint8_t> Hash) const;
+ ConstOnDiskPtr find(ArrayRef<uint8_t> Hash) const;
/// Helper function to recover a pointer into the trie from file offset.
- Expected<const_pointer> recoverFromFileOffset(FileOffset Offset) const;
+ Expected<ConstOnDiskPtr> recoverFromFileOffset(FileOffset Offset) const;
using LazyInsertOnConstructCB =
function_ref<void(FileOffset TentativeOffset, ValueProxy TentativeValue)>;
@@ -186,11 +186,11 @@ public:
/// The in-memory \a TrieRawHashMap uses LazyAtomicPointer to synchronize
/// simultaneous writes, but that seems dangerous to use in a memory-mapped
/// file in case a process crashes in the busy state.
- Expected<pointer> insertLazy(ArrayRef<uint8_t> Hash,
- LazyInsertOnConstructCB OnConstruct = nullptr,
- LazyInsertOnLeakCB OnLeak = nullptr);
+ Expected<OnDiskPtr> insertLazy(ArrayRef<uint8_t> Hash,
+ LazyInsertOnConstructCB OnConstruct = nullptr,
+ LazyInsertOnLeakCB OnLeak = nullptr);
- Expected<pointer> insert(const ConstValueProxy &Value) {
+ Expected<OnDiskPtr> insert(const ConstValueProxy &Value) {
return insertLazy(Value.Hash, [&](FileOffset, ValueProxy Allocated) {
assert(Allocated.Hash == Value.Hash);
assert(Allocated.Data.size() == Value.Data.size());
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
index 754714d..eaca0a8 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
@@ -79,6 +79,19 @@ public:
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
+/// Additional 'norecurse' attribute deduction during postlink LTO phase.
+///
+/// This is a module pass that infers 'norecurse' attribute on functions.
+/// It runs during LTO and analyzes the module's call graph to find functions
+/// that are guaranteed not to call themselves, either directly or indirectly.
+/// The pass uses a module-wide flag which checks if any function's address is
+/// taken or any function in the module has external linkage, to safely handle
+/// indirect and library function calls from current function.
+class NoRecurseLTOInferencePass
+ : public PassInfoMixin<NoRecurseLTOInferencePass> {
+public:
+ LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H
diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt
index 7ae5f7e..bca39b6 100644
--- a/llvm/lib/CAS/CMakeLists.txt
+++ b/llvm/lib/CAS/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMCAS
MappedFileRegionArena.cpp
ObjectStore.cpp
OnDiskCommon.cpp
+ OnDiskDataAllocator.cpp
OnDiskTrieRawHashMap.cpp
ADDITIONAL_HEADER_DIRS
diff --git a/llvm/lib/CAS/OnDiskDataAllocator.cpp b/llvm/lib/CAS/OnDiskDataAllocator.cpp
new file mode 100644
index 0000000..13bbd66
--- /dev/null
+++ b/llvm/lib/CAS/OnDiskDataAllocator.cpp
@@ -0,0 +1,234 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file Implements OnDiskDataAllocator.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/OnDiskDataAllocator.h"
+#include "DatabaseFile.h"
+#include "llvm/Config/llvm-config.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+#if LLVM_ENABLE_ONDISK_CAS
+
+//===----------------------------------------------------------------------===//
+// DataAllocator data structures.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// DataAllocator table layout:
+/// - [8-bytes: Generic table header]
+/// - 8-bytes: AllocatorOffset (reserved for implementing free lists)
+/// - 8-bytes: Size for user data header
+/// - <user data buffer>
+///
+/// Record layout:
+/// - <data>
+class DataAllocatorHandle {
+public:
+ static constexpr TableHandle::TableKind Kind =
+ TableHandle::TableKind::DataAllocator;
+
+ struct Header {
+ TableHandle::Header GenericHeader;
+ std::atomic<int64_t> AllocatorOffset;
+ const uint64_t UserHeaderSize;
+ };
+
+ operator TableHandle() const {
+ if (!H)
+ return TableHandle();
+ return TableHandle(*Region, H->GenericHeader);
+ }
+
+ Expected<MutableArrayRef<char>> allocate(MappedFileRegionArena &Alloc,
+ size_t DataSize) {
+ assert(&Alloc.getRegion() == Region);
+ auto Ptr = Alloc.allocate(DataSize);
+ if (LLVM_UNLIKELY(!Ptr))
+ return Ptr.takeError();
+ return MutableArrayRef(*Ptr, DataSize);
+ }
+
+ explicit operator bool() const { return H; }
+ const Header &getHeader() const { return *H; }
+ MappedFileRegion &getRegion() const { return *Region; }
+
+ MutableArrayRef<uint8_t> getUserHeader() {
+ return MutableArrayRef(reinterpret_cast<uint8_t *>(H + 1),
+ H->UserHeaderSize);
+ }
+
+ static Expected<DataAllocatorHandle>
+ create(MappedFileRegionArena &Alloc, StringRef Name, uint32_t UserHeaderSize);
+
+ DataAllocatorHandle() = default;
+ DataAllocatorHandle(MappedFileRegion &Region, Header &H)
+ : Region(&Region), H(&H) {}
+ DataAllocatorHandle(MappedFileRegion &Region, intptr_t HeaderOffset)
+ : DataAllocatorHandle(
+ Region, *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) {
+ }
+
+private:
+ MappedFileRegion *Region = nullptr;
+ Header *H = nullptr;
+};
+
+} // end anonymous namespace
+
+struct OnDiskDataAllocator::ImplType {
+ DatabaseFile File;
+ DataAllocatorHandle Store;
+};
+
+Expected<DataAllocatorHandle>
+DataAllocatorHandle::create(MappedFileRegionArena &Alloc, StringRef Name,
+ uint32_t UserHeaderSize) {
+ // Allocate.
+ auto Offset =
+ Alloc.allocateOffset(sizeof(Header) + UserHeaderSize + Name.size() + 1);
+ if (LLVM_UNLIKELY(!Offset))
+ return Offset.takeError();
+
+ // Construct the header and the name.
+ assert(Name.size() <= UINT16_MAX && "Expected smaller table name");
+ auto *H = new (Alloc.getRegion().data() + *Offset)
+ Header{{TableHandle::TableKind::DataAllocator,
+ static_cast<uint16_t>(Name.size()),
+ static_cast<int32_t>(sizeof(Header) + UserHeaderSize)},
+ /*AllocatorOffset=*/{0},
+ /*UserHeaderSize=*/UserHeaderSize};
+ // Memset UserHeader.
+ char *UserHeader = reinterpret_cast<char *>(H + 1);
+ memset(UserHeader, 0, UserHeaderSize);
+ // Write database file name (null-terminated).
+ char *NameStorage = UserHeader + UserHeaderSize;
+ llvm::copy(Name, NameStorage);
+ NameStorage[Name.size()] = 0;
+ return DataAllocatorHandle(Alloc.getRegion(), *H);
+}
+
+Expected<OnDiskDataAllocator> OnDiskDataAllocator::create(
+ const Twine &PathTwine, const Twine &TableNameTwine, uint64_t MaxFileSize,
+ std::optional<uint64_t> NewFileInitialSize, uint32_t UserHeaderSize,
+ function_ref<void(void *)> UserHeaderInit) {
+ assert(!UserHeaderSize || UserHeaderInit);
+ SmallString<128> PathStorage;
+ StringRef Path = PathTwine.toStringRef(PathStorage);
+ SmallString<128> TableNameStorage;
+ StringRef TableName = TableNameTwine.toStringRef(TableNameStorage);
+
+ // Constructor for if the file doesn't exist.
+ auto NewDBConstructor = [&](DatabaseFile &DB) -> Error {
+ auto Store =
+ DataAllocatorHandle::create(DB.getAlloc(), TableName, UserHeaderSize);
+ if (LLVM_UNLIKELY(!Store))
+ return Store.takeError();
+
+ if (auto E = DB.addTable(*Store))
+ return E;
+
+ if (UserHeaderSize)
+ UserHeaderInit(Store->getUserHeader().data());
+ return Error::success();
+ };
+
+ // Get or create the file.
+ Expected<DatabaseFile> File =
+ DatabaseFile::create(Path, MaxFileSize, NewDBConstructor);
+ if (!File)
+ return File.takeError();
+
+ // Find the table and validate it.
+ std::optional<TableHandle> Table = File->findTable(TableName);
+ if (!Table)
+ return createTableConfigError(std::errc::argument_out_of_domain, Path,
+ TableName, "table not found");
+ if (Error E = checkTable("table kind", (size_t)DataAllocatorHandle::Kind,
+ (size_t)Table->getHeader().Kind, Path, TableName))
+ return std::move(E);
+ auto Store = Table->cast<DataAllocatorHandle>();
+ assert(Store && "Already checked the kind");
+
+ // Success.
+ OnDiskDataAllocator::ImplType Impl{DatabaseFile(std::move(*File)), Store};
+ return OnDiskDataAllocator(std::make_unique<ImplType>(std::move(Impl)));
+}
+
+Expected<OnDiskDataAllocator::OnDiskPtr>
+OnDiskDataAllocator::allocate(size_t Size) {
+ auto Data = Impl->Store.allocate(Impl->File.getAlloc(), Size);
+ if (LLVM_UNLIKELY(!Data))
+ return Data.takeError();
+
+ return OnDiskPtr(FileOffset(Data->data() - Impl->Store.getRegion().data()),
+ *Data);
+}
+
+Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset,
+ size_t Size) const {
+ assert(Offset);
+ assert(Impl);
+ if (Offset.get() + Size >= Impl->File.getAlloc().size())
+ return createStringError(make_error_code(std::errc::protocol_error),
+ "requested size too large in allocator");
+ return ArrayRef<char>{Impl->File.getRegion().data() + Offset.get(), Size};
+}
+
+MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() {
+ return Impl->Store.getUserHeader();
+}
+
+size_t OnDiskDataAllocator::size() const { return Impl->File.size(); }
+size_t OnDiskDataAllocator::capacity() const {
+ return Impl->File.getRegion().size();
+}
+
+OnDiskDataAllocator::OnDiskDataAllocator(std::unique_ptr<ImplType> Impl)
+ : Impl(std::move(Impl)) {}
+
+#else // !LLVM_ENABLE_ONDISK_CAS
+
+struct OnDiskDataAllocator::ImplType {};
+
+Expected<OnDiskDataAllocator> OnDiskDataAllocator::create(
+ const Twine &Path, const Twine &TableName, uint64_t MaxFileSize,
+ std::optional<uint64_t> NewFileInitialSize, uint32_t UserHeaderSize,
+ function_ref<void(void *)> UserHeaderInit) {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskDataAllocator is not supported");
+}
+
+Expected<OnDiskDataAllocator::OnDiskPtr>
+OnDiskDataAllocator::allocate(size_t Size) {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskDataAllocator is not supported");
+}
+
+Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset,
+ size_t Size) const {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskDataAllocator is not supported");
+}
+
+MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() { return {}; }
+
+size_t OnDiskDataAllocator::size() const { return 0; }
+size_t OnDiskDataAllocator::capacity() const { return 0; }
+
+#endif // LLVM_ENABLE_ONDISK_CAS
+
+OnDiskDataAllocator::OnDiskDataAllocator(OnDiskDataAllocator &&RHS) = default;
+OnDiskDataAllocator &
+OnDiskDataAllocator::operator=(OnDiskDataAllocator &&RHS) = default;
+OnDiskDataAllocator::~OnDiskDataAllocator() = default;
diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
index 9403893..323b21e 100644
--- a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
+++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
@@ -427,7 +427,7 @@ TrieRawHashMapHandle::createRecord(MappedFileRegionArena &Alloc,
return Record;
}
-Expected<OnDiskTrieRawHashMap::const_pointer>
+Expected<OnDiskTrieRawHashMap::ConstOnDiskPtr>
OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const {
// Check alignment.
if (!isAligned(MappedFileRegionArena::getAlign(), Offset.get()))
@@ -448,17 +448,17 @@ OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const {
// Looks okay...
TrieRawHashMapHandle::RecordData D =
Impl->Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset));
- return const_pointer(D.Proxy, D.getFileOffset());
+ return ConstOnDiskPtr(D.Proxy, D.getFileOffset());
}
-OnDiskTrieRawHashMap::const_pointer
+OnDiskTrieRawHashMap::ConstOnDiskPtr
OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const {
TrieRawHashMapHandle Trie = Impl->Trie;
assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash");
SubtrieHandle S = Trie.getRoot();
if (!S)
- return const_pointer();
+ return ConstOnDiskPtr();
TrieHashIndexGenerator IndexGen = Trie.getIndexGen(S, Hash);
size_t Index = IndexGen.next();
@@ -466,13 +466,13 @@ OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const {
// Try to set the content.
SubtrieSlotValue V = S.load(Index);
if (!V)
- return const_pointer();
+ return ConstOnDiskPtr();
// Check for an exact match.
if (V.isData()) {
TrieRawHashMapHandle::RecordData D = Trie.getRecord(V);
- return D.Proxy.Hash == Hash ? const_pointer(D.Proxy, D.getFileOffset())
- : const_pointer();
+ return D.Proxy.Hash == Hash ? ConstOnDiskPtr(D.Proxy, D.getFileOffset())
+ : ConstOnDiskPtr();
}
Index = IndexGen.next();
@@ -490,7 +490,7 @@ void SubtrieHandle::reinitialize(uint32_t StartBit, uint32_t NumBits) {
H->NumBits = NumBits;
}
-Expected<OnDiskTrieRawHashMap::pointer>
+Expected<OnDiskTrieRawHashMap::OnDiskPtr>
OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash,
LazyInsertOnConstructCB OnConstruct,
LazyInsertOnLeakCB OnLeak) {
@@ -523,7 +523,8 @@ OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash,
}
if (S->compare_exchange_strong(Index, Existing, NewRecord->Offset))
- return pointer(NewRecord->Proxy, NewRecord->Offset.asDataFileOffset());
+ return OnDiskPtr(NewRecord->Proxy,
+ NewRecord->Offset.asDataFileOffset());
// Race means that Existing is no longer empty; fall through...
}
@@ -540,8 +541,8 @@ OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash,
if (NewRecord && OnLeak)
OnLeak(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy,
ExistingRecord.Offset.asDataFileOffset(), ExistingRecord.Proxy);
- return pointer(ExistingRecord.Proxy,
- ExistingRecord.Offset.asDataFileOffset());
+ return OnDiskPtr(ExistingRecord.Proxy,
+ ExistingRecord.Offset.asDataFileOffset());
}
// Sink the existing content as long as the indexes match.
@@ -1135,7 +1136,7 @@ OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine,
"OnDiskTrieRawHashMap is not supported");
}
-Expected<OnDiskTrieRawHashMap::pointer>
+Expected<OnDiskTrieRawHashMap::OnDiskPtr>
OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash,
LazyInsertOnConstructCB OnConstruct,
LazyInsertOnLeakCB OnLeak) {
@@ -1143,15 +1144,15 @@ OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash,
"OnDiskTrieRawHashMap is not supported");
}
-Expected<OnDiskTrieRawHashMap::const_pointer>
+Expected<OnDiskTrieRawHashMap::ConstOnDiskPtr>
OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const {
return createStringError(make_error_code(std::errc::not_supported),
"OnDiskTrieRawHashMap is not supported");
}
-OnDiskTrieRawHashMap::const_pointer
+OnDiskTrieRawHashMap::ConstOnDiskPtr
OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const {
- return const_pointer();
+ return ConstOnDiskPtr();
}
void OnDiskTrieRawHashMap::print(
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index 1a7a5c5..c3a472b 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -419,6 +419,7 @@ findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases,
case Instruction::PtrToAddr:
case Instruction::PtrToInt:
case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
case Instruction::GetElementPtr:
return findBaseObject(CE->getOperand(0), Aliases, Op);
default:
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 7069e8d..119caea 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1960,6 +1960,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// is fixed.
MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
+ MPM.addPass(NoRecurseLTOInferencePass());
// Stop here at -O1.
if (Level == OptimizationLevel::O1) {
// The LowerTypeTestsPass needs to run to lower type metadata and the
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index f0e7d36..88550ea 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -119,6 +119,7 @@ MODULE_PASS("metarenamer", MetaRenamerPass())
MODULE_PASS("module-inline", ModuleInlinerPass())
MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
MODULE_PASS("no-op-module", NoOpModulePass())
+MODULE_PASS("norecurse-lto-inference", NoRecurseLTOInferencePass())
MODULE_PASS("nsan", NumericalStabilitySanitizerPass())
MODULE_PASS("openmp-opt", OpenMPOptPass())
MODULE_PASS("openmp-opt-postlink",
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 50a8754..479e345 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5666,18 +5666,21 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
VectorType *AccumVectorType =
VectorType::get(AccumType, VF.divideCoefficientBy(Ratio));
// We don't yet support all kinds of legalization.
- auto TA = TLI->getTypeAction(AccumVectorType->getContext(),
- EVT::getEVT(AccumVectorType));
- switch (TA) {
+ auto TC = TLI->getTypeConversion(AccumVectorType->getContext(),
+ EVT::getEVT(AccumVectorType));
+ switch (TC.first) {
default:
return Invalid;
case TargetLowering::TypeLegal:
case TargetLowering::TypePromoteInteger:
case TargetLowering::TypeSplitVector:
+ // The legalised type (e.g. after splitting) must be legal too.
+ if (TLI->getTypeAction(AccumVectorType->getContext(), TC.second) !=
+ TargetLowering::TypeLegal)
+ return Invalid;
break;
}
- // Check what kind of type-legalisation happens.
std::pair<InstructionCost, MVT> AccumLT =
getTypeLegalizationCost(AccumVectorType);
std::pair<InstructionCost, MVT> InputLT =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 6b3c151..1a697f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1448,10 +1448,10 @@ def Feature45BitNumRecordsBufferResource : SubtargetFeature< "45-bit-num-records
"The buffer resource (V#) supports 45-bit num_records"
>;
-def FeatureCluster : SubtargetFeature< "cluster",
- "HasCluster",
+def FeatureClusters : SubtargetFeature< "clusters",
+ "HasClusters",
"true",
- "Has cluster support"
+ "Has clusters of workgroups support"
>;
// Dummy feature used to disable assembler instructions.
@@ -2120,7 +2120,7 @@ def FeatureISAVersion12_50 : FeatureSet<
Feature45BitNumRecordsBufferResource,
FeatureSupportsXNACK,
FeatureXNACK,
- FeatureCluster,
+ FeatureClusters,
]>;
def FeatureISAVersion12_51 : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 7b94ea3..f291e37 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -541,7 +541,7 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const {
unsigned GCNSubtarget::getBaseMaxNumVGPRs(
const Function &F, std::pair<unsigned, unsigned> NumVGPRBounds) const {
- const auto &[Min, Max] = NumVGPRBounds;
+ const auto [Min, Max] = NumVGPRBounds;
// Check if maximum number of VGPRs was explicitly requested using
// "amdgpu-num-vgpr" attribute.
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 879bf5a..c2e6078 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -288,7 +288,7 @@ protected:
bool Has45BitNumRecordsBufferResource = false;
- bool HasCluster = false;
+ bool HasClusters = false;
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -1839,7 +1839,7 @@ public:
}
/// \returns true if the subtarget supports clusters of workgroups.
- bool hasClusters() const { return HasCluster; }
+ bool hasClusters() const { return HasClusters; }
/// \returns true if the subtarget requires a wait for xcnt before atomic
/// flat/global stores & rmw.
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 3c2dd42..3115579 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1118,12 +1118,7 @@ SIRegisterInfo::getPointerRegClass(unsigned Kind) const {
const TargetRegisterClass *
SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
- if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
- return getEquivalentVGPRClass(RC);
- if (RC == &AMDGPU::SCC_CLASSRegClass)
- return getWaveMaskRegClass();
-
- return RC;
+ return RC == &AMDGPU::SCC_CLASSRegClass ? &AMDGPU::SReg_32RegClass : RC;
}
static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI,
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 1fc475d..561a9c5 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -349,32 +349,30 @@ public:
bool isImm() const override {
return Kind == Immediate || Kind == Expression;
}
- bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); }
- bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
- bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); }
- bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
- bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
- bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
- bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
- bool isU6ImmX2() const { return Kind == Immediate &&
- isUInt<6>(getImm()) &&
- (getImm() & 1) == 0; }
- bool isU7Imm() const { return Kind == Immediate && isUInt<7>(getImm()); }
- bool isU7ImmX4() const { return Kind == Immediate &&
- isUInt<7>(getImm()) &&
- (getImm() & 3) == 0; }
- bool isU8Imm() const { return Kind == Immediate && isUInt<8>(getImm()); }
- bool isU8ImmX8() const { return Kind == Immediate &&
- isUInt<8>(getImm()) &&
- (getImm() & 7) == 0; }
-
- bool isU10Imm() const { return Kind == Immediate && isUInt<10>(getImm()); }
- bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); }
+
+ template <uint64_t N> bool isUImm() const {
+ return Kind == Immediate && isUInt<N>(getImm());
+ }
+ template <uint64_t N> bool isSImm() const {
+ return Kind == Immediate && isInt<N>(getImm());
+ }
+ bool isU6ImmX2() const { return isUImm<6>() && (getImm() & 1) == 0; }
+ bool isU7ImmX4() const { return isUImm<7>() && (getImm() & 3) == 0; }
+ bool isU8ImmX8() const { return isUImm<8>() && (getImm() & 7) == 0; }
+
bool isU16Imm() const { return isExtImm<16>(/*Signed*/ false, 1); }
bool isS16Imm() const { return isExtImm<16>(/*Signed*/ true, 1); }
bool isS16ImmX4() const { return isExtImm<16>(/*Signed*/ true, 4); }
bool isS16ImmX16() const { return isExtImm<16>(/*Signed*/ true, 16); }
bool isS17Imm() const { return isExtImm<17>(/*Signed*/ true, 1); }
+ bool isS34Imm() const {
+ // Once the PC-Rel ABI is finalized, evaluate whether a 34-bit
+ // ContextImmediate is needed.
+ return Kind == Expression || isSImm<34>();
+ }
+ bool isS34ImmX16() const {
+ return Kind == Expression || (isSImm<34>() && (getImm() & 15) == 0);
+ }
bool isHashImmX8() const {
// The Hash Imm form is used for instructions that check or store a hash.
@@ -384,16 +382,6 @@ public:
(getImm() & 7) == 0);
}
- bool isS34ImmX16() const {
- return Kind == Expression ||
- (Kind == Immediate && isInt<34>(getImm()) && (getImm() & 15) == 0);
- }
- bool isS34Imm() const {
- // Once the PC-Rel ABI is finalized, evaluate whether a 34-bit
- // ContextImmediate is needed.
- return Kind == Expression || (Kind == Immediate && isInt<34>(getImm()));
- }
-
bool isTLSReg() const { return Kind == TLSRegister; }
bool isDirectBr() const {
if (Kind == Expression)
@@ -1637,7 +1625,7 @@ bool PPCAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (Operands.size() != 5)
return false;
PPCOperand &EHOp = (PPCOperand &)*Operands[4];
- if (EHOp.isU1Imm() && EHOp.getImm() == 0)
+ if (EHOp.isUImm<1>() && EHOp.getImm() == 0)
Operands.pop_back();
}
@@ -1817,7 +1805,7 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
}
PPCOperand &Op = static_cast<PPCOperand &>(AsmOp);
- if (Op.isU3Imm() && Op.getImm() == ImmVal)
+ if (Op.isUImm<3>() && Op.getImm() == ImmVal)
return Match_Success;
return Match_InvalidOperand;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 48c31c9..81d8e94 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -206,45 +206,24 @@ PPCMCCodeEmitter::getVSRpEvenEncoding(const MCInst &MI, unsigned OpNo,
return RegBits;
}
-unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
-
- // Add a fixup for the immediate field.
- addFixup(Fixups, IsLittleEndian ? 0 : 2, MO.getExpr(), PPC::fixup_ppc_half16);
- return 0;
-}
-
-uint64_t PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI,
- MCFixupKind Fixup) const {
+template <MCFixupKind Fixup>
+uint64_t PPCMCCodeEmitter::getImmEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
assert(!MO.isReg() && "Not expecting a register for this operand.");
if (MO.isImm())
return getMachineOpValue(MI, MO, Fixups, STI);
+ uint32_t Offset = 0;
+ if (Fixup == PPC::fixup_ppc_half16)
+ Offset = IsLittleEndian ? 0 : 2;
+
// Add a fixup for the immediate field.
- addFixup(Fixups, 0, MO.getExpr(), Fixup);
+ addFixup(Fixups, Offset, MO.getExpr(), Fixup);
return 0;
}
-uint64_t
-PPCMCCodeEmitter::getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return getImm34Encoding(MI, OpNo, Fixups, STI, PPC::fixup_ppc_imm34);
-}
-
-uint64_t
-PPCMCCodeEmitter::getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return getImm34Encoding(MI, OpNo, Fixups, STI, PPC::fixup_ppc_pcrel34);
-}
-
unsigned PPCMCCodeEmitter::getDispRIEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
index b574557..3356513 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -47,19 +47,10 @@ public:
unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint64_t getImm34Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI,
- MCFixupKind Fixup) const;
- uint64_t getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint64_t getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ template <MCFixupKind Fixup>
+ uint64_t getImmEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getDispRIEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 60efa4c..fdca5ebc 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -14,30 +14,6 @@
//===----------------------------------------------------------------------===//
// 64-bit operands.
//
-def s16imm64 : Operand<i64> {
- let PrintMethod = "printS16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
- let ParserMatchClass = PPCS16ImmAsmOperand;
- let DecoderMethod = "decodeSImmOperand<16>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def u16imm64 : Operand<i64> {
- let PrintMethod = "printU16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
- let ParserMatchClass = PPCU16ImmAsmOperand;
- let DecoderMethod = "decodeUImmOperand<16>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def s17imm64 : Operand<i64> {
- // This operand type is used for addis/lis to allow the assembler parser
- // to accept immediates in the range -65536..65535 for compatibility with
- // the GNU assembler. The operand is treated as 16-bit otherwise.
- let PrintMethod = "printS16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
- let ParserMatchClass = PPCS17ImmAsmOperand;
- let DecoderMethod = "decodeSImmOperand<16>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
def tocentry : Operand<iPTR> {
let MIOperandInfo = (ops i64imm:$imm);
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index c616db4..23d6d88 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -30,6 +30,11 @@
// Altivec transformation functions and pattern fragments.
//
+// fneg is not legal, and desugared as an xor.
+def desugared_fneg : PatFrag<(ops node:$x), (v4f32 (bitconvert (xor (bitconvert $x),
+ (int_ppc_altivec_vslw (bitconvert (v16i8 immAllOnesV)),
+ (bitconvert (v16i8 immAllOnesV))))))>;
+
def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
@@ -467,11 +472,12 @@ def VMADDFP : VAForm_1<46, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB),
[(set v4f32:$RT,
(fma v4f32:$RA, v4f32:$RC, v4f32:$RB))]>;
-// FIXME: The fma+fneg pattern won't match because fneg is not legal.
+// fneg is not legal, hence we have to match on the desugared version.
def VNMSUBFP: VAForm_1<47, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB),
"vnmsubfp $RT, $RA, $RC, $RB", IIC_VecFP,
- [(set v4f32:$RT, (fneg (fma v4f32:$RA, v4f32:$RC,
- (fneg v4f32:$RB))))]>;
+ [(set v4f32:$RT, (desugared_fneg (fma v4f32:$RA, v4f32:$RC,
+ (desugared_fneg v4f32:$RB))))]>;
+
let hasSideEffects = 1 in {
def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
@@ -892,6 +898,13 @@ def : Pat<(mul v8i16:$vA, v8i16:$vB), (VMLADDUHM $vA, $vB, (v8i16(V_SET0H)))>;
// Add
def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC), (VMLADDUHM $vA, $vB, $vC)>;
+
+// Fused negated multiply-subtract
+def : Pat<(v4f32 (desugared_fneg
+ (int_ppc_altivec_vmaddfp v4f32:$RA, v4f32:$RC,
+ (desugared_fneg v4f32:$RB)))),
+ (VNMSUBFP $RA, $RC, $RB)>;
+
// Saturating adds/subtracts.
def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>;
def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>;
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 6d8c122..65d0484 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -615,7 +615,8 @@ def spe4rc : RegisterOperand<GPRC> {
}
def PPCU1ImmAsmOperand : AsmOperandClass {
- let Name = "U1Imm"; let PredicateMethod = "isU1Imm";
+ let Name = "U1Imm";
+ let PredicateMethod = "isUImm<1>";
let RenderMethod = "addImmOperands";
}
def u1imm : Operand<i32> {
@@ -626,7 +627,8 @@ def u1imm : Operand<i32> {
}
def PPCU2ImmAsmOperand : AsmOperandClass {
- let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
+ let Name = "U2Imm";
+ let PredicateMethod = "isUImm<2>";
let RenderMethod = "addImmOperands";
}
def u2imm : Operand<i32> {
@@ -647,7 +649,8 @@ def atimm : Operand<i32> {
}
def PPCU3ImmAsmOperand : AsmOperandClass {
- let Name = "U3Imm"; let PredicateMethod = "isU3Imm";
+ let Name = "U3Imm";
+ let PredicateMethod = "isUImm<3>";
let RenderMethod = "addImmOperands";
}
def u3imm : Operand<i32> {
@@ -658,7 +661,8 @@ def u3imm : Operand<i32> {
}
def PPCU4ImmAsmOperand : AsmOperandClass {
- let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
+ let Name = "U4Imm";
+ let PredicateMethod = "isUImm<4>";
let RenderMethod = "addImmOperands";
}
def u4imm : Operand<i32> {
@@ -668,7 +672,8 @@ def u4imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCS5ImmAsmOperand : AsmOperandClass {
- let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
+ let Name = "S5Imm";
+ let PredicateMethod = "isSImm<5>";
let RenderMethod = "addImmOperands";
}
def s5imm : Operand<i32> {
@@ -678,7 +683,8 @@ def s5imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU5ImmAsmOperand : AsmOperandClass {
- let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
+ let Name = "U5Imm";
+ let PredicateMethod = "isUImm<5>";
let RenderMethod = "addImmOperands";
}
def u5imm : Operand<i32> {
@@ -688,7 +694,8 @@ def u5imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU6ImmAsmOperand : AsmOperandClass {
- let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
+ let Name = "U6Imm";
+ let PredicateMethod = "isUImm<6>";
let RenderMethod = "addImmOperands";
}
def u6imm : Operand<i32> {
@@ -698,7 +705,8 @@ def u6imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU7ImmAsmOperand : AsmOperandClass {
- let Name = "U7Imm"; let PredicateMethod = "isU7Imm";
+ let Name = "U7Imm";
+ let PredicateMethod = "isUImm<7>";
let RenderMethod = "addImmOperands";
}
def u7imm : Operand<i32> {
@@ -708,7 +716,8 @@ def u7imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU8ImmAsmOperand : AsmOperandClass {
- let Name = "U8Imm"; let PredicateMethod = "isU8Imm";
+ let Name = "U8Imm";
+ let PredicateMethod = "isUImm<8>";
let RenderMethod = "addImmOperands";
}
def u8imm : Operand<i32> {
@@ -718,7 +727,8 @@ def u8imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU10ImmAsmOperand : AsmOperandClass {
- let Name = "U10Imm"; let PredicateMethod = "isU10Imm";
+ let Name = "U10Imm";
+ let PredicateMethod = "isUImm<10>";
let RenderMethod = "addImmOperands";
}
def u10imm : Operand<i32> {
@@ -728,7 +738,8 @@ def u10imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU12ImmAsmOperand : AsmOperandClass {
- let Name = "U12Imm"; let PredicateMethod = "isU12Imm";
+ let Name = "U12Imm";
+ let PredicateMethod = "isUImm<12>";
let RenderMethod = "addImmOperands";
}
def u12imm : Operand<i32> {
@@ -743,7 +754,14 @@ def PPCS16ImmAsmOperand : AsmOperandClass {
}
def s16imm : Operand<i32> {
let PrintMethod = "printS16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>";
+ let ParserMatchClass = PPCS16ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def s16imm64 : Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>";
let ParserMatchClass = PPCS16ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<16>";
let OperandType = "OPERAND_IMMEDIATE";
@@ -754,7 +772,14 @@ def PPCU16ImmAsmOperand : AsmOperandClass {
}
def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>";
+ let ParserMatchClass = PPCU16ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def u16imm64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>";
let ParserMatchClass = PPCU16ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<16>";
let OperandType = "OPERAND_IMMEDIATE";
@@ -768,7 +793,17 @@ def s17imm : Operand<i32> {
// to accept immediates in the range -65536..65535 for compatibility with
// the GNU assembler. The operand is treated as 16-bit otherwise.
let PrintMethod = "printS16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>";
+ let ParserMatchClass = PPCS17ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def s17imm64 : Operand<i64> {
+ // This operand type is used for addis/lis to allow the assembler parser
+ // to accept immediates in the range -65536..65535 for compatibility with
+ // the GNU assembler. The operand is treated as 16-bit otherwise.
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>";
let ParserMatchClass = PPCS17ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<16>";
let OperandType = "OPERAND_IMMEDIATE";
@@ -780,14 +815,14 @@ def PPCS34ImmAsmOperand : AsmOperandClass {
}
def s34imm : Operand<i64> {
let PrintMethod = "printS34ImmOperand";
- let EncoderMethod = "getImm34EncodingNoPCRel";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_imm34>";
let ParserMatchClass = PPCS34ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<34>";
let OperandType = "OPERAND_IMMEDIATE";
}
def s34imm_pcrel : Operand<i64> {
let PrintMethod = "printS34ImmOperand";
- let EncoderMethod = "getImm34EncodingPCRel";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_pcrel34>";
let ParserMatchClass = PPCS34ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<34>";
let OperandType = "OPERAND_IMMEDIATE";
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
index 597dd12..9f9ae2f 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
@@ -324,6 +324,10 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = GPRValueMapping;
+ // Atomics always use GPR destinations. Don't refine any further.
+ if (cast<GLoad>(MI).isAtomic())
+ break;
+
// Use FPR64 for s64 loads on rv32.
if (GPRSize == 32 && Size.getFixedValue() == 64) {
assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD());
@@ -358,6 +362,10 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = GPRValueMapping;
+ // Atomics always use GPR sources. Don't refine any further.
+ if (cast<GStore>(MI).isAtomic())
+ break;
+
// Use FPR64 for s64 stores on rv32.
if (GPRSize == 32 && Size.getFixedValue() == 64) {
assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD());
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 9f2e075..e16c8f0 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -2811,9 +2811,7 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
GetElementPtrInst *NewGEP = simplifyZeroLengthArrayGepInst(Ref);
if (NewGEP) {
Ref->replaceAllUsesWith(NewGEP);
- if (isInstructionTriviallyDead(Ref))
- DeadInsts.insert(Ref);
-
+ DeadInsts.insert(Ref);
Ref = NewGEP;
}
if (Type *GepTy = getGEPType(Ref))
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index b906690..62a3c88 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -444,7 +444,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["atomic-fmin-fmax-global-f32"] = true;
Features["atomic-fmin-fmax-global-f64"] = true;
Features["wavefrontsize32"] = true;
- Features["cluster"] = true;
+ Features["clusters"] = true;
break;
case GK_GFX1201:
case GK_GFX1200:
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 8d9a0e7..50130da 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -2067,6 +2067,36 @@ static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes,
AI.run(SCCNodes, Changed);
}
+// Determines if the function 'F' can be marked 'norecurse'.
+// It returns true if any call within 'F' could lead to a recursive
+// call back to 'F', and false otherwise.
+// The 'AnyFunctionsAddressIsTaken' parameter is a module-wide flag
+// that is true if any function's address is taken, or if any function
+// has external linkage. This is used to determine the safety of
+// external/library calls.
+static bool mayHaveRecursiveCallee(Function &F,
+ bool AnyFunctionsAddressIsTaken = true) {
+ for (const auto &BB : F) {
+ for (const auto &I : BB.instructionsWithoutDebug()) {
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ const Function *Callee = CB->getCalledFunction();
+ if (!Callee || Callee == &F)
+ return true;
+
+ if (Callee->doesNotRecurse())
+ continue;
+
+ if (!AnyFunctionsAddressIsTaken ||
+ (Callee->isDeclaration() &&
+ Callee->hasFnAttribute(Attribute::NoCallback)))
+ continue;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
SmallPtrSet<Function *, 8> &Changed) {
// Try and identify functions that do not recurse.
@@ -2078,28 +2108,14 @@ static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
Function *F = *SCCNodes.begin();
if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
return;
-
- // If all of the calls in F are identifiable and are to norecurse functions, F
- // is norecurse. This check also detects self-recursion as F is not currently
- // marked norecurse, so any called from F to F will not be marked norecurse.
- for (auto &BB : *F)
- for (auto &I : BB.instructionsWithoutDebug())
- if (auto *CB = dyn_cast<CallBase>(&I)) {
- Function *Callee = CB->getCalledFunction();
- if (!Callee || Callee == F ||
- (!Callee->doesNotRecurse() &&
- !(Callee->isDeclaration() &&
- Callee->hasFnAttribute(Attribute::NoCallback))))
- // Function calls a potentially recursive function.
- return;
- }
-
- // Every call was to a non-recursive function other than this function, and
- // we have no indirect recursion as the SCC size is one. This function cannot
- // recurse.
- F->setDoesNotRecurse();
- ++NumNoRecurse;
- Changed.insert(F);
+ if (!mayHaveRecursiveCallee(*F)) {
+ // Every call was to a non-recursive function other than this function, and
+ // we have no indirect recursion as the SCC size is one. This function
+ // cannot recurse.
+ F->setDoesNotRecurse();
+ ++NumNoRecurse;
+ Changed.insert(F);
+ }
}
// Set the noreturn function attribute if possible.
@@ -2429,3 +2445,62 @@ ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) {
PA.preserve<LazyCallGraphAnalysis>();
return PA;
}
+
+PreservedAnalyses NoRecurseLTOInferencePass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+
+ // Check if any function in the whole program has its address taken or has
+ // potentially external linkage.
+ // We use this information when inferring norecurse attribute: If there is
+ // no function whose address is taken and all functions have internal
+ // linkage, there is no path for a callback to any user function.
+ bool AnyFunctionsAddressIsTaken = false;
+ for (Function &F : M) {
+ if (F.isDeclaration() || F.doesNotRecurse())
+ continue;
+ if (!F.hasLocalLinkage() || F.hasAddressTaken()) {
+ AnyFunctionsAddressIsTaken = true;
+ break;
+ }
+ }
+
+ // Run norecurse inference on all RefSCCs in the LazyCallGraph for this
+ // module.
+ bool Changed = false;
+ LazyCallGraph &CG = MAM.getResult<LazyCallGraphAnalysis>(M);
+ CG.buildRefSCCs();
+
+ for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
+ // Skip any RefSCC that is part of a call cycle. A RefSCC containing more
+ // than one SCC indicates a recursive relationship involving indirect calls.
+ if (RC.size() > 1)
+ continue;
+
+ // RefSCC contains a single-SCC. SCC size > 1 indicates mutually recursive
+ // functions. Ex: foo1 -> foo2 -> foo3 -> foo1.
+ LazyCallGraph::SCC &S = *RC.begin();
+ if (S.size() > 1)
+ continue;
+
+ // Get the single function from this SCC.
+ Function &F = S.begin()->getFunction();
+ if (!F.hasExactDefinition() || F.doesNotRecurse())
+ continue;
+
+ // If the analysis confirms that this function has no recursive calls
+ // (either direct, indirect, or through external linkages),
+ // we can safely apply the norecurse attribute.
+ if (!mayHaveRecursiveCallee(F, AnyFunctionsAddressIsTaken)) {
+ F.setDoesNotRecurse();
+ ++NumNoRecurse;
+ Changed = true;
+ }
+ }
+
+ PreservedAnalyses PA;
+ if (Changed)
+ PA.preserve<LazyCallGraphAnalysis>();
+ else
+ PA = PreservedAnalyses::all();
+ return PA;
+}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e434e73..d393a9c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8393,11 +8393,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
R->setOperand(1, WideIV->getStepValue());
}
- VPlanTransforms::runPass(
- VPlanTransforms::addExitUsersForFirstOrderRecurrences, *Plan, Range);
+ // TODO: We can't call runPass on these transforms yet, due to verifier
+ // failures.
+ VPlanTransforms::addExitUsersForFirstOrderRecurrences(*Plan, Range);
DenseMap<VPValue *, VPValue *> IVEndValues;
- VPlanTransforms::runPass(VPlanTransforms::addScalarResumePhis, *Plan,
- RecipeBuilder, IVEndValues);
+ VPlanTransforms::addScalarResumePhis(*Plan, RecipeBuilder, IVEndValues);
// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -8508,8 +8508,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
DenseMap<VPValue *, VPValue *> IVEndValues;
// TODO: IVEndValues are not used yet in the native path, to optimize exit
// values.
- VPlanTransforms::runPass(VPlanTransforms::addScalarResumePhis, *Plan,
- RecipeBuilder, IVEndValues);
+ // TODO: We can't call runPass on the transform yet, due to verifier
+ // failures.
+ VPlanTransforms::addScalarResumePhis(*Plan, RecipeBuilder, IVEndValues);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
diff --git a/llvm/test/Bitcode/thinlto-alias-addrspacecast.ll b/llvm/test/Bitcode/thinlto-alias-addrspacecast.ll
new file mode 100644
index 0000000..fe4f05e
--- /dev/null
+++ b/llvm/test/Bitcode/thinlto-alias-addrspacecast.ll
@@ -0,0 +1,7 @@
+; RUN: opt -module-summary < %s | llvm-dis | FileCheck %s
+
+@__oclc_ABI_version = linkonce_odr hidden addrspace(4) constant i32 500, align 4
+@_ZL20__oclc_ABI_version__ = internal alias i32, addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr)
+
+; CHECK: ^1 = gv: (name: "__oclc_ABI_version", summaries: (variable: (module: ^0, flags: {{.*}})))
+; CHECK: ^2 = gv: (name: "_ZL20__oclc_ABI_version__", summaries: (alias: (module: ^0, flags: {{.*}}, aliasee: ^1)))
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index 9e24023..ebbeab9 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -146,9 +146,9 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; copy
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_accvgpr_read_b32 v32, a2
+; GFX908-NEXT: v_accvgpr_read_b32 v39, a2
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_accvgpr_write_b32 a3, v32
+; GFX908-NEXT: v_accvgpr_write_b32 a3, v39
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use a3 v[0:31]
; GFX908-NEXT: ;;#ASMEND
@@ -437,9 +437,9 @@ define void @v32_asm_def_use(float %v0, float %v1) #4 {
; GFX908-NEXT: ; copy
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: s_nop 7
-; GFX908-NEXT: v_accvgpr_read_b32 v33, a2
+; GFX908-NEXT: v_accvgpr_read_b32 v35, a2
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_accvgpr_write_b32 a3, v33
+; GFX908-NEXT: v_accvgpr_write_b32 a3, v35
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use a3 v[0:31]
; GFX908-NEXT: ;;#ASMEND
@@ -1045,9 +1045,9 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 {
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; copy
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_accvgpr_read_b32 v32, a2
+; GFX908-NEXT: v_accvgpr_read_b32 v39, a2
; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_accvgpr_write_b32 a3, v32
+; GFX908-NEXT: v_accvgpr_write_b32 a3, v39
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use a3 v[0:31]
; GFX908-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir b/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir
index a42cf43..7e82382d 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir
@@ -40,8 +40,8 @@ body: |
; GFX908: liveins: $agpr0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
- ; GFX908-NEXT: renamable $agpr1 = COPY renamable $vgpr0, implicit $exec
- ; GFX908-NEXT: renamable $agpr2 = COPY renamable $vgpr0, implicit $exec
+ ; GFX908-NEXT: renamable $agpr1 = COPY $agpr0, implicit $exec
+ ; GFX908-NEXT: renamable $agpr2 = COPY $agpr0, implicit $exec
; GFX908-NEXT: S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2
;
; GFX90A-LABEL: name: do_not_propagate_agpr_to_agpr
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
index c4479b3..e3bc516 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
@@ -15,6 +15,9 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1250 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX1250 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1250 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX1250 %s
+
; NO-SRAM-ECC-GFX906: Flags [
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
@@ -52,6 +55,11 @@
; SRAM-ECC-GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
; SRAM-ECC-GFX950: ]
+; SRAM-ECC-GFX1250: Flags [
+; SRAM-ECC-GFX1250: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
+; SRAM-ECC-GFX1250: EF_AMDGPU_MACH_AMDGCN_GFX1250 (0x49)
+; SRAM-ECC-GFX1250: ]
+
define amdgpu_kernel void @elf_header() {
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
index 51cd564..f46116e 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
@@ -95,66 +95,66 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32(ptr addrspace(1) %arg) #0 {
; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v3, v0, a[0:31]
; GREEDY908-NEXT: s_nop 15
; GREEDY908-NEXT: s_nop 1
-; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a32
-; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a61
-; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60
-; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v1
-; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a33
-; GREEDY908-NEXT: v_accvgpr_read_b32 v7, a59
-; GREEDY908-NEXT: v_accvgpr_read_b32 v8, a58
-; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v1
+; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a32
+; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a33
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a34
-; GREEDY908-NEXT: v_accvgpr_read_b32 v9, a57
-; GREEDY908-NEXT: v_accvgpr_read_b32 v10, a56
+; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v2
+; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v6
; GREEDY908-NEXT: v_accvgpr_write_b32 a4, v1
-; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a35
-; GREEDY908-NEXT: v_accvgpr_read_b32 v11, a55
-; GREEDY908-NEXT: v_accvgpr_read_b32 v12, a54
-; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v1
-; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a36
-; GREEDY908-NEXT: v_accvgpr_read_b32 v13, a53
-; GREEDY908-NEXT: v_accvgpr_read_b32 v14, a52
-; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v1
+; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a35
+; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a36
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a37
-; GREEDY908-NEXT: v_accvgpr_read_b32 v15, a51
-; GREEDY908-NEXT: v_accvgpr_read_b32 v16, a50
+; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v2
+; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v6
; GREEDY908-NEXT: v_accvgpr_write_b32 a7, v1
-; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a38
-; GREEDY908-NEXT: v_accvgpr_read_b32 v17, a49
-; GREEDY908-NEXT: v_accvgpr_read_b32 v18, a48
-; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v1
-; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a39
-; GREEDY908-NEXT: v_accvgpr_read_b32 v19, a47
-; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a46
-; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v1
+; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a38
+; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a39
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a40
-; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v2
-; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v19
+; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v2
+; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v6
; GREEDY908-NEXT: v_accvgpr_write_b32 a10, v1
-; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a41
-; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v18
-; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v17
-; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v1
-; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a42
-; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v16
-; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v15
-; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v1
+; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a41
+; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a42
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a43
-; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v14
-; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v13
+; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v2
+; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v6
; GREEDY908-NEXT: v_accvgpr_write_b32 a13, v1
-; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a44
-; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v12
-; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v11
-; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v1
-; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a45
-; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v10
-; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v9
-; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v1
-; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v8
-; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v7
+; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a44
+; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a45
+; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a46
+; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v2
+; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v6
+; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v1
+; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a47
+; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a48
+; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a49
+; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v2
+; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v6
+; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v1
+; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a50
+; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a51
+; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a52
+; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v2
+; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v6
+; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v1
+; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a53
+; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a54
+; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a55
+; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v2
+; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v6
+; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v1
+; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a56
+; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a57
+; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a58
+; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v2
+; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v6
+; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v1
+; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a59
+; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60
+; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a61
+; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v2
; GREEDY908-NEXT: v_accvgpr_write_b32 a30, v6
-; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v5
+; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v1
; GREEDY908-NEXT: s_nop 0
; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v3, v0, a[0:31]
; GREEDY908-NEXT: s_nop 15
@@ -667,11 +667,11 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32(ptr addrspace(1) %arg) #0 {
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[18:33], v0, v1, a[18:33]
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[2:17], v0, v1, a[18:33]
; GREEDY908-NEXT: s_nop 8
+; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a18
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a19
-; GREEDY908-NEXT: v_accvgpr_read_b32 v3, a18
; GREEDY908-NEXT: s_nop 0
+; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v5
; GREEDY908-NEXT: v_accvgpr_write_b32 a1, v2
-; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v3
; GREEDY908-NEXT: s_nop 0
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[0:15]
; GREEDY908-NEXT: s_nop 9
diff --git a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll
index cf244f0..be1788c 100644
--- a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll
+++ b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll
@@ -54,19 +54,20 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) {
; GFX908-NEXT: s_branch .LBB0_2
; GFX908-NEXT: .LBB0_1: ; %bb2
; GFX908-NEXT: ; in Loop: Header=BB0_2 Depth=1
+; GFX908-NEXT: s_nop 6
+; GFX908-NEXT: v_accvgpr_read_b32 v3, a2
; GFX908-NEXT: s_or_b32 s4, s3, 1
; GFX908-NEXT: s_ashr_i32 s5, s3, 31
; GFX908-NEXT: s_mov_b32 s3, s2
; GFX908-NEXT: v_mov_b32_e32 v1, s2
-; GFX908-NEXT: s_nop 2
-; GFX908-NEXT: v_accvgpr_read_b32 v0, a2
; GFX908-NEXT: v_mov_b32_e32 v2, s3
+; GFX908-NEXT: v_accvgpr_write_b32 a0, v3
; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
; GFX908-NEXT: v_accvgpr_read_b32 v3, a1
-; GFX908-NEXT: v_accvgpr_write_b32 a0, v0
+; GFX908-NEXT: s_and_b32 s3, s5, s4
; GFX908-NEXT: v_accvgpr_write_b32 a2, v4
; GFX908-NEXT: v_accvgpr_write_b32 a3, v3
-; GFX908-NEXT: s_and_b32 s3, s5, s4
+; GFX908-NEXT: s_nop 0
; GFX908-NEXT: v_mfma_f32_16x16x16f16 a[2:5], v[1:2], v[1:2], a[0:3]
; GFX908-NEXT: s_cbranch_execz .LBB0_4
; GFX908-NEXT: .LBB0_2: ; %bb
diff --git a/llvm/test/CodeGen/PowerPC/vec-nmsub.ll b/llvm/test/CodeGen/PowerPC/vec-nmsub.ll
new file mode 100644
index 0000000..8f4ac972
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vec-nmsub.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs < %s -mcpu=pwr5 -mtriple=ppc32-- -mattr=+altivec | FileCheck %s
+
+define dso_local <4 x float> @intrinsic(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) local_unnamed_addr {
+; CHECK-LABEL: intrinsic:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnmsubfp 2, 2, 3, 4
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call <4 x float> @llvm.ppc.altivec.vnmsubfp(<4 x float> %a, <4 x float> %b, <4 x float> %c)
+ ret <4 x float> %0
+}
+
+define <4 x float> @manual_llvm_fma(<4 x float> %a, <4 x float> %b, <4 x float> %c) unnamed_addr {
+; CHECK-LABEL: manual_llvm_fma:
+; CHECK: # %bb.0: # %start
+; CHECK-NEXT: vnmsubfp 2, 2, 3, 4
+; CHECK-NEXT: blr
+start:
+ %0 = fneg <4 x float> %c
+ %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %0)
+ %2 = fneg <4 x float> %1
+ ret <4 x float> %2
+}
+
+define dso_local <4 x float> @manual_vmaddfp(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) local_unnamed_addr {
+; CHECK-LABEL: manual_vmaddfp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnmsubfp 2, 2, 3, 4
+; CHECK-NEXT: blr
+entry:
+ %fneg.i3 = fneg <4 x float> %c
+ %0 = tail call <4 x float> @llvm.ppc.altivec.vmaddfp(<4 x float> %a, <4 x float> %b, <4 x float> %fneg.i3)
+ %fneg.i = fneg <4 x float> %0
+ ret <4 x float> %fneg.i
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll
new file mode 100644
index 0000000..4ad2d2c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll
@@ -0,0 +1,950 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+no-trailing-seq-cst-fence \
+; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+ztso,+no-trailing-seq-cst-fence \
+; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s
+; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+no-trailing-seq-cst-fence \
+; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+ztso,+no-trailing-seq-cst-fence \
+; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s
+
+
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s
+
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s
+
+
+define float @atomic_load_f32_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_f32_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: fmv.w.x fa0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_f32_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lw a0, 0(a0)
+; RV32IA-NEXT: fmv.w.x fa0, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_f32_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_f32_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lw a0, 0(a0)
+; RV64IA-NEXT: fmv.w.x fa0, a0
+; RV64IA-NEXT: ret
+ %1 = load atomic float, ptr %a unordered, align 4
+ ret float %1
+}
+
+define float @atomic_load_f32_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_f32_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: fmv.w.x fa0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_f32_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lw a0, 0(a0)
+; RV32IA-NEXT: fmv.w.x fa0, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_f32_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_f32_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lw a0, 0(a0)
+; RV64IA-NEXT: fmv.w.x fa0, a0
+; RV64IA-NEXT: ret
+ %1 = load atomic float, ptr %a monotonic, align 4
+ ret float %1
+}
+
+define float @atomic_load_f32_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_f32_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: fmv.w.x fa0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_f32_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: fmv.w.x fa0, a0
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_f32_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-NEXT: fmv.w.x fa0, a0
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_f32_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_f32_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: fmv.w.x fa0, a0
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_f32_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lw a0, 0(a0)
+; RV64IA-TSO-NEXT: fmv.w.x fa0, a0
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+ %1 = load atomic float, ptr %a acquire, align 4
+ ret float %1
+}
+
+define float @atomic_load_f32_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_f32_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: fmv.w.x fa0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_f32_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: fence rw, rw
+; RV32IA-WMO-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: fmv.w.x fa0, a0
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_f32_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: fence rw, rw
+; RV32IA-TSO-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-NEXT: fmv.w.x fa0, a0
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_f32_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_f32_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: fmv.w.x fa0, a0
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_f32_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: lw a0, 0(a0)
+; RV64IA-TSO-NEXT: fmv.w.x fa0, a0
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+ %1 = load atomic float, ptr %a seq_cst, align 4
+ ret float %1
+}
+
+define double @atomic_load_f64_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_f64_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: sw a0, 0(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: fld fa0, 0(sp)
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_f64_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a1, 0
+; RV32IA-NEXT: call __atomic_load_8
+; RV32IA-NEXT: sw a0, 0(sp)
+; RV32IA-NEXT: sw a1, 4(sp)
+; RV32IA-NEXT: fld fa0, 0(sp)
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_f64_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_f64_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: ld a0, 0(a0)
+; RV64IA-NEXT: fmv.d.x fa0, a0
+; RV64IA-NEXT: ret
+ %1 = load atomic double, ptr %a unordered, align 8
+ ret double %1
+}
+
+define double @atomic_load_f64_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_f64_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: sw a0, 0(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: fld fa0, 0(sp)
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_f64_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a1, 0
+; RV32IA-NEXT: call __atomic_load_8
+; RV32IA-NEXT: sw a0, 0(sp)
+; RV32IA-NEXT: sw a1, 4(sp)
+; RV32IA-NEXT: fld fa0, 0(sp)
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_f64_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_f64_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: ld a0, 0(a0)
+; RV64IA-NEXT: fmv.d.x fa0, a0
+; RV64IA-NEXT: ret
+ %1 = load atomic double, ptr %a monotonic, align 8
+ ret double %1
+}
+
+define double @atomic_load_f64_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_f64_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: sw a0, 0(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: fld fa0, 0(sp)
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_f64_acquire:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a1, 2
+; RV32IA-NEXT: call __atomic_load_8
+; RV32IA-NEXT: sw a0, 0(sp)
+; RV32IA-NEXT: sw a1, 4(sp)
+; RV32IA-NEXT: fld fa0, 0(sp)
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_f64_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_f64_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: ld a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: fmv.d.x fa0, a0
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_f64_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: ld a0, 0(a0)
+; RV64IA-TSO-NEXT: fmv.d.x fa0, a0
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f64_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f64_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+ %1 = load atomic double, ptr %a acquire, align 8
+ ret double %1
+}
+
+define double @atomic_load_f64_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_f64_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: sw a0, 0(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: fld fa0, 0(sp)
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_f64_seq_cst:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a1, 5
+; RV32IA-NEXT: call __atomic_load_8
+; RV32IA-NEXT: sw a0, 0(sp)
+; RV32IA-NEXT: sw a1, 4(sp)
+; RV32IA-NEXT: fld fa0, 0(sp)
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_f64_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_f64_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: ld a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: fmv.d.x fa0, a0
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_f64_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: ld a0, 0(a0)
+; RV64IA-TSO-NEXT: fmv.d.x fa0, a0
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f64_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f64_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+ %1 = load atomic double, ptr %a seq_cst, align 8
+ ret double %1
+}
+
+define void @atomic_store_f32_unordered(ptr %a, float %b) nounwind {
+; RV32I-LABEL: atomic_store_f32_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: fmv.x.w a1, fa0
+; RV32I-NEXT: li a2, 0
+; RV32I-NEXT: call __atomic_store_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_store_f32_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: fmv.x.w a1, fa0
+; RV32IA-NEXT: sw a1, 0(a0)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_store_f32_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 0
+; RV64I-NEXT: call __atomic_store_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_store_f32_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: fmv.x.w a1, fa0
+; RV64IA-NEXT: sw a1, 0(a0)
+; RV64IA-NEXT: ret
+ store atomic float %b, ptr %a unordered, align 4
+ ret void
+}
+
+define void @atomic_store_f32_monotonic(ptr %a, float %b) nounwind {
+; RV32I-LABEL: atomic_store_f32_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: fmv.x.w a1, fa0
+; RV32I-NEXT: li a2, 0
+; RV32I-NEXT: call __atomic_store_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_store_f32_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: fmv.x.w a1, fa0
+; RV32IA-NEXT: sw a1, 0(a0)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_store_f32_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 0
+; RV64I-NEXT: call __atomic_store_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_store_f32_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: fmv.x.w a1, fa0
+; RV64IA-NEXT: sw a1, 0(a0)
+; RV64IA-NEXT: ret
+ store atomic float %b, ptr %a monotonic, align 4
+ ret void
+}
+
+define void @atomic_store_f32_release(ptr %a, float %b) nounwind {
+; RV32I-LABEL: atomic_store_f32_release:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 3
+; RV32I-NEXT: fmv.x.w a1, fa0
+; RV32I-NEXT: call __atomic_store_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_store_f32_release:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: fence rw, w
+; RV32IA-WMO-NEXT: fmv.x.w a1, fa0
+; RV32IA-WMO-NEXT: sw a1, 0(a0)
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_store_f32_release:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: fmv.x.w a1, fa0
+; RV32IA-TSO-NEXT: sw a1, 0(a0)
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_store_f32_release:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 3
+; RV64I-NEXT: call __atomic_store_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_store_f32_release:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, w
+; RV64IA-WMO-NEXT: fmv.x.w a1, fa0
+; RV64IA-WMO-NEXT: sw a1, 0(a0)
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_store_f32_release:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fmv.x.w a1, fa0
+; RV64IA-TSO-NEXT: sw a1, 0(a0)
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_release:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, w
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_release:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_release:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_release:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+ store atomic float %b, ptr %a release, align 4
+ ret void
+}
+
+define void @atomic_store_f32_seq_cst(ptr %a, float %b) nounwind {
+; RV32I-LABEL: atomic_store_f32_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: fmv.x.w a1, fa0
+; RV32I-NEXT: call __atomic_store_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_store_f32_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: fence rw, w
+; RV32IA-WMO-NEXT: fmv.x.w a1, fa0
+; RV32IA-WMO-NEXT: sw a1, 0(a0)
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_store_f32_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: fmv.x.w a1, fa0
+; RV32IA-TSO-NEXT: sw a1, 0(a0)
+; RV32IA-TSO-NEXT: fence rw, rw
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_store_f32_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_store_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_store_f32_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, w
+; RV64IA-WMO-NEXT: fmv.x.w a1, fa0
+; RV64IA-WMO-NEXT: sw a1, 0(a0)
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_store_f32_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fmv.x.w a1, fa0
+; RV64IA-TSO-NEXT: sw a1, 0(a0)
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, w
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+ store atomic float %b, ptr %a seq_cst, align 4
+ ret void
+}
+
+define void @atomic_store_f64_unordered(ptr %a, double %b) nounwind {
+; RV32I-LABEL: atomic_store_f64_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: fsd fa0, 0(sp)
+; RV32I-NEXT: lw a1, 0(sp)
+; RV32I-NEXT: lw a2, 4(sp)
+; RV32I-NEXT: li a3, 0
+; RV32I-NEXT: call __atomic_store_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_store_f64_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: fsd fa0, 0(sp)
+; RV32IA-NEXT: lw a1, 0(sp)
+; RV32IA-NEXT: lw a2, 4(sp)
+; RV32IA-NEXT: li a3, 0
+; RV32IA-NEXT: call __atomic_store_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_store_f64_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 0
+; RV64I-NEXT: call __atomic_store_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_store_f64_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: fmv.x.d a1, fa0
+; RV64IA-NEXT: sd a1, 0(a0)
+; RV64IA-NEXT: ret
+ store atomic double %b, ptr %a unordered, align 8
+ ret void
+}
+
+define void @atomic_store_f64_monotonic(ptr %a, double %b) nounwind {
+; RV32I-LABEL: atomic_store_f64_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: fsd fa0, 0(sp)
+; RV32I-NEXT: lw a1, 0(sp)
+; RV32I-NEXT: lw a2, 4(sp)
+; RV32I-NEXT: li a3, 0
+; RV32I-NEXT: call __atomic_store_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_store_f64_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: fsd fa0, 0(sp)
+; RV32IA-NEXT: lw a1, 0(sp)
+; RV32IA-NEXT: lw a2, 4(sp)
+; RV32IA-NEXT: li a3, 0
+; RV32IA-NEXT: call __atomic_store_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_store_f64_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 0
+; RV64I-NEXT: call __atomic_store_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_store_f64_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: fmv.x.d a1, fa0
+; RV64IA-NEXT: sd a1, 0(a0)
+; RV64IA-NEXT: ret
+ store atomic double %b, ptr %a monotonic, align 8
+ ret void
+}
+
+define void @atomic_store_f64_release(ptr %a, double %b) nounwind {
+; RV32I-LABEL: atomic_store_f64_release:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: fsd fa0, 0(sp)
+; RV32I-NEXT: lw a1, 0(sp)
+; RV32I-NEXT: lw a2, 4(sp)
+; RV32I-NEXT: li a3, 3
+; RV32I-NEXT: call __atomic_store_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_store_f64_release:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: fsd fa0, 0(sp)
+; RV32IA-NEXT: lw a1, 0(sp)
+; RV32IA-NEXT: lw a2, 4(sp)
+; RV32IA-NEXT: li a3, 3
+; RV32IA-NEXT: call __atomic_store_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_store_f64_release:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 3
+; RV64I-NEXT: call __atomic_store_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_store_f64_release:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, w
+; RV64IA-WMO-NEXT: fmv.x.d a1, fa0
+; RV64IA-WMO-NEXT: sd a1, 0(a0)
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_store_f64_release:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fmv.x.d a1, fa0
+; RV64IA-TSO-NEXT: sd a1, 0(a0)
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f64_release:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: sd a1, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f64_release:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+ store atomic double %b, ptr %a release, align 8
+ ret void
+}
+
+define void @atomic_store_f64_seq_cst(ptr %a, double %b) nounwind {
+; RV32I-LABEL: atomic_store_f64_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: fsd fa0, 0(sp)
+; RV32I-NEXT: lw a1, 0(sp)
+; RV32I-NEXT: lw a2, 4(sp)
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: call __atomic_store_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_store_f64_seq_cst:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: fsd fa0, 0(sp)
+; RV32IA-NEXT: lw a1, 0(sp)
+; RV32IA-NEXT: lw a2, 4(sp)
+; RV32IA-NEXT: li a3, 5
+; RV32IA-NEXT: call __atomic_store_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_store_f64_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_store_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_store_f64_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, w
+; RV64IA-WMO-NEXT: fmv.x.d a1, fa0
+; RV64IA-WMO-NEXT: sd a1, 0(a0)
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_store_f64_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fmv.x.d a1, fa0
+; RV64IA-TSO-NEXT: sd a1, 0(a0)
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f64_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: sd a1, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f64_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+ store atomic double %b, ptr %a seq_cst, align 8
+ ret void
+}
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index 3aea0f2..f595dfe 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -67,6 +67,7 @@
; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis
; CHECK-O-NEXT: Running pass: GlobalSplitPass
; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass
+; CHECK-O-NEXT: Running pass: NoRecurseLTOInferencePass
; CHECK-O23SZ-NEXT: Running pass: CoroEarlyPass
; CHECK-O1-NEXT: Running pass: LowerTypeTestsPass
; CHECK-O23SZ-NEXT: Running pass: GlobalOptPass
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll
new file mode 100644
index 0000000..bcdf75b
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s
+
+; This test includes a call to a library function which is not marked as
+; NoCallback. Function bob() does not have internal linkage and hence prevents
+; norecurse to be added.
+
+@.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1
+
+;.
+; CHECK: @.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1
+;.
+define dso_local void @bob() {
+; CHECK-LABEL: define dso_local void @bob() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str)
+; CHECK-NEXT: ret void
+;
+entry:
+ %call = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str)
+ ret void
+}
+
+declare i32 @printf(ptr readonly captures(none), ...)
+
+define dso_local i32 @main() norecurse {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define dso_local i32 @main(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bob()
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ tail call void @bob()
+ ret i32 0
+}
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse }
+;.
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_no_address_taken.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_no_address_taken.ll
new file mode 100644
index 0000000..a03b4ca
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_no_address_taken.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s
+
+; This test includes a call to a library function which is not marked as
+; NoCallback. All functions except main() are internal and main is marked
+; norecurse, so as to not block norecurse to be added to bob().
+
+@.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1
+
+; Function Attrs: nofree noinline nounwind uwtable
+;.
+; CHECK: @.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1
+;.
+define internal void @bob() {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define internal void @bob(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str)
+; CHECK-NEXT: ret void
+;
+entry:
+ %call = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str)
+ ret void
+}
+
+; Function Attrs: nofree nounwind
+declare i32 @printf(ptr readonly captures(none), ...)
+
+; Function Attrs: nofree norecurse nounwind uwtable
+define dso_local i32 @main() norecurse {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define dso_local i32 @main(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bob()
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ tail call void @bob()
+ ret i32 0
+}
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse }
+;.
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_lto.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_lto.ll
new file mode 100644
index 0000000..5be707b
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_lto.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s
+
+; This test includes a call graph which has a recursive function(foo2) which
+; calls a non-recursive internal function (foo3) satisfying the norecurse
+; attribute criteria.
+
+
+define internal void @foo3() {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define internal void @foo3(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
+ ret void
+}
+
+define internal i32 @foo2(i32 %accum, i32 %n) {
+; CHECK-LABEL: define internal i32 @foo2(
+; CHECK-SAME: i32 [[ACCUM:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[RECURSE:.*]]
+; CHECK: [[RECURSE]]:
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[N]], 1
+; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ACCUM]], [[SUB]]
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo2(i32 [[MUL]], i32 [[SUB]])
+; CHECK-NEXT: call void @foo3()
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[ACCUM]], %[[ENTRY]] ], [ [[CALL]], %[[RECURSE]] ]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ %cmp = icmp eq i32 %n, 0
+ br i1 %cmp, label %exit, label %recurse
+
+recurse:
+ %sub = sub i32 %n, 1
+ %mul = mul i32 %accum, %sub
+ %call = call i32 @foo2(i32 %mul, i32 %sub)
+ call void @foo3()
+ br label %exit
+
+exit:
+ %res = phi i32 [ %accum, %entry ], [ %call, %recurse ]
+ ret i32 %res
+}
+
+define internal i32 @foo1() {
+; CHECK-LABEL: define internal i32 @foo1() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @foo2(i32 1, i32 5)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @foo2(i32 1, i32 5)
+ ret i32 %res
+}
+
+define dso_local i32 @main() {
+; CHECK-LABEL: define dso_local i32 @main() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @foo1()
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @foo1()
+ ret i32 %res
+}
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse }
+;.
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion.ll
new file mode 100644
index 0000000..e351f60
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion.ll
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s
+
+; This test includes a call graph with multiple SCCs. The purpose of this is
+; to check that norecurse is not added when a function is part of non-singular
+; SCC.
+; There are three different SCCs in this test:
+; SCC#1: f1, foo, bar, foo1, bar1
+; SCC#2: bar2, bar3, bar4
+; SCC#3: baz, fun
+; None of these functions should be marked as norecurse
+
+define internal void @bar1() {
+; CHECK-LABEL: define internal void @bar1() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @f1()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @f1()
+ ret void
+}
+
+define internal void @f1() {
+; CHECK-LABEL: define internal void @f1() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @foo()
+; CHECK-NEXT: tail call void @bar2()
+; CHECK-NEXT: tail call void @baz()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @foo()
+ tail call void @bar2()
+ tail call void @baz()
+ ret void
+}
+
+define dso_local i32 @main() norecurse {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define dso_local i32 @main(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @f1()
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ tail call void @f1()
+ ret i32 0
+}
+
+define internal void @foo1() {
+; CHECK-LABEL: define internal void @foo1() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar1()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar1()
+ ret void
+}
+
+define internal void @bar() {
+; CHECK-LABEL: define internal void @bar() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @foo1()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @foo1()
+ ret void
+}
+
+define internal void @foo() {
+; CHECK-LABEL: define internal void @foo() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar()
+ ret void
+}
+
+define internal void @bar4() {
+; CHECK-LABEL: define internal void @bar4() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar2()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar2()
+ ret void
+}
+
+define internal void @bar2() {
+; CHECK-LABEL: define internal void @bar2() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar3()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar3()
+ ret void
+}
+
+define internal void @bar3() {
+; CHECK-LABEL: define internal void @bar3() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar4()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar4()
+ ret void
+}
+
+define internal void @fun() {
+; CHECK-LABEL: define internal void @fun() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @baz()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @baz()
+ ret void
+}
+
+define internal void @baz() {
+; CHECK-LABEL: define internal void @baz() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @fun()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @fun()
+ ret void
+}
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse }
+;.
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion1.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion1.ll
new file mode 100644
index 0000000..cd94037
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion1.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s
+
+; This test includes a call graph with multiple SCCs. The purpose of this is
+; to check that norecurse is added to a function which calls a function which
+; is indirectly recursive but is not part of the recursive chain.
+; There are two SCCs in this test:
+; SCC#1: bar2, bar3, bar4
+; SCC#2: baz, fun
+; f1() calls bar2 and baz, both of which are part of some indirect recursive
+; chain. but does not call back f1() and hence f1() can be marked as
+; norecurse.
+
+define dso_local i32 @main() norecurse {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define dso_local i32 @main(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @f1()
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ tail call void @f1()
+ ret i32 0
+}
+
+define internal void @f1() {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define internal void @f1(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar2()
+; CHECK-NEXT: tail call void @baz()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar2()
+ tail call void @baz()
+ ret void
+}
+
+define internal void @bar4() {
+; CHECK-LABEL: define internal void @bar4() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar2()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar2()
+ ret void
+}
+
+define internal void @bar2() {
+; CHECK-LABEL: define internal void @bar2() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar3()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar3()
+ ret void
+}
+
+define internal void @bar3() {
+; CHECK-LABEL: define internal void @bar3() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar4()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar4()
+ ret void
+}
+
+define internal void @fun() {
+; CHECK-LABEL: define internal void @fun() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @baz()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @baz()
+ ret void
+}
+
+define internal void @baz() {
+; CHECK-LABEL: define internal void @baz() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @fun()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @fun()
+ ret void
+}
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse }
+;.
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_multinode_refscc.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_multinode_refscc.ll
new file mode 100644
index 0000000..8b81a90
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_multinode_refscc.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -passes=norecurse-lto-inference -S %s | FileCheck %s
+
+; This is a negative test which results in RefSCC with size > 1.
+; RefSCC : [(f2), (f1)]
+; --- SCC A (f1) --- size() = 1
+define internal void @f1() {
+; CHECK-LABEL: define internal void @f1() {
+; CHECK-NEXT: call void @f2()
+; CHECK-NEXT: ret void
+;
+ call void @f2()
+ ret void
+}
+
+; --- SCC B (f2) --- size() = 1
+; f2 indirectly calls f1 using locally allocated function pointer
+define internal void @f2() {
+; CHECK-LABEL: define internal void @f2() {
+; CHECK-NEXT: [[FP:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: store ptr @f1, ptr [[FP]], align 8
+; CHECK-NEXT: [[TMP:%.*]] = load ptr, ptr [[FP]], align 8
+; CHECK-NEXT: call void [[TMP]]()
+; CHECK-NEXT: ret void
+;
+ %fp = alloca void ()*
+ store void ()* @f1, void ()** %fp
+ %tmp = load void ()*, void ()** %fp
+ call void %tmp()
+ ret void
+}
+
+define i32 @main() {
+; CHECK-LABEL: define i32 @main() {
+; CHECK-NEXT: call void @f1()
+; CHECK-NEXT: ret i32 0
+;
+ call void @f1()
+ ret i32 0
+}
+
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll
new file mode 100644
index 0000000..461e5df
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s
+
+; This test includes a call graph with a self recursive function.
+; The purpose of this is to check that norecurse is added to functions
+; which have a self-recursive function in the call-chain.
+; The call-chain in this test is as follows
+; main -> bob -> callee1 -> callee2
+; where callee2 is self recursive.
+
+@x = dso_local global i32 4, align 4
+@y = dso_local global i32 2, align 4
+
+;.
+; CHECK: @x = dso_local global i32 4, align 4
+; CHECK: @y = dso_local global i32 2, align 4
+;.
+define internal void @callee2() {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define internal void @callee2(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @y, align 4
+; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+; CHECK-NEXT: store volatile i32 [[INC]], ptr @y, align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile i32, ptr @y, align 4
+ %inc = add nsw i32 %0, 1
+ store volatile i32 %inc, ptr @y, align 4
+ ret void
+}
+
+define internal void @callee1(i32 %x) {
+; CHECK-LABEL: define internal void @callee1(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], 0
+; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: tail call void @callee1(i32 [[X]])
+; CHECK-NEXT: br label %[[IF_END]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: tail call void @callee2()
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp = icmp sgt i32 %x, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void @callee1(i32 %x)
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ tail call void @callee2()
+ ret void
+}
+
+define internal void @bob() {
+; CHECK-LABEL: define internal void @bob() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @x, align 4
+; CHECK-NEXT: tail call void @callee2(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile i32, ptr @x, align 4
+ tail call void @callee2(i32 %0)
+ ret void
+}
+
+define dso_local i32 @main() norecurse {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define dso_local i32 @main(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bob()
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ tail call void @bob()
+ ret i32 0
+}
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse }
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr162009.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr162009.ll
new file mode 100644
index 0000000..6095b24
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr162009.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-NO-PARTIAL-REDUCTION
+
+target triple = "aarch64"
+
+define i128 @add_reduc_i32_i128_unsupported(ptr %a, ptr %b) "target-features"="+dotprod" {
+; CHECK-NO-PARTIAL-REDUCTION-LABEL: define i128 @add_reduc_i32_i128_unsupported(
+; CHECK-NO-PARTIAL-REDUCTION-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ENTRY:.*:]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK-NO-PARTIAL-REDUCTION: [[VECTOR_PH]]:
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-NO-PARTIAL-REDUCTION: [[VECTOR_BODY]]:
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[VEC_PHI:%.*]] = phi <4 x i128> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 1
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP1:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 1
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[WIDE_LOAD1]] to <4 x i64>
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP4:%.*]] = mul nuw <4 x i64> [[TMP1]], [[TMP3]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP5:%.*]] = zext <4 x i64> [[TMP4]] to <4 x i128>
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP7]] = add <4 x i128> [[VEC_PHI]], [[TMP5]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4024
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NO-PARTIAL-REDUCTION: [[MIDDLE_BLOCK]]:
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP8:%.*]] = call i128 @llvm.vector.reduce.add.v4i128(<4 x i128> [[TMP7]])
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[SCALAR_PH:.*]]
+; CHECK-NO-PARTIAL-REDUCTION: [[SCALAR_PH]]:
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK-NO-PARTIAL-REDUCTION: [[FOR_BODY]]:
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[IV:%.*]] = phi i64 [ 4024, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ACCUM:%.*]] = phi i128 [ [[TMP8]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[LOAD_A:%.*]] = load i32, ptr [[GEP_A]], align 1
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[EXT_A:%.*]] = zext i32 [[LOAD_A]] to i64
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[LOAD_B:%.*]] = load i32, ptr [[GEP_B]], align 1
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[EXT_B:%.*]] = zext i32 [[LOAD_B]] to i64
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[MUL:%.*]] = mul nuw i64 [[EXT_A]], [[EXT_B]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[MUL_ZEXT:%.*]] = zext i64 [[MUL]] to i128
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ADD]] = add i128 [[ACCUM]], [[MUL_ZEXT]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 4025
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_EXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NO-PARTIAL-REDUCTION: [[FOR_EXIT]]:
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ADD_LCSSA:%.*]] = phi i128 [ [[ADD]], %[[FOR_BODY]] ]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: ret i128 [[ADD_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %accum = phi i128 [ 0, %entry ], [ %add, %for.body ]
+ %gep.a = getelementptr i32, ptr %a, i64 %iv
+ %load.a = load i32, ptr %gep.a, align 1
+ %ext.a = zext i32 %load.a to i64
+ %gep.b = getelementptr i32, ptr %b, i64 %iv
+ %load.b = load i32, ptr %gep.b, align 1
+ %ext.b = zext i32 %load.b to i64
+ %mul = mul nuw i64 %ext.a, %ext.b
+ %mul.zext = zext i64 %mul to i128
+ %add = add i128 %accum, %mul.zext
+ %iv.next = add i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 4025
+ br i1 %exitcond.not, label %for.exit, label %for.body
+
+for.exit:
+ ret i128 %add
+}
+;.
+; CHECK-NO-PARTIAL-REDUCTION: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-NO-PARTIAL-REDUCTION: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-NO-PARTIAL-REDUCTION: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK-NO-PARTIAL-REDUCTION: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s b/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s
index 65e1203..c8a5746 100644
--- a/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s
+++ b/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s
@@ -1,4 +1,6 @@
REQUIRES: aarch64-registered-target
+// Flakey on SVE buildbots, disabled pending invesgitation.
+UNSUPPORTED: target={{.*}}
RUN: llvm-exegesis -mtriple=aarch64 -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FMOVWSr --benchmark-phase=assemble-measured-code 2>&1
RUN: llvm-objdump -d %d > %t.s
diff --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt
index 0f8fcb9..ee40e6c 100644
--- a/llvm/unittests/CAS/CMakeLists.txt
+++ b/llvm/unittests/CAS/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_unittest(CASTests
ActionCacheTest.cpp
CASTestConfig.cpp
ObjectStoreTest.cpp
+ OnDiskDataAllocatorTest.cpp
OnDiskTrieRawHashMapTest.cpp
ProgramTest.cpp
)
diff --git a/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp b/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp
new file mode 100644
index 0000000..966fa03
--- /dev/null
+++ b/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/OnDiskDataAllocator.h"
+#include "llvm/CAS/MappedFileRegionArena.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Testing/Support/Error.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+
+#if LLVM_ENABLE_ONDISK_CAS
+
+using namespace llvm;
+using namespace llvm::cas;
+
+TEST(OnDiskDataAllocatorTest, Allocate) {
+ unittest::TempDir Temp("data-allocator", /*Unique=*/true);
+ constexpr size_t MB = 1024u * 1024u;
+
+ std::optional<OnDiskDataAllocator> Allocator;
+ ASSERT_THAT_ERROR(OnDiskDataAllocator::create(
+ Temp.path("allocator"), "data", /*MaxFileSize=*/MB,
+ /*NewFileInitialSize=*/std::nullopt)
+ .moveInto(Allocator),
+ Succeeded());
+
+ // Allocate.
+ {
+ for (size_t Size = 1; Size < 16; ++Size) {
+ OnDiskDataAllocator::OnDiskPtr P;
+ ASSERT_THAT_ERROR(Allocator->allocate(Size).moveInto(P), Succeeded());
+ EXPECT_TRUE(
+ isAligned(MappedFileRegionArena::getAlign(), P.getOffset().get()));
+ }
+ }
+
+ // Out of space.
+ {
+ OnDiskDataAllocator::OnDiskPtr P;
+ ASSERT_THAT_ERROR(Allocator->allocate(MB).moveInto(P), Failed());
+ }
+
+ // Check size and capacity.
+ {
+ ASSERT_EQ(Allocator->capacity(), MB);
+ ASSERT_LE(Allocator->size(), MB);
+ }
+
+ // Get.
+ {
+ OnDiskDataAllocator::OnDiskPtr P;
+ ASSERT_THAT_ERROR(Allocator->allocate(32).moveInto(P), Succeeded());
+ ArrayRef<char> Data;
+ ASSERT_THAT_ERROR(Allocator->get(P.getOffset(), 16).moveInto(Data),
+ Succeeded());
+ ASSERT_THAT_ERROR(Allocator->get(P.getOffset(), 1025).moveInto(Data),
+ Failed());
+ }
+}
+
+#endif // LLVM_ENABLE_ONDISK_CAS
diff --git a/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp b/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp
index 7bedfe4..6034c70 100644
--- a/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp
+++ b/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp
@@ -71,7 +71,7 @@ TEST_P(OnDiskTrieRawHashMapTestFixture, General) {
std::optional<FileOffset> Offset;
std::optional<MutableArrayRef<char>> Data;
{
- std::optional<OnDiskTrieRawHashMap::pointer> Insertion;
+ std::optional<OnDiskTrieRawHashMap::OnDiskPtr> Insertion;
ASSERT_THAT_ERROR(Trie1->insert({Hash0, Data0v1}).moveInto(Insertion),
Succeeded());
EXPECT_EQ(Hash0, (*Insertion)->Hash);
@@ -128,7 +128,7 @@ TEST_P(OnDiskTrieRawHashMapTestFixture, General) {
// Recover from an offset.
{
- OnDiskTrieRawHashMap::const_pointer Recovered;
+ OnDiskTrieRawHashMap::ConstOnDiskPtr Recovered;
ASSERT_THAT_ERROR(Trie1->recoverFromFileOffset(*Offset).moveInto(Recovered),
Succeeded());
ASSERT_TRUE(Recovered);
@@ -140,14 +140,14 @@ TEST_P(OnDiskTrieRawHashMapTestFixture, General) {
// Recover from a bad offset.
{
FileOffset BadOffset(1);
- OnDiskTrieRawHashMap::const_pointer Recovered;
+ OnDiskTrieRawHashMap::ConstOnDiskPtr Recovered;
ASSERT_THAT_ERROR(
Trie1->recoverFromFileOffset(BadOffset).moveInto(Recovered), Failed());
}
// Insert another thing.
{
- std::optional<OnDiskTrieRawHashMap::pointer> Insertion;
+ std::optional<OnDiskTrieRawHashMap::OnDiskPtr> Insertion;
ASSERT_THAT_ERROR(Trie1->insert({Hash1, Data1}).moveInto(Insertion),
Succeeded());
EXPECT_EQ(Hash1, (*Insertion)->Hash);
@@ -210,7 +210,7 @@ TEST(OnDiskTrieRawHashMapTest, OutOfSpace) {
auto Hash0 = ArrayRef(Hash0Bytes);
constexpr StringLiteral Data0v1Bytes = "data0.v1";
ArrayRef<char> Data0v1 = ArrayRef(Data0v1Bytes.data(), Data0v1Bytes.size());
- std::optional<OnDiskTrieRawHashMap::pointer> Insertion;
+ std::optional<OnDiskTrieRawHashMap::OnDiskPtr> Insertion;
ASSERT_THAT_ERROR(Trie->insert({Hash0, Data0v1}).moveInto(Insertion),
Failed());
}
diff --git a/mlir/cmake/modules/AddMLIRPython.cmake b/mlir/cmake/modules/AddMLIRPython.cmake
index fa6aec8..ea34f94 100644
--- a/mlir/cmake/modules/AddMLIRPython.cmake
+++ b/mlir/cmake/modules/AddMLIRPython.cmake
@@ -123,12 +123,12 @@ function(mlir_generate_type_stubs)
"IMPORT_PATHS;DEPENDS_TARGETS;OUTPUTS;DEPENDS_TARGET_SRC_DEPS"
${ARGN})
- # for people doing find_package(nanobind)
+ # for people installing a distro (e.g., pip install) of nanobind
if(EXISTS ${nanobind_DIR}/../src/stubgen.py)
set(NB_STUBGEN "${nanobind_DIR}/../src/stubgen.py")
elseif(EXISTS ${nanobind_DIR}/../stubgen.py)
set(NB_STUBGEN "${nanobind_DIR}/../stubgen.py")
- # for people using FetchContent_Declare and FetchContent_MakeAvailable
+ # for people using nanobind git source tree (e.g., FetchContent_Declare and FetchContent_MakeAvailable)
elseif(EXISTS ${nanobind_SOURCE_DIR}/src/stubgen.py)
set(NB_STUBGEN "${nanobind_SOURCE_DIR}/src/stubgen.py")
elseif(EXISTS ${nanobind_SOURCE_DIR}/stubgen.py)
@@ -226,11 +226,10 @@ endfunction()
# EMBED_CAPI_LINK_LIBS: Dependent CAPI libraries that this extension depends
# on. These will be collected for all extensions and put into an
# aggregate dylib that is linked against.
-# PYTHON_BINDINGS_LIBRARY: Either pybind11 or nanobind.
function(declare_mlir_python_extension name)
cmake_parse_arguments(ARG
""
- "ROOT_DIR;MODULE_NAME;ADD_TO_PARENT;PYTHON_BINDINGS_LIBRARY"
+ "ROOT_DIR;MODULE_NAME;ADD_TO_PARENT"
"SOURCES;PRIVATE_LINK_LIBS;EMBED_CAPI_LINK_LIBS"
${ARGN})
@@ -239,20 +238,15 @@ function(declare_mlir_python_extension name)
endif()
set(_install_destination "src/python/${name}")
- if(NOT ARG_PYTHON_BINDINGS_LIBRARY)
- set(ARG_PYTHON_BINDINGS_LIBRARY "pybind11")
- endif()
-
add_library(${name} INTERFACE)
set_target_properties(${name} PROPERTIES
# Yes: Leading-lowercase property names are load bearing and the recommended
# way to do this: https://gitlab.kitware.com/cmake/cmake/-/issues/19261
- EXPORT_PROPERTIES "mlir_python_SOURCES_TYPE;mlir_python_EXTENSION_MODULE_NAME;mlir_python_EMBED_CAPI_LINK_LIBS;mlir_python_DEPENDS;mlir_python_BINDINGS_LIBRARY"
+ EXPORT_PROPERTIES "mlir_python_SOURCES_TYPE;mlir_python_EXTENSION_MODULE_NAME;mlir_python_EMBED_CAPI_LINK_LIBS;mlir_python_DEPENDS"
mlir_python_SOURCES_TYPE extension
mlir_python_EXTENSION_MODULE_NAME "${ARG_MODULE_NAME}"
mlir_python_EMBED_CAPI_LINK_LIBS "${ARG_EMBED_CAPI_LINK_LIBS}"
mlir_python_DEPENDS ""
- mlir_python_BINDINGS_LIBRARY "${ARG_PYTHON_BINDINGS_LIBRARY}"
)
# Set the interface source and link_libs properties of the target
@@ -341,14 +335,12 @@ function(add_mlir_python_modules name)
elseif(_source_type STREQUAL "extension")
# Native CPP extension.
get_target_property(_module_name ${sources_target} mlir_python_EXTENSION_MODULE_NAME)
- get_target_property(_bindings_library ${sources_target} mlir_python_BINDINGS_LIBRARY)
# Transform relative source to based on root dir.
set(_extension_target "${modules_target}.extension.${_module_name}.dso")
add_mlir_python_extension(${_extension_target} "${_module_name}"
INSTALL_COMPONENT ${modules_target}
INSTALL_DIR "${ARG_INSTALL_PREFIX}/_mlir_libs"
OUTPUT_DIRECTORY "${ARG_ROOT_PREFIX}/_mlir_libs"
- PYTHON_BINDINGS_LIBRARY ${_bindings_library}
LINK_LIBS PRIVATE
${sources_target}
${ARG_COMMON_CAPI_LINK_LIBS}
@@ -753,7 +745,7 @@ endfunction()
function(add_mlir_python_extension libname extname)
cmake_parse_arguments(ARG
""
- "INSTALL_COMPONENT;INSTALL_DIR;OUTPUT_DIRECTORY;PYTHON_BINDINGS_LIBRARY"
+ "INSTALL_COMPONENT;INSTALL_DIR;OUTPUT_DIRECTORY"
"SOURCES;LINK_LIBS"
${ARGN})
if(ARG_UNPARSED_ARGUMENTS)
@@ -761,7 +753,7 @@ function(add_mlir_python_extension libname extname)
endif()
# The extension itself must be compiled with RTTI and exceptions enabled.
- # Also, some warning classes triggered by pybind11 are disabled.
+ # Also, some warning classes triggered by nanobind are disabled.
set(eh_rtti_enable)
if (MSVC)
set(eh_rtti_enable /EHsc /GR)
@@ -769,62 +761,53 @@ function(add_mlir_python_extension libname extname)
set(eh_rtti_enable -frtti -fexceptions)
endif ()
- # The actual extension library produces a shared-object or DLL and has
- # sources that must be compiled in accordance with pybind11 needs (RTTI and
- # exceptions).
- if(NOT DEFINED ARG_PYTHON_BINDINGS_LIBRARY OR ARG_PYTHON_BINDINGS_LIBRARY STREQUAL "pybind11")
- pybind11_add_module(${libname}
- ${ARG_SOURCES}
- )
- elseif(ARG_PYTHON_BINDINGS_LIBRARY STREQUAL "nanobind")
- nanobind_add_module(${libname}
- NB_DOMAIN ${MLIR_BINDINGS_PYTHON_NB_DOMAIN}
- FREE_THREADED
- ${ARG_SOURCES}
- )
+ nanobind_add_module(${libname}
+ NB_DOMAIN ${MLIR_BINDINGS_PYTHON_NB_DOMAIN}
+ FREE_THREADED
+ ${ARG_SOURCES}
+ )
- if (NOT MLIR_DISABLE_CONFIGURE_PYTHON_DEV_PACKAGES
- AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL))
- # Avoid some warnings from upstream nanobind.
- # If a superproject set MLIR_DISABLE_CONFIGURE_PYTHON_DEV_PACKAGES, let
- # the super project handle compile options as it wishes.
- get_property(NB_LIBRARY_TARGET_NAME TARGET ${libname} PROPERTY LINK_LIBRARIES)
- target_compile_options(${NB_LIBRARY_TARGET_NAME}
- PRIVATE
- -Wall -Wextra -Wpedantic
- -Wno-c++98-compat-extra-semi
- -Wno-cast-qual
- -Wno-covered-switch-default
- -Wno-deprecated-literal-operator
- -Wno-nested-anon-types
- -Wno-unused-parameter
- -Wno-zero-length-array
- ${eh_rtti_enable})
-
- target_compile_options(${libname}
- PRIVATE
- -Wall -Wextra -Wpedantic
- -Wno-c++98-compat-extra-semi
- -Wno-cast-qual
- -Wno-covered-switch-default
- -Wno-deprecated-literal-operator
- -Wno-nested-anon-types
- -Wno-unused-parameter
- -Wno-zero-length-array
- ${eh_rtti_enable})
- endif()
+ if (NOT MLIR_DISABLE_CONFIGURE_PYTHON_DEV_PACKAGES
+ AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL))
+ # Avoid some warnings from upstream nanobind.
+ # If a superproject set MLIR_DISABLE_CONFIGURE_PYTHON_DEV_PACKAGES, let
+ # the super project handle compile options as it wishes.
+ get_property(NB_LIBRARY_TARGET_NAME TARGET ${libname} PROPERTY LINK_LIBRARIES)
+ target_compile_options(${NB_LIBRARY_TARGET_NAME}
+ PRIVATE
+ -Wall -Wextra -Wpedantic
+ -Wno-c++98-compat-extra-semi
+ -Wno-cast-qual
+ -Wno-covered-switch-default
+ -Wno-deprecated-literal-operator
+ -Wno-nested-anon-types
+ -Wno-unused-parameter
+ -Wno-zero-length-array
+ ${eh_rtti_enable})
+
+ target_compile_options(${libname}
+ PRIVATE
+ -Wall -Wextra -Wpedantic
+ -Wno-c++98-compat-extra-semi
+ -Wno-cast-qual
+ -Wno-covered-switch-default
+ -Wno-deprecated-literal-operator
+ -Wno-nested-anon-types
+ -Wno-unused-parameter
+ -Wno-zero-length-array
+ ${eh_rtti_enable})
+ endif()
- if(APPLE)
- # NanobindAdaptors.h uses PyClassMethod_New to build `pure_subclass`es but nanobind
- # doesn't declare this API as undefined in its linker flags. So we need to declare it as such
- # for downstream users that do not do something like `-undefined dynamic_lookup`.
- # Same for the rest.
- target_link_options(${libname} PUBLIC
- "LINKER:-U,_PyClassMethod_New"
- "LINKER:-U,_PyCode_Addr2Location"
- "LINKER:-U,_PyFrame_GetLasti"
- )
- endif()
+ if(APPLE)
+ # NanobindAdaptors.h uses PyClassMethod_New to build `pure_subclass`es but nanobind
+ # doesn't declare this API as undefined in its linker flags. So we need to declare it as such
+ # for downstream users that do not do something like `-undefined dynamic_lookup`.
+ # Same for the rest.
+ target_link_options(${libname} PUBLIC
+ "LINKER:-U,_PyClassMethod_New"
+ "LINKER:-U,_PyCode_Addr2Location"
+ "LINKER:-U,_PyFrame_GetLasti"
+ )
endif()
target_compile_options(${libname} PRIVATE ${eh_rtti_enable})
@@ -862,11 +845,11 @@ function(add_mlir_python_extension libname extname)
if(WIN32)
# On Windows, pyconfig.h (and by extension python.h) hardcode the version of the
# python library which will be used for linkage depending on the flavor of the build.
- # pybind11 has a workaround which depends on the definition of Py_DEBUG (if Py_DEBUG
- # is not passed in as a compile definition, pybind11 undefs _DEBUG when including
+ # nanobind has a workaround which depends on the definition of Py_DEBUG (if Py_DEBUG
+ # is not passed in as a compile definition, nanobind undefs _DEBUG when including
# python.h, so that the release python library would be used).
- # Since mlir uses pybind11, we can leverage their workaround by never directly
- # pyconfig.h or python.h and instead relying on the pybind11 headers to include the
+ # Since mlir uses nanobind, we can leverage their workaround by never directly
+ # pyconfig.h or python.h and instead relying on the nanobind headers to include the
# necessary python headers. This results in mlir always linking against the
# release python library via the (undocumented) cmake property Python3_LIBRARY_RELEASE.
target_link_libraries(${libname} PRIVATE ${Python3_LIBRARY_RELEASE})
diff --git a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake
index d18f8c0..edbad2e 100644
--- a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake
+++ b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake
@@ -46,81 +46,20 @@ macro(mlir_configure_python_dev_packages)
message(STATUS "Found python include dirs: ${Python3_INCLUDE_DIRS}")
message(STATUS "Found python libraries: ${Python3_LIBRARIES}")
message(STATUS "Found numpy v${Python3_NumPy_VERSION}: ${Python3_NumPy_INCLUDE_DIRS}")
- mlir_detect_pybind11_install()
- find_package(pybind11 2.10 CONFIG REQUIRED)
- message(STATUS "Found pybind11 v${pybind11_VERSION}: ${pybind11_INCLUDE_DIR}")
- message(STATUS "Python prefix = '${PYTHON_MODULE_PREFIX}', "
- "suffix = '${PYTHON_MODULE_SUFFIX}', "
- "extension = '${PYTHON_MODULE_EXTENSION}")
-
- mlir_detect_nanobind_install()
- find_package(nanobind 2.9 CONFIG REQUIRED)
- message(STATUS "Found nanobind v${nanobind_VERSION}: ${nanobind_INCLUDE_DIR}")
- message(STATUS "Python prefix = '${PYTHON_MODULE_PREFIX}', "
- "suffix = '${PYTHON_MODULE_SUFFIX}', "
- "extension = '${PYTHON_MODULE_EXTENSION}")
- endif()
-endmacro()
-
-# Detects a pybind11 package installed in the current python environment
-# and sets variables to allow it to be found. This allows pybind11 to be
-# installed via pip, which typically yields a much more recent version than
-# the OS install, which will be available otherwise.
-function(mlir_detect_pybind11_install)
- if(pybind11_DIR)
- message(STATUS "Using explicit pybind11 cmake directory: ${pybind11_DIR} (-Dpybind11_DIR to change)")
- else()
- message(STATUS "Checking for pybind11 in python path...")
- execute_process(
- COMMAND "${Python3_EXECUTABLE}"
- -c "import pybind11;print(pybind11.get_cmake_dir(), end='')"
- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
- RESULT_VARIABLE STATUS
- OUTPUT_VARIABLE PACKAGE_DIR
- ERROR_QUIET)
- if(NOT STATUS EQUAL "0")
- message(STATUS "not found (install via 'pip install pybind11' or set pybind11_DIR)")
- return()
- endif()
- message(STATUS "found (${PACKAGE_DIR})")
- set(pybind11_DIR "${PACKAGE_DIR}" PARENT_SCOPE)
- endif()
-endfunction()
-
-
-# Detects a nanobind package installed in the current python environment
-# and sets variables to allow it to be found. This allows nanobind to be
-# installed via pip, which typically yields a much more recent version than
-# the OS install, which will be available otherwise.
-function(mlir_detect_nanobind_install)
- if(nanobind_DIR)
- message(STATUS "Using explicit nanobind cmake directory: ${nanobind_DIR} (-Dnanobind_DIR to change)")
- else()
- message(STATUS "Checking for nanobind in python path...")
- execute_process(
- COMMAND "${Python3_EXECUTABLE}"
- -c "import nanobind;print(nanobind.cmake_dir(), end='')"
- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
- RESULT_VARIABLE STATUS
- OUTPUT_VARIABLE PACKAGE_DIR
- ERROR_QUIET)
- if(NOT STATUS EQUAL "0")
- message(STATUS "not found (install via 'pip install nanobind' or set nanobind_DIR)")
- return()
+ message(STATUS "Python extension suffix for modules: '${Python3_SOABI}'")
+ if(nanobind_DIR)
+ message(STATUS "Using explicit nanobind cmake directory: ${nanobind_DIR} (-Dnanobind_DIR to change)")
+ find_package(nanobind 2.9 CONFIG REQUIRED)
+ else()
+ include(FetchContent)
+ FetchContent_Declare(
+ nanobind
+ GIT_REPOSITORY https://github.com/wjakob/nanobind.git
+ GIT_TAG v2.9.0
+ GIT_SHALLOW TRUE
+ )
+ FetchContent_MakeAvailable(nanobind)
endif()
- message(STATUS "found (${PACKAGE_DIR})")
- set(nanobind_DIR "${PACKAGE_DIR}" PARENT_SCOPE)
- execute_process(
- COMMAND "${Python3_EXECUTABLE}"
- -c "import nanobind;print(nanobind.include_dir(), end='')"
- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
- RESULT_VARIABLE STATUS
- OUTPUT_VARIABLE PACKAGE_DIR
- ERROR_QUIET)
- if(NOT STATUS EQUAL "0")
- message(STATUS "not found (install via 'pip install nanobind' or set nanobind_DIR)")
- return()
- endif()
- set(nanobind_INCLUDE_DIR "${PACKAGE_DIR}" PARENT_SCOPE)
+ message(STATUS "Found nanobind: ${NB_DIR}")
endif()
-endfunction()
+endmacro()
diff --git a/mlir/docs/Dialects/Linalg/OpDSL.md b/mlir/docs/Dialects/Linalg/OpDSL.md
index b892bbe..5d7e274 100644
--- a/mlir/docs/Dialects/Linalg/OpDSL.md
+++ b/mlir/docs/Dialects/Linalg/OpDSL.md
@@ -16,7 +16,7 @@ corresponding `linalg.generic` IR for the composition.
## Basic usage
The tool is bundled with the MLIR Python bindings. To use from the CMake build
-tree, MLIR must be build with Python bindings enabled
+tree, MLIR must be built with Python bindings enabled
(`-DMLIR_ENABLE_BINDINGS_PYTHON=ON`). Then add the `python` directory in the
build tree to your `PYTHONPATH` environment variable (i.e. `export
PYTHONPATH=$PWD/build/tools/mlir/python_packages/mlir_core`). Optionally, use an
@@ -24,7 +24,7 @@ installed MLIR package, if available, to avoid building.
```shell
# Dump the `core_named_ops.py` module as YAML.
-python -m mlir.dialects.linalg.opdsl.dump_oplib .ops.core_named_ops
+python -m mlir.dialects.linalg.opdsl.dump_oplib.ops.core_named_ops
```
Alternatively, run the `$PWD/build/bin/update_core_linalg_named_ops.sh` script,
diff --git a/mlir/examples/standalone/pyproject.toml b/mlir/examples/standalone/pyproject.toml
index 5a1e6e8..75e2153 100644
--- a/mlir/examples/standalone/pyproject.toml
+++ b/mlir/examples/standalone/pyproject.toml
@@ -23,9 +23,7 @@ Discussions = "https://discourse.llvm.org/"
[build-system]
requires = [
"scikit-build-core>=0.10.7",
- "typing_extensions>=4.12.2",
- "nanobind>=2.9, <3.0",
- "pybind11>=2.10.0, <=2.13.6",
+ "typing_extensions>=4.12.2"
]
build-backend = "scikit_build_core.build"
diff --git a/mlir/examples/standalone/python/CMakeLists.txt b/mlir/examples/standalone/python/CMakeLists.txt
index 905c9449..108c343 100644
--- a/mlir/examples/standalone/python/CMakeLists.txt
+++ b/mlir/examples/standalone/python/CMakeLists.txt
@@ -16,27 +16,10 @@ declare_mlir_dialect_python_bindings(
ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir_standalone"
TD_FILE dialects/StandaloneOps.td
SOURCES
- dialects/standalone_pybind11.py
dialects/standalone_nanobind.py
_mlir_libs/_standaloneDialectsNanobind/py.typed
DIALECT_NAME standalone)
-
-declare_mlir_python_extension(StandalonePythonSources.Pybind11Extension
- MODULE_NAME _standaloneDialectsPybind11
- ADD_TO_PARENT StandalonePythonSources
- SOURCES
- StandaloneExtensionPybind11.cpp
- PRIVATE_LINK_LIBS
- LLVMSupport
- EMBED_CAPI_LINK_LIBS
- MLIRCAPIIR
- MLIRCAPIArith
- MLIRCAPITransforms
- StandaloneCAPI
- PYTHON_BINDINGS_LIBRARY pybind11
-)
-
declare_mlir_python_extension(StandalonePythonSources.NanobindExtension
MODULE_NAME _standaloneDialectsNanobind
ADD_TO_PARENT StandalonePythonSources
@@ -49,7 +32,6 @@ declare_mlir_python_extension(StandalonePythonSources.NanobindExtension
MLIRCAPIArith
MLIRCAPITransforms
StandaloneCAPI
- PYTHON_BINDINGS_LIBRARY nanobind
)
diff --git a/mlir/examples/standalone/python/StandaloneExtensionPybind11.cpp b/mlir/examples/standalone/python/StandaloneExtensionPybind11.cpp
deleted file mode 100644
index da8c216..0000000
--- a/mlir/examples/standalone/python/StandaloneExtensionPybind11.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- StandaloneExtensionPybind11.cpp - Extension module -----------------===//
-//
-// This is the pybind11 version of the example module. There is also a nanobind
-// example in StandaloneExtensionNanobind.cpp.
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "Standalone-c/Dialects.h"
-#include "mlir-c/Dialect/Arith.h"
-#include "mlir/Bindings/Python/PybindAdaptors.h"
-
-using namespace mlir::python::adaptors;
-
-PYBIND11_MODULE(_standaloneDialectsPybind11, m) {
- //===--------------------------------------------------------------------===//
- // standalone dialect
- //===--------------------------------------------------------------------===//
- auto standaloneM = m.def_submodule("standalone");
-
- standaloneM.def(
- "register_dialects",
- [](MlirContext context, bool load) {
- MlirDialectHandle arithHandle = mlirGetDialectHandle__arith__();
- MlirDialectHandle standaloneHandle =
- mlirGetDialectHandle__standalone__();
- mlirDialectHandleRegisterDialect(arithHandle, context);
- mlirDialectHandleRegisterDialect(standaloneHandle, context);
- if (load) {
- mlirDialectHandleLoadDialect(arithHandle, context);
- mlirDialectHandleRegisterDialect(standaloneHandle, context);
- }
- },
- py::arg("context") = py::none(), py::arg("load") = true);
-}
diff --git a/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py b/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py
deleted file mode 100644
index bfb98e40..0000000
--- a/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-from ._standalone_ops_gen import *
-from .._mlir_libs._standaloneDialectsPybind11.standalone import *
diff --git a/mlir/examples/standalone/test/python/smoketest.py b/mlir/examples/standalone/test/python/smoketest.py
index 26d84fd..f881984 100644
--- a/mlir/examples/standalone/test/python/smoketest.py
+++ b/mlir/examples/standalone/test/python/smoketest.py
@@ -1,16 +1,7 @@
-# RUN: %python %s pybind11 | FileCheck %s
# RUN: %python %s nanobind | FileCheck %s
-import sys
from mlir_standalone.ir import *
-
-if sys.argv[1] == "pybind11":
- from mlir_standalone.dialects import standalone_pybind11 as standalone_d
-elif sys.argv[1] == "nanobind":
- from mlir_standalone.dialects import standalone_nanobind as standalone_d
-else:
- raise ValueError("Expected either pybind11 or nanobind as arguments")
-
+from mlir_standalone.dialects import standalone_nanobind as standalone_d
with Context():
standalone_d.register_dialects()
diff --git a/mlir/include/mlir/Bindings/Python/PybindAdaptors.h b/mlir/include/mlir/Bindings/Python/PybindAdaptors.h
deleted file mode 100644
index edc6977..0000000
--- a/mlir/include/mlir/Bindings/Python/PybindAdaptors.h
+++ /dev/null
@@ -1,616 +0,0 @@
-//===- PybindAdaptors.h - Interop with MLIR APIs via pybind11 -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// This file contains adaptors for clients of the core MLIR Python APIs to
-// interop via MLIR CAPI types, using pybind11. The facilities here do not
-// depend on implementation details of the MLIR Python API and do not introduce
-// C++-level dependencies with it (requiring only Python and CAPI-level
-// dependencies).
-//
-// It is encouraged to be used both in-tree and out-of-tree. For in-tree use
-// cases, it should be used for dialect implementations (versus relying on
-// Pybind-based internals of the core libraries).
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_BINDINGS_PYTHON_PYBINDADAPTORS_H
-#define MLIR_BINDINGS_PYTHON_PYBINDADAPTORS_H
-
-#include <pybind11/functional.h>
-#include <pybind11/pybind11.h>
-#include <pybind11/pytypes.h>
-#include <pybind11/stl.h>
-
-#include "mlir-c/Bindings/Python/Interop.h"
-#include "mlir-c/Diagnostics.h"
-#include "mlir-c/IR.h"
-
-#include "llvm/ADT/Twine.h"
-
-namespace py = pybind11;
-using namespace py::literals;
-
-// Raw CAPI type casters need to be declared before use, so always include them
-// first.
-namespace pybind11 {
-namespace detail {
-
-/// Helper to convert a presumed MLIR API object to a capsule, accepting either
-/// an explicit Capsule (which can happen when two C APIs are communicating
-/// directly via Python) or indirectly by querying the MLIR_PYTHON_CAPI_PTR_ATTR
-/// attribute (through which supported MLIR Python API objects export their
-/// contained API pointer as a capsule). Throws a type error if the object is
-/// neither. This is intended to be used from type casters, which are invoked
-/// with a raw handle (unowned). The returned object's lifetime may not extend
-/// beyond the apiObject handle without explicitly having its refcount increased
-/// (i.e. on return).
-static py::object mlirApiObjectToCapsule(py::handle apiObject) {
- if (PyCapsule_CheckExact(apiObject.ptr()))
- return py::reinterpret_borrow<py::object>(apiObject);
- if (!py::hasattr(apiObject, MLIR_PYTHON_CAPI_PTR_ATTR)) {
- auto repr = py::repr(apiObject).cast<std::string>();
- throw py::type_error(
- (llvm::Twine("Expected an MLIR object (got ") + repr + ").").str());
- }
- return apiObject.attr(MLIR_PYTHON_CAPI_PTR_ATTR);
-}
-
-// Note: Currently all of the following support cast from py::object to the
-// Mlir* C-API type, but only a few light-weight, context-bound ones
-// implicitly cast the other way because the use case has not yet emerged and
-// ownership is unclear.
-
-/// Casts object <-> MlirAffineMap.
-template <>
-struct type_caster<MlirAffineMap> {
- PYBIND11_TYPE_CASTER(MlirAffineMap, _("MlirAffineMap"));
- bool load(handle src, bool) {
- py::object capsule = mlirApiObjectToCapsule(src);
- value = mlirPythonCapsuleToAffineMap(capsule.ptr());
- if (mlirAffineMapIsNull(value)) {
- return false;
- }
- return !mlirAffineMapIsNull(value);
- }
- static handle cast(MlirAffineMap v, return_value_policy, handle) {
- py::object capsule =
- py::reinterpret_steal<py::object>(mlirPythonAffineMapToCapsule(v));
- return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr("AffineMap")
- .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
- .release();
- }
-};
-
-/// Casts object <-> MlirAttribute.
-template <>
-struct type_caster<MlirAttribute> {
- PYBIND11_TYPE_CASTER(MlirAttribute, _("MlirAttribute"));
- bool load(handle src, bool) {
- py::object capsule = mlirApiObjectToCapsule(src);
- value = mlirPythonCapsuleToAttribute(capsule.ptr());
- return !mlirAttributeIsNull(value);
- }
- static handle cast(MlirAttribute v, return_value_policy, handle) {
- py::object capsule =
- py::reinterpret_steal<py::object>(mlirPythonAttributeToCapsule(v));
- return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr("Attribute")
- .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
- .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)()
- .release();
- }
-};
-
-/// Casts object -> MlirBlock.
-template <>
-struct type_caster<MlirBlock> {
- PYBIND11_TYPE_CASTER(MlirBlock, _("MlirBlock"));
- bool load(handle src, bool) {
- py::object capsule = mlirApiObjectToCapsule(src);
- value = mlirPythonCapsuleToBlock(capsule.ptr());
- return !mlirBlockIsNull(value);
- }
-};
-
-/// Casts object -> MlirContext.
-template <>
-struct type_caster<MlirContext> {
- PYBIND11_TYPE_CASTER(MlirContext, _("MlirContext"));
- bool load(handle src, bool) {
- if (src.is_none()) {
- // Gets the current thread-bound context.
- // TODO: This raises an error of "No current context" currently.
- // Update the implementation to pretty-print the helpful error that the
- // core implementations print in this case.
- src = py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr("Context")
- .attr("current");
- }
- py::object capsule = mlirApiObjectToCapsule(src);
- value = mlirPythonCapsuleToContext(capsule.ptr());
- return !mlirContextIsNull(value);
- }
-};
-
-/// Casts object <-> MlirDialectRegistry.
-template <>
-struct type_caster<MlirDialectRegistry> {
- PYBIND11_TYPE_CASTER(MlirDialectRegistry, _("MlirDialectRegistry"));
- bool load(handle src, bool) {
- py::object capsule = mlirApiObjectToCapsule(src);
- value = mlirPythonCapsuleToDialectRegistry(capsule.ptr());
- return !mlirDialectRegistryIsNull(value);
- }
- static handle cast(MlirDialectRegistry v, return_value_policy, handle) {
- py::object capsule = py::reinterpret_steal<py::object>(
- mlirPythonDialectRegistryToCapsule(v));
- return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr("DialectRegistry")
- .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
- .release();
- }
-};
-
-/// Casts object <-> MlirLocation.
-template <>
-struct type_caster<MlirLocation> {
- PYBIND11_TYPE_CASTER(MlirLocation, _("MlirLocation"));
- bool load(handle src, bool) {
- if (src.is_none()) {
- // Gets the current thread-bound context.
- src = py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr("Location")
- .attr("current");
- }
- py::object capsule = mlirApiObjectToCapsule(src);
- value = mlirPythonCapsuleToLocation(capsule.ptr());
- return !mlirLocationIsNull(value);
- }
- static handle cast(MlirLocation v, return_value_policy, handle) {
- py::object capsule =
- py::reinterpret_steal<py::object>(mlirPythonLocationToCapsule(v));
- return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr("Location")
- .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
- .release();
- }
-};
-
-/// Casts object <-> MlirModule.
-template <>
-struct type_caster<MlirModule> {
- PYBIND11_TYPE_CASTER(MlirModule, _("MlirModule"));
- bool load(handle src, bool) {
- py::object capsule = mlirApiObjectToCapsule(src);
- value = mlirPythonCapsuleToModule(capsule.ptr());
- return !mlirModuleIsNull(value);
- }
- static handle cast(MlirModule v, return_value_policy, handle) {
- py::object capsule =
- py::reinterpret_steal<py::object>(mlirPythonModuleToCapsule(v));
- return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr("Module")
- .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
- .release();
- };
-};
-
-/// Casts object <-> MlirFrozenRewritePatternSet.
-template <>
-struct type_caster<MlirFrozenRewritePatternSet> {
- PYBIND11_TYPE_CASTER(MlirFrozenRewritePatternSet,
- _("MlirFrozenRewritePatternSet"));
- bool load(handle src, bool) {
- py::object capsule = mlirApiObjectToCapsule(src);
- value = mlirPythonCapsuleToFrozenRewritePatternSet(capsule.ptr());
- return value.ptr != nullptr;
- }
- static handle cast(MlirFrozenRewritePatternSet v, return_value_policy,
- handle) {
- py::object capsule = py::reinterpret_steal<py::object>(
- mlirPythonFrozenRewritePatternSetToCapsule(v));
- return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("rewrite"))
- .attr("FrozenRewritePatternSet")
- .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
- .release();
- };
-};
-
-/// Casts object <-> MlirOperation.
-template <>
-struct type_caster<MlirOperation> {
- PYBIND11_TYPE_CASTER(MlirOperation, _("MlirOperation"));
- bool load(handle src, bool) {
- py::object capsule = mlirApiObjectToCapsule(src);
- value = mlirPythonCapsuleToOperation(capsule.ptr());
- return !mlirOperationIsNull(value);
- }
- static handle cast(MlirOperation v, return_value_policy, handle) {
- if (v.ptr == nullptr)
- return py::none();
- py::object capsule =
- py::reinterpret_steal<py::object>(mlirPythonOperationToCapsule(v));
- return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr("Operation")
- .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
- .release();
- };
-};
-
-/// Casts object <-> MlirValue.
-template <>
-struct type_caster<MlirValue> {
- PYBIND11_TYPE_CASTER(MlirValue, _("MlirValue"));
- bool load(handle src, bool) {
- py::object capsule = mlirApiObjectToCapsule(src);
- value = mlirPythonCapsuleToValue(capsule.ptr());
- return !mlirValueIsNull(value);
- }
- static handle cast(MlirValue v, return_value_policy, handle) {
- if (v.ptr == nullptr)
- return py::none();
- py::object capsule =
- py::reinterpret_steal<py::object>(mlirPythonValueToCapsule(v));
- return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr("Value")
- .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
- .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)()
- .release();
- };
-};
-
-/// Casts object -> MlirPassManager.
-template <>
-struct type_caster<MlirPassManager> {
- PYBIND11_TYPE_CASTER(MlirPassManager, _("MlirPassManager"));
- bool load(handle src, bool) {
- py::object capsule = mlirApiObjectToCapsule(src);
- value = mlirPythonCapsuleToPassManager(capsule.ptr());
- return !mlirPassManagerIsNull(value);
- }
-};
-
-/// Casts object <-> MlirTypeID.
-template <>
-struct type_caster<MlirTypeID> {
- PYBIND11_TYPE_CASTER(MlirTypeID, _("MlirTypeID"));
- bool load(handle src, bool) {
- py::object capsule = mlirApiObjectToCapsule(src);
- value = mlirPythonCapsuleToTypeID(capsule.ptr());
- return !mlirTypeIDIsNull(value);
- }
- static handle cast(MlirTypeID v, return_value_policy, handle) {
- if (v.ptr == nullptr)
- return py::none();
- py::object capsule =
- py::reinterpret_steal<py::object>(mlirPythonTypeIDToCapsule(v));
- return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr("TypeID")
- .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
- .release();
- };
-};
-
-/// Casts object <-> MlirType.
-template <>
-struct type_caster<MlirType> {
- PYBIND11_TYPE_CASTER(MlirType, _("MlirType"));
- bool load(handle src, bool) {
- py::object capsule = mlirApiObjectToCapsule(src);
- value = mlirPythonCapsuleToType(capsule.ptr());
- return !mlirTypeIsNull(value);
- }
- static handle cast(MlirType t, return_value_policy, handle) {
- py::object capsule =
- py::reinterpret_steal<py::object>(mlirPythonTypeToCapsule(t));
- return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr("Type")
- .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
- .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)()
- .release();
- }
-};
-
-} // namespace detail
-} // namespace pybind11
-
-namespace mlir {
-namespace python {
-namespace adaptors {
-
-/// Provides a facility like py::class_ for defining a new class in a scope,
-/// but this allows extension of an arbitrary Python class, defining methods
-/// on it is a similar way. Classes defined in this way are very similar to
-/// if defined in Python in the usual way but use Pybind11 machinery to do
-/// it. These are not "real" Pybind11 classes but pure Python classes with no
-/// relation to a concrete C++ class.
-///
-/// Derived from a discussion upstream:
-/// https://github.com/pybind/pybind11/issues/1193
-/// (plus a fair amount of extra curricular poking)
-/// TODO: If this proves useful, see about including it in pybind11.
-class pure_subclass {
-public:
- pure_subclass(py::handle scope, const char *derivedClassName,
- const py::object &superClass) {
- py::object pyType =
- py::reinterpret_borrow<py::object>((PyObject *)&PyType_Type);
- py::object metaclass = pyType(superClass);
- py::dict attributes;
-
- thisClass =
- metaclass(derivedClassName, py::make_tuple(superClass), attributes);
- scope.attr(derivedClassName) = thisClass;
- }
-
- template <typename Func, typename... Extra>
- pure_subclass &def(const char *name, Func &&f, const Extra &...extra) {
- py::cpp_function cf(
- std::forward<Func>(f), py::name(name), py::is_method(thisClass),
- py::sibling(py::getattr(thisClass, name, py::none())), extra...);
- thisClass.attr(cf.name()) = cf;
- return *this;
- }
-
- template <typename Func, typename... Extra>
- pure_subclass &def_property_readonly(const char *name, Func &&f,
- const Extra &...extra) {
- py::cpp_function cf(
- std::forward<Func>(f), py::name(name), py::is_method(thisClass),
- py::sibling(py::getattr(thisClass, name, py::none())), extra...);
- auto builtinProperty =
- py::reinterpret_borrow<py::object>((PyObject *)&PyProperty_Type);
- thisClass.attr(name) = builtinProperty(cf);
- return *this;
- }
-
- template <typename Func, typename... Extra>
- pure_subclass &def_staticmethod(const char *name, Func &&f,
- const Extra &...extra) {
- static_assert(!std::is_member_function_pointer<Func>::value,
- "def_staticmethod(...) called with a non-static member "
- "function pointer");
- py::cpp_function cf(std::forward<Func>(f), py::name(name),
- py::scope(thisClass), extra...);
- thisClass.attr(cf.name()) = py::staticmethod(cf);
- return *this;
- }
-
- template <typename Func, typename... Extra>
- pure_subclass &def_classmethod(const char *name, Func &&f,
- const Extra &...extra) {
- static_assert(!std::is_member_function_pointer<Func>::value,
- "def_classmethod(...) called with a non-static member "
- "function pointer");
- py::cpp_function cf(std::forward<Func>(f), py::name(name),
- py::scope(thisClass), extra...);
- thisClass.attr(cf.name()) =
- py::reinterpret_borrow<py::object>(PyClassMethod_New(cf.ptr()));
- return *this;
- }
-
- py::object get_class() const { return thisClass; }
-
-protected:
- py::object superClass;
- py::object thisClass;
-};
-
-/// Creates a custom subclass of mlir.ir.Attribute, implementing a casting
-/// constructor and type checking methods.
-class mlir_attribute_subclass : public pure_subclass {
-public:
- using IsAFunctionTy = bool (*)(MlirAttribute);
- using GetTypeIDFunctionTy = MlirTypeID (*)();
-
- /// Subclasses by looking up the super-class dynamically.
- mlir_attribute_subclass(py::handle scope, const char *attrClassName,
- IsAFunctionTy isaFunction,
- GetTypeIDFunctionTy getTypeIDFunction = nullptr)
- : mlir_attribute_subclass(
- scope, attrClassName, isaFunction,
- py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr("Attribute"),
- getTypeIDFunction) {}
-
- /// Subclasses with a provided mlir.ir.Attribute super-class. This must
- /// be used if the subclass is being defined in the same extension module
- /// as the mlir.ir class (otherwise, it will trigger a recursive
- /// initialization).
- mlir_attribute_subclass(py::handle scope, const char *typeClassName,
- IsAFunctionTy isaFunction, const py::object &superCls,
- GetTypeIDFunctionTy getTypeIDFunction = nullptr)
- : pure_subclass(scope, typeClassName, superCls) {
- // Casting constructor. Note that it hard, if not impossible, to properly
- // call chain to parent `__init__` in pybind11 due to its special handling
- // for init functions that don't have a fully constructed self-reference,
- // which makes it impossible to forward it to `__init__` of a superclass.
- // Instead, provide a custom `__new__` and call that of a superclass, which
- // eventually calls `__init__` of the superclass. Since attribute subclasses
- // have no additional members, we can just return the instance thus created
- // without amending it.
- std::string captureTypeName(
- typeClassName); // As string in case if typeClassName is not static.
- py::cpp_function newCf(
- [superCls, isaFunction, captureTypeName](py::object cls,
- py::object otherAttribute) {
- MlirAttribute rawAttribute = py::cast<MlirAttribute>(otherAttribute);
- if (!isaFunction(rawAttribute)) {
- auto origRepr = py::repr(otherAttribute).cast<std::string>();
- throw std::invalid_argument(
- (llvm::Twine("Cannot cast attribute to ") + captureTypeName +
- " (from " + origRepr + ")")
- .str());
- }
- py::object self = superCls.attr("__new__")(cls, otherAttribute);
- return self;
- },
- py::name("__new__"), py::arg("cls"), py::arg("cast_from_attr"));
- thisClass.attr("__new__") = newCf;
-
- // 'isinstance' method.
- def_staticmethod(
- "isinstance",
- [isaFunction](MlirAttribute other) { return isaFunction(other); },
- py::arg("other_attribute"));
- def("__repr__", [superCls, captureTypeName](py::object self) {
- return py::repr(superCls(self))
- .attr("replace")(superCls.attr("__name__"), captureTypeName);
- });
- if (getTypeIDFunction) {
- def_staticmethod("get_static_typeid",
- [getTypeIDFunction]() { return getTypeIDFunction(); });
- py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr(MLIR_PYTHON_CAPI_TYPE_CASTER_REGISTER_ATTR)(
- getTypeIDFunction())(pybind11::cpp_function(
- [thisClass = thisClass](const py::object &mlirAttribute) {
- return thisClass(mlirAttribute);
- }));
- }
- }
-};
-
-/// Creates a custom subclass of mlir.ir.Type, implementing a casting
-/// constructor and type checking methods.
-class mlir_type_subclass : public pure_subclass {
-public:
- using IsAFunctionTy = bool (*)(MlirType);
- using GetTypeIDFunctionTy = MlirTypeID (*)();
-
- /// Subclasses by looking up the super-class dynamically.
- mlir_type_subclass(py::handle scope, const char *typeClassName,
- IsAFunctionTy isaFunction,
- GetTypeIDFunctionTy getTypeIDFunction = nullptr)
- : mlir_type_subclass(
- scope, typeClassName, isaFunction,
- py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")).attr("Type"),
- getTypeIDFunction) {}
-
- /// Subclasses with a provided mlir.ir.Type super-class. This must
- /// be used if the subclass is being defined in the same extension module
- /// as the mlir.ir class (otherwise, it will trigger a recursive
- /// initialization).
- mlir_type_subclass(py::handle scope, const char *typeClassName,
- IsAFunctionTy isaFunction, const py::object &superCls,
- GetTypeIDFunctionTy getTypeIDFunction = nullptr)
- : pure_subclass(scope, typeClassName, superCls) {
- // Casting constructor. Note that it hard, if not impossible, to properly
- // call chain to parent `__init__` in pybind11 due to its special handling
- // for init functions that don't have a fully constructed self-reference,
- // which makes it impossible to forward it to `__init__` of a superclass.
- // Instead, provide a custom `__new__` and call that of a superclass, which
- // eventually calls `__init__` of the superclass. Since attribute subclasses
- // have no additional members, we can just return the instance thus created
- // without amending it.
- std::string captureTypeName(
- typeClassName); // As string in case if typeClassName is not static.
- py::cpp_function newCf(
- [superCls, isaFunction, captureTypeName](py::object cls,
- py::object otherType) {
- MlirType rawType = py::cast<MlirType>(otherType);
- if (!isaFunction(rawType)) {
- auto origRepr = py::repr(otherType).cast<std::string>();
- throw std::invalid_argument((llvm::Twine("Cannot cast type to ") +
- captureTypeName + " (from " +
- origRepr + ")")
- .str());
- }
- py::object self = superCls.attr("__new__")(cls, otherType);
- return self;
- },
- py::name("__new__"), py::arg("cls"), py::arg("cast_from_type"));
- thisClass.attr("__new__") = newCf;
-
- // 'isinstance' method.
- def_staticmethod(
- "isinstance",
- [isaFunction](MlirType other) { return isaFunction(other); },
- py::arg("other_type"));
- def("__repr__", [superCls, captureTypeName](py::object self) {
- return py::repr(superCls(self))
- .attr("replace")(superCls.attr("__name__"), captureTypeName);
- });
- if (getTypeIDFunction) {
- // 'get_static_typeid' method.
- // This is modeled as a static method instead of a static property because
- // `def_property_readonly_static` is not available in `pure_subclass` and
- // we do not want to introduce the complexity that pybind uses to
- // implement it.
- def_staticmethod("get_static_typeid",
- [getTypeIDFunction]() { return getTypeIDFunction(); });
- py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr(MLIR_PYTHON_CAPI_TYPE_CASTER_REGISTER_ATTR)(
- getTypeIDFunction())(pybind11::cpp_function(
- [thisClass = thisClass](const py::object &mlirType) {
- return thisClass(mlirType);
- }));
- }
- }
-};
-
-/// Creates a custom subclass of mlir.ir.Value, implementing a casting
-/// constructor and type checking methods.
-class mlir_value_subclass : public pure_subclass {
-public:
- using IsAFunctionTy = bool (*)(MlirValue);
-
- /// Subclasses by looking up the super-class dynamically.
- mlir_value_subclass(py::handle scope, const char *valueClassName,
- IsAFunctionTy isaFunction)
- : mlir_value_subclass(
- scope, valueClassName, isaFunction,
- py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")).attr("Value")) {
- }
-
- /// Subclasses with a provided mlir.ir.Value super-class. This must
- /// be used if the subclass is being defined in the same extension module
- /// as the mlir.ir class (otherwise, it will trigger a recursive
- /// initialization).
- mlir_value_subclass(py::handle scope, const char *valueClassName,
- IsAFunctionTy isaFunction, const py::object &superCls)
- : pure_subclass(scope, valueClassName, superCls) {
- // Casting constructor. Note that it hard, if not impossible, to properly
- // call chain to parent `__init__` in pybind11 due to its special handling
- // for init functions that don't have a fully constructed self-reference,
- // which makes it impossible to forward it to `__init__` of a superclass.
- // Instead, provide a custom `__new__` and call that of a superclass, which
- // eventually calls `__init__` of the superclass. Since attribute subclasses
- // have no additional members, we can just return the instance thus created
- // without amending it.
- std::string captureValueName(
- valueClassName); // As string in case if valueClassName is not static.
- py::cpp_function newCf(
- [superCls, isaFunction, captureValueName](py::object cls,
- py::object otherValue) {
- MlirValue rawValue = py::cast<MlirValue>(otherValue);
- if (!isaFunction(rawValue)) {
- auto origRepr = py::repr(otherValue).cast<std::string>();
- throw std::invalid_argument((llvm::Twine("Cannot cast value to ") +
- captureValueName + " (from " +
- origRepr + ")")
- .str());
- }
- py::object self = superCls.attr("__new__")(cls, otherValue);
- return self;
- },
- py::name("__new__"), py::arg("cls"), py::arg("cast_from_value"));
- thisClass.attr("__new__") = newCf;
-
- // 'isinstance' method.
- def_staticmethod(
- "isinstance",
- [isaFunction](MlirValue other) { return isaFunction(other); },
- py::arg("other_value"));
- }
-};
-
-} // namespace adaptors
-
-} // namespace python
-} // namespace mlir
-
-#endif // MLIR_BINDINGS_PYTHON_PYBINDADAPTORS_H
diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td
index 115a11b..80337fc 100644
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td
@@ -201,9 +201,9 @@ def Tosa_PadOpQuantInfoBuilder : OpBuilder<
// and optional initial value. The builder will extract var_shape and element type
// attributes from variable type.
def Tosa_VariableOpBuilder : OpBuilder<
- (ins "StringRef":$name, "Type":$variable_type, "Attribute":$initial_value),
+ (ins "StringRef":$sym_name, "Type":$variable_type, "Attribute":$initial_value),
[{
- buildVariableOp($_builder, $_state, name, variable_type, initial_value);
+ buildVariableOp($_builder, $_state, sym_name, variable_type, initial_value);
}]>;
diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td
index d819cc1..f1a618e 100644
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td
@@ -18,6 +18,7 @@
include "mlir/IR/OpBase.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/IR/SymbolInterfaces.td"
include "mlir/Interfaces/LoopLikeInterface.td"
include "mlir/Interfaces/VectorInterfaces.td"
include "mlir/Dialect/Tosa/IR/TosaInterfaces.td"
@@ -82,7 +83,7 @@ def Tosa_YieldOp : Tosa_Op<"yield", [
//===----------------------------------------------------------------------===//
// Operator: variable
//===----------------------------------------------------------------------===//
-def Tosa_VariableOp : Tosa_Op<"variable", []> {
+def Tosa_VariableOp : Tosa_Op<"variable", [Symbol]> {
let summary = "Defines a variable";
let description = [{
@@ -91,7 +92,10 @@ def Tosa_VariableOp : Tosa_Op<"variable", []> {
}];
let arguments = (ins
- SymbolNameAttr:$name,
+ // Note: "sym_name" is used as opposed to "name" in the specification,
+ // since a Symbol must be named "sym_name" for it to be recognised by
+ // the containing SymbolTable.
+ SymbolNameAttr:$sym_name,
IndexElementsAttr:$var_shape,
TypeAttr:$type,
OptionalAttr<AnyAttr>:$initial_value
@@ -105,14 +109,18 @@ def Tosa_VariableOp : Tosa_Op<"variable", []> {
let hasCustomAssemblyFormat = 1;
let assemblyFormat = [{
- $name
+ $sym_name
attr-dict
custom<VariableOpTypeOrInitialValue>($var_shape, $type, $initial_value)
}];
let builders = [Tosa_VariableOpBuilder];
- let hasVerifier = 1;
+ let extraClassDeclaration = [{
+ ::llvm::StringRef getName() {
+ return getSymName();
+ }
+ }];
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp
index d57926ec..39d4815 100644
--- a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp
+++ b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp
@@ -243,7 +243,7 @@ static void getTreePredicates(std::vector<PositionalPredicate> &predList,
.Case<OperandPosition, OperandGroupPosition>([&](auto *pos) {
getOperandTreePredicates(predList, val, builder, inputs, pos);
})
- .Default([](auto *) { llvm_unreachable("unexpected position kind"); });
+ .DefaultUnreachable("unexpected position kind");
}
static void getAttributePredicates(pdl::AttributeOp op,
diff --git a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp
index 9b61540..50fca56 100644
--- a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp
+++ b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp
@@ -1118,10 +1118,7 @@ StringRef getTypeMangling(Type type, bool isSigned) {
llvm_unreachable("Unsupported integer width");
}
})
- .Default([](auto) {
- llvm_unreachable("No mangling defined");
- return "";
- });
+ .DefaultUnreachable("No mangling defined");
}
template <typename ReduceOp>
diff --git a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
index 0f90acf..57877b8 100644
--- a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
+++ b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
@@ -68,9 +68,7 @@ std::string getTypeMangling(Type ty, bool isUnsigned = false) {
llvm_unreachable("unhandled integer type");
}
})
- .Default([](Type) -> std::string {
- llvm_unreachable("unhandled type for mangling");
- });
+ .DefaultUnreachable("unhandled type for mangling");
}
std::string mangle(StringRef baseName, ArrayRef<Type> types,
diff --git a/mlir/lib/Dialect/ArmSME/Transforms/OuterProductFusion.cpp b/mlir/lib/Dialect/ArmSME/Transforms/OuterProductFusion.cpp
index 9196d2e..39e398b 100644
--- a/mlir/lib/Dialect/ArmSME/Transforms/OuterProductFusion.cpp
+++ b/mlir/lib/Dialect/ArmSME/Transforms/OuterProductFusion.cpp
@@ -170,7 +170,7 @@ public:
op2, op.getResultType(), lhs, rhs, lhsMask, rhsMask,
op1.getAcc());
})
- .Default([&](auto) { llvm_unreachable("unexpected extend op!"); });
+ .DefaultUnreachable("unexpected extend op!");
} else if (kind == arm_sme::CombiningKind::Sub) {
TypeSwitch<Operation *>(extOp)
.Case<arith::ExtFOp>([&](auto) {
@@ -188,7 +188,7 @@ public:
op2, op.getResultType(), lhs, rhs, lhsMask, rhsMask,
op1.getAcc());
})
- .Default([&](auto) { llvm_unreachable("unexpected extend op!"); });
+ .DefaultUnreachable("unexpected extend op!");
} else {
llvm_unreachable("unexpected arm_sme::CombiningKind!");
}
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index c0f9132..19eba6b 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -375,7 +375,7 @@ void GPUDialect::printType(Type type, DialectAsmPrinter &os) const {
os << shape.back() << 'x' << fragTy.getElementType();
os << ", \"" << fragTy.getOperand() << "\"" << '>';
})
- .Default([](Type) { llvm_unreachable("unexpected 'gpu' type kind"); });
+ .DefaultUnreachable("unexpected 'gpu' type kind");
}
static LogicalResult verifyKnownLaunchSizeAttr(Operation *op,
diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
index 2561f66..0a3ef7d 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
@@ -847,9 +847,7 @@ getThreadIdBuilder(std::optional<TransformOpInterface> transformOp,
return GpuLaneIdBuilder(ctx, warpSize, useLinearMapping,
*maybeMaskingAttr);
})
- .Default([&](DeviceMappingAttrInterface) -> GpuIdBuilder {
- llvm_unreachable("unknown mapping attribute");
- });
+ .DefaultUnreachable("unknown mapping attribute");
return DiagnosedSilenceableFailure::success();
}
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index ef38027..cee943d 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -1096,10 +1096,8 @@ static Value memsetGetStored(MemsetIntr op, const MemorySlot &slot,
Value intVal = buildMemsetValue(type.getWidth());
return LLVM::BitcastOp::create(builder, op.getLoc(), type, intVal);
})
- .Default([](Type) -> Value {
- llvm_unreachable(
- "getStored should not be called on memset to unsupported type");
- });
+ .DefaultUnreachable(
+ "getStored should not be called on memset to unsupported type");
}
template <class MemsetIntr>
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp
index 297640c..705d07d 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp
@@ -45,9 +45,7 @@ static StringRef getTypeKeyword(Type type) {
.Case<LLVMStructType>([&](Type) { return "struct"; })
.Case<LLVMTargetExtType>([&](Type) { return "target"; })
.Case<LLVMX86AMXType>([&](Type) { return "x86_amx"; })
- .Default([](Type) -> StringRef {
- llvm_unreachable("unexpected 'llvm' type kind");
- });
+ .DefaultUnreachable("unexpected 'llvm' type kind");
}
/// Prints a structure type. Keeps track of known struct names to handle self-
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
index 38f1a8b..42160a1 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
@@ -192,7 +192,7 @@ static void replaceIndexOpsByInductionVariables(RewriterBase &rewriter,
.Case([&](affine::AffineForOp affineForOp) {
allIvs.push_back(affineForOp.getInductionVar());
})
- .Default([&](Operation *op) { assert(false && "unexpected op"); });
+ .DefaultUnreachable("unexpected op");
}
assert(linalgOp.getNumLoops() == allIvs.size() &&
"expected the number of loops and induction variables to match");
diff --git a/mlir/lib/Dialect/Linalg/Transforms/NamedToElementwise.cpp b/mlir/lib/Dialect/Linalg/Transforms/NamedToElementwise.cpp
index 00a076b..c904556 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/NamedToElementwise.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/NamedToElementwise.cpp
@@ -48,10 +48,7 @@ ElementwiseKind getKind(Operation *op) {
.Case([](SquareOp) { return ElementwiseKind::square; })
.Case([](TanhOp) { return ElementwiseKind::tanh; })
.Case([](ErfOp) { return ElementwiseKind::erf; })
- .Default([&](Operation *op) {
- llvm_unreachable("unhandled case in named to elementwise");
- return ElementwiseKind::sub;
- });
+ .DefaultUnreachable("unhandled case in named to elementwise");
}
template <typename NamedOpTy>
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index e9a8b25..7863c21 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -1427,10 +1427,7 @@ FailureOr<Conv1DOp> DownscaleSizeOneWindowed2DConvolution<Conv2DOp, Conv1DOp>::
.Case([&](linalg::PoolingNchwMaxOp op) {
return std::make_tuple(0, 1, 2, 3);
})
- .Default([&](Operation *op) {
- llvm_unreachable("unexpected conv2d/pool2d operation.");
- return std::make_tuple(0, 0, 0, 0);
- });
+ .DefaultUnreachable("unexpected conv2d/pool2d operation.");
// Only handle the case where at least one of the window dimensions is
// of size 1. Other cases can rely on tiling to reduce to such cases.
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 3593b53..24d3722 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -604,9 +604,7 @@ static Operation *materializeTiledShape(OpBuilder &builder, Location loc,
builder, loc, valueToTile, sliceParams.offsets,
sliceParams.sizes, sliceParams.strides);
})
- .Default([](ShapedType) -> Operation * {
- llvm_unreachable("Unexpected shaped type");
- });
+ .DefaultUnreachable("Unexpected shaped type");
return sliceOp;
}
diff --git a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp
index 24da447..214410f 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp
@@ -315,7 +315,7 @@ LogicalResult LoadOpOfSubViewOpFolder<OpTy>::matchAndRewrite(
op, op.getType(), subViewOp.getSource(), sourceIndices,
op.getTranspose(), op.getNumTiles());
})
- .Default([](Operation *) { llvm_unreachable("unexpected operation."); });
+ .DefaultUnreachable("unexpected operation");
return success();
}
@@ -367,7 +367,7 @@ LogicalResult LoadOpOfExpandShapeOpFolder<OpTy>::matchAndRewrite(
op, op.getType(), expandShapeOp.getViewSource(), sourceIndices,
op.getMask(), op.getPassThru());
})
- .Default([](Operation *) { llvm_unreachable("unexpected operation."); });
+ .DefaultUnreachable("unexpected operation");
return success();
}
@@ -415,7 +415,7 @@ LogicalResult LoadOpOfCollapseShapeOpFolder<OpTy>::matchAndRewrite(
op, op.getType(), collapseShapeOp.getViewSource(), sourceIndices,
op.getMask(), op.getPassThru());
})
- .Default([](Operation *) { llvm_unreachable("unexpected operation."); });
+ .DefaultUnreachable("unexpected operation");
return success();
}
@@ -482,7 +482,7 @@ LogicalResult StoreOpOfSubViewOpFolder<OpTy>::matchAndRewrite(
op, op.getSrc(), subViewOp.getSource(), sourceIndices,
op.getLeadDimension(), op.getTransposeAttr());
})
- .Default([](Operation *) { llvm_unreachable("unexpected operation."); });
+ .DefaultUnreachable("unexpected operation");
return success();
}
@@ -535,7 +535,7 @@ LogicalResult StoreOpOfExpandShapeOpFolder<OpTy>::matchAndRewrite(
op, expandShapeOp.getViewSource(), sourceIndices, op.getMask(),
op.getValueToStore());
})
- .Default([](Operation *) { llvm_unreachable("unexpected operation."); });
+ .DefaultUnreachable("unexpected operation");
return success();
}
@@ -584,7 +584,7 @@ LogicalResult StoreOpOfCollapseShapeOpFolder<OpTy>::matchAndRewrite(
op, collapseShapeOp.getViewSource(), sourceIndices, op.getMask(),
op.getValueToStore());
})
- .Default([](Operation *) { llvm_unreachable("unexpected operation."); });
+ .DefaultUnreachable("unexpected operation");
return success();
}
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 5672942..fd4cabbad 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -3425,10 +3425,7 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
}
llvm_unreachable("Unexpected generatee argument");
})
- .Default([&](Operation *op) {
- assert(false && "TODO: Custom name for this operation");
- return "transformed";
- });
+ .DefaultUnreachable("TODO: Custom name for this operation");
}
setNameFn(result, cliName);
diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
index 36685d3..29b770f 100644
--- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
@@ -2177,10 +2177,9 @@ cloneAsInsertSlices(RewriterBase &rewriter,
auto clonedOp = cloneAsInsertSlice(rewriter, op);
clonedSlices.push_back(clonedOp);
})
- .Default([&](Operation *op) {
- // Assert here assuming this has already been checked.
- assert(0 && "unexpected slice type while cloning as insert slice");
- });
+ // Assert here assuming this has already been checked.
+ .DefaultUnreachable(
+ "unexpected slice type while cloning as insert slice");
}
return clonedSlices;
}
diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp
index c8efdf0..24c33f9 100644
--- a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp
+++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp
@@ -987,7 +987,7 @@ void SPIRVDialect::printType(Type type, DialectAsmPrinter &os) const {
.Case<ArrayType, CooperativeMatrixType, PointerType, RuntimeArrayType,
ImageType, SampledImageType, StructType, MatrixType, TensorArmType>(
[&](auto type) { print(type, os); })
- .Default([](Type) { llvm_unreachable("unhandled SPIR-V type"); });
+ .DefaultUnreachable("Unhandled SPIR-V type");
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp
index 7e9a80e..f895807 100644
--- a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp
+++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp
@@ -57,7 +57,7 @@ public:
for (Type elementType : concreteType.getElementTypes())
add(elementType);
})
- .Default([](SPIRVType) { llvm_unreachable("Unhandled type"); });
+ .DefaultUnreachable("Unhandled type");
}
void add(Type type) { add(cast<SPIRVType>(type)); }
@@ -107,7 +107,7 @@ public:
for (Type elementType : concreteType.getElementTypes())
add(elementType);
})
- .Default([](SPIRVType) { llvm_unreachable("Unhandled type"); });
+ .DefaultUnreachable("Unhandled type");
}
void add(Type type) { add(cast<SPIRVType>(type)); }
@@ -198,8 +198,7 @@ Type CompositeType::getElementType(unsigned index) const {
.Case<MatrixType>([](MatrixType type) { return type.getColumnType(); })
.Case<StructType>(
[index](StructType type) { return type.getElementType(index); })
- .Default(
- [](Type) -> Type { llvm_unreachable("invalid composite type"); });
+ .DefaultUnreachable("Invalid composite type");
}
unsigned CompositeType::getNumElements() const {
@@ -207,9 +206,7 @@ unsigned CompositeType::getNumElements() const {
.Case<ArrayType, StructType, TensorArmType, VectorType>(
[](auto type) { return type.getNumElements(); })
.Case<MatrixType>([](MatrixType type) { return type.getNumColumns(); })
- .Default([](SPIRVType) -> unsigned {
- llvm_unreachable("Invalid type for number of elements query");
- });
+ .DefaultUnreachable("Invalid type for number of elements query");
}
bool CompositeType::hasCompileTimeKnownNumElements() const {
diff --git a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp
index 122f61e0..88e1ab6 100644
--- a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp
+++ b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp
@@ -622,7 +622,7 @@ static spirv::Dim convertRank(int64_t rank) {
}
static spirv::ImageFormat getImageFormat(Type elementType) {
- return llvm::TypeSwitch<Type, spirv::ImageFormat>(elementType)
+ return TypeSwitch<Type, spirv::ImageFormat>(elementType)
.Case<Float16Type>([](Float16Type) { return spirv::ImageFormat::R16f; })
.Case<Float32Type>([](Float32Type) { return spirv::ImageFormat::R32f; })
.Case<IntegerType>([](IntegerType intType) {
@@ -639,11 +639,7 @@ static spirv::ImageFormat getImageFormat(Type elementType) {
llvm_unreachable("Unhandled integer type!");
}
})
- .Default([](Type) {
- llvm_unreachable("Unhandled element type!");
- // We need to return something here to satisfy the type switch.
- return spirv::ImageFormat::R32f;
- });
+ .DefaultUnreachable("Unhandled element type!");
#undef BIT_WIDTH_CASE
}
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
index 332f1a0..c51b5e9 100644
--- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
@@ -905,56 +905,29 @@ static inline LogicalResult errorIfShapeNotSizeOne(Operation *op, Type type) {
return shapeAdaptor.getNumElements() == 1 ? success() : failure();
}
-// Returns the first declaration point prior to this operation or failure if
-// not found.
-static FailureOr<tosa::VariableOp> findVariableDecl(Operation *op,
- StringRef symName) {
- ModuleOp module = op->getParentOfType<ModuleOp>();
- tosa::VariableOp varOp = nullptr;
-
- // TODO: Adopt SymbolTable trait to Varible ops.
- // Currently, the variable's definition point is searched via walk(),
- // starting from the top-level ModuleOp and stopping at the point of use. Once
- // TOSA control flow and variable extensions reach the complete state, may
- // leverage MLIR's Symbol Table functionality to look up symbol and enhance
- // the search to a TOSA specific graph traversal over the IR structure.
- module.walk([&](Operation *tempOp) {
- // Reach this op itself.
- if (tempOp == op) {
- return WalkResult::interrupt();
- }
-
- if (auto tosaOp = dyn_cast<tosa::VariableOp>(tempOp)) {
- if (symName == tosaOp.getName()) {
- varOp = tosaOp;
- return WalkResult::interrupt();
- }
- }
-
- return WalkResult::advance();
- });
-
- if (varOp)
- return varOp;
-
- return failure();
-}
-
template <typename T>
static LogicalResult verifyVariableOpErrorIf(T op, Type type, StringRef name) {
- StringRef symName = op.getName();
- FailureOr<tosa::VariableOp> varOp = findVariableDecl(op, symName);
- if (failed(varOp))
+ Operation *symTableOp =
+ op->template getParentWithTrait<OpTrait::SymbolTable>();
+ if (!symTableOp)
+ // If the operation is not the scope of a symbol table, we cannot
+ // verify it against it's declaration.
+ return success();
+
+ SymbolTable symTable(symTableOp);
+ const auto varOp = symTable.lookup<tosa::VariableOp>(op.getName());
+
+ // Verify prior declaration
+ if (!varOp)
return op->emitOpError("'")
- << symName << "' has not been declared by 'tosa.variable'";
+ << op.getName() << "' has not been declared by 'tosa.variable'";
// Verify type and shape
- auto variableType = getVariableType(varOp.value());
+ auto variableType = getVariableType(varOp);
if (errorIfTypeOrShapeMismatch(op, type, name, variableType,
"the input tensor")
.failed())
return failure();
-
return success();
}
@@ -1418,7 +1391,7 @@ static void buildVariableOp(OpBuilder &builder, OperationState &result,
ArrayRef<int64_t> shape = shapedType.getShape();
auto varShapeAttr = builder.getIndexTensorAttr(convertFromMlirShape(shape));
- result.addAttribute("name", nameAttr);
+ result.addAttribute("sym_name", nameAttr);
result.addAttribute("var_shape", varShapeAttr);
result.addAttribute("type", elementTypeAttr);
result.addAttribute("initial_value", initialValue);
@@ -4160,16 +4133,6 @@ LogicalResult tosa::SelectOp::verify() {
return success();
}
-LogicalResult tosa::VariableOp::verify() {
- StringRef symName = getName();
- FailureOr<tosa::VariableOp> varOp = findVariableDecl(*this, symName);
- if (succeeded(varOp))
- return emitOpError("illegal to have multiple declaration of '")
- << symName << "'";
-
- return success();
-}
-
LogicalResult tosa::VariableReadOp::verify() {
if (verifyVariableOpErrorIf(*this, getOutput1().getType(), "'output1'")
.failed())
diff --git a/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp b/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp
index a500228..45cef9c1 100644
--- a/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp
+++ b/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp
@@ -13,6 +13,7 @@
#include "mlir/Dialect/Transform/IR/Utils.h"
#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h"
#include "mlir/IR/DialectImplementation.h"
+#include "mlir/IR/Verifier.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/TypeSwitch.h"
@@ -140,6 +141,20 @@ LogicalResult transform::TransformDialect::verifyOperationAttribute(
"operations with symbol tables";
}
+ // Pre-verify calls and callables because call graph construction below
+ // assumes they are valid, but this verifier runs before verifying the
+ // nested operations.
+ WalkResult walkResult = op->walk([](Operation *nested) {
+ if (!isa<CallableOpInterface, CallOpInterface>(nested))
+ return WalkResult::advance();
+
+ if (failed(verify(nested, /*verifyRecursively=*/false)))
+ return WalkResult::interrupt();
+ return WalkResult::advance();
+ });
+ if (walkResult.wasInterrupted())
+ return failure();
+
const mlir::CallGraph callgraph(op);
for (auto scc = llvm::scc_begin(&callgraph); !scc.isAtEnd(); ++scc) {
if (!scc.hasCycle())
diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp
index 3385b2a..365afab 100644
--- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp
+++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp
@@ -2097,17 +2097,11 @@ void transform::IncludeOp::getEffects(
getOperation(), getTarget());
if (!callee)
return defaultEffects();
- DiagnosedSilenceableFailure earlyVerifierResult =
- verifyNamedSequenceOp(callee, /*emitWarnings=*/false);
- if (!earlyVerifierResult.succeeded()) {
- (void)earlyVerifierResult.silence();
- return defaultEffects();
- }
for (unsigned i = 0, e = getNumOperands(); i < e; ++i) {
if (callee.getArgAttr(i, TransformDialect::kArgConsumedAttrName))
consumesHandle(getOperation()->getOpOperand(i), effects);
- else
+ else if (callee.getArgAttr(i, TransformDialect::kArgReadOnlyAttrName))
onlyReadsHandle(getOperation()->getOpOperand(i), effects);
}
}
@@ -2597,10 +2591,7 @@ transform::NumAssociationsOp::apply(transform::TransformRewriter &rewriter,
.Case([&](TransformParamTypeInterface param) {
return llvm::range_size(state.getParams(getHandle()));
})
- .Default([](Type) {
- llvm_unreachable("unknown kind of transform dialect type");
- return 0;
- });
+ .DefaultUnreachable("unknown kind of transform dialect type");
results.setParams(cast<OpResult>(getNum()),
rewriter.getI64IntegerAttr(numAssociations));
return DiagnosedSilenceableFailure::success();
@@ -2657,10 +2648,7 @@ transform::SplitHandleOp::apply(transform::TransformRewriter &rewriter,
.Case<TransformParamTypeInterface>([&](auto x) {
return llvm::range_size(state.getParams(getHandle()));
})
- .Default([](auto x) {
- llvm_unreachable("unknown transform dialect type interface");
- return -1;
- });
+ .DefaultUnreachable("unknown transform dialect type interface");
auto produceNumOpsError = [&]() {
return emitSilenceableError()
diff --git a/mlir/lib/Interfaces/DataLayoutInterfaces.cpp b/mlir/lib/Interfaces/DataLayoutInterfaces.cpp
index 3b6330b..7823849 100644
--- a/mlir/lib/Interfaces/DataLayoutInterfaces.cpp
+++ b/mlir/lib/Interfaces/DataLayoutInterfaces.cpp
@@ -364,10 +364,7 @@ static DataLayoutSpecInterface getSpec(Operation *operation) {
return llvm::TypeSwitch<Operation *, DataLayoutSpecInterface>(operation)
.Case<ModuleOp, DataLayoutOpInterface>(
[&](auto op) { return op.getDataLayoutSpec(); })
- .Default([](Operation *) {
- llvm_unreachable("expected an op with data layout spec");
- return DataLayoutSpecInterface();
- });
+ .DefaultUnreachable("expected an op with data layout spec");
}
static TargetSystemSpecInterface getTargetSystemSpec(Operation *operation) {
diff --git a/mlir/lib/Rewrite/ByteCode.cpp b/mlir/lib/Rewrite/ByteCode.cpp
index 5cbea5d..33fbd2a 100644
--- a/mlir/lib/Rewrite/ByteCode.cpp
+++ b/mlir/lib/Rewrite/ByteCode.cpp
@@ -764,9 +764,7 @@ void Generator::generate(Operation *op, ByteCodeWriter &writer) {
pdl_interp::SwitchOperandCountOp, pdl_interp::SwitchOperationNameOp,
pdl_interp::SwitchResultCountOp>(
[&](auto interpOp) { this->generate(interpOp, writer); })
- .Default([](Operation *) {
- llvm_unreachable("unknown `pdl_interp` operation");
- });
+ .DefaultUnreachable("unknown `pdl_interp` operation");
}
void Generator::generate(pdl_interp::ApplyConstraintOp op,
@@ -913,9 +911,7 @@ void Generator::generate(pdl_interp::ExtractOp op, ByteCodeWriter &writer) {
.Case([](pdl::OperationType) { return OpCode::ExtractOp; })
.Case([](pdl::ValueType) { return OpCode::ExtractValue; })
.Case([](pdl::TypeType) { return OpCode::ExtractType; })
- .Default([](Type) -> OpCode {
- llvm_unreachable("unsupported element type");
- });
+ .DefaultUnreachable("unsupported element type");
writer.append(opCode, op.getRange(), op.getIndex(), op.getResult());
}
void Generator::generate(pdl_interp::FinalizeOp op, ByteCodeWriter &writer) {
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 9fcb02e..1e2099d 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4716,10 +4716,7 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
info.HasNoWait = updateDataOp.getNowait();
return success();
})
- .Default([&](Operation *op) {
- llvm_unreachable("unexpected operation");
- return failure();
- });
+ .DefaultUnreachable("unexpected operation");
if (failed(result))
return failure();
@@ -5312,9 +5309,7 @@ extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
(void)found;
assert(found && "unsupported host_eval use");
})
- .Default([](Operation *) {
- llvm_unreachable("unsupported host_eval use");
- });
+ .DefaultUnreachable("unsupported host_eval use");
}
}
}
diff --git a/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp b/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp
index 4d20474..807a94c 100644
--- a/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp
+++ b/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp
@@ -74,9 +74,7 @@ public:
LLVM::LLVMPointerType, LLVM::LLVMStructType, VectorType,
LLVM::LLVMTargetExtType, PtrLikeTypeInterface>(
[this](auto type) { return this->translate(type); })
- .Default([](Type t) -> llvm::Type * {
- llvm_unreachable("unknown LLVM dialect type");
- });
+ .DefaultUnreachable("unknown LLVM dialect type");
// Cache the result of the conversion and return.
knownTranslations.try_emplace(type, translated);
diff --git a/mlir/lib/Tools/PDLL/AST/NodePrinter.cpp b/mlir/lib/Tools/PDLL/AST/NodePrinter.cpp
index e2c987a..f49d3d0 100644
--- a/mlir/lib/Tools/PDLL/AST/NodePrinter.cpp
+++ b/mlir/lib/Tools/PDLL/AST/NodePrinter.cpp
@@ -154,7 +154,7 @@ void NodePrinter::print(Type type) {
})
.Case([&](TypeType) { os << "Type"; })
.Case([&](ValueType) { os << "Value"; })
- .Default([](Type) { llvm_unreachable("unknown AST type"); });
+ .DefaultUnreachable("unknown AST type");
}
void NodePrinter::print(const Node *node) {
@@ -182,7 +182,7 @@ void NodePrinter::print(const Node *node) {
const VariableDecl,
const Module>([&](auto derivedNode) { this->printImpl(derivedNode); })
- .Default([](const Node *) { llvm_unreachable("unknown AST node"); });
+ .DefaultUnreachable("unknown AST node");
elementIndentStack.pop_back();
}
diff --git a/mlir/lib/Tools/PDLL/AST/Nodes.cpp b/mlir/lib/Tools/PDLL/AST/Nodes.cpp
index 159ce62..5aa0937 100644
--- a/mlir/lib/Tools/PDLL/AST/Nodes.cpp
+++ b/mlir/lib/Tools/PDLL/AST/Nodes.cpp
@@ -72,7 +72,7 @@ public:
const Module>(
[&](auto derivedNode) { this->visitImpl(derivedNode); })
- .Default([](const Node *) { llvm_unreachable("unknown AST node"); });
+ .DefaultUnreachable("unknown AST node");
}
private:
diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt
index 9f5246d..cea5b25 100644
--- a/mlir/python/CMakeLists.txt
+++ b/mlir/python/CMakeLists.txt
@@ -440,11 +440,11 @@ declare_mlir_dialect_python_bindings(
DIALECT_NAME smt)
declare_mlir_dialect_python_bindings(
- ADD_TO_PARENT MLIRPythonSources.Dialects
- ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
- TD_FILE dialects/SPIRVOps.td
- SOURCES dialects/spirv.py
- DIALECT_NAME spirv)
+ ADD_TO_PARENT MLIRPythonSources.Dialects
+ ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
+ TD_FILE dialects/SPIRVOps.td
+ SOURCES dialects/spirv.py
+ DIALECT_NAME spirv)
declare_mlir_dialect_python_bindings(
ADD_TO_PARENT MLIRPythonSources.Dialects
@@ -501,7 +501,6 @@ declare_mlir_python_extension(MLIRPythonExtension.Core
MODULE_NAME _mlir
ADD_TO_PARENT MLIRPythonSources.Core
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
MainModule.cpp
IRAffine.cpp
@@ -540,7 +539,6 @@ declare_mlir_python_extension(MLIRPythonExtension.Core
declare_mlir_python_extension(MLIRPythonExtension.RegisterEverything
MODULE_NAME _mlirRegisterEverything
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
RegisterEverything.cpp
PRIVATE_LINK_LIBS
@@ -551,11 +549,10 @@ declare_mlir_python_extension(MLIRPythonExtension.RegisterEverything
MLIRCAPIRegisterEverything
)
-declare_mlir_python_extension(MLIRPythonExtension.Dialects.Linalg.Pybind
+declare_mlir_python_extension(MLIRPythonExtension.Dialects.Linalg.Nanobind
MODULE_NAME _mlirDialectsLinalg
ADD_TO_PARENT MLIRPythonSources.Dialects.linalg
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
DialectLinalg.cpp
PRIVATE_LINK_LIBS
@@ -565,11 +562,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.Linalg.Pybind
MLIRCAPILinalg
)
-declare_mlir_python_extension(MLIRPythonExtension.Dialects.GPU.Pybind
+declare_mlir_python_extension(MLIRPythonExtension.Dialects.GPU.Nanobind
MODULE_NAME _mlirDialectsGPU
ADD_TO_PARENT MLIRPythonSources.Dialects.gpu
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
DialectGPU.cpp
PRIVATE_LINK_LIBS
@@ -579,11 +575,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.GPU.Pybind
MLIRCAPIGPU
)
-declare_mlir_python_extension(MLIRPythonExtension.Dialects.LLVM.Pybind
+declare_mlir_python_extension(MLIRPythonExtension.Dialects.LLVM.Nanobind
MODULE_NAME _mlirDialectsLLVM
ADD_TO_PARENT MLIRPythonSources.Dialects.llvm
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
DialectLLVM.cpp
PRIVATE_LINK_LIBS
@@ -593,11 +588,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.LLVM.Pybind
MLIRCAPILLVM
)
-declare_mlir_python_extension(MLIRPythonExtension.Dialects.Quant.Pybind
+declare_mlir_python_extension(MLIRPythonExtension.Dialects.Quant.Nanobind
MODULE_NAME _mlirDialectsQuant
ADD_TO_PARENT MLIRPythonSources.Dialects.quant
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
DialectQuant.cpp
PRIVATE_LINK_LIBS
@@ -607,11 +601,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.Quant.Pybind
MLIRCAPIQuant
)
-declare_mlir_python_extension(MLIRPythonExtension.Dialects.NVGPU.Pybind
+declare_mlir_python_extension(MLIRPythonExtension.Dialects.NVGPU.Nanobind
MODULE_NAME _mlirDialectsNVGPU
ADD_TO_PARENT MLIRPythonSources.Dialects.nvgpu
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
DialectNVGPU.cpp
PRIVATE_LINK_LIBS
@@ -621,11 +614,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.NVGPU.Pybind
MLIRCAPINVGPU
)
-declare_mlir_python_extension(MLIRPythonExtension.Dialects.PDL.Pybind
+declare_mlir_python_extension(MLIRPythonExtension.Dialects.PDL.Nanobind
MODULE_NAME _mlirDialectsPDL
ADD_TO_PARENT MLIRPythonSources.Dialects.pdl
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
DialectPDL.cpp
PRIVATE_LINK_LIBS
@@ -635,11 +627,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.PDL.Pybind
MLIRCAPIPDL
)
-declare_mlir_python_extension(MLIRPythonExtension.Dialects.SparseTensor.Pybind
+declare_mlir_python_extension(MLIRPythonExtension.Dialects.SparseTensor.Nanobind
MODULE_NAME _mlirDialectsSparseTensor
ADD_TO_PARENT MLIRPythonSources.Dialects.sparse_tensor
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
DialectSparseTensor.cpp
PRIVATE_LINK_LIBS
@@ -649,11 +640,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.SparseTensor.Pybind
MLIRCAPISparseTensor
)
-declare_mlir_python_extension(MLIRPythonExtension.Dialects.Transform.Pybind
+declare_mlir_python_extension(MLIRPythonExtension.Dialects.Transform.Nanobind
MODULE_NAME _mlirDialectsTransform
ADD_TO_PARENT MLIRPythonSources.Dialects.transform
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
DialectTransform.cpp
PRIVATE_LINK_LIBS
@@ -663,11 +653,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.Transform.Pybind
MLIRCAPITransformDialect
)
-declare_mlir_python_extension(MLIRPythonExtension.Dialects.IRDL.Pybind
+declare_mlir_python_extension(MLIRPythonExtension.Dialects.IRDL.Nanobind
MODULE_NAME _mlirDialectsIRDL
ADD_TO_PARENT MLIRPythonSources.Dialects.irdl
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
DialectIRDL.cpp
PRIVATE_LINK_LIBS
@@ -681,7 +670,6 @@ declare_mlir_python_extension(MLIRPythonExtension.AsyncDialectPasses
MODULE_NAME _mlirAsyncPasses
ADD_TO_PARENT MLIRPythonSources.Dialects.async
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
AsyncPasses.cpp
PRIVATE_LINK_LIBS
@@ -695,7 +683,6 @@ if(MLIR_ENABLE_EXECUTION_ENGINE)
MODULE_NAME _mlirExecutionEngine
ADD_TO_PARENT MLIRPythonSources.ExecutionEngine
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
ExecutionEngineModule.cpp
PRIVATE_LINK_LIBS
@@ -709,7 +696,6 @@ declare_mlir_python_extension(MLIRPythonExtension.GPUDialectPasses
MODULE_NAME _mlirGPUPasses
ADD_TO_PARENT MLIRPythonSources.Dialects.gpu
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
GPUPasses.cpp
PRIVATE_LINK_LIBS
@@ -722,7 +708,6 @@ declare_mlir_python_extension(MLIRPythonExtension.LinalgPasses
MODULE_NAME _mlirLinalgPasses
ADD_TO_PARENT MLIRPythonSources.Dialects.linalg
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
LinalgPasses.cpp
PRIVATE_LINK_LIBS
@@ -731,11 +716,10 @@ declare_mlir_python_extension(MLIRPythonExtension.LinalgPasses
MLIRCAPILinalg
)
-declare_mlir_python_extension(MLIRPythonExtension.Dialects.SMT.Pybind
+declare_mlir_python_extension(MLIRPythonExtension.Dialects.SMT.Nanobind
MODULE_NAME _mlirDialectsSMT
ADD_TO_PARENT MLIRPythonSources.Dialects.smt
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
DialectSMT.cpp
# Headers must be included explicitly so they are installed.
@@ -752,7 +736,6 @@ declare_mlir_python_extension(MLIRPythonExtension.SparseTensorDialectPasses
MODULE_NAME _mlirSparseTensorPasses
ADD_TO_PARENT MLIRPythonSources.Dialects.sparse_tensor
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
SparseTensorPasses.cpp
PRIVATE_LINK_LIBS
@@ -765,7 +748,6 @@ declare_mlir_python_extension(MLIRPythonExtension.TransformInterpreter
MODULE_NAME _mlirTransformInterpreter
ADD_TO_PARENT MLIRPythonSources.Dialects.transform
ROOT_DIR "${PYTHON_SOURCE_DIR}"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
TransformInterpreter.cpp
PRIVATE_LINK_LIBS
@@ -807,23 +789,10 @@ if(MLIR_INCLUDE_TESTS)
ADD_TO_PARENT MLIRPythonTestSources.Dialects.PythonTest
SOURCES "dialects/_python_test_ops_gen.py")
- declare_mlir_python_extension(MLIRPythonTestSources.PythonTestExtensionPybind11
- MODULE_NAME _mlirPythonTestPybind11
- ADD_TO_PARENT MLIRPythonTestSources.Dialects
- ROOT_DIR "${MLIR_SOURCE_DIR}/test/python/lib"
- PYTHON_BINDINGS_LIBRARY pybind11
- SOURCES
- PythonTestModulePybind11.cpp
- PRIVATE_LINK_LIBS
- LLVMSupport
- EMBED_CAPI_LINK_LIBS
- MLIRCAPIPythonTestDialect
- )
declare_mlir_python_extension(MLIRPythonTestSources.PythonTestExtensionNanobind
MODULE_NAME _mlirPythonTestNanobind
ADD_TO_PARENT MLIRPythonTestSources.Dialects
ROOT_DIR "${MLIR_SOURCE_DIR}/test/python/lib"
- PYTHON_BINDINGS_LIBRARY nanobind
SOURCES
PythonTestModuleNanobind.cpp
PRIVATE_LINK_LIBS
diff --git a/mlir/python/mlir/dialects/python_test.py b/mlir/python/mlir/dialects/python_test.py
index 9380896..56d3c0f 100644
--- a/mlir/python/mlir/dialects/python_test.py
+++ b/mlir/python/mlir/dialects/python_test.py
@@ -5,12 +5,7 @@
from ._python_test_ops_gen import *
-def register_python_test_dialect(registry, use_nanobind):
- if use_nanobind:
- from .._mlir_libs import _mlirPythonTestNanobind
+def register_python_test_dialect(registry):
+ from .._mlir_libs import _mlirPythonTestNanobind
- _mlirPythonTestNanobind.register_dialect(registry)
- else:
- from .._mlir_libs import _mlirPythonTestPybind11
-
- _mlirPythonTestPybind11.register_dialect(registry)
+ _mlirPythonTestNanobind.register_dialect(registry)
diff --git a/mlir/python/requirements.txt b/mlir/python/requirements.txt
index abe0925..5ff9500 100644
--- a/mlir/python/requirements.txt
+++ b/mlir/python/requirements.txt
@@ -1,6 +1,4 @@
-nanobind>=2.9, <3.0
numpy>=1.19.5, <=2.1.2
-pybind11>=2.10.0, <=2.13.6
PyYAML>=5.4.0, <=6.0.1
ml_dtypes>=0.1.0, <=0.6.0; python_version<"3.13" # provides several NumPy dtype extensions, including the bf16
ml_dtypes>=0.5.0, <=0.6.0; python_version>="3.13"
diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir
index 41c3243..e60f1c9b 100644
--- a/mlir/test/Dialect/Tosa/invalid.mlir
+++ b/mlir/test/Dialect/Tosa/invalid.mlir
@@ -573,64 +573,61 @@ func.func @test_avg_pool2d_zero_dim_input(%arg0: tensor<1x0x?x9xf32>, %arg1: ten
// -----
-func.func @test_variable_unranked(%arg0: tensor<2x4x8xi8>) -> () {
+module {
tosa.variable @stored_var : tensor<*xi8>
// expected-error@+1 {{custom op 'tosa.variable' expected ranked type}}
- return
}
// -----
-func.func @test_variable_unranked_initial_value(%arg0: tensor<2x4x8xi8>) -> () {
+module {
// expected-error@+1 {{elements literal type must have static shape}}
tosa.variable @stored_var = dense<0> : tensor<*xi8>
// expected-error@+1 {{custom op 'tosa.variable' expected attribute}}
- return
-}
-
-// -----
-
-func.func @test_variable_duplicates(%arg0: tensor<2x4x8xi8>) -> () {
- tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi8>
- // expected-error@+1 {{'tosa.variable' op illegal to have multiple declaration of 'stored_var'}}
- tosa.variable @stored_var = dense<3> : tensor<1x4x8xi8>
- return
}
// -----
-func.func @test_variable_read_type(%arg0: tensor<2x4x8xi8>) -> () {
+module {
tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi8>
- // expected-error@+1 {{'tosa.variable_read' op require same element type for 'output1' ('i16') and the input tensor ('i8')}}
- %0 = tosa.variable_read @stored_var : tensor<2x4x8xi16>
- return
+ func.func @test_variable_read_type(%arg0: tensor<2x4x8xi8>) -> () {
+ // expected-error@+1 {{'tosa.variable_read' op require same element type for 'output1' ('i16') and the input tensor ('i8')}}
+ %0 = tosa.variable_read @stored_var : tensor<2x4x8xi16>
+ return
+ }
}
// -----
-func.func @test_variable_read_shape(%arg0: tensor<2x4x8xi8>) -> () {
+module {
tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi8>
- // expected-error@+1 {{'tosa.variable_read' op require same element type for 'output1' ('i32') and the input tensor ('i8'}}
- %0 = tosa.variable_read @stored_var : tensor<1x4x8xi32>
- return
+ func.func @test_variable_read_shape(%arg0: tensor<2x4x8xi8>) -> () {
+ // expected-error@+1 {{'tosa.variable_read' op require same element type for 'output1' ('i32') and the input tensor ('i8'}}
+ %0 = tosa.variable_read @stored_var : tensor<1x4x8xi32>
+ return
+ }
}
// -----
-func.func @test_variable_write_type(%arg0: tensor<2x4x8xi16>) -> () {
+module {
tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi8>
- // expected-error@+1 {{'tosa.variable_write' op require same element type for 'input1' ('i16') and the input tensor ('i8')}}
- tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xi16>
- return
+ func.func @test_variable_write_type(%arg0: tensor<2x4x8xi16>) -> () {
+ // expected-error@+1 {{'tosa.variable_write' op require same element type for 'input1' ('i16') and the input tensor ('i8')}}
+ tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xi16>
+ return
+ }
}
// -----
-func.func @test_variable_write_shape(%arg0: tensor<1x4x8xi8>) -> () {
+module {
tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi8>
- // expected-error@+1 {{'tosa.variable_write' op require same shapes for 'input1' ('tensor<1x4x8xi8>') and the input tensor ('tensor<2x4x8xi8>')}}
- tosa.variable_write @stored_var, %arg0 : tensor<1x4x8xi8>
- return
+ func.func @test_variable_write_shape(%arg0: tensor<1x4x8xi8>) -> () {
+ // expected-error@+1 {{'tosa.variable_write' op require same shapes for 'input1' ('tensor<1x4x8xi8>') and the input tensor ('tensor<2x4x8xi8>')}}
+ tosa.variable_write @stored_var, %arg0 : tensor<1x4x8xi8>
+ return
+ }
}
// -----
diff --git a/mlir/test/Dialect/Tosa/invalid_extension.mlir b/mlir/test/Dialect/Tosa/invalid_extension.mlir
index 3138ce2..1daabe9 100644
--- a/mlir/test/Dialect/Tosa/invalid_extension.mlir
+++ b/mlir/test/Dialect/Tosa/invalid_extension.mlir
@@ -310,21 +310,27 @@ func.func @test_identity(%arg0: tensor<13x21x3xi4>) -> tensor<13x21x3xi4> {
}
// -----
-func.func @test_variable_read_type(%arg0: tensor<2x4x8xi8>) -> () {
+module {
// expected-error@+1 {{'tosa.variable' op illegal: requires [variable] but not enabled in target}}
tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi8>
- // expected-error@+1 {{'tosa.variable_read' op illegal: requires [variable]}}
- %0 = tosa.variable_read @stored_var : tensor<2x4x8xi8>
- return
+
+ func.func @test_variable_read_type(%arg0: tensor<2x4x8xi8>) -> () {
+ // expected-error@+1 {{'tosa.variable_read' op illegal: requires [variable]}}
+ %0 = tosa.variable_read @stored_var : tensor<2x4x8xi8>
+ return
+ }
}
// -----
-func.func @test_variable_write_type(%arg0: tensor<2x4x8xi8>) -> () {
+module {
// expected-error@+1 {{'tosa.variable' op illegal: requires [variable] but not enabled in target}}
tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi8>
- // expected-error@+1 {{'tosa.variable_write' op illegal: requires [variable]}}
- tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xi8>
- return
+
+ func.func @test_variable_write_type(%arg0: tensor<2x4x8xi8>) -> () {
+ // expected-error@+1 {{'tosa.variable_write' op illegal: requires [variable]}}
+ tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xi8>
+ return
+ }
}
// -----
diff --git a/mlir/test/Dialect/Tosa/level_check.mlir b/mlir/test/Dialect/Tosa/level_check.mlir
index 3742adf..5bf2dbb8 100644
--- a/mlir/test/Dialect/Tosa/level_check.mlir
+++ b/mlir/test/Dialect/Tosa/level_check.mlir
@@ -1097,14 +1097,17 @@ func.func @test_scatter_tensor_size_invalid(%arg0: tensor<13x260000000x3xf32>, %
// -----
-func.func @test_variable_read_write_tensor_size_invalid() -> () {
+module {
// expected-error@+1 {{'tosa.variable' op failed level check: variable type tensor size (in bytes) <= (1 << MAX_LOG2_SIZE - 1)}}
tosa.variable @stored_var : tensor<536870912xf32>
- // expected-error@+1 {{'tosa.variable_read' op failed level check: result tensor size (in bytes) <= (1 << MAX_LOG2_SIZE - 1)}}
- %0 = tosa.variable_read @stored_var : tensor<536870912xf32>
- // expected-error@+1 {{'tosa.variable_write' op failed level check: operand tensor size (in bytes) <= (1 << MAX_LOG2_SIZE - 1)}}
- tosa.variable_write @stored_var, %0 : tensor<536870912xf32>
- return
+
+ func.func @test_variable_read_write_tensor_size_invalid() -> () {
+ // expected-error@+1 {{'tosa.variable_read' op failed level check: result tensor size (in bytes) <= (1 << MAX_LOG2_SIZE - 1)}}
+ %0 = tosa.variable_read @stored_var : tensor<536870912xf32>
+ // expected-error@+1 {{'tosa.variable_write' op failed level check: operand tensor size (in bytes) <= (1 << MAX_LOG2_SIZE - 1)}}
+ tosa.variable_write @stored_var, %0 : tensor<536870912xf32>
+ return
+ }
}
// -----
@@ -1165,14 +1168,17 @@ func.func @test_cond_if_rank_invalid(%arg0: tensor<1x1x1x1x1x1x1x1xf32>, %arg1:
// -----
-func.func @test_variable_read_write_rank_invalid() -> () {
+module {
// expected-error@+1 {{'tosa.variable' op failed level check: variable type rank(shape) <= MAX_RANK}}
tosa.variable @stored_var : tensor<1x1x1x1x1x1x1x1xf32>
- // expected-error@+1 {{'tosa.variable_read' op failed level check: result rank(shape) <= MAX_RANK}}
- %0 = tosa.variable_read @stored_var : tensor<1x1x1x1x1x1x1x1xf32>
- // expected-error@+1 {{'tosa.variable_write' op failed level check: operand rank(shape) <= MAX_RANK}}
- tosa.variable_write @stored_var, %0 : tensor<1x1x1x1x1x1x1x1xf32>
- return
+
+ func.func @test_variable_read_write_rank_invalid() -> () {
+ // expected-error@+1 {{'tosa.variable_read' op failed level check: result rank(shape) <= MAX_RANK}}
+ %0 = tosa.variable_read @stored_var : tensor<1x1x1x1x1x1x1x1xf32>
+ // expected-error@+1 {{'tosa.variable_write' op failed level check: operand rank(shape) <= MAX_RANK}}
+ tosa.variable_write @stored_var, %0 : tensor<1x1x1x1x1x1x1x1xf32>
+ return
+ }
}
// -----
diff --git a/mlir/test/Dialect/Tosa/variables.mlir b/mlir/test/Dialect/Tosa/variables.mlir
index 9953eb3..0c104e8 100644
--- a/mlir/test/Dialect/Tosa/variables.mlir
+++ b/mlir/test/Dialect/Tosa/variables.mlir
@@ -3,76 +3,98 @@
// -----
-// CHECK-LABEL: @test_variable_scalar(
-// CHECK-SAME: %[[ADD_VAL:.*]]: tensor<f32>) {
-func.func @test_variable_scalar(%arg0: tensor<f32>) -> () {
- // CHECK: tosa.variable @stored_var = dense<3.140000e+00> : tensor<f32>
+
+module {
+ // CHECK: tosa.variable @stored_var = dense<3.140000e+00> : tensor<f32>
tosa.variable @stored_var = dense<3.14> : tensor<f32>
- // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<f32>
- %0 = tosa.variable_read @stored_var : tensor<f32>
- // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<f32>, tensor<f32>) -> tensor<f32>
- %1 = "tosa.add"(%arg0, %0) : (tensor<f32>, tensor<f32>) -> tensor<f32>
- // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<f32>
- tosa.variable_write @stored_var, %1 : tensor<f32>
- return
+
+ // CHECK-LABEL: @test_variable_scalar(
+ // CHECK-SAME: %[[ADD_VAL:.*]]: tensor<f32>) {
+ func.func @test_variable_scalar(%arg0: tensor<f32>) -> () {
+ // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<f32>
+ %0 = tosa.variable_read @stored_var : tensor<f32>
+ // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<f32>, tensor<f32>) -> tensor<f32>
+ %1 = "tosa.add"(%arg0, %0) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+ // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<f32>
+ tosa.variable_write @stored_var, %1 : tensor<f32>
+ return
+ }
}
+
// -----
-// CHECK-LABEL: @test_variable_tensor(
-// CHECK-SAME: %[[ADD_VAL:.*]]: tensor<2x4x8xi32>) {
-func.func @test_variable_tensor(%arg0: tensor<2x4x8xi32>) -> () {
- // CHECK: tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32>
+
+module {
+ // CHECK: tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32>
tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32>
- // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<2x4x8xi32>
- %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32>
- // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32>
- %1 = "tosa.add"(%arg0, %0) : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32>
- // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<2x4x8xi32>
- tosa.variable_write @stored_var, %1 : tensor<2x4x8xi32>
- return
+
+ // CHECK-LABEL: @test_variable_tensor(
+ // CHECK-SAME: %[[ADD_VAL:.*]]: tensor<2x4x8xi32>) {
+ func.func @test_variable_tensor(%arg0: tensor<2x4x8xi32>) -> () {
+ // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<2x4x8xi32>
+ %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32>
+ // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32>
+ %1 = "tosa.add"(%arg0, %0) : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32>
+ // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<2x4x8xi32>
+ tosa.variable_write @stored_var, %1 : tensor<2x4x8xi32>
+ return
+ }
}
// -----
-// CHECK-LABEL: @test_variable_scalar_no_initial_value(
-// CHECK-SAME: %[[ADD_VAL:.*]]: tensor<f32>) {
-func.func @test_variable_scalar_no_initial_value(%arg0: tensor<f32>) -> () {
- // CHECK: tosa.variable @stored_var : tensor<f32>
+
+module {
+ // CHECK: tosa.variable @stored_var : tensor<f32>
tosa.variable @stored_var : tensor<f32>
- // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<f32>
- %0 = tosa.variable_read @stored_var : tensor<f32>
- // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<f32>, tensor<f32>) -> tensor<f32>
- %1 = "tosa.add"(%arg0, %0) : (tensor<f32>, tensor<f32>) -> tensor<f32>
- // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<f32>
- tosa.variable_write @stored_var, %1 : tensor<f32>
- return
+
+ // CHECK-LABEL: @test_variable_scalar_no_initial_value(
+ // CHECK-SAME: %[[ADD_VAL:.*]]: tensor<f32>) {
+ func.func @test_variable_scalar_no_initial_value(%arg0: tensor<f32>) -> () {
+ // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<f32>
+ %0 = tosa.variable_read @stored_var : tensor<f32>
+ // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<f32>, tensor<f32>) -> tensor<f32>
+ %1 = "tosa.add"(%arg0, %0) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+ // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<f32>
+ tosa.variable_write @stored_var, %1 : tensor<f32>
+ return
+ }
}
// -----
-// CHECK-LABEL: @test_variable_tensor_no_initial_value(
-// CHECK-SAME: %[[ADD_VAL:.*]]: tensor<2x4x8xi32>) {
-func.func @test_variable_tensor_no_initial_value(%arg0: tensor<2x4x8xi32>) -> () {
- // CHECK: tosa.variable @stored_var : tensor<2x4x8xi32>
+
+module {
+ // CHECK: tosa.variable @stored_var : tensor<2x4x8xi32>
tosa.variable @stored_var : tensor<2x4x8xi32>
- // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<2x4x8xi32>
- %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32>
- // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32>
- %1 = "tosa.add"(%arg0, %0) : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32>
- // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<2x4x8xi32>
- tosa.variable_write @stored_var, %1 : tensor<2x4x8xi32>
- return
+
+ // CHECK-LABEL: @test_variable_tensor_no_initial_value(
+ // CHECK-SAME: %[[ADD_VAL:.*]]: tensor<2x4x8xi32>) {
+ func.func @test_variable_tensor_no_initial_value(%arg0: tensor<2x4x8xi32>) -> () {
+ // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<2x4x8xi32>
+ %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32>
+ // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32>
+ %1 = "tosa.add"(%arg0, %0) : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32>
+ // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<2x4x8xi32>
+ tosa.variable_write @stored_var, %1 : tensor<2x4x8xi32>
+ return
+ }
}
+
// -----
-// CHECK-LABEL: @test_variable_tensor_with_unknowns(
-// CHECK-SAME: %[[ADD_VAL:.*]]: tensor<2x4x8xi32>) {
-func.func @test_variable_tensor_with_unknowns(%arg0: tensor<2x4x8xi32>) -> () {
- // CHECK: tosa.variable @stored_var : tensor<2x?x8xi32>
+
+module {
+ // CHECK: tosa.variable @stored_var : tensor<2x?x8xi32>
tosa.variable @stored_var : tensor<2x?x8xi32>
- // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<2x4x8xi32>
- %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32>
- // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32>
- %1 = "tosa.add"(%arg0, %0) : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32>
- // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<2x4x8xi32>
- tosa.variable_write @stored_var, %1 : tensor<2x4x8xi32>
- return
+
+ // CHECK-LABEL: @test_variable_tensor_with_unknowns(
+ // CHECK-SAME: %[[ADD_VAL:.*]]: tensor<2x4x8xi32>) {
+ func.func @test_variable_tensor_with_unknowns(%arg0: tensor<2x4x8xi32>) -> () {
+ // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<2x4x8xi32>
+ %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32>
+ // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32>
+ %1 = "tosa.add"(%arg0, %0) : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32>
+ // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<2x4x8xi32>
+ tosa.variable_write @stored_var, %1 : tensor<2x4x8xi32>
+ return
+ }
}
diff --git a/mlir/test/Dialect/Tosa/verifier.mlir b/mlir/test/Dialect/Tosa/verifier.mlir
index 0128da7..430b06a 100644
--- a/mlir/test/Dialect/Tosa/verifier.mlir
+++ b/mlir/test/Dialect/Tosa/verifier.mlir
@@ -944,29 +944,27 @@ func.func @test_while_loop_cond_output_not_bool(%arg0: tensor<10xi32>, %arg1: te
// -----
-func.func @test_variable_multiple_declaration() -> () {
+module {
+ // expected-note@below {{see existing symbol definition here}}
tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32>
- // expected-error@+1 {{'tosa.variable' op illegal to have multiple declaration of 'stored_var'}}
+ // expected-error@+1 {{redefinition of symbol named 'stored_var'}}
tosa.variable @stored_var = dense<-3> : tensor<2x4x8xi32>
- return
}
// -----
-func.func @test_variable_shape_mismatch() -> () {
+module {
// expected-error@+1 {{inferred shape of elements literal ([2]) does not match type ([3])}}
tosa.variable @stored_var = dense<[3.14, 2.14]> : tensor<3xf32>
// expected-error@+1 {{custom op 'tosa.variable' expected attribute}}
- return
}
// -----
-func.func @test_variable_type_mismatch() -> () {
+module {
// expected-error@+1 {{expected integer elements, but parsed floating-point}}
tosa.variable @stored_var = dense<-1.2> : tensor<2x4x8xi32>
// expected-error@+1 {{custom op 'tosa.variable' expected attribute}}
- return
}
// -----
@@ -979,20 +977,26 @@ func.func @test_variable_read_no_declaration() -> () {
// -----
-func.func @test_variable_read_type_mismatch() -> () {
+module {
tosa.variable @stored_var = dense<-1.2> : tensor<2x4x8xf32>
- // expected-error@+1 {{'tosa.variable_read' op require same element type for 'output1' ('i32') and the input tensor ('f32')}}
- %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32>
- return
+
+ func.func @test_variable_read_type_mismatch() -> () {
+ // expected-error@+1 {{'tosa.variable_read' op require same element type for 'output1' ('i32') and the input tensor ('f32')}}
+ %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32>
+ return
+ }
}
// -----
-func.func @test_variable_read_shape_mismatch() -> () {
+module {
tosa.variable @stored_var = dense<-1.2> : tensor<8x4x2xf32>
- // expected-error@+1 {{'tosa.variable_read' op require same shapes for 'output1' ('tensor<2x4x8xf32>') and the input tensor ('tensor<8x4x2xf32>')}}
- %0 = tosa.variable_read @stored_var : tensor<2x4x8xf32>
- return
+
+ func.func @test_variable_read_shape_mismatch() -> () {
+ // expected-error@+1 {{'tosa.variable_read' op require same shapes for 'output1' ('tensor<2x4x8xf32>') and the input tensor ('tensor<8x4x2xf32>')}}
+ %0 = tosa.variable_read @stored_var : tensor<2x4x8xf32>
+ return
+ }
}
// -----
@@ -1005,20 +1009,26 @@ func.func @test_variable_write_no_declaration(%arg0: tensor<f32>) -> () {
// -----
-func.func @test_variable_write_type_mismatch(%arg0: tensor<2x4x8xi32>) -> () {
+module {
tosa.variable @stored_var = dense<-1.2> : tensor<2x4x8xf32>
- // expected-error@+1 {{'tosa.variable_write' op require same element type for 'input1' ('i32') and the input tensor ('f32')}}
- tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xi32>
- return
+
+ func.func @test_variable_write_type_mismatch(%arg0: tensor<2x4x8xi32>) -> () {
+ // expected-error@+1 {{'tosa.variable_write' op require same element type for 'input1' ('i32') and the input tensor ('f32')}}
+ tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xi32>
+ return
+ }
}
// -----
-func.func @test_variable_write_shape_mismatch(%arg0: tensor<2x4x8xf32>) -> () {
+module {
tosa.variable @stored_var = dense<-1.2> : tensor<8x4x2xf32>
- // expected-error@+1 {{'tosa.variable_write' op require same shapes for 'input1' ('tensor<2x4x8xf32>') and the input tensor ('tensor<8x4x2xf32>')}}
- tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xf32>
- return
+
+ func.func @test_variable_write_shape_mismatch(%arg0: tensor<2x4x8xf32>) -> () {
+ // expected-error@+1 {{'tosa.variable_write' op require same shapes for 'input1' ('tensor<2x4x8xf32>') and the input tensor ('tensor<8x4x2xf32>')}}
+ tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xf32>
+ return
+ }
}
// -----
diff --git a/mlir/test/Dialect/Transform/ops-invalid.mlir b/mlir/test/Dialect/Transform/ops-invalid.mlir
index 71a260f..68305de 100644
--- a/mlir/test/Dialect/Transform/ops-invalid.mlir
+++ b/mlir/test/Dialect/Transform/ops-invalid.mlir
@@ -369,6 +369,7 @@ module attributes { transform.with_named_sequence } {
// expected-error @below {{recursion not allowed in named sequences}}
transform.named_sequence @self_recursion() -> () {
transform.include @self_recursion failures(suppress) () : () -> ()
+ transform.yield
}
}
@@ -376,13 +377,13 @@ module attributes { transform.with_named_sequence } {
module @mutual_recursion attributes { transform.with_named_sequence } {
// expected-note @below {{operation on recursion stack}}
- transform.named_sequence @foo(%arg0: !transform.any_op) -> () {
+ transform.named_sequence @foo(%arg0: !transform.any_op {transform.readonly}) -> () {
transform.include @bar failures(suppress) (%arg0) : (!transform.any_op) -> ()
transform.yield
}
// expected-error @below {{recursion not allowed in named sequences}}
- transform.named_sequence @bar(%arg0: !transform.any_op) -> () {
+ transform.named_sequence @bar(%arg0: !transform.any_op {transform.readonly}) -> () {
transform.include @foo failures(propagate) (%arg0) : (!transform.any_op) -> ()
transform.yield
}
@@ -430,7 +431,7 @@ module attributes { transform.with_named_sequence } {
// -----
module attributes { transform.with_named_sequence } {
- transform.named_sequence @foo(%arg0: !transform.any_op) -> () {
+ transform.named_sequence @foo(%arg0: !transform.any_op {transform.readonly}) -> () {
transform.yield
}
@@ -444,7 +445,7 @@ module attributes { transform.with_named_sequence } {
// -----
module attributes { transform.with_named_sequence } {
- transform.named_sequence @foo(%arg0: !transform.any_op) -> (!transform.any_op) {
+ transform.named_sequence @foo(%arg0: !transform.any_op {transform.readonly}) -> (!transform.any_op) {
transform.yield %arg0 : !transform.any_op
}
@@ -458,7 +459,7 @@ module attributes { transform.with_named_sequence } {
// -----
module attributes { transform.with_named_sequence } {
- transform.named_sequence @foo(%arg0: !transform.any_op) -> (!transform.any_op) {
+ transform.named_sequence @foo(%arg0: !transform.any_op {transform.readonly}) -> (!transform.any_op) {
transform.yield %arg0 : !transform.any_op
}
@@ -543,7 +544,6 @@ module attributes { transform.with_named_sequence } {
// -----
module attributes { transform.with_named_sequence } {
- // expected-error @below {{must provide consumed/readonly status for arguments of external or called ops}}
transform.named_sequence @foo(%op: !transform.any_op) {
transform.debug.emit_remark_at %op, "message" : !transform.any_op
transform.yield
@@ -551,6 +551,8 @@ module attributes { transform.with_named_sequence } {
transform.sequence failures(propagate) {
^bb0(%arg0: !transform.any_op):
+ // expected-error @below {{TransformOpInterface requires memory effects on operands to be specified}}
+ // expected-note @below {{no effects specified for operand #0}}
transform.include @foo failures(propagate) (%arg0) : (!transform.any_op) -> ()
transform.yield
}
@@ -908,3 +910,54 @@ module attributes { transform.with_named_sequence } {
transform.yield
}
}
+
+// -----
+
+module attributes { transform.with_named_sequence } {
+ transform.named_sequence @__transform_main(%arg0: !transform.any_op) -> () {
+ // Intentionally malformed func with no region. This shouldn't crash the
+ // verifier of `with_named_sequence` that runs before we get to the
+ // function.
+ // expected-error @below {{requires one region}}
+ "func.func"() : () -> ()
+ transform.yield
+ }
+}
+
+// -----
+
+module attributes { transform.with_named_sequence } {
+ transform.named_sequence @__transform_main(%arg0: !transform.any_op) -> () {
+ // Intentionally malformed call with a region. This shouldn't crash the
+ // verifier of `with_named_sequence` that runs before we get to the call.
+ // expected-error @below {{requires zero regions}}
+ "func.call"() <{
+ function_type = () -> (),
+ sym_name = "lambda_function"
+ }> ({
+ ^bb0:
+ "func.return"() : () -> ()
+ }) : () -> ()
+ transform.yield
+ }
+}
+
+// -----
+
+module attributes { transform.with_named_sequence } {
+ // Intentionally malformed sequence where the verifier should not crash.
+ // expected-error @below {{ op expects argument attribute array to have the same number of elements as the number of function arguments, got 1, but expected 3}}
+ "transform.named_sequence"() <{
+ arg_attrs = [{transform.readonly}],
+ function_type = (i1, tensor<f32>, tensor<f32>) -> (),
+ sym_name = "print_message"
+ }> ({}) : () -> ()
+ "transform.named_sequence"() <{
+ function_type = (!transform.any_op) -> (),
+ sym_name = "reference_other_module"
+ }> ({
+ ^bb0(%arg0: !transform.any_op):
+ "transform.include"(%arg0) <{target = @print_message}> : (!transform.any_op) -> ()
+ "transform.yield"() : () -> ()
+ }) : () -> ()
+}
diff --git a/mlir/test/python/dialects/python_test.py b/mlir/test/python/dialects/python_test.py
index 1194e32..5a9acc7 100644
--- a/mlir/test/python/dialects/python_test.py
+++ b/mlir/test/python/dialects/python_test.py
@@ -1,5 +1,4 @@
-# RUN: %PYTHON %s pybind11 | FileCheck %s
-# RUN: %PYTHON %s nanobind | FileCheck %s
+# RUN: %PYTHON %s | FileCheck %s
import sys
import typing
from typing import Union, Optional
@@ -10,26 +9,14 @@ import mlir.dialects.python_test as test
import mlir.dialects.tensor as tensor
import mlir.dialects.arith as arith
-if sys.argv[1] == "pybind11":
- from mlir._mlir_libs._mlirPythonTestPybind11 import (
- TestAttr,
- TestType,
- TestTensorValue,
- TestIntegerRankedTensorType,
- )
-
- test.register_python_test_dialect(get_dialect_registry(), use_nanobind=False)
-elif sys.argv[1] == "nanobind":
- from mlir._mlir_libs._mlirPythonTestNanobind import (
- TestAttr,
- TestType,
- TestTensorValue,
- TestIntegerRankedTensorType,
- )
-
- test.register_python_test_dialect(get_dialect_registry(), use_nanobind=True)
-else:
- raise ValueError("Expected pybind11 or nanobind as argument")
+from mlir._mlir_libs._mlirPythonTestNanobind import (
+ TestAttr,
+ TestType,
+ TestTensorValue,
+ TestIntegerRankedTensorType,
+)
+
+test.register_python_test_dialect(get_dialect_registry())
def run(f):
diff --git a/mlir/test/python/lib/CMakeLists.txt b/mlir/test/python/lib/CMakeLists.txt
index 9a813da..f51a7b4 100644
--- a/mlir/test/python/lib/CMakeLists.txt
+++ b/mlir/test/python/lib/CMakeLists.txt
@@ -1,7 +1,6 @@
set(LLVM_OPTIONAL_SOURCES
PythonTestCAPI.cpp
PythonTestDialect.cpp
- PythonTestModulePybind11.cpp
PythonTestModuleNanobind.cpp
)
diff --git a/mlir/test/python/lib/PythonTestModulePybind11.cpp b/mlir/test/python/lib/PythonTestModulePybind11.cpp
deleted file mode 100644
index 94a5f51..0000000
--- a/mlir/test/python/lib/PythonTestModulePybind11.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-//===- PythonTestModule.cpp - Python extension for the PythonTest dialect -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// This is the pybind11 edition of the PythonTest dialect module.
-//===----------------------------------------------------------------------===//
-
-#include "PythonTestCAPI.h"
-#include "mlir-c/BuiltinAttributes.h"
-#include "mlir-c/BuiltinTypes.h"
-#include "mlir-c/IR.h"
-#include "mlir/Bindings/Python/PybindAdaptors.h"
-
-namespace py = pybind11;
-using namespace mlir::python::adaptors;
-using namespace pybind11::literals;
-
-static bool mlirTypeIsARankedIntegerTensor(MlirType t) {
- return mlirTypeIsARankedTensor(t) &&
- mlirTypeIsAInteger(mlirShapedTypeGetElementType(t));
-}
-
-PYBIND11_MODULE(_mlirPythonTestPybind11, m) {
- m.def(
- "register_python_test_dialect",
- [](MlirContext context, bool load) {
- MlirDialectHandle pythonTestDialect =
- mlirGetDialectHandle__python_test__();
- mlirDialectHandleRegisterDialect(pythonTestDialect, context);
- if (load) {
- mlirDialectHandleLoadDialect(pythonTestDialect, context);
- }
- },
- py::arg("context"), py::arg("load") = true);
-
- m.def(
- "register_dialect",
- [](MlirDialectRegistry registry) {
- MlirDialectHandle pythonTestDialect =
- mlirGetDialectHandle__python_test__();
- mlirDialectHandleInsertDialect(pythonTestDialect, registry);
- },
- py::arg("registry"));
-
- mlir_attribute_subclass(m, "TestAttr",
- mlirAttributeIsAPythonTestTestAttribute,
- mlirPythonTestTestAttributeGetTypeID)
- .def_classmethod(
- "get",
- [](const py::object &cls, MlirContext ctx) {
- return cls(mlirPythonTestTestAttributeGet(ctx));
- },
- py::arg("cls"), py::arg("context") = py::none());
-
- mlir_type_subclass(m, "TestType", mlirTypeIsAPythonTestTestType,
- mlirPythonTestTestTypeGetTypeID)
- .def_classmethod(
- "get",
- [](const py::object &cls, MlirContext ctx) {
- return cls(mlirPythonTestTestTypeGet(ctx));
- },
- py::arg("cls"), py::arg("context") = py::none());
-
- auto typeCls =
- mlir_type_subclass(m, "TestIntegerRankedTensorType",
- mlirTypeIsARankedIntegerTensor,
- py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr("RankedTensorType"))
- .def_classmethod(
- "get",
- [](const py::object &cls, std::vector<int64_t> shape,
- unsigned width, MlirContext ctx) {
- MlirAttribute encoding = mlirAttributeGetNull();
- return cls(mlirRankedTensorTypeGet(
- shape.size(), shape.data(), mlirIntegerTypeGet(ctx, width),
- encoding));
- },
- "cls"_a, "shape"_a, "width"_a, "context"_a = py::none());
-
- assert(py::hasattr(typeCls.get_class(), "static_typeid") &&
- "TestIntegerRankedTensorType has no static_typeid");
-
- MlirTypeID mlirRankedTensorTypeID = mlirRankedTensorTypeGetTypeID();
-
- py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr(MLIR_PYTHON_CAPI_TYPE_CASTER_REGISTER_ATTR)(mlirRankedTensorTypeID,
- "replace"_a = true)(
- pybind11::cpp_function([typeCls](const py::object &mlirType) {
- return typeCls.get_class()(mlirType);
- }));
-
- auto valueCls = mlir_value_subclass(m, "TestTensorValue",
- mlirTypeIsAPythonTestTestTensorValue)
- .def("is_null", [](MlirValue &self) {
- return mlirValueIsNull(self);
- });
-
- py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr(MLIR_PYTHON_CAPI_VALUE_CASTER_REGISTER_ATTR)(
- mlirRankedTensorTypeID)(
- pybind11::cpp_function([valueCls](const py::object &valueObj) {
- py::object capsule = mlirApiObjectToCapsule(valueObj);
- MlirValue v = mlirPythonCapsuleToValue(capsule.ptr());
- MlirType t = mlirValueGetType(v);
- // This is hyper-specific in order to exercise/test registering a
- // value caster from cpp (but only for a single test case; see
- // testTensorValue python_test.py).
- if (mlirShapedTypeHasStaticShape(t) &&
- mlirShapedTypeGetDimSize(t, 0) == 1 &&
- mlirShapedTypeGetDimSize(t, 1) == 2 &&
- mlirShapedTypeGetDimSize(t, 2) == 3)
- return valueCls.get_class()(valueObj);
- return valueObj;
- }));
-}
diff --git a/mlir/tools/mlir-linalg-ods-gen/update_core_linalg_named_ops.sh.in b/mlir/tools/mlir-linalg-ods-gen/update_core_linalg_named_ops.sh.in
index da4db39..0bb6a20 100755
--- a/mlir/tools/mlir-linalg-ods-gen/update_core_linalg_named_ops.sh.in
+++ b/mlir/tools/mlir-linalg-ods-gen/update_core_linalg_named_ops.sh.in
@@ -26,7 +26,7 @@ export PYTHONPATH="$python_package_dir"
OUTPUT="$(
echo "### AUTOGENERATED from core_named_ops.py" && \
echo "### To regenerate, run: bin/update_core_linalg_named_ops.sh" && \
- "$python_exe" -m mlir.dialects.linalg.opdsl.dump_oplib .ops.core_named_ops \
+ "$python_exe" -m mlir.dialects.linalg.opdsl.dump_oplib.ops.core_named_ops \
)"
echo "$OUTPUT" > "$dest_file"
echo "Success."
diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp
index 06ef396..a580b1b 100644
--- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp
+++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp
@@ -864,11 +864,8 @@ bool DefGenerator::emitDecls(StringRef selectedDialect) {
// Declare all the def classes first (in case they reference each other).
for (const AttrOrTypeDef &def : defs) {
- std::string comments = tblgen::emitSummaryAndDescComments(
- def.getSummary(), def.getDescription());
- if (!comments.empty()) {
- os << comments << "\n";
- }
+ tblgen::emitSummaryAndDescComments(os, def.getSummary(),
+ def.getDescription());
os << "class " << def.getCppClassName() << ";\n";
}
@@ -1166,7 +1163,7 @@ getAllCppAttrConstraints(const RecordKeeper &records) {
/// Emit the declarations for the given constraints, of the form:
/// `bool <constraintCppFunctionName>(<parameterTypeName> <parameterName>);`
-static void emitConstraintDecls(const std::vector<Constraint> &constraints,
+static void emitConstraintDecls(ArrayRef<Constraint> constraints,
raw_ostream &os, StringRef parameterTypeName,
StringRef parameterName) {
static const char *const constraintDecl = "bool {0}({1} {2});\n";
@@ -1192,7 +1189,7 @@ static void emitAttrConstraintDecls(const RecordKeeper &records,
/// return (<condition>); }`
/// where `<condition>` is the condition template with the `self` variable
/// replaced with the `selfName` parameter.
-static void emitConstraintDefs(const std::vector<Constraint> &constraints,
+static void emitConstraintDefs(ArrayRef<Constraint> constraints,
raw_ostream &os, StringRef parameterTypeName,
StringRef selfName) {
static const char *const constraintDef = R"(
diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
index 8dd9713..34547e9 100644
--- a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
@@ -89,10 +89,7 @@ static ParameterElement *getEncapsulatedParameterElement(FormatElement *el) {
.Case<ParameterElement>([&](auto param) { return param; })
.Case<RefDirective>(
[&](auto ref) { return cast<ParameterElement>(ref->getArg()); })
- .Default([&](auto el) {
- assert(false && "unexpected struct element type");
- return nullptr;
- });
+ .DefaultUnreachable("unexpected struct element type");
}
/// Shorthand functions that can be used with ranged-based conditions.
diff --git a/mlir/tools/mlir-tblgen/CppGenUtilities.cpp b/mlir/tools/mlir-tblgen/CppGenUtilities.cpp
index ebca20c..fddd779 100644
--- a/mlir/tools/mlir-tblgen/CppGenUtilities.cpp
+++ b/mlir/tools/mlir-tblgen/CppGenUtilities.cpp
@@ -14,26 +14,31 @@
#include "CppGenUtilities.h"
#include "mlir/Support/IndentedOstream.h"
-std::string
-mlir::tblgen::emitSummaryAndDescComments(llvm::StringRef summary,
- llvm::StringRef description) {
+void mlir::tblgen::emitSummaryAndDescComments(llvm::raw_ostream &os,
+ llvm::StringRef summary,
+ llvm::StringRef description,
+ bool terminateComment) {
std::string comments = "";
StringRef trimmedSummary = summary.trim();
StringRef trimmedDesc = description.trim();
- llvm::raw_string_ostream os(comments);
raw_indented_ostream ros(os);
+ bool empty = true;
if (!trimmedSummary.empty()) {
ros.printReindented(trimmedSummary, "/// ");
+ empty = false;
}
if (!trimmedDesc.empty()) {
- if (!trimmedSummary.empty()) {
+ if (!empty) {
// If there is a summary, add a newline after it.
ros << "\n";
}
ros.printReindented(trimmedDesc, "/// ");
+ empty = false;
}
- return comments;
+
+ if (!empty && terminateComment)
+ ros << "\n";
}
diff --git a/mlir/tools/mlir-tblgen/CppGenUtilities.h b/mlir/tools/mlir-tblgen/CppGenUtilities.h
index 231c59a..69d8cd8 100644
--- a/mlir/tools/mlir-tblgen/CppGenUtilities.h
+++ b/mlir/tools/mlir-tblgen/CppGenUtilities.h
@@ -15,14 +15,16 @@
#define MLIR_TOOLS_MLIRTBLGEN_CPPGENUTILITIES_H_
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
namespace mlir {
namespace tblgen {
-// Emit the summary and description as a C++ comment, perperly aligned placed
-// adjacent to the class declaration of generated classes.
-std::string emitSummaryAndDescComments(llvm::StringRef summary,
- llvm::StringRef description);
+// Emit the summary and description as a C++ comment. If `terminateComment` is
+// true, terminates the comment with a `\n`.
+void emitSummaryAndDescComments(llvm::raw_ostream &os, llvm::StringRef summary,
+ llvm::StringRef description,
+ bool terminateComment = true);
} // namespace tblgen
} // namespace mlir
diff --git a/mlir/tools/mlir-tblgen/DialectGen.cpp b/mlir/tools/mlir-tblgen/DialectGen.cpp
index 2e8810d..c2c0c1f 100644
--- a/mlir/tools/mlir-tblgen/DialectGen.cpp
+++ b/mlir/tools/mlir-tblgen/DialectGen.cpp
@@ -109,9 +109,7 @@ tblgen::findDialectToGenerate(ArrayRef<Dialect> dialects) {
/// {0}: The name of the dialect class.
/// {1}: The dialect namespace.
/// {2}: The dialect parent class.
-/// {3}: The summary and description comments.
static const char *const dialectDeclBeginStr = R"(
-{3}
class {0} : public ::mlir::{2} {
explicit {0}(::mlir::MLIRContext *context);
@@ -249,10 +247,11 @@ static void emitDialectDecl(Dialect &dialect, raw_ostream &os) {
StringRef superClassName =
dialect.isExtensible() ? "ExtensibleDialect" : "Dialect";
- std::string comments = tblgen::emitSummaryAndDescComments(
- dialect.getSummary(), dialect.getDescription());
+ tblgen::emitSummaryAndDescComments(os, dialect.getSummary(),
+ dialect.getDescription(),
+ /*terminateCmment=*/false);
os << llvm::formatv(dialectDeclBeginStr, cppName, dialect.getName(),
- superClassName, comments);
+ superClassName);
// If the dialect requested the default attribute printer and parser, emit
// the declarations for the hooks.
diff --git a/mlir/tools/mlir-tblgen/EnumsGen.cpp b/mlir/tools/mlir-tblgen/EnumsGen.cpp
index d4d32f5..d55ad482 100644
--- a/mlir/tools/mlir-tblgen/EnumsGen.cpp
+++ b/mlir/tools/mlir-tblgen/EnumsGen.cpp
@@ -46,8 +46,7 @@ static std::string makeIdentifier(StringRef str) {
static void emitEnumClass(const Record &enumDef, StringRef enumName,
StringRef underlyingType, StringRef description,
- const std::vector<EnumCase> &enumerants,
- raw_ostream &os) {
+ ArrayRef<EnumCase> enumerants, raw_ostream &os) {
os << "// " << description << "\n";
os << "enum class " << enumName;
@@ -55,14 +54,13 @@ static void emitEnumClass(const Record &enumDef, StringRef enumName,
os << " : " << underlyingType;
os << " {\n";
- for (const auto &enumerant : enumerants) {
+ for (const EnumCase &enumerant : enumerants) {
auto symbol = makeIdentifier(enumerant.getSymbol());
auto value = enumerant.getValue();
- if (value >= 0) {
+ if (value >= 0)
os << formatv(" {0} = {1},\n", symbol, value);
- } else {
+ else
os << formatv(" {0},\n", symbol);
- }
}
os << "};\n\n";
}
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index c3420d4..2ddb07d 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -4801,11 +4801,9 @@ void OpOperandAdaptorEmitter::emitDef(
}
/// Emit the class declarations or definitions for the given op defs.
-static void
-emitOpClasses(const RecordKeeper &records,
- const std::vector<const Record *> &defs, raw_ostream &os,
- const StaticVerifierFunctionEmitter &staticVerifierEmitter,
- bool emitDecl) {
+static void emitOpClasses(
+ const RecordKeeper &records, ArrayRef<const Record *> defs, raw_ostream &os,
+ const StaticVerifierFunctionEmitter &staticVerifierEmitter, bool emitDecl) {
if (defs.empty())
return;
@@ -4840,18 +4838,14 @@ emitOpClasses(const RecordKeeper &records,
/// Emit the declarations for the provided op classes.
static void emitOpClassDecls(const RecordKeeper &records,
- const std::vector<const Record *> &defs,
- raw_ostream &os) {
+ ArrayRef<const Record *> defs, raw_ostream &os) {
// First emit forward declaration for each class, this allows them to refer
// to each others in traits for example.
- for (auto *def : defs) {
+ for (const Record *def : defs) {
Operator op(*def);
NamespaceEmitter emitter(os, op.getCppNamespace());
- std::string comments = tblgen::emitSummaryAndDescComments(
- op.getSummary(), op.getDescription());
- if (!comments.empty()) {
- os << comments << "\n";
- }
+ tblgen::emitSummaryAndDescComments(os, op.getSummary(),
+ op.getDescription());
os << "class " << op.getCppClassName() << ";\n";
}
diff --git a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
index 3cc1636..25f160d 100644
--- a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
@@ -536,11 +536,8 @@ void InterfaceGenerator::forwardDeclareInterface(const Interface &interface) {
// Emit a forward declaration of the interface class so that it becomes usable
// in the signature of its methods.
- std::string comments = tblgen::emitSummaryAndDescComments(
- "", interface.getDescription().value_or(""));
- if (!comments.empty()) {
- os << comments << "\n";
- }
+ tblgen::emitSummaryAndDescComments(os, "",
+ interface.getDescription().value_or(""));
StringRef interfaceName = interface.getName();
os << "class " << interfaceName << ";\n";
@@ -560,11 +557,8 @@ void InterfaceGenerator::emitInterfaceDecl(const Interface &interface) {
// Emit a forward declaration of the interface class so that it becomes usable
// in the signature of its methods.
- std::string comments = tblgen::emitSummaryAndDescComments(
- "", interface.getDescription().value_or(""));
- if (!comments.empty()) {
- os << comments << "\n";
- }
+ tblgen::emitSummaryAndDescComments(os, "",
+ interface.getDescription().value_or(""));
// Emit the traits struct containing the concept and model declarations.
os << "namespace detail {\n"
diff --git a/mlir/tools/mlir-tblgen/TosaUtilsGen.cpp b/mlir/tools/mlir-tblgen/TosaUtilsGen.cpp
index c929546..dc8cc58 100644
--- a/mlir/tools/mlir-tblgen/TosaUtilsGen.cpp
+++ b/mlir/tools/mlir-tblgen/TosaUtilsGen.cpp
@@ -1,4 +1,4 @@
-//===- TosaUtilsGen.cpp - Tosa utility generator -===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 422c29f..0c77a1e 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -1049,32 +1049,6 @@ filegroup(
)
cc_library(
- name = "MLIRBindingsPythonHeaders",
- includes = [
- "include",
- ],
- textual_hdrs = [":MLIRBindingsPythonHeaderFiles"],
- deps = [
- ":CAPIIRHeaders",
- "@pybind11",
- "@rules_python//python/cc:current_py_cc_headers",
- ],
-)
-
-cc_library(
- name = "MLIRBindingsPythonHeadersAndDeps",
- includes = [
- "include",
- ],
- textual_hdrs = [":MLIRBindingsPythonHeaderFiles"],
- deps = [
- ":CAPIIR",
- "@pybind11",
- "@rules_python//python/cc:current_py_cc_headers",
- ],
-)
-
-cc_library(
name = "MLIRBindingsPythonNanobindHeaders",
includes = [
"include",
@@ -1087,6 +1061,11 @@ cc_library(
],
)
+alias(
+ name = "MLIRBindingsPythonHeaders",
+ actual = ":MLIRBindingsPythonNanobindHeaders",
+)
+
cc_library(
name = "MLIRBindingsPythonNanobindHeadersAndDeps",
includes = [
@@ -1100,6 +1079,11 @@ cc_library(
],
)
+alias(
+ name = "MLIRBindingsPythonHeadersAndDeps",
+ actual = ":MLIRBindingsPythonNanobindHeadersAndDeps",
+)
+
# These flags are needed for pybind11 to work.
PYBIND11_COPTS = [
"-fexceptions",
@@ -1147,7 +1131,7 @@ cc_library(
":CAPIIR",
":CAPIInterfaces",
":CAPITransforms",
- ":MLIRBindingsPythonNanobindHeadersAndDeps",
+ ":MLIRBindingsPythonHeadersAndDeps",
":Support",
":config",
"//llvm:Support",
@@ -1170,7 +1154,7 @@ cc_library(
":CAPIDebugHeaders",
":CAPIIRHeaders",
":CAPITransformsHeaders",
- ":MLIRBindingsPythonNanobindHeaders",
+ ":MLIRBindingsPythonHeaders",
":Support",
":config",
"//llvm:Support",
@@ -1220,7 +1204,7 @@ cc_binary(
linkstatic = 0,
deps = [
":CAPIIR",
- ":MLIRBindingsPythonNanobindHeadersAndDeps",
+ ":MLIRBindingsPythonHeadersAndDeps",
"@nanobind",
],
)
@@ -1238,7 +1222,7 @@ cc_binary(
deps = [
":CAPIIR",
":CAPILinalg",
- ":MLIRBindingsPythonNanobindHeadersAndDeps",
+ ":MLIRBindingsPythonHeadersAndDeps",
"@nanobind",
],
)
@@ -1253,7 +1237,7 @@ cc_binary(
deps = [
":CAPIIR",
":CAPILLVM",
- ":MLIRBindingsPythonNanobindHeadersAndDeps",
+ ":MLIRBindingsPythonHeadersAndDeps",
"@nanobind",
],
)
@@ -1268,7 +1252,7 @@ cc_binary(
deps = [
":CAPIIR",
":CAPIQuant",
- ":MLIRBindingsPythonNanobindHeadersAndDeps",
+ ":MLIRBindingsPythonHeadersAndDeps",
"@nanobind",
],
)
@@ -1283,7 +1267,7 @@ cc_binary(
deps = [
":CAPIIR",
":CAPISparseTensor",
- ":MLIRBindingsPythonNanobindHeadersAndDeps",
+ ":MLIRBindingsPythonHeadersAndDeps",
"@nanobind",
],
)
@@ -1298,7 +1282,7 @@ cc_binary(
linkstatic = 0,
deps = [
":CAPIExecutionEngine",
- ":MLIRBindingsPythonNanobindHeadersAndDeps",
+ ":MLIRBindingsPythonHeadersAndDeps",
"@nanobind",
"@rules_python//python/cc:current_py_cc_headers",
],
@@ -1314,7 +1298,7 @@ cc_binary(
linkstatic = 0,
deps = [
":CAPILinalg",
- ":MLIRBindingsPythonNanobindHeadersAndDeps",
+ ":MLIRBindingsPythonHeadersAndDeps",
"@nanobind",
"@rules_python//python/cc:current_py_cc_headers",
],