diff options
164 files changed, 6118 insertions, 2766 deletions
diff --git a/.ci/all_requirements.txt b/.ci/all_requirements.txt index ac9682a..313ab107 100644 --- a/.ci/all_requirements.txt +++ b/.ci/all_requirements.txt @@ -194,10 +194,6 @@ ml-dtypes==0.5.1 ; python_version < "3.13" \ --hash=sha256:d13755f8e8445b3870114e5b6240facaa7cb0c3361e54beba3e07fa912a6e12b \ --hash=sha256:fd918d4e6a4e0c110e2e05be7a7814d10dc1b95872accbf6512b80a109b71ae1 # via -r mlir/python/requirements.txt -nanobind==2.9.2 \ - --hash=sha256:c37957ffd5eac7eda349cff3622ecd32e5ee1244ecc912c99b5bc8188bafd16e \ - --hash=sha256:e7608472de99d375759814cab3e2c94aba3f9ec80e62cfef8ced495ca5c27d6e - # via -r mlir/python/requirements.txt numpy==2.0.2 \ --hash=sha256:0123ffdaa88fa4ab64835dcbde75dcdf89c453c922f18dced6e27c90d1d0ec5a \ --hash=sha256:11a76c372d1d37437857280aa142086476136a8c0f373b2e648ab2c8f18fb195 \ @@ -299,10 +295,6 @@ pyasn1-modules==0.4.2 \ --hash=sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a \ --hash=sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6 # via google-auth -pybind11==2.13.6 \ - --hash=sha256:237c41e29157b962835d356b370ededd57594a26d5894a795960f0047cb5caf5 \ - --hash=sha256:ba6af10348c12b24e92fa086b39cfba0eff619b61ac77c406167d813b096d39a - # via -r mlir/python/requirements.txt pyyaml==6.0.1 \ --hash=sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5 \ --hash=sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc \ diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index a0181b7..4165225 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -2409,28 +2409,36 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512> def psraq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; def psrld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">; def psrlq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; +} + +let Features = "avx512f", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def pternlogd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; def pternlogd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; def pternlogq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">; def pternlogq512_maskz : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def pternlogd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">; def pternlogd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def pternlogd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">; def pternlogd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def pternlogq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">; def pternlogq128_maskz : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def pternlogq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">; def pternlogq256_maskz : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">; } diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 569491a..89b519e 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -125,6 +125,7 @@ public: cir::ConstantOp getTrue(mlir::Location loc) { return getBool(true, loc); } cir::BoolType getBoolTy() { return cir::BoolType::get(getContext()); } + cir::VoidType getVoidTy() { return cir::VoidType::get(getContext()); } cir::PointerType getPointerTo(mlir::Type ty) { return cir::PointerType::get(ty); diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index f7ca276..f795800 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -37,6 +37,11 @@ struct MissingFeatures { static bool opGlobalDLLImportExport() { return false; } static bool opGlobalPartition() { return false; } static bool opGlobalUsedOrCompilerUsed() { return false; } + static bool opGlobalAnnotations() { return false; } + static bool opGlobalDtorLowering() { return false; } + static bool opGlobalCtorAttr() { return false; } + static bool opGlobalCtorPriority() { return false; } + static bool opGlobalCtorList() { return false; } static bool setDSOLocal() { return false; } static bool setComdat() { return false; } diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 21af3d6..8904396 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1638,6 +1638,36 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func, return true; } +static bool GetDynamicDecl(InterpState &S, CodePtr OpPC, Pointer TypePtr, + const CXXRecordDecl *&DynamicDecl) { + while (TypePtr.isBaseClass()) + TypePtr = TypePtr.getBase(); + + QualType DynamicType = TypePtr.getType(); + if (TypePtr.isStatic() || TypePtr.isConst()) { + const VarDecl *VD = TypePtr.getDeclDesc()->asVarDecl(); + if (!VD->isConstexpr()) { + const Expr *E = S.Current->getExpr(OpPC); + APValue V = TypePtr.toAPValue(S.getASTContext()); + QualType TT = S.getASTContext().getLValueReferenceType(DynamicType); + S.FFDiag(E, diag::note_constexpr_polymorphic_unknown_dynamic_type) + << AccessKinds::AK_MemberCall << V.getAsString(S.getASTContext(), TT); + return false; + } + } + + if (DynamicType->isPointerType() || DynamicType->isReferenceType()) { + DynamicDecl = DynamicType->getPointeeCXXRecordDecl(); + } else if (DynamicType->isArrayType()) { + const Type *ElemType = DynamicType->getPointeeOrArrayElementType(); + assert(ElemType); + DynamicDecl = ElemType->getAsCXXRecordDecl(); + } else { + DynamicDecl = DynamicType->getAsCXXRecordDecl(); + } + return true; +} + bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func, uint32_t VarArgSize) { assert(Func->hasThisPointer()); @@ -1662,17 +1692,8 @@ bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func, } const CXXRecordDecl *DynamicDecl = nullptr; - { - Pointer TypePtr = ThisPtr; - while (TypePtr.isBaseClass()) - TypePtr = TypePtr.getBase(); - - QualType DynamicType = TypePtr.getType(); - if (DynamicType->isPointerType() || DynamicType->isReferenceType()) - DynamicDecl = DynamicType->getPointeeCXXRecordDecl(); - else - DynamicDecl = DynamicType->getAsCXXRecordDecl(); - } + if (!GetDynamicDecl(S, OpPC, ThisPtr, DynamicDecl)) + return false; assert(DynamicDecl); const auto *StaticDecl = cast<CXXRecordDecl>(Func->getParentDecl()); diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index a3c4ba5..6af7ef3 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2897,7 +2897,49 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, }); Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC, + const CallExpr *Call, bool MaskZ) { + assert(Call->getNumArgs() == 5); + + APInt U = popToAPSInt(S, Call->getArg(4)); // Lane mask + APInt Imm = popToAPSInt(S, Call->getArg(3)); // Ternary truth table + const Pointer &C = S.Stk.pop<Pointer>(); + const Pointer &B = S.Stk.pop<Pointer>(); + const Pointer &A = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + unsigned DstLen = A.getNumElems(); + const QualType ElemQT = getElemType(A); + const OptPrimType ElemPT = S.getContext().classify(ElemQT); + unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT); + bool DstUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType(); + + INT_TYPE_SWITCH_NO_BOOL(*ElemPT, { + for (unsigned I = 0; I != DstLen; ++I) { + APInt ALane = A.elem<T>(I).toAPSInt(); + APInt BLane = B.elem<T>(I).toAPSInt(); + APInt CLane = C.elem<T>(I).toAPSInt(); + APInt RLane(LaneWidth, 0); + if (U[I]) { // If lane not masked, compute ternary logic. + for (unsigned Bit = 0; Bit != LaneWidth; ++Bit) { + unsigned ABit = ALane[Bit]; + unsigned BBit = BLane[Bit]; + unsigned CBit = CLane[Bit]; + unsigned Idx = (ABit << 2) | (BBit << 1) | (CBit); + RLane.setBitVal(Bit, Imm[Idx]); + } + Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned)); + } else if (MaskZ) { // If zero masked, zero the lane. + Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned)); + } else { // Just masked, put in A lane. + Dst.elem<T>(I) = static_cast<T>(APSInt(ALane, DstUnsigned)); + } + } + }); + Dst.initializeAllElements(); return true; } @@ -3760,6 +3802,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); + case X86::BI__builtin_ia32_pternlogd128_mask: + case X86::BI__builtin_ia32_pternlogd256_mask: + case X86::BI__builtin_ia32_pternlogd512_mask: + case X86::BI__builtin_ia32_pternlogq128_mask: + case X86::BI__builtin_ia32_pternlogq256_mask: + case X86::BI__builtin_ia32_pternlogq512_mask: + return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/false); + case X86::BI__builtin_ia32_pternlogd128_maskz: + case X86::BI__builtin_ia32_pternlogd256_maskz: + case X86::BI__builtin_ia32_pternlogd512_maskz: + case X86::BI__builtin_ia32_pternlogq128_maskz: + case X86::BI__builtin_ia32_pternlogq256_maskz: + case X86::BI__builtin_ia32_pternlogq512_maskz: + return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/true); case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 7bf28d9..0b23eed 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12168,6 +12168,97 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case X86::BI__builtin_ia32_pternlogd128_mask: + case X86::BI__builtin_ia32_pternlogd256_mask: + case X86::BI__builtin_ia32_pternlogd512_mask: + case X86::BI__builtin_ia32_pternlogq128_mask: + case X86::BI__builtin_ia32_pternlogq256_mask: + case X86::BI__builtin_ia32_pternlogq512_mask: { + APValue AValue, BValue, CValue, ImmValue, UValue; + if (!EvaluateAsRValue(Info, E->getArg(0), AValue) || + !EvaluateAsRValue(Info, E->getArg(1), BValue) || + !EvaluateAsRValue(Info, E->getArg(2), CValue) || + !EvaluateAsRValue(Info, E->getArg(3), ImmValue) || + !EvaluateAsRValue(Info, E->getArg(4), UValue)) + return false; + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + APInt Imm = ImmValue.getInt(); + APInt U = UValue.getInt(); + unsigned ResultLen = AValue.getVectorLength(); + SmallVector<APValue, 16> ResultElements; + ResultElements.reserve(ResultLen); + + for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) { + APInt ALane = AValue.getVectorElt(EltNum).getInt(); + APInt BLane = BValue.getVectorElt(EltNum).getInt(); + APInt CLane = CValue.getVectorElt(EltNum).getInt(); + + if (U[EltNum]) { + unsigned BitWidth = ALane.getBitWidth(); + APInt ResLane(BitWidth, 0); + + for (unsigned Bit = 0; Bit < BitWidth; ++Bit) { + unsigned ABit = ALane[Bit]; + unsigned BBit = BLane[Bit]; + unsigned CBit = CLane[Bit]; + + unsigned Idx = (ABit << 2) | (BBit << 1) | CBit; + ResLane.setBitVal(Bit, Imm[Idx]); + } + ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned))); + } else { + ResultElements.push_back(APValue(APSInt(ALane, DestUnsigned))); + } + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case X86::BI__builtin_ia32_pternlogd128_maskz: + case X86::BI__builtin_ia32_pternlogd256_maskz: + case X86::BI__builtin_ia32_pternlogd512_maskz: + case X86::BI__builtin_ia32_pternlogq128_maskz: + case X86::BI__builtin_ia32_pternlogq256_maskz: + case X86::BI__builtin_ia32_pternlogq512_maskz: { + APValue AValue, BValue, CValue, ImmValue, UValue; + if (!EvaluateAsRValue(Info, E->getArg(0), AValue) || + !EvaluateAsRValue(Info, E->getArg(1), BValue) || + !EvaluateAsRValue(Info, E->getArg(2), CValue) || + !EvaluateAsRValue(Info, E->getArg(3), ImmValue) || + !EvaluateAsRValue(Info, E->getArg(4), UValue)) + return false; + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + APInt Imm = ImmValue.getInt(); + APInt U = UValue.getInt(); + unsigned ResultLen = AValue.getVectorLength(); + SmallVector<APValue, 16> ResultElements; + ResultElements.reserve(ResultLen); + + for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) { + APInt ALane = AValue.getVectorElt(EltNum).getInt(); + APInt BLane = BValue.getVectorElt(EltNum).getInt(); + APInt CLane = CValue.getVectorElt(EltNum).getInt(); + + unsigned BitWidth = ALane.getBitWidth(); + APInt ResLane(BitWidth, 0); + + if (U[EltNum]) { + for (unsigned Bit = 0; Bit < BitWidth; ++Bit) { + unsigned ABit = ALane[Bit]; + unsigned BBit = BLane[Bit]; + unsigned CBit = CLane[Bit]; + + unsigned Idx = (ABit << 2) | (BBit << 1) | CBit; + ResLane.setBitVal(Bit, Imm[Idx]); + } + } + ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned))); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case Builtin::BI__builtin_elementwise_clzg: case Builtin::BI__builtin_elementwise_ctzg: { APValue SourceLHS; diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 552698a..dfcc7940 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -319,9 +319,12 @@ public: Opts["__opencl_c_images"] = true; Opts["__opencl_c_3d_image_writes"] = true; Opts["cl_khr_3d_image_writes"] = true; + Opts["__opencl_c_program_scope_global_variables"] = true; - Opts["__opencl_c_generic_address_space"] = - GPUKind >= llvm::AMDGPU::GK_GFX700; + if (GPUKind >= llvm::AMDGPU::GK_GFX700) { + Opts["__opencl_c_generic_address_space"] = true; + Opts["__opencl_c_device_enqueue"] = true; + } } } diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp index bbc45e5..24a5fc2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp @@ -221,10 +221,9 @@ mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca( return initialAlloca; } -mlir::Value -OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, - mlir::Value bound, - mlir::Location loc, bool inverse) { +std::pair<mlir::Value, mlir::Value> OpenACCRecipeBuilderBase::createBoundsLoop( + mlir::Value subscriptedValue, mlir::Value subscriptedValue2, + mlir::Value bound, mlir::Location loc, bool inverse) { mlir::Operation *bodyInsertLoc; mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy); @@ -249,7 +248,6 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, return cir::PtrStrideOp::create(builder, loc, eltLoad.getType(), eltLoad, idxLoad); - }; auto forStmtBuilder = [&]() { @@ -303,6 +301,8 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, if (subscriptedValue) subscriptedValue = doSubscriptOp(subscriptedValue, load); + if (subscriptedValue2) + subscriptedValue2 = doSubscriptOp(subscriptedValue2, load); bodyInsertLoc = builder.createYield(loc); }, /*stepBuilder=*/ @@ -325,7 +325,7 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, // Leave the insertion point to be inside the body, so we can loop over // these things. builder.setInsertionPoint(bodyInsertLoc); - return subscriptedValue; + return {subscriptedValue, subscriptedValue2}; } mlir::acc::ReductionOperator @@ -434,7 +434,7 @@ void OpenACCRecipeBuilderBase::createInitRecipe( mlir::Location loc, mlir::Location locEnd, SourceRange exprRange, mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds, llvm::ArrayRef<QualType> boundTypes, const VarDecl *allocaDecl, - QualType origType) { + QualType origType, bool emitInitExpr) { assert(allocaDecl && "Required recipe variable not set?"); CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, allocaDecl}; @@ -464,14 +464,15 @@ void OpenACCRecipeBuilderBase::createInitRecipe( // initialize this variable correctly. CIRGenFunction::AutoVarEmission tempDeclEmission = cgf.emitAutoVarAlloca(*allocaDecl, builder.saveInsertionPoint()); - cgf.emitAutoVarInit(tempDeclEmission); + if (emitInitExpr) + cgf.emitAutoVarInit(tempDeclEmission); } else { mlir::Value alloca = makeBoundsAlloca( block, exprRange, loc, allocaDecl->getName(), numBounds, boundTypes); // If the initializer is trivial, there is nothing to do here, so save // ourselves some effort. - if (allocaDecl->getInit() && + if (emitInitExpr && allocaDecl->getInit() && (!cgf.isTrivialInitializer(allocaDecl->getInit()) || cgf.getContext().getLangOpts().getTrivialAutoVarInit() != LangOptions::TrivialAutoVarInitKind::Uninitialized)) @@ -484,35 +485,42 @@ void OpenACCRecipeBuilderBase::createInitRecipe( void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy( mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, - CIRGenFunction::AutoVarEmission tempDeclEmission, - mlir::acc::FirstprivateRecipeOp recipe, const VarDecl *varRecipe, - const VarDecl *temporary) { - mlir::Block *block = - createRecipeBlock(recipe.getCopyRegion(), mainOp.getType(), loc, - /*numBounds=*/0, /*isInit=*/false); - builder.setInsertionPointToEnd(&recipe.getCopyRegion().back()); + const VarDecl *allocaDecl, const VarDecl *temporary, + mlir::Region ©Region, size_t numBounds) { + mlir::Block *block = createRecipeBlock(copyRegion, mainOp.getType(), loc, + numBounds, /*isInit=*/false); + builder.setInsertionPointToEnd(©Region.back()); CIRGenFunction::LexicalScope ls(cgf, loc, block); - mlir::BlockArgument fromArg = block->getArgument(0); - mlir::BlockArgument toArg = block->getArgument(1); + mlir::Value fromArg = block->getArgument(0); + mlir::Value toArg = block->getArgument(1); - mlir::Type elementTy = - mlir::cast<cir::PointerType>(mainOp.getType()).getPointee(); + llvm::MutableArrayRef<mlir::BlockArgument> boundsRange = + block->getArguments().drop_front(2); - // Set the address of the emission to be the argument, so that we initialize - // that instead of the variable in the other block. - tempDeclEmission.setAllocatedAddress( - Address{toArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)}); + for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) + std::tie(fromArg, toArg) = + createBoundsLoop(fromArg, toArg, boundArg, loc, /*inverse=*/false); + + // Set up the 'to' address. + mlir::Type elementTy = + mlir::cast<cir::PointerType>(toArg.getType()).getPointee(); + CIRGenFunction::AutoVarEmission tempDeclEmission(*allocaDecl); tempDeclEmission.emittedAsOffload = true; + tempDeclEmission.setAllocatedAddress( + Address{toArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)}); + // Set up the 'from' address from the temporary. CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, temporary}; cgf.setAddrOfLocalVar( temporary, - Address{fromArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)}); - + Address{fromArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)}); cgf.emitAutoVarInit(tempDeclEmission); + + builder.setInsertionPointToEnd(©Region.back()); mlir::acc::YieldOp::create(builder, locEnd); } + // This function generates the 'combiner' section for a reduction recipe. Note // that this function is not 'insertion point' clean, in that it alters the // insertion point to be inside of the 'combiner' section of the recipe, but diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h index 21707ad..a5da744 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h @@ -49,14 +49,16 @@ protected: // Creates a loop through an 'acc.bounds', leaving the 'insertion' point to be // the inside of the loop body. Traverses LB->UB UNLESS `inverse` is set. // Returns the 'subscriptedValue' changed with the new bounds subscript. + std::pair<mlir::Value, mlir::Value> + createBoundsLoop(mlir::Value subscriptedValue, mlir::Value subscriptedValue2, + mlir::Value bound, mlir::Location loc, bool inverse); + mlir::Value createBoundsLoop(mlir::Value subscriptedValue, mlir::Value bound, - mlir::Location loc, bool inverse); + mlir::Location loc, bool inverse) { + return createBoundsLoop(subscriptedValue, {}, bound, loc, inverse).first; + } + mlir::acc::ReductionOperator convertReductionOp(OpenACCReductionOperator op); - void createFirstprivateRecipeCopy( - mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, - CIRGenFunction::AutoVarEmission tempDeclEmission, - mlir::acc::FirstprivateRecipeOp recipe, const VarDecl *varRecipe, - const VarDecl *temporary); // This function generates the 'combiner' section for a reduction recipe. Note // that this function is not 'insertion point' clean, in that it alters the @@ -66,11 +68,19 @@ protected: mlir::Value mainOp, mlir::acc::ReductionRecipeOp recipe, size_t numBounds); + void createInitRecipe(mlir::Location loc, mlir::Location locEnd, SourceRange exprRange, mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds, llvm::ArrayRef<QualType> boundTypes, - const VarDecl *allocaDecl, QualType origType); + const VarDecl *allocaDecl, QualType origType, + bool emitInitExpr); + + void createFirstprivateRecipeCopy(mlir::Location loc, mlir::Location locEnd, + mlir::Value mainOp, + const VarDecl *allocaDecl, + const VarDecl *temporary, + mlir::Region ©Region, size_t numBounds); void createRecipeDestroySection(mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, CharUnits alignment, @@ -150,63 +160,6 @@ class OpenACCRecipeBuilder : OpenACCRecipeBuilderBase { return recipeName; } - // Create the 'init' section of the recipe, including the 'copy' section for - // 'firstprivate'. Note that this function is not 'insertion point' clean, in - // that it alters the insertion point to be inside of the 'destroy' section of - // the recipe, but doesn't restore it aftewards. - void createRecipeInitCopy(mlir::Location loc, mlir::Location locEnd, - SourceRange exprRange, mlir::Value mainOp, - RecipeTy recipe, const VarDecl *varRecipe, - const VarDecl *temporary) { - // TODO: OpenACC: when we get the 'pointer' variants for - // firstprivate/reduction, this probably should be removed/split into - // functions for the BuilderBase. - assert(varRecipe && "Required recipe variable not set?"); - - CIRGenFunction::AutoVarEmission tempDeclEmission{ - CIRGenFunction::AutoVarEmission::invalid()}; - CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, varRecipe}; - - // Do the 'init' section of the recipe IR, which does an alloca, then the - // initialization (except for firstprivate). - mlir::Block *block = - createRecipeBlock(recipe.getInitRegion(), mainOp.getType(), loc, - /*numBounds=*/0, /*isInit=*/true); - builder.setInsertionPointToEnd(&recipe.getInitRegion().back()); - CIRGenFunction::LexicalScope ls(cgf, loc, block); - - tempDeclEmission = - cgf.emitAutoVarAlloca(*varRecipe, builder.saveInsertionPoint()); - - // 'firstprivate' doesn't do its initialization in the 'init' section, - // instead it does it in the 'copy' section. SO, only do 'init' here for - // reduction. - if constexpr (std::is_same_v<RecipeTy, mlir::acc::ReductionRecipeOp>) { - // Unlike Private, the recipe here is always required as it has to do - // init, not just 'default' init. - if (!varRecipe->getInit()) - cgf.cgm.errorNYI(exprRange, "reduction init recipe"); - cgf.emitAutoVarInit(tempDeclEmission); - } - - mlir::acc::YieldOp::create(builder, locEnd); - - if constexpr (std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>) { - if (!varRecipe->getInit()) { - // If we don't have any initialization recipe, we failed during Sema to - // initialize this correctly. If we disable the - // Sema::TentativeAnalysisScopes in SemaOpenACC::CreateInitRecipe, it'll - // emit an error to tell us. However, emitting those errors during - // production is a violation of the standard, so we cannot do them. - cgf.cgm.errorNYI( - exprRange, "firstprivate copy-init recipe not properly generated"); - } - - createFirstprivateRecipeCopy(loc, locEnd, mainOp, tempDeclEmission, - recipe, varRecipe, temporary); - } - } - public: OpenACCRecipeBuilder(CIRGen::CIRGenFunction &cgf, CIRGen::CIRGenBuilderTy &builder) @@ -221,19 +174,6 @@ public: BuiltinType::ArraySection) && "array section shouldn't make it to recipe creation"); - // TODO: OpenACC: This is a bit of a hackery to get this to not change for - // the non-private recipes. This will be removed soon, when we get this - // 'right' for firstprivate and reduction. - if constexpr (std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>) { - if (numBounds) { - cgf.cgm.errorNYI(varRef->getSourceRange(), - "firstprivate-init with bounds"); - } - boundTypes = {}; - numBounds = 0; - origType = baseType; - } - mlir::ModuleOp mod = builder.getBlock() ->getParent() ->template getParentOfType<mlir::ModuleOp>(); @@ -262,21 +202,20 @@ public: if constexpr (std::is_same_v<RecipeTy, mlir::acc::PrivateRecipeOp>) { createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp, recipe.getInitRegion(), numBounds, boundTypes, varRecipe, - origType); + origType, /*emitInitExpr=*/true); } else if constexpr (std::is_same_v<RecipeTy, mlir::acc::ReductionRecipeOp>) { createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp, recipe.getInitRegion(), numBounds, boundTypes, varRecipe, - origType); + origType, /*emitInitExpr=*/true); createReductionRecipeCombiner(loc, locEnd, mainOp, recipe, numBounds); } else { static_assert(std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>); - // TODO: OpenACC: we probably want this to call createInitRecipe as well, - // but do so in a way that omits the 'initialization', so that we can do - // it separately, since it belongs in the 'copy' region. It also might - // need a way of getting the tempDeclEmission out of it for that purpose. - createRecipeInitCopy(loc, locEnd, varRef->getSourceRange(), mainOp, - recipe, varRecipe, temporary); + createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp, + recipe.getInitRegion(), numBounds, boundTypes, varRecipe, + origType, /*emitInitExpr=*/false); + createFirstprivateRecipeCopy(loc, locEnd, mainOp, varRecipe, temporary, + recipe.getCopyRegion(), numBounds); } if (origType.isDestructedType()) diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index c15637d..2eeef81 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -8,18 +8,39 @@ #include "PassDetail.h" #include "clang/AST/ASTContext.h" +#include "clang/Basic/Module.h" #include "clang/Basic/TargetInfo.h" #include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIROpsEnums.h" #include "clang/CIR/Dialect/Passes.h" #include "clang/CIR/MissingFeatures.h" +#include "llvm/Support/Path.h" #include <memory> using namespace mlir; using namespace cir; +static SmallString<128> getTransformedFileName(mlir::ModuleOp mlirModule) { + SmallString<128> fileName; + + if (mlirModule.getSymName()) + fileName = llvm::sys::path::filename(mlirModule.getSymName()->str()); + + if (fileName.empty()) + fileName = "<null>"; + + for (size_t i = 0; i < fileName.size(); ++i) { + // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens + // to be the set of C preprocessing numbers. + if (!clang::isPreprocessingNumberBody(fileName[i])) + fileName[i] = '_'; + } + + return fileName; +} + namespace { struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> { LoweringPreparePass() = default; @@ -30,9 +51,16 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> { void lowerComplexDivOp(cir::ComplexDivOp op); void lowerComplexMulOp(cir::ComplexMulOp op); void lowerUnaryOp(cir::UnaryOp op); + void lowerGlobalOp(cir::GlobalOp op); void lowerArrayDtor(cir::ArrayDtor op); void lowerArrayCtor(cir::ArrayCtor op); + /// Build the function that initializes the specified global + cir::FuncOp buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op); + + /// Build a module init function that calls all the dynamic initializers. + void buildCXXGlobalInitFunc(); + cir::FuncOp buildRuntimeFunction( mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc, cir::FuncType type, @@ -47,6 +75,10 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> { /// Tracks current module. mlir::ModuleOp mlirModule; + /// Tracks existing dynamic initializers. + llvm::StringMap<uint32_t> dynamicInitializerNames; + llvm::SmallVector<cir::FuncOp> dynamicInitializers; + void setASTContext(clang::ASTContext *c) { astCtx = c; } }; @@ -589,6 +621,111 @@ void LoweringPreparePass::lowerUnaryOp(cir::UnaryOp op) { op.erase(); } +cir::FuncOp +LoweringPreparePass::buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op) { + // TODO(cir): Store this in the GlobalOp. + // This should come from the MangleContext, but for now I'm hardcoding it. + SmallString<256> fnName("__cxx_global_var_init"); + // Get a unique name + uint32_t cnt = dynamicInitializerNames[fnName]++; + if (cnt) + fnName += "." + llvm::Twine(cnt).str(); + + // Create a variable initialization function. + CIRBaseBuilderTy builder(getContext()); + builder.setInsertionPointAfter(op); + auto fnType = cir::FuncType::get({}, builder.getVoidTy()); + FuncOp f = buildRuntimeFunction(builder, fnName, op.getLoc(), fnType, + cir::GlobalLinkageKind::InternalLinkage); + + // Move over the initialzation code of the ctor region. + mlir::Block *entryBB = f.addEntryBlock(); + if (!op.getCtorRegion().empty()) { + mlir::Block &block = op.getCtorRegion().front(); + entryBB->getOperations().splice(entryBB->begin(), block.getOperations(), + block.begin(), std::prev(block.end())); + } + + // Register the destructor call with __cxa_atexit + mlir::Region &dtorRegion = op.getDtorRegion(); + if (!dtorRegion.empty()) { + assert(!cir::MissingFeatures::opGlobalDtorLowering()); + llvm_unreachable("dtor region lowering is NYI"); + } + + // Replace cir.yield with cir.return + builder.setInsertionPointToEnd(entryBB); + mlir::Operation *yieldOp = nullptr; + if (!op.getCtorRegion().empty()) { + mlir::Block &block = op.getCtorRegion().front(); + yieldOp = &block.getOperations().back(); + } else { + assert(!cir::MissingFeatures::opGlobalDtorLowering()); + llvm_unreachable("dtor region lowering is NYI"); + } + + assert(isa<YieldOp>(*yieldOp)); + cir::ReturnOp::create(builder, yieldOp->getLoc()); + return f; +} + +void LoweringPreparePass::lowerGlobalOp(GlobalOp op) { + mlir::Region &ctorRegion = op.getCtorRegion(); + mlir::Region &dtorRegion = op.getDtorRegion(); + + if (!ctorRegion.empty() || !dtorRegion.empty()) { + // Build a variable initialization function and move the initialzation code + // in the ctor region over. + cir::FuncOp f = buildCXXGlobalVarDeclInitFunc(op); + + // Clear the ctor and dtor region + ctorRegion.getBlocks().clear(); + dtorRegion.getBlocks().clear(); + + assert(!cir::MissingFeatures::astVarDeclInterface()); + dynamicInitializers.push_back(f); + } + + assert(!cir::MissingFeatures::opGlobalAnnotations()); +} + +void LoweringPreparePass::buildCXXGlobalInitFunc() { + if (dynamicInitializers.empty()) + return; + + assert(!cir::MissingFeatures::opGlobalCtorList()); + + SmallString<256> fnName; + // Include the filename in the symbol name. Including "sub_" matches gcc + // and makes sure these symbols appear lexicographically behind the symbols + // with priority (TBD). Module implementation units behave the same + // way as a non-modular TU with imports. + // TODO: check CXX20ModuleInits + if (astCtx->getCurrentNamedModule() && + !astCtx->getCurrentNamedModule()->isModuleImplementation()) { + llvm::raw_svector_ostream out(fnName); + std::unique_ptr<clang::MangleContext> mangleCtx( + astCtx->createMangleContext()); + cast<clang::ItaniumMangleContext>(*mangleCtx) + .mangleModuleInitializer(astCtx->getCurrentNamedModule(), out); + } else { + fnName += "_GLOBAL__sub_I_"; + fnName += getTransformedFileName(mlirModule); + } + + CIRBaseBuilderTy builder(getContext()); + builder.setInsertionPointToEnd(&mlirModule.getBodyRegion().back()); + auto fnType = cir::FuncType::get({}, builder.getVoidTy()); + cir::FuncOp f = + buildRuntimeFunction(builder, fnName, mlirModule.getLoc(), fnType, + cir::GlobalLinkageKind::ExternalLinkage); + builder.setInsertionPointToStart(f.addEntryBlock()); + for (cir::FuncOp &f : dynamicInitializers) + builder.createCallOp(f.getLoc(), f, {}); + + cir::ReturnOp::create(builder, f.getLoc()); +} + static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder, clang::ASTContext *astCtx, mlir::Operation *op, mlir::Type eltTy, @@ -691,6 +828,8 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) { lowerComplexDivOp(complexDiv); else if (auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op)) lowerComplexMulOp(complexMul); + else if (auto glob = mlir::dyn_cast<cir::GlobalOp>(op)) + lowerGlobalOp(glob); else if (auto unary = mlir::dyn_cast<cir::UnaryOp>(op)) lowerUnaryOp(unary); } @@ -704,12 +843,15 @@ void LoweringPreparePass::runOnOperation() { op->walk([&](mlir::Operation *op) { if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp, - cir::ComplexMulOp, cir::ComplexDivOp, cir::UnaryOp>(op)) + cir::ComplexMulOp, cir::ComplexDivOp, cir::GlobalOp, + cir::UnaryOp>(op)) opsToTransform.push_back(op); }); for (mlir::Operation *o : opsToTransform) runOnOp(o); + + buildCXXGlobalInitFunc(); } std::unique_ptr<Pass> mlir::createLoweringPreparePass() { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index e6e4947..9f30287 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -6784,29 +6784,26 @@ LValue CodeGenFunction::EmitPseudoObjectLValue(const PseudoObjectExpr *E) { return emitPseudoObjectExpr(*this, E, true, AggValueSlot::ignored()).LV; } -void CodeGenFunction::FlattenAccessAndType( - Address Addr, QualType AddrType, - SmallVectorImpl<std::pair<Address, llvm::Value *>> &AccessList, - SmallVectorImpl<QualType> &FlatTypes) { - // WorkList is list of type we are processing + the Index List to access - // the field of that type in Addr for use in a GEP - llvm::SmallVector<std::pair<QualType, llvm::SmallVector<llvm::Value *, 4>>, - 16> +void CodeGenFunction::FlattenAccessAndTypeLValue( + LValue Val, SmallVectorImpl<LValue> &AccessList) { + + llvm::SmallVector< + std::tuple<LValue, QualType, llvm::SmallVector<llvm::Value *, 4>>, 16> WorkList; llvm::IntegerType *IdxTy = llvm::IntegerType::get(getLLVMContext(), 32); - // Addr should be a pointer so we need to 'dereference' it - WorkList.push_back({AddrType, {llvm::ConstantInt::get(IdxTy, 0)}}); + WorkList.push_back({Val, Val.getType(), {llvm::ConstantInt::get(IdxTy, 0)}}); while (!WorkList.empty()) { - auto [T, IdxList] = WorkList.pop_back_val(); + auto [LVal, T, IdxList] = WorkList.pop_back_val(); T = T.getCanonicalType().getUnqualifiedType(); assert(!isa<MatrixType>(T) && "Matrix types not yet supported in HLSL"); + if (const auto *CAT = dyn_cast<ConstantArrayType>(T)) { uint64_t Size = CAT->getZExtSize(); for (int64_t I = Size - 1; I > -1; I--) { llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList; IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I)); - WorkList.emplace_back(CAT->getElementType(), IdxListCopy); + WorkList.emplace_back(LVal, CAT->getElementType(), IdxListCopy); } } else if (const auto *RT = dyn_cast<RecordType>(T)) { const RecordDecl *Record = RT->getOriginalDecl()->getDefinitionOrSelf(); @@ -6814,44 +6811,75 @@ void CodeGenFunction::FlattenAccessAndType( const CXXRecordDecl *CXXD = dyn_cast<CXXRecordDecl>(Record); - llvm::SmallVector<QualType, 16> FieldTypes; + llvm::SmallVector< + std::tuple<LValue, QualType, llvm::SmallVector<llvm::Value *, 4>>, 16> + ReverseList; if (CXXD && CXXD->isStandardLayout()) Record = CXXD->getStandardLayoutBaseWithFields(); // deal with potential base classes if (CXXD && !CXXD->isStandardLayout()) { - for (auto &Base : CXXD->bases()) - FieldTypes.push_back(Base.getType()); + if (CXXD->getNumBases() > 0) { + assert(CXXD->getNumBases() == 1 && + "HLSL doesn't support multiple inheritance."); + auto Base = CXXD->bases_begin(); + llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList; + IdxListCopy.push_back(llvm::ConstantInt::get( + IdxTy, 0)); // base struct should be at index zero + ReverseList.emplace_back(LVal, Base->getType(), IdxListCopy); + } } - for (auto *FD : Record->fields()) - FieldTypes.push_back(FD->getType()); + const CGRecordLayout &Layout = CGM.getTypes().getCGRecordLayout(Record); - for (int64_t I = FieldTypes.size() - 1; I > -1; I--) { - llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList; - IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I)); - WorkList.insert(WorkList.end(), {FieldTypes[I], IdxListCopy}); + llvm::Type *LLVMT = ConvertTypeForMem(T); + CharUnits Align = getContext().getTypeAlignInChars(T); + LValue RLValue; + bool createdGEP = false; + for (auto *FD : Record->fields()) { + if (FD->isBitField()) { + if (FD->isUnnamedBitField()) + continue; + if (!createdGEP) { + createdGEP = true; + Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, + LLVMT, Align, "gep"); + RLValue = MakeAddrLValue(GEP, T); + } + LValue FieldLVal = EmitLValueForField(RLValue, FD, true); + ReverseList.push_back({FieldLVal, FD->getType(), {}}); + } else { + llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList; + IdxListCopy.push_back( + llvm::ConstantInt::get(IdxTy, Layout.getLLVMFieldNo(FD))); + ReverseList.emplace_back(LVal, FD->getType(), IdxListCopy); + } } + + std::reverse(ReverseList.begin(), ReverseList.end()); + llvm::append_range(WorkList, ReverseList); } else if (const auto *VT = dyn_cast<VectorType>(T)) { llvm::Type *LLVMT = ConvertTypeForMem(T); CharUnits Align = getContext().getTypeAlignInChars(T); - Address GEP = - Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "vector.gep"); + Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, LLVMT, + Align, "vector.gep"); + LValue Base = MakeAddrLValue(GEP, T); for (unsigned I = 0, E = VT->getNumElements(); I < E; I++) { - llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, I); - // gep on vector fields is not recommended so combine gep with - // extract/insert - AccessList.emplace_back(GEP, Idx); - FlatTypes.push_back(VT->getElementType()); + llvm::Constant *Idx = llvm::ConstantInt::get(IdxTy, I); + LValue LV = + LValue::MakeVectorElt(Base.getAddress(), Idx, VT->getElementType(), + Base.getBaseInfo(), TBAAAccessInfo()); + AccessList.emplace_back(LV); } - } else { - // a scalar/builtin type - llvm::Type *LLVMT = ConvertTypeForMem(T); - CharUnits Align = getContext().getTypeAlignInChars(T); - Address GEP = - Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "gep"); - AccessList.emplace_back(GEP, nullptr); - FlatTypes.push_back(T); + } else { // a scalar/builtin type + if (!IdxList.empty()) { + llvm::Type *LLVMT = ConvertTypeForMem(T); + CharUnits Align = getContext().getTypeAlignInChars(T); + Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, + LLVMT, Align, "gep"); + AccessList.emplace_back(MakeAddrLValue(GEP, T)); + } else // must be a bitfield we already created an lvalue for + AccessList.emplace_back(LVal); } } } diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index b8150a2..07b9aeb 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -488,100 +488,62 @@ static bool isTrivialFiller(Expr *E) { return false; } -static void EmitHLSLAggregateSplatCast(CodeGenFunction &CGF, Address DestVal, - QualType DestTy, llvm::Value *SrcVal, - QualType SrcTy, SourceLocation Loc) { +// emit an elementwise cast where the RHS is a scalar or vector +// or emit an aggregate splat cast +static void EmitHLSLScalarElementwiseAndSplatCasts(CodeGenFunction &CGF, + LValue DestVal, + llvm::Value *SrcVal, + QualType SrcTy, + SourceLocation Loc) { // Flatten our destination - SmallVector<QualType> DestTypes; // Flattened type - SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList; - // ^^ Flattened accesses to DestVal we want to store into - CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes); - - assert(SrcTy->isScalarType() && "Invalid HLSL Aggregate splat cast."); - for (unsigned I = 0, Size = StoreGEPList.size(); I < Size; ++I) { - llvm::Value *Cast = - CGF.EmitScalarConversion(SrcVal, SrcTy, DestTypes[I], Loc); - - // store back - llvm::Value *Idx = StoreGEPList[I].second; - if (Idx) { - llvm::Value *V = - CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert"); - Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx); - } - CGF.Builder.CreateStore(Cast, StoreGEPList[I].first); - } -} - -// emit a flat cast where the RHS is a scalar, including vector -static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal, - QualType DestTy, llvm::Value *SrcVal, - QualType SrcTy, SourceLocation Loc) { - // Flatten our destination - SmallVector<QualType, 16> DestTypes; // Flattened type - SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList; - // ^^ Flattened accesses to DestVal we want to store into - CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes); - - assert(SrcTy->isVectorType() && "HLSL Flat cast doesn't handle splatting."); - const VectorType *VT = SrcTy->getAs<VectorType>(); - SrcTy = VT->getElementType(); - assert(StoreGEPList.size() <= VT->getNumElements() && - "Cannot perform HLSL flat cast when vector source \ - object has less elements than flattened destination \ - object."); - for (unsigned I = 0, Size = StoreGEPList.size(); I < Size; I++) { - llvm::Value *Load = CGF.Builder.CreateExtractElement(SrcVal, I, "vec.load"); + SmallVector<LValue, 16> StoreList; + CGF.FlattenAccessAndTypeLValue(DestVal, StoreList); + + bool isVector = false; + if (auto *VT = SrcTy->getAs<VectorType>()) { + isVector = true; + SrcTy = VT->getElementType(); + assert(StoreList.size() <= VT->getNumElements() && + "Cannot perform HLSL flat cast when vector source \ + object has less elements than flattened destination \ + object."); + } + + for (unsigned I = 0, Size = StoreList.size(); I < Size; I++) { + LValue DestLVal = StoreList[I]; + llvm::Value *Load = + isVector ? CGF.Builder.CreateExtractElement(SrcVal, I, "vec.load") + : SrcVal; llvm::Value *Cast = - CGF.EmitScalarConversion(Load, SrcTy, DestTypes[I], Loc); - - // store back - llvm::Value *Idx = StoreGEPList[I].second; - if (Idx) { - llvm::Value *V = - CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert"); - Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx); - } - CGF.Builder.CreateStore(Cast, StoreGEPList[I].first); + CGF.EmitScalarConversion(Load, SrcTy, DestLVal.getType(), Loc); + CGF.EmitStoreThroughLValue(RValue::get(Cast), DestLVal); } } // emit a flat cast where the RHS is an aggregate -static void EmitHLSLElementwiseCast(CodeGenFunction &CGF, Address DestVal, - QualType DestTy, Address SrcVal, - QualType SrcTy, SourceLocation Loc) { +static void EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue DestVal, + LValue SrcVal, SourceLocation Loc) { // Flatten our destination - SmallVector<QualType, 16> DestTypes; // Flattened type - SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList; - // ^^ Flattened accesses to DestVal we want to store into - CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes); + SmallVector<LValue, 16> StoreList; + CGF.FlattenAccessAndTypeLValue(DestVal, StoreList); // Flatten our src - SmallVector<QualType, 16> SrcTypes; // Flattened type - SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList; - // ^^ Flattened accesses to SrcVal we want to load from - CGF.FlattenAccessAndType(SrcVal, SrcTy, LoadGEPList, SrcTypes); + SmallVector<LValue, 16> LoadList; + CGF.FlattenAccessAndTypeLValue(SrcVal, LoadList); - assert(StoreGEPList.size() <= LoadGEPList.size() && - "Cannot perform HLSL flat cast when flattened source object \ + assert(StoreList.size() <= LoadList.size() && + "Cannot perform HLSL elementwise cast when flattened source object \ has less elements than flattened destination object."); - // apply casts to what we load from LoadGEPList + // apply casts to what we load from LoadList // and store result in Dest - for (unsigned I = 0, E = StoreGEPList.size(); I < E; I++) { - llvm::Value *Idx = LoadGEPList[I].second; - llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[I].first, "load"); - Load = - Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load; - llvm::Value *Cast = - CGF.EmitScalarConversion(Load, SrcTypes[I], DestTypes[I], Loc); - - // store back - Idx = StoreGEPList[I].second; - if (Idx) { - llvm::Value *V = - CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert"); - Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx); - } - CGF.Builder.CreateStore(Cast, StoreGEPList[I].first); + for (unsigned I = 0, E = StoreList.size(); I < E; I++) { + LValue DestLVal = StoreList[I]; + LValue SrcLVal = LoadList[I]; + RValue RVal = CGF.EmitLoadOfLValue(SrcLVal, Loc); + assert(RVal.isScalar() && "All flattened source values should be scalars"); + llvm::Value *Val = RVal.getScalarVal(); + llvm::Value *Cast = CGF.EmitScalarConversion(Val, SrcLVal.getType(), + DestLVal.getType(), Loc); + CGF.EmitStoreThroughLValue(RValue::get(Cast), DestLVal); } } @@ -988,31 +950,33 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { Expr *Src = E->getSubExpr(); QualType SrcTy = Src->getType(); RValue RV = CGF.EmitAnyExpr(Src); - QualType DestTy = E->getType(); - Address DestVal = Dest.getAddress(); + LValue DestLVal = CGF.MakeAddrLValue(Dest.getAddress(), E->getType()); SourceLocation Loc = E->getExprLoc(); - assert(RV.isScalar() && "RHS of HLSL splat cast must be a scalar."); + assert(RV.isScalar() && SrcTy->isScalarType() && + "RHS of HLSL splat cast must be a scalar."); llvm::Value *SrcVal = RV.getScalarVal(); - EmitHLSLAggregateSplatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); + EmitHLSLScalarElementwiseAndSplatCasts(CGF, DestLVal, SrcVal, SrcTy, Loc); break; } case CK_HLSLElementwiseCast: { Expr *Src = E->getSubExpr(); QualType SrcTy = Src->getType(); RValue RV = CGF.EmitAnyExpr(Src); - QualType DestTy = E->getType(); - Address DestVal = Dest.getAddress(); + LValue DestLVal = CGF.MakeAddrLValue(Dest.getAddress(), E->getType()); SourceLocation Loc = E->getExprLoc(); if (RV.isScalar()) { llvm::Value *SrcVal = RV.getScalarVal(); - EmitHLSLScalarFlatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); + assert(SrcTy->isVectorType() && + "HLSL Elementwise cast doesn't handle splatting."); + EmitHLSLScalarElementwiseAndSplatCasts(CGF, DestLVal, SrcVal, SrcTy, Loc); } else { assert(RV.isAggregate() && "Can't perform HLSL Aggregate cast on a complex type."); Address SrcVal = RV.getAggregateAddress(); - EmitHLSLElementwiseCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); + EmitHLSLElementwiseCast(CGF, DestLVal, CGF.MakeAddrLValue(SrcVal, SrcTy), + Loc); } break; } diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index c961222..06d9d81 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2397,39 +2397,37 @@ bool CodeGenFunction::ShouldNullCheckClassCastValue(const CastExpr *CE) { } // RHS is an aggregate type -static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, Address RHSVal, - QualType RHSTy, QualType LHSTy, - SourceLocation Loc) { - SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList; - SmallVector<QualType, 16> SrcTypes; // Flattened type - CGF.FlattenAccessAndType(RHSVal, RHSTy, LoadGEPList, SrcTypes); - // LHS is either a vector or a builtin? +static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal, + QualType DestTy, SourceLocation Loc) { + SmallVector<LValue, 16> LoadList; + CGF.FlattenAccessAndTypeLValue(SrcVal, LoadList); + // Dest is either a vector or a builtin? // if its a vector create a temp alloca to store into and return that - if (auto *VecTy = LHSTy->getAs<VectorType>()) { - assert(SrcTypes.size() >= VecTy->getNumElements() && - "Flattened type on RHS must have more elements than vector on LHS."); + if (auto *VecTy = DestTy->getAs<VectorType>()) { + assert(LoadList.size() >= VecTy->getNumElements() && + "Flattened type on RHS must have the same number or more elements " + "than vector on LHS."); llvm::Value *V = - CGF.Builder.CreateLoad(CGF.CreateIRTemp(LHSTy, "flatcast.tmp")); + CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp")); // write to V. for (unsigned I = 0, E = VecTy->getNumElements(); I < E; I++) { - llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[I].first, "load"); - llvm::Value *Idx = LoadGEPList[I].second; - Load = Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") - : Load; - llvm::Value *Cast = CGF.EmitScalarConversion( - Load, SrcTypes[I], VecTy->getElementType(), Loc); + RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc); + assert(RVal.isScalar() && + "All flattened source values should be scalars."); + llvm::Value *Cast = + CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[I].getType(), + VecTy->getElementType(), Loc); V = CGF.Builder.CreateInsertElement(V, Cast, I); } return V; } - // i its a builtin just do an extract element or load. - assert(LHSTy->isBuiltinType() && + // if its a builtin just do an extract element or load. + assert(DestTy->isBuiltinType() && "Destination type must be a vector or builtin type."); - llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[0].first, "load"); - llvm::Value *Idx = LoadGEPList[0].second; - Load = - Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load; - return CGF.EmitScalarConversion(Load, LHSTy, SrcTypes[0], Loc); + RValue RVal = CGF.EmitLoadOfLValue(LoadList[0], Loc); + assert(RVal.isScalar() && "All flattened source values should be scalars."); + return CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[0].getType(), + DestTy, Loc); } // VisitCastExpr - Emit code for an explicit or implicit cast. Implicit casts @@ -2954,12 +2952,11 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { case CK_HLSLElementwiseCast: { RValue RV = CGF.EmitAnyExpr(E); SourceLocation Loc = CE->getExprLoc(); - QualType SrcTy = E->getType(); assert(RV.isAggregate() && "Not a valid HLSL Elementwise Cast."); // RHS is an aggregate - Address SrcVal = RV.getAggregateAddress(); - return EmitHLSLElementwiseCast(CGF, SrcVal, SrcTy, DestTy, Loc); + LValue SrcVal = CGF.MakeAddrLValue(RV.getAggregateAddress(), E->getType()); + return EmitHLSLElementwiseCast(CGF, SrcVal, DestTy, Loc); } } // end of switch diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index f0565c1..99de6e1 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4464,10 +4464,8 @@ public: AggValueSlot slot = AggValueSlot::ignored()); LValue EmitPseudoObjectLValue(const PseudoObjectExpr *e); - void FlattenAccessAndType( - Address Addr, QualType AddrTy, - SmallVectorImpl<std::pair<Address, llvm::Value *>> &AccessList, - SmallVectorImpl<QualType> &FlatTypes); + void FlattenAccessAndTypeLValue(LValue LVal, + SmallVectorImpl<LValue> &AccessList); llvm::Value *EmitIvarOffset(const ObjCInterfaceDecl *Interface, const ObjCIvarDecl *Ivar); diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h index 4bd7981..d951ba0 100644 --- a/clang/lib/Headers/avx512fp16intrin.h +++ b/clang/lib/Headers/avx512fp16intrin.h @@ -41,7 +41,8 @@ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1))); #define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 #endif -static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) { +static __inline__ _Float16 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtsh_h(__m512h __a) { return __a[0]; } diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index fa30c66b..2b375b9 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -3571,9 +3571,6 @@ bool SemaHLSL::CanPerformAggregateSplatCast(Expr *Src, QualType DestTy) { if (SrcVecTy) SrcTy = SrcVecTy->getElementType(); - if (ContainsBitField(DestTy)) - return false; - llvm::SmallVector<QualType> DestTypes; BuildFlattenedTypeList(DestTy, DestTypes); @@ -3600,9 +3597,6 @@ bool SemaHLSL::CanPerformElementwiseCast(Expr *Src, QualType DestTy) { (DestTy->isScalarType() || DestTy->isVectorType())) return false; - if (ContainsBitField(DestTy) || ContainsBitField(SrcTy)) - return false; - llvm::SmallVector<QualType> DestTypes; BuildFlattenedTypeList(DestTy, DestTypes); llvm::SmallVector<QualType> SrcTypes; diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 7ad7049..8471f02 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -2724,16 +2724,6 @@ Expr *GenerateReductionInitRecipeExpr(ASTContext &Context, return InitExpr; } -const Expr *StripOffBounds(const Expr *VarExpr) { - while (isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(VarExpr)) { - if (const auto *AS = dyn_cast<ArraySectionExpr>(VarExpr)) - VarExpr = AS->getBase()->IgnoreParenImpCasts(); - else if (const auto *Sub = dyn_cast<ArraySubscriptExpr>(VarExpr)) - VarExpr = Sub->getBase()->IgnoreParenImpCasts(); - } - return VarExpr; -} - VarDecl *CreateAllocaDecl(ASTContext &Ctx, DeclContext *DC, SourceLocation BeginLoc, IdentifierInfo *VarName, QualType VarTy) { @@ -2794,17 +2784,18 @@ OpenACCPrivateRecipe SemaOpenACC::CreatePrivateInitRecipe(const Expr *VarExpr) { OpenACCFirstPrivateRecipe SemaOpenACC::CreateFirstPrivateInitRecipe(const Expr *VarExpr) { - // TODO: OpenACC: This shouldn't be necessary, see PrivateInitRecipe - VarExpr = StripOffBounds(VarExpr); - + // We don't strip bounds here, so that we are doing our recipe init at the + // 'lowest' possible level. Codegen is going to have to do its own 'looping'. if (!VarExpr || VarExpr->getType()->isDependentType()) return OpenACCFirstPrivateRecipe::Empty(); QualType VarTy = VarExpr->getType().getNonReferenceType().getUnqualifiedType(); - // TODO: OpenACC: for arrays/bounds versions, we're going to have to do a - // different initializer, but for now we can go ahead with this. + // Array sections are special, and we have to treat them that way. + if (const auto *ASE = + dyn_cast<ArraySectionExpr>(VarExpr->IgnoreParenImpCasts())) + VarTy = ArraySectionExpr::getBaseOriginalType(ASE); VarDecl *AllocaDecl = CreateAllocaDecl( getASTContext(), SemaRef.getCurContext(), VarExpr->getBeginLoc(), diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp index e1f4d0d..b0096d8 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp @@ -509,6 +509,8 @@ bool initializeScanCompilerInstance( ScanInstance.getFrontendOpts().DisableFree = false; ScanInstance.getFrontendOpts().GenerateGlobalModuleIndex = false; ScanInstance.getFrontendOpts().UseGlobalModuleIndex = false; + ScanInstance.getFrontendOpts().GenReducedBMI = false; + ScanInstance.getFrontendOpts().ModuleOutputPath.clear(); // This will prevent us compiling individual modules asynchronously since // FileManager is not thread-safe, but it does improve performance for now. ScanInstance.getFrontendOpts().ModulesShareFileManager = true; diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index d67178c..a117bec 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -263,6 +263,10 @@ makeCommonInvocationForModuleBuild(CompilerInvocation CI) { // units. CI.getFrontendOpts().Inputs.clear(); CI.getFrontendOpts().OutputFile.clear(); + CI.getFrontendOpts().GenReducedBMI = false; + CI.getFrontendOpts().ModuleOutputPath.clear(); + CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = false; + CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = false; // LLVM options are not going to affect the AST CI.getFrontendOpts().LLVMArgs.clear(); diff --git a/clang/test/AST/ByteCode/cxx20.cpp b/clang/test/AST/ByteCode/cxx20.cpp index 67bf9a7..1888998 100644 --- a/clang/test/AST/ByteCode/cxx20.cpp +++ b/clang/test/AST/ByteCode/cxx20.cpp @@ -1070,9 +1070,30 @@ namespace Virtual { public: int a = f(); - virtual constexpr int f() { return 10; } + virtual constexpr int f() const { return 10; } }; + K k; + static_assert(k.f() == 10); // both-error {{not an integral constant expression}} \ + // both-note {{virtual function called on object 'k' whose dynamic type is not constant}} + + void f() { + constexpr K k; + static_assert(k.f() == 10); + } + + void f2() { + K k; + static_assert(k.f() == 10); // both-error {{not an integral constant expression}} \ + // both-note {{virtual function called on object 'k' whose dynamic type is not constant}} + } + + static_assert(K().f() == 10); + + void f3() { + static_assert(K().f() == 10); + } + class L : public K { public: int b = f(); @@ -1083,6 +1104,42 @@ namespace Virtual { static_assert(l.a == 10); static_assert(l.b == 10); static_assert(l.c == 10); + static_assert(l.f() == 10); + + struct M { + K& mk = k; + }; + static_assert(M{}.mk.f() == 10); // both-error {{not an integral constant expression}} \ + // both-note {{virtual function called on object 'k' whose dynamic type is not constant}} + + struct N { + K* mk = &k; + }; + static_assert(N{}.mk->f() == 10); // both-error {{not an integral constant expression}} \ + // both-note {{virtual function called on object 'k' whose dynamic type is not constant}} + + extern K o; + static_assert(o.f() == 10); // both-error {{not an integral constant expression}} \ + // both-note {{virtual function called on object 'o' whose dynamic type is not constant}} + static K p; + static_assert(p.f() == 10); // both-error {{not an integral constant expression}} \ + // both-note {{virtual function called on object 'p' whose dynamic type is not constant}} + + void f4() { + static K p; + static_assert(p.f() == 10); // both-error {{not an integral constant expression}} \ + // both-note {{virtual function called on object 'p' whose dynamic type is not constant}} + } + + const K q; + static_assert(q.f() == 10); // both-error {{not an integral constant expression}} \ + // both-note {{virtual function called on object 'q' whose dynamic type is not constant}} + + void f5() { + const K q; + static_assert(q.f() == 10); // both-error {{not an integral constant expression}} \ + // both-note {{virtual function called on object 'q' whose dynamic type is not constant}} + } } namespace DiscardedTrivialCXXConstructExpr { @@ -1100,3 +1157,29 @@ namespace DiscardedTrivialCXXConstructExpr { constexpr int y = foo(12); // both-error {{must be initialized by a constant expression}} \ // both-note {{in call to}} } + +namespace VirtualFunctionCallThroughArrayElem { + struct X { + constexpr virtual int foo() const { + return 3; + } + }; + constexpr X xs[5]; + static_assert(xs[3].foo() == 3); + + constexpr X xs2[1][2]; + static_assert(xs2[0].foo() == 3); // both-error {{is not a structure or union}} + static_assert(xs2[0][0].foo() == 3); + + struct Y: public X { + constexpr int foo() const override { + return 1; + } + }; + constexpr Y ys[20]; + static_assert(ys[12].foo() == static_cast<const X&>(ys[12]).foo()); + + X a[3][4]; + static_assert(a[2][3].foo()); // both-error {{not an integral constant expression}} \ + // both-note {{virtual function called on object 'a[2][3]' whose dynamic type is not constant}} +} diff --git a/clang/test/CIR/CodeGen/global-init.cpp b/clang/test/CIR/CodeGen/global-init.cpp index 102affc..0c19e68 100644 --- a/clang/test/CIR/CodeGen/global-init.cpp +++ b/clang/test/CIR/CodeGen/global-init.cpp @@ -1,8 +1,9 @@ -// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2> %t-before.cir +// RUN: FileCheck --input-file=%t-before.cir %s --check-prefix=CIR-BEFORE-LPP // RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR -// Note: The CIR generated from this test isn't ready for lowering to LLVM yet. -// That will require changes to LoweringPrepare. +// Note: The LoweringPrepare work isn't yet complete. We still need to create +// the global ctor list attribute. struct NeedsCtor { NeedsCtor(); @@ -10,8 +11,16 @@ struct NeedsCtor { NeedsCtor needsCtor; -// CIR: cir.func private @_ZN9NeedsCtorC1Ev(!cir.ptr<!rec_NeedsCtor>) -// CIR: cir.global external @needsCtor = ctor : !rec_NeedsCtor { -// CIR: %[[THIS:.*]] = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor> -// CIR: cir.call @_ZN9NeedsCtorC1Ev(%[[THIS]]) : (!cir.ptr<!rec_NeedsCtor>) -> () +// CIR-BEFORE-LPP: cir.global external @needsCtor = ctor : !rec_NeedsCtor { +// CIR-BEFORE-LPP: %[[THIS:.*]] = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor> +// CIR-BEFORE-LPP: cir.call @_ZN9NeedsCtorC1Ev(%[[THIS]]) : (!cir.ptr<!rec_NeedsCtor>) -> () + +// CIR: cir.global external @needsCtor = #cir.zero : !rec_NeedsCtor +// CIR: cir.func internal private @__cxx_global_var_init() { +// CIR: %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor> +// CIR: cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> () + +// CIR: cir.func private @_GLOBAL__sub_I_[[FILENAME:.*]]() { +// CIR: cir.call @__cxx_global_var_init() : () -> () +// CIR: cir.return // CIR: } diff --git a/clang/test/CIR/CodeGen/new.cpp b/clang/test/CIR/CodeGen/new.cpp index 91dae3f..3dcf7af 100644 --- a/clang/test/CIR/CodeGen/new.cpp +++ b/clang/test/CIR/CodeGen/new.cpp @@ -158,13 +158,13 @@ void test_new_with_complex_type() { } // CHECK: cir.func{{.*}} @_Z26test_new_with_complex_typev -// CHECK: %0 = cir.alloca !cir.ptr<!cir.complex<!cir.float>>, !cir.ptr<!cir.ptr<!cir.complex<!cir.float>>>, ["a", init] -// CHECK: %1 = cir.const #cir.int<8> : !u64i -// CHECK: %2 = cir.call @_Znwm(%1) : (!u64i) -> !cir.ptr<!void> -// CHECK: %3 = cir.cast bitcast %2 : !cir.ptr<!void> -> !cir.ptr<!cir.complex<!cir.float>> -// CHECK: %4 = cir.const #cir.const_complex<#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float> : !cir.complex<!cir.float> -// CHECK: cir.store align(8) %4, %3 : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>> -// CHECK: cir.store align(8) %3, %0 : !cir.ptr<!cir.complex<!cir.float>>, !cir.ptr<!cir.ptr<!cir.complex<!cir.float>>> +// CHECK: %[[A_ADDR:.*]] = cir.alloca !cir.ptr<!cir.complex<!cir.float>>, !cir.ptr<!cir.ptr<!cir.complex<!cir.float>>>, ["a", init] +// CHECK: %[[COMPLEX_SIZE:.*]] = cir.const #cir.int<8> : !u64i +// CHECK: %[[NEW_COMPLEX:.*]] = cir.call @_Znwm(%[[COMPLEX_SIZE]]) : (!u64i) -> !cir.ptr<!void> +// CHECK: %[[COMPLEX_PTR:.*]] = cir.cast bitcast %[[NEW_COMPLEX]] : !cir.ptr<!void> -> !cir.ptr<!cir.complex<!cir.float>> +// CHECK: %[[COMPLEX_VAL:.*]] = cir.const #cir.const_complex<#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float> : !cir.complex<!cir.float> +// CHECK: cir.store{{.*}} %[[COMPLEX_VAL]], %[[COMPLEX_PTR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>> +// CHECK: cir.store{{.*}} %[[COMPLEX_PTR]], %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.ptr<!cir.ptr<!cir.complex<!cir.float>>> // LLVM: define{{.*}} void @_Z26test_new_with_complex_typev // LLVM: %[[A_ADDR:.*]] = alloca ptr, i64 1, align 8 diff --git a/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp b/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp index e836a37a..726cd3e 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp @@ -1,4 +1,4 @@ -// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s struct NoCopyConstruct {}; @@ -81,292 +81,247 @@ struct HasDtor { // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}): +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> -// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!s32i>, %[[ZERO]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !s32i, !cir.ptr<!s32i> -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[ONE]] : !s64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ONE_2]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i> -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[TWO]] : !s64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[TWO_2]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i> -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[THREE]] : !s64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[THREE_2]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i> -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[FOUR]] : !s64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[FOUR_2]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!s32i>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!s32i> +// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load{{.*}} %[[STRIDE_FROM]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.store{{.*}} %[[FROM_LOAD]], %[[STRIDE_TO]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}): +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> -// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!cir.float>, %[[ZERO]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !cir.float, !cir.ptr<!cir.float> -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[ONE]] : !s64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ONE_2]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float> -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[TWO]] : !s64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[TWO_2]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float> -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[THREE]] : !s64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[THREE_2]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float> -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[FOUR]] : !s64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[FOUR_2]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.float> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!cir.float>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.float> +// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load{{.*}} %[[STRIDE_FROM]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[FROM_LOAD]], %[[STRIDE_TO]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}): +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> -// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> () -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> () -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> () -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> () -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> () -// +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> +// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> () +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_13CopyConstruct : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}): +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_13CopyConstruct : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.alloca !cir.array<!rec_CopyConstruct x 5>, !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> -// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> () -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> () -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> () -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> () -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> () -// +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_CopyConstruct> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_CopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_CopyConstruct> +// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> () +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_14NonDefaultCtor : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}): -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["openacc.firstprivate.init"] +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> -// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> () -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ONE]] : !s64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> () -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[TWO]] : !s64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> () -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[THREE]] : !s64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> () -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> () -// +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NonDefaultCtor> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NonDefaultCtor> +// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> () +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_7HasDtor : !cir.ptr<!cir.array<!rec_HasDtor x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}): +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_7HasDtor : !cir.ptr<!cir.array<!rec_HasDtor x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.alloca !cir.array<!rec_HasDtor x 5>, !cir.ptr<!cir.array<!rec_HasDtor x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ZERO]] : !u64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> () -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[ONE]] : !s64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> () -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[TWO]] : !s64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> () -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[THREE]] : !s64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> () -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[FOUR]] : !s64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> () -// +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> () +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield -// // CHECK-NEXT: } destroy { -// CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}): -// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<4> : !u64i -// CHECK-NEXT: %[[ARRPTR:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[ELEM:.*]] = cir.ptr_stride(%[[ARRPTR]] : !cir.ptr<!rec_HasDtor>, %[[LAST_IDX]] : !u64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>, ["__array_idx"] -// CHECK-NEXT: cir.store %[[ELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>> -// CHECK-NEXT: cir.do { -// CHECK-NEXT: %[[ELEM_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!cir.ptr<!rec_HasDtor>>, !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[ELEM_LOAD]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> () -// CHECK-NEXT: %[[NEG_ONE:.*]] = cir.const #cir.int<-1> : !s64i -// CHECK-NEXT: %[[PREVELEM:.*]] = cir.ptr_stride(%[[ELEM_LOAD]] : !cir.ptr<!rec_HasDtor>, %[[NEG_ONE]] : !s64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: cir.store %[[PREVELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>> +// CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i +// CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.binop(sub, %[[UB_CAST]], %[[ONE]]) : !u64i +// CHECK-NEXT: cir.store %[[LAST_SUB_ONE]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR_LOAD]], %[[LB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> () // CHECK-NEXT: cir.yield -// CHECK-NEXT: } while { -// CHECK-NEXT: %[[ELEM_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!cir.ptr<!rec_HasDtor>>, !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[ELEM_LOAD]], %[[ARRPTR]]) : !cir.ptr<!rec_HasDtor>, !cir.bool -// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[DEC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } +// extern "C" void acc_combined() { // CHECK: cir.func{{.*}} @acc_combined() { @@ -482,7 +437,7 @@ extern "C" void acc_combined() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1]"} - // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) { + // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) { // CHECK-NEXT: acc.loop combined(serial) // CHECK: acc.yield // CHECK-NEXT: } loc @@ -497,7 +452,7 @@ extern "C" void acc_combined() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1]"} - // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) { // CHECK-NEXT: acc.loop combined(parallel) // CHECK: acc.yield // CHECK-NEXT: } loc @@ -512,7 +467,7 @@ extern "C" void acc_combined() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1]"} - // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) { + // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) { // CHECK-NEXT: acc.loop combined(serial) // CHECK: acc.yield // CHECK-NEXT: } loc @@ -527,7 +482,7 @@ extern "C" void acc_combined() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1]"} - // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) { + // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) { // CHECK-NEXT: acc.loop combined(parallel) // CHECK: acc.yield // CHECK-NEXT: } loc @@ -542,7 +497,7 @@ extern "C" void acc_combined() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1]"} - // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) { + // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) { // CHECK-NEXT: acc.loop combined(parallel) // CHECK: acc.yield // CHECK-NEXT: } loc @@ -557,7 +512,7 @@ extern "C" void acc_combined() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1]"} - // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) { + // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) { // CHECK-NEXT: acc.loop combined(parallel) // CHECK: acc.yield // CHECK-NEXT: } loc @@ -607,12 +562,12 @@ extern "C" void acc_combined() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE6:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1]"} - // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) // CHECK-NEXT: acc.loop combined(serial) // CHECK: acc.yield // CHECK-NEXT: } loc @@ -629,7 +584,7 @@ extern "C" void acc_combined() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1:1]"} - // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) { + // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) { // CHECK-NEXT: acc.loop combined(parallel) // CHECK: acc.yield // CHECK-NEXT: } loc @@ -645,7 +600,7 @@ extern "C" void acc_combined() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1:1]"} - // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) { + // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) { // CHECK-NEXT: acc.loop combined(serial) // CHECK: acc.yield // CHECK-NEXT: } loc @@ -661,7 +616,7 @@ extern "C" void acc_combined() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1:1]"} - // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) { + // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) { // CHECK-NEXT: acc.loop combined(parallel) // CHECK: acc.yield // CHECK-NEXT: } loc @@ -677,7 +632,7 @@ extern "C" void acc_combined() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1:1]"} - // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) { + // CHECK-NEXT: acc.serial combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) { // CHECK-NEXT: acc.loop combined(serial) // CHECK: acc.yield // CHECK-NEXT: } loc @@ -693,7 +648,7 @@ extern "C" void acc_combined() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1:1]"} - // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) { + // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) { // CHECK-NEXT: acc.loop combined(parallel) // CHECK: acc.yield // CHECK-NEXT: } loc @@ -709,7 +664,7 @@ extern "C" void acc_combined() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1:1]"} - // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) { + // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) { // CHECK-NEXT: acc.loop combined(parallel) // CHECK: acc.yield // CHECK-NEXT: } loc @@ -765,12 +720,12 @@ extern "C" void acc_combined() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE6:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1:1]"} - // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK-NEXT: acc.parallel combined(loop) firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) // CHECK-NEXT: acc.loop combined(parallel) // CHECK: acc.yield // CHECK-NEXT: } loc diff --git a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c index de6e7b0..94c2973 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c @@ -1,4 +1,4 @@ -// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s struct NoCopyConstruct {}; @@ -34,140 +34,110 @@ struct NoCopyConstruct {}; // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}): +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> -// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!s32i>, %[[ZERO]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !s32i, !cir.ptr<!s32i> -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[ONE]] : !s64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ONE_2]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i> -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[TWO]] : !s64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[TWO_2]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i> -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[THREE]] : !s64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[THREE_2]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i> -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[FOUR]] : !s64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[FOUR_2]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!s32i>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!s32i> +// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load{{.*}} %[[STRIDE_FROM]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.store{{.*}} %[[FROM_LOAD]], %[[STRIDE_TO]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}): +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> -// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!cir.float>, %[[ZERO]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !cir.float, !cir.ptr<!cir.float> -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[ONE]] : !s64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ONE_2]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float> -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[TWO]] : !s64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[TWO_2]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float> -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[THREE]] : !s64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[THREE_2]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float> -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[FOUR]] : !s64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[FOUR_2]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.float> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!cir.float>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.float> +// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load{{.*}} %[[STRIDE_FROM]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[FROM_LOAD]], %[[STRIDE_TO]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}): +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i // CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.copy %[[FROM_OFFSET:.*]] to %[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct> -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.copy %[[FROM_OFFSET]] to %[[TO_OFFSET]] : !cir.ptr<!rec_NoCopyConstruct> -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.copy %[[FROM_OFFSET]] to %[[TO_OFFSET]] : !cir.ptr<!rec_NoCopyConstruct> -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.copy %[[FROM_OFFSET]] to %[[TO_OFFSET]] : !cir.ptr<!rec_NoCopyConstruct> -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.copy %[[FROM_OFFSET]] to %[[TO_OFFSET]] : !cir.ptr<!rec_NoCopyConstruct> -// +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> +// CHECK-NEXT: cir.copy %[[STRIDE_FROM]] to %[[STRIDE_TO]] : !cir.ptr<!rec_NoCopyConstruct> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } @@ -227,7 +197,7 @@ void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1]"} - // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) + // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc parallel firstprivate(someFloatArr[1]) @@ -239,7 +209,7 @@ void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1]"} - // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) + // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc serial firstprivate(noCopyArr[1]) @@ -251,7 +221,7 @@ void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1]"} - // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) + // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc serial firstprivate(someIntArr[1], someFloatArr[1], noCopyArr[1]) @@ -277,9 +247,9 @@ void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1]"} - // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) + // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -293,7 +263,7 @@ void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1:1]"} - // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) + // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc serial firstprivate(someFloatArr[1:1]) @@ -306,7 +276,7 @@ void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1:1]"} - // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) + // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc parallel firstprivate(noCopyArr[1:1]) @@ -319,7 +289,7 @@ void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1:1]"} - // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) + // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc parallel firstprivate(someIntArr[1:1], someFloatArr[1:1], noCopyArr[1:1]) @@ -348,9 +318,9 @@ void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1:1]"} - // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) + // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc } diff --git a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp index fca3ca8..1e174bb 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp @@ -1,4 +1,4 @@ -// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s struct NoCopyConstruct {}; @@ -81,292 +81,247 @@ struct HasDtor { // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}): +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> -// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!s32i>, %[[ZERO]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !s32i, !cir.ptr<!s32i> -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[ONE]] : !s64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ONE_2]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i> -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[TWO]] : !s64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[TWO_2]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i> -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[THREE]] : !s64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[THREE_2]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i> -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[FOUR]] : !s64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[FOUR_2]] : !u64i), !cir.ptr<!s32i> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!s32i> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!s32i>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!s32i> +// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load{{.*}} %[[STRIDE_FROM]] : !cir.ptr<!s32i>, !s32i +// CHECK-NEXT: cir.store{{.*}} %[[FROM_LOAD]], %[[STRIDE_TO]] : !s32i, !cir.ptr<!s32i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}): +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> -// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!cir.float>, %[[ZERO]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !cir.float, !cir.ptr<!cir.float> -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[ONE]] : !s64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ONE_2]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float> -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[TWO]] : !s64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[TWO_2]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float> -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[THREE]] : !s64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[THREE_2]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float> -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[FOUR]] : !s64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[FOUR_2]] : !u64i), !cir.ptr<!cir.float> -// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float -// CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.float> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!cir.float>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.float> +// CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load{{.*}} %[[STRIDE_FROM]] : !cir.ptr<!cir.float>, !cir.float +// CHECK-NEXT: cir.store{{.*}} %[[FROM_LOAD]], %[[STRIDE_TO]] : !cir.float, !cir.ptr<!cir.float> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}): +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> -// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> () -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> () -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> () -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> () -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> -// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> () -// +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NoCopyConstruct> +// CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> () +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_13CopyConstruct : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}): +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_13CopyConstruct : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.alloca !cir.array<!rec_CopyConstruct x 5>, !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> -// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> () -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> () -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> () -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> () -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_CopyConstruct> -// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> () -// +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_CopyConstruct> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_CopyConstruct>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_CopyConstruct> +// CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> () +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_14NonDefaultCtor : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}): -// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["openacc.firstprivate.init"] +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> -// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> () -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ONE]] : !s64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> () -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[TWO]] : !s64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> () -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[THREE]] : !s64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> () -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor> -// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> () -// +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NonDefaultCtor> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_NonDefaultCtor> +// CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> () +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } // -// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__ZTSA5_7HasDtor : !cir.ptr<!cir.array<!rec_HasDtor x 5>> init { -// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}): +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt1__ZTSA5_7HasDtor : !cir.ptr<!cir.array<!rec_HasDtor x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.alloca !cir.array<!rec_HasDtor x 5>, !cir.ptr<!cir.array<!rec_HasDtor x 5>>, ["openacc.firstprivate.init"] // CHECK-NEXT: acc.yield // CHECK-NEXT: } copy { -// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}): -// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ZERO]] : !u64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> () -// -// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[ONE]] : !s64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> () -// -// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[TWO]] : !s64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> () -// -// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[THREE]] : !s64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> () -// -// CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[FOUR]] : !s64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4> -// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> () -// +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[DECAY_TO]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[STRIDE_TO]], %[[STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> () +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK-NEXT: acc.yield -// // CHECK-NEXT: } destroy { -// CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}): -// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<4> : !u64i -// CHECK-NEXT: %[[ARRPTR:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[ELEM:.*]] = cir.ptr_stride(%[[ARRPTR]] : !cir.ptr<!rec_HasDtor>, %[[LAST_IDX]] : !u64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>, ["__array_idx"] -// CHECK-NEXT: cir.store %[[ELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>> -// CHECK-NEXT: cir.do { -// CHECK-NEXT: %[[ELEM_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!cir.ptr<!rec_HasDtor>>, !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[ELEM_LOAD]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> () -// CHECK-NEXT: %[[NEG_ONE:.*]] = cir.const #cir.int<-1> : !s64i -// CHECK-NEXT: %[[PREVELEM:.*]] = cir.ptr_stride(%[[ELEM_LOAD]] : !cir.ptr<!rec_HasDtor>, %[[NEG_ONE]] : !s64i), !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: cir.store %[[PREVELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>> +// CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i +// CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.binop(sub, %[[UB_CAST]], %[[ONE]]) : !u64i +// CHECK-NEXT: cir.store %[[LAST_SUB_ONE]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR_LOAD]], %[[LB_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> () // CHECK-NEXT: cir.yield -// CHECK-NEXT: } while { -// CHECK-NEXT: %[[ELEM_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!cir.ptr<!rec_HasDtor>>, !cir.ptr<!rec_HasDtor> -// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[ELEM_LOAD]], %[[ARRPTR]]) : !cir.ptr<!rec_HasDtor>, !cir.bool -// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[DEC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } +// extern "C" void acc_compute() { // CHECK: cir.func{{.*}} @acc_compute() { @@ -461,7 +416,7 @@ extern "C" void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1]"} - // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) + // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc parallel firstprivate(someFloatArr[1]) @@ -473,7 +428,7 @@ extern "C" void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1]"} - // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) + // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc serial firstprivate(noCopyArr[1]) @@ -485,7 +440,7 @@ extern "C" void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1]"} - // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) + // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc parallel firstprivate(hasCopyArr[1]) @@ -497,7 +452,7 @@ extern "C" void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1]"} - // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) + // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc parallel firstprivate(notDefCtorArr[1]) @@ -509,7 +464,7 @@ extern "C" void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1]"} - // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) + // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc parallel firstprivate(dtorArr[1]) @@ -521,7 +476,7 @@ extern "C" void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1]"} - // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc serial firstprivate(someIntArr[1], someFloatArr[1], noCopyArr[1], hasCopyArr[1], notDefCtorArr[1], dtorArr[1]) @@ -568,12 +523,12 @@ extern "C" void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE6:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1]"} - // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -587,7 +542,7 @@ extern "C" void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1:1]"} - // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) + // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc serial firstprivate(someFloatArr[1:1]) @@ -600,7 +555,7 @@ extern "C" void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1:1]"} - // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) + // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc parallel firstprivate(noCopyArr[1:1]) @@ -613,7 +568,7 @@ extern "C" void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1:1]"} - // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) + // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc serial firstprivate(hasCopyArr[1:1]) @@ -626,7 +581,7 @@ extern "C" void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1:1]"} - // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) + // CHECK-NEXT: acc.serial firstprivate(@firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc parallel firstprivate(notDefCtorArr[1:1]) @@ -639,7 +594,7 @@ extern "C" void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1:1]"} - // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) + // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc parallel firstprivate(dtorArr[1:1]) @@ -652,7 +607,7 @@ extern "C" void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1:1]"} - // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc #pragma acc parallel firstprivate(someIntArr[1:1], someFloatArr[1:1], noCopyArr[1:1], hasCopyArr[1:1], notDefCtorArr[1:1], dtorArr[1:1]) @@ -705,12 +660,12 @@ extern "C" void acc_compute() { // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE6:.*]] = acc.firstprivate varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1:1]"} - // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, - // CHECK-SAME: @firstprivatization__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK-NEXT: acc.parallel firstprivate(@firstprivatization__Bcnt1__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, + // CHECK-SAME: @firstprivatization__Bcnt1__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc } diff --git a/clang/test/CIR/CodeGenOpenACC/firstprivate-clause-recipes.cpp b/clang/test/CIR/CodeGenOpenACC/firstprivate-clause-recipes.cpp new file mode 100644 index 0000000..e10d737 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/firstprivate-clause-recipes.cpp @@ -0,0 +1,691 @@ +// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s + +// Note: unlike the 'private' recipe checks, this is just for spot-checking, +// so this test isn't as comprehensive. The same code paths are used for +// 'private', so we just count on those to catch the errors. +struct NoOps { + int i; + ~NoOps(); +}; + +struct CtorDtor { + int i; + CtorDtor(); + ~CtorDtor(); +}; + +void do_things(unsigned A, unsigned B) { + NoOps ThreeArr[5][5][5]; + +#pragma acc parallel firstprivate(ThreeArr[B][B][B]) +// CHECK:acc.firstprivate.recipe @firstprivatization__Bcnt3__ZTSA5_A5_A5_5NoOps : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): +// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>>, ["openacc.firstprivate.init"] {alignment = 4 : i64} +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } copy { +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[TLA_DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> +// CHECK-NEXT: %[[BOUND3_STRIDE_FROM:.*]] = cir.ptr_stride(%[[TLA_DECAY_FROM]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> +// CHECK-NEXT: %[[TLA_DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> +// CHECK-NEXT: %[[BOUND3_STRIDE_TO:.*]] = cir.ptr_stride(%[[TLA_DECAY_TO]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE_FROM]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 5>> +// CHECK-NEXT: %[[BOUND2_STRIDE_FROM:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY_FROM]] : !cir.ptr<!cir.array<!rec_NoOps x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_NoOps x 5>> +// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE_TO]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 5>> +// CHECK-NEXT: %[[BOUND2_STRIDE_TO:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY_TO]] : !cir.ptr<!cir.array<!rec_NoOps x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_NoOps x 5>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE_FROM]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[BOUND1_STRIDE_FROM:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY_FROM]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE_TO]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[BOUND1_STRIDE_TO:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY_TO]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps> +// CHECK-NEXT: cir.call @_ZN5NoOpsC1ERKS_(%[[BOUND1_STRIDE_TO]], %[[BOUND1_STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_NoOps>, !cir.ptr<!rec_NoOps>) -> () +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT:} destroy { +// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i +// CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.binop(sub, %[[UB3_CAST]], %[[ONE]]) : !u64i +// CHECK-NEXT: cir.store %[[LAST_SUB_ONE]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR3_LOAD]], %[[LB3_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> +// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i +// CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.binop(sub, %[[UB2_CAST]], %[[ONE]]) : !u64i +// CHECK-NEXT: cir.store %[[LAST_SUB_ONE]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR2_LOAD]], %[[LB2_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 5>> +// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY]] : !cir.ptr<!cir.array<!rec_NoOps x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_NoOps x 5>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i +// CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.binop(sub, %[[UB1_CAST]], %[[ONE]]) : !u64i +// CHECK-NEXT: cir.store %[[LAST_SUB_ONE]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR1_LOAD]], %[[LB1_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[BOUND1_STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps> +// CHECK-NEXT: cir.call @_ZN5NoOpsD1Ev(%[[BOUND1_STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> () +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR1_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[DEC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR2_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[DEC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR3_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[DEC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT:acc.yield +// CHECK-NEXT:} + ; + + NoOps ***ThreePtr; +#pragma acc parallel firstprivate(ThreePtr[B][B][A:B]) +// CHECK: acc.firstprivate.recipe @firstprivatization__Bcnt3__ZTSPPP5NoOps : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): +// CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, ["openacc.firstprivate.init"] {alignment = 8 : i64} +// CHECK-NEXT: %[[INT_PTR_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i +// CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.binop(mul, %[[UPPER_BOUND_CAST]], %[[SIZEOF_PTR]]) : !u64i +// CHECK-NEXT: %[[INT_PTR_PTR_VLA_ALLOCA:.*]] = cir.alloca !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[CALC_ALLOCA_SIZE]] : !u64i, ["openacc.init.bounds"] {alignment = 8 : i64} +// +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[UPPER_LIMIT:.*]] = cir.const #cir.int<1> : !u64i +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UPPER_LIMIT]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[SRC_IDX:.*]] = cir.binop(mul, %[[UPPER_BOUND_CAST]], %[[ITR_LOAD]]) : !u64i +// CHECK-NEXT: %[[SRC_STRIDE:.*]] = cir.ptr_stride(%[[INT_PTR_PTR_VLA_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[SRC_IDX]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> +// CHECK-NEXT: %[[DEST_STRIDE:.*]] = cir.ptr_stride(%[[TOP_LEVEL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> +// CHECK-NEXT: cir.store %[[SRC_STRIDE]], %[[DEST_STRIDE]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// +// CHECK-NEXT: %[[INT_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.binop(mul, %[[UPPER_BOUND_CAST_2]], %[[UPPER_BOUND_CAST]]) : !u64i +// CHECK-NEXT: %[[SIZEOF_PTR_PTR:.*]] = cir.const #cir.int<8> : !u64i +// CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.binop(mul, %[[NUM_ELTS]], %[[SIZEOF_PTR_PTR]]) : !u64i +// CHECK-NEXT: %[[INT_PTR_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[CALC_ALLOCA_SIZE]] : !u64i, ["openacc.init.bounds"] {alignment = 8 : i64} +// +// +// Copy array pointer to the original alloca. +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UPPER_BOUND_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[SRC_IDX:.*]] = cir.binop(mul, %[[UPPER_BOUND_CAST_2]], %[[ITR_LOAD]]) : !u64i +// CHECK-NEXT: %[[SRC_STRIDE:.*]] = cir.ptr_stride(%[[INT_PTR_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[SRC_IDX]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>> +// CHECK-NEXT: %[[DEST_STRIDE:.*]] = cir.ptr_stride(%[[INT_PTR_PTR_VLA_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> +// CHECK-NEXT: cir.store %[[SRC_STRIDE]], %[[DEST_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// +// CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UPPER_BOUND_CAST_3:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[NUM_ELTS_2:.*]] = cir.binop(mul, %[[UPPER_BOUND_CAST_3]], %[[NUM_ELTS]]) : !u64i +// CHECK-NEXT: %[[SIZEOF_INT:.*]] = cir.const #cir.int<4> : !u64i +// CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.binop(mul, %[[NUM_ELTS_2]], %[[SIZEOF_INT]]) : !u64i +// CHECK-NEXT: %[[INT_PTR_ALLOCA:.*]] = cir.alloca !rec_NoOps, !cir.ptr<!rec_NoOps>, %[[CALC_ALLOCA_SIZE]] : !u64i, ["openacc.init.bounds"] {alignment = 4 : i64} +// +// Copy array pointer to the original alloca. +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[NUM_ELTS]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[SRC_IDX:.*]] = cir.binop(mul, %[[UPPER_BOUND_CAST_3]], %[[ITR_LOAD]]) : !u64i +// CHECK-NEXT: %[[SRC_STRIDE:.*]] = cir.ptr_stride(%[[INT_PTR_ALLOCA]] : !cir.ptr<!rec_NoOps>, %[[SRC_IDX]] : !u64i), !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[DEST_STRIDE:.*]] = cir.ptr_stride(%[[INT_PTR_PTR_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>> +// CHECK-NEXT: cir.store %[[SRC_STRIDE]], %[[DEST_STRIDE]] : !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>> +// CHECK-NEXT: cir.yield +// +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// +// CHECK-NEXT: } copy { +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> + +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[TLA_LOAD_FROM:.*]] = cir.load %[[ARG_FROM]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> +// CHECK-NEXT: %[[BOUND3_STRIDE_FROM:.*]] = cir.ptr_stride(%[[TLA_LOAD_FROM]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> +// CHECK-NEXT: %[[TLA_LOAD_TO:.*]] = cir.load %[[ARG_TO]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> +// CHECK-NEXT: %[[BOUND3_STRIDETO:.*]] = cir.ptr_stride(%[[TLA_LOAD_TO]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[BOUND3_STRIDE_LOAD_FROM:.*]] = cir.load %[[BOUND3_STRIDE_FROM]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>> +// CHECK-NEXT: %[[BOUND2_STRIDE_FROM:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_LOAD_FROM]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>> +// CHECK-NEXT: %[[BOUND3_STRIDE_LOAD_TO:.*]] = cir.load %[[BOUND3_STRIDE_TO]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>> +// CHECK-NEXT: %[[BOUND2_STRIDE_TO:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_LOAD_TO]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD_FROM:.*]] = cir.load %[[BOUND2_STRIDE_FROM]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD_FROM]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD_TO:.*]] = cir.load %[[BOUND2_STRIDE_TO]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD_TO]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps> +// CHECK-NEXT: cir.call @_ZN5NoOpsC1ERKS_(%[[BOUND1_STRIDE_TO]], %[[BOUND1_STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_NoOps>, !cir.ptr<!rec_NoOps>) -> () +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } destroy { +// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i +// CHECK-NEXT: %[[ONE_BELOW_UB3:.*]] = cir.binop(sub, %[[UB3_CAST]], %[[CONST_ONE]]) : !u64i +// CHECK-NEXT: cir.store %[[ONE_BELOW_UB3]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> + +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR3_LOAD]], %[[LB3_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[PRIVATE]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> +// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i +// CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.binop(sub, %[[UB2_CAST]], %[[CONST_ONE]]) : !u64i +// CHECK-NEXT: cir.store %[[ONE_BELOW_UB2]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR2_LOAD]], %[[LB2_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[BOUND3_STRIDE_LOAD:.*]] = cir.load %[[BOUND3_STRIDE]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_NoOps>>>, !cir.ptr<!cir.ptr<!rec_NoOps>> +// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_LOAD]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_NoOps>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i +// CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.binop(sub, %[[UB1_CAST]], %[[CONST_ONE]]) : !u64i +// CHECK-NEXT: cir.store %[[ONE_BELOW_UB1]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR1_LOAD]], %[[LB1_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_NoOps>>, !cir.ptr<!rec_NoOps> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!rec_NoOps>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_NoOps> +// CHECK-NEXT: cir.call @_ZN5NoOpsD1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_NoOps>) -> () +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR1_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[DEC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR2_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[DEC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR3_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[DEC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +; + using PtrTArrayTy = CtorDtor*[5]; + PtrTArrayTy *PtrArrayPtr; + +#pragma acc parallel firstprivate(PtrArrayPtr[B][B][B]) +// CHECK-NEXT: acc.firstprivate.recipe @firstprivatization__Bcnt3__ZTSPA5_P8CtorDtor : !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): +// CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>>, ["openacc.firstprivate.init"] {alignment = 8 : i64} +// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<40> : !u64i +// CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.binop(mul, %[[UB3_CAST]], %[[ARR_SIZE]]) : !u64i +// CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca !cir.array<!cir.ptr<!rec_CtorDtor> x 5>, !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, %[[ALLOCA_SIZE]] : !u64i, ["openacc.init.bounds"] {alignment = 8 : i64} +// +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[UPP_BOUND:.*]] = cir.const #cir.int<1> : !u64i +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UPP_BOUND]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[SRC_IDX:.*]] = cir.binop(mul, %[[UB3_CAST]], %[[ITR_LOAD]]) : !u64i +// CHECK-NEXT: %[[SRC:.*]] = cir.ptr_stride(%[[ARR_ALLOCA]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, %[[SRC_IDX]] : !u64i), !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> +// CHECK-NEXT: %[[DEST:.*]] = cir.ptr_stride(%[[TL_ALLOCA]] : !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> +// CHECK-NEXT: cir.store %[[SRC]], %[[DEST]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// +// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.binop(mul, %[[UB2_CAST]], %[[UB3_CAST]]) : !u64i +// +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i +// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARR_ALLOCA]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> -> !cir.ptr<!cir.ptr<!rec_CtorDtor>> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ZERO]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>> +// +// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[NUM_ELTS2:.*]] = cir.binop(mul, %[[UB1_CAST]], %[[NUM_ELTS]]) : !u64i +// CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<4> : !u64i +// CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.binop(mul, %[[NUM_ELTS2]], %[[ELT_SIZE]]) : !u64i +// CHECK-NEXT: %[[ARR_ALLOCA2:.*]] = cir.alloca !rec_CtorDtor, !cir.ptr<!rec_CtorDtor>, %[[ALLOCA_SIZE]] : !u64i, ["openacc.init.bounds"] {alignment = 4 : i64} +// +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["itr"] {alignment = 8 : i64} +// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i +// CHECK-NEXT: cir.store %[[ZERO]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[NUM_ELTS]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[SRC_IDX:.*]] = cir.binop(mul, %[[UB1_CAST]], %[[ITR_LOAD]]) : !u64i +// CHECK-NEXT: %[[SRC:.*]] = cir.ptr_stride(%[[ARR_ALLOCA2]] : !cir.ptr<!rec_CtorDtor>, %[[SRC_IDX]] : !u64i), !cir.ptr<!rec_CtorDtor> +// CHECK-NEXT: %[[DEST:.*]] = cir.ptr_stride(%[[STRIDE]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>> +// CHECK-NEXT: cir.store %[[SRC]], %[[DEST]] : !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } copy { +// CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(lt, %[[ITR3_LOAD]], %[[UB3_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[TLA_LOAD_FROM:.*]] = cir.load %[[ARG_FROM]] : !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>>, !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> +// CHECK-NEXT: %[[BOUND3_STRIDE_FROM:.*]] = cir.ptr_stride(%[[TLA_LOAD_FROM]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> +// CHECK-NEXT: %[[TLA_LOAD_TO:.*]] = cir.load %[[ARG_TO]] : !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>>, !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> +// CHECK-NEXT: %[[BOUND3_STRIDE_TO:.*]] = cir.ptr_stride(%[[TLA_LOAD_TO]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR2_LOAD]], %[[UB2_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE_FROM]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> -> !cir.ptr<!cir.ptr<!rec_CtorDtor>> +// CHECK-NEXT: %[[BOUND2_STRIDE_FROM:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY_FROM]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>> +// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY_TO:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE_TO]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> -> !cir.ptr<!cir.ptr<!rec_CtorDtor>> +// CHECK-NEXT: %[[BOUND2_STRIDE_TO:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY_TO]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR1_LOAD]], %[[UB1_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD_FROM:.*]] = cir.load %[[BOUND2_STRIDE_FROM]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor> +// CHECK-NEXT: %[[STRIDE_FROM:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD_FROM]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor> +// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD_TO:.*]] = cir.load %[[BOUND2_STRIDE_TO]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor> +// CHECK-NEXT: %[[STRIDE_TO:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD_TO]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor> +// CHECK-NEXT: cir.call @_ZN8CtorDtorC1ERKS_(%[[BOUND1_STRIDE_TO]], %[[BOUND1_STRIDE_FROM]]) nothrow : (!cir.ptr<!rec_CtorDtor>, !cir.ptr<!rec_CtorDtor>) -> () +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR1_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR2_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR3_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[INC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } destroy { +// CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[ITR3:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i +// CHECK-NEXT: %[[ONE_BELOW_UB3:.*]] = cir.binop(sub, %[[UB3_CAST]], %[[CONST_ONE]]) : !u64i +// CHECK-NEXT: cir.store %[[ONE_BELOW_UB3]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> +// +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR3_LOAD]], %[[LB3_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[TLA_LOAD:.*]] = cir.load %[[PRIVATE]] : !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>>, !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> +// CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_LOAD]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[ITR2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i +// CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.binop(sub, %[[UB2_CAST]], %[[CONST_ONE]]) : !u64i +// CHECK-NEXT: cir.store %[[ONE_BELOW_UB2]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR2_LOAD]], %[[LB2_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> -> !cir.ptr<!cir.ptr<!rec_CtorDtor>> +// CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>> +// CHECK-NEXT: cir.scope { +// CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index +// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[ITR1:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64} +// CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i +// CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.binop(sub, %[[UB1_CAST]], %[[CONST_ONE]]) : !u64i +// CHECK-NEXT: cir.store %[[ONE_BELOW_UB1]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.for : cond { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[COND:.*]] = cir.cmp(ge, %[[ITR1_LOAD]], %[[LB1_CAST]]) : !u64i, !cir.bool +// CHECK-NEXT: cir.condition(%[[COND]]) +// CHECK-NEXT: } body { +// CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[BOUND2_STRIDE]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, !cir.ptr<!rec_CtorDtor> +// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor> +// CHECK-NEXT: cir.call @_ZN8CtorDtorD1Ev(%[[STRIDE]]) : (!cir.ptr<!rec_CtorDtor>) -> () +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR1_LOAD]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR1_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[DEC]], %[[ITR1]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR2_LOAD]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR2_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[DEC]], %[[ITR2]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } step { +// CHECK-NEXT: %[[ITR3_LOAD]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i +// CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[ITR3_LOAD]]) : !u64i, !u64i +// CHECK-NEXT: cir.store %[[DEC]], %[[ITR3]] : !u64i, !cir.ptr<!u64i> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } + ; +} diff --git a/clang/test/ClangScanDeps/modules-context-hash-from-named-module.cpp b/clang/test/ClangScanDeps/modules-context-hash-from-named-module.cpp new file mode 100644 index 0000000..c272022 --- /dev/null +++ b/clang/test/ClangScanDeps/modules-context-hash-from-named-module.cpp @@ -0,0 +1,121 @@ +// Checks that driver-generated options for C++ module inputs preserve the +// canonical module build commands compared to an equivalent non-module input, +// and that they do not produce additional internal scanning PCMs. + +// RUN: rm -rf %t +// RUN: split-file %s %t + +//--- main.cpp +#include "root.h" +import A; +import B; + +auto main() -> int { return 1; } + +//--- A.cppm +module; +#include "root.h" +export module A; + +//--- B.cppm +module; +#include "root.h" +export module B; + +//--- module.modulemap +module root { header "root.h" } + +//--- root.h +// empty + +// RUN: %clang -std=c++23 -fmodules \ +// RUN: -fmodules-cache-path=%t/modules-cache \ +// RUN: %t/main.cpp %t/A.cppm %t/B.cppm \ +// RUN: -fsyntax-only -fdriver-only -MJ %t/deps.json +// +// RUN: sed -e '1s/^/[/' -e '$s/,$/]/' -e 's:\\\\\?:/:g' %t/deps.json \ +// RUN: > %t/compile_commands.json +// +// RUN: clang-scan-deps \ +// RUN: -compilation-database=%t/compile_commands.json \ +// RUN: -format experimental-full \ +// RUN: | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t + +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/module.modulemap", +// CHECK: "context-hash": "[[HASH_ROOT:.*]]", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/module.modulemap", +// CHECK-NEXT: "[[PREFIX]]/root.h" +// CHECK-NEXT: ], +// CHECK-NEXT: "link-libraries": [], +// CHECK-NEXT: "name": "root" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "translation-units": [ +// CHECK-NEXT: { +// CHECK-NEXT: "commands": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-context-hash": "{{.*}}", +// CHECK-NEXT: "named-module-deps": [ +// CHECK-NEXT: "A", +// CHECK-NEXT: "B" +// CHECK-NEXT: ], +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH_ROOT]]", +// CHECK-NEXT: "module-name": "root" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/main.cpp" +// CHECK-NEXT: ], +// CHECK-NEXT: "input-file": "[[PREFIX]]/main.cpp" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "commands": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-context-hash": "{{.*}}", +// CHECK-NEXT: "named-module": "A", +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH_ROOT]]", +// CHECK-NEXT: "module-name": "root" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/A.cppm" +// CHECK-NEXT: ], +// CHECK-NEXT: "input-file": "[[PREFIX]]/A.cppm" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "commands": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-context-hash": "{{.*}}", +// CHECK-NEXT: "named-module": "B", +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH_ROOT]]", +// CHECK-NEXT: "module-name": "root" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/B.cppm" +// CHECK-NEXT: ], +// CHECK-NEXT: "input-file": "[[PREFIX]]/B.cppm" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } + +// This tests that the scanner doesn't produce multiple internal scanning PCMs +// for our single Clang module (root). +// RUN: find %t/modules-cache -name "*.pcm" | wc -l | grep 1 diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 47cb485a..7756f0d 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -6273,6 +6273,78 @@ __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) { // CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 240) return _mm512_ternarylogic_epi32(__A, __B, __C, _MM_TERNLOG_A); } +TEST_CONSTEXPR(match_v16si( + _mm512_ternarylogic_epi32( + ((__m512i)((__v16si){ + 0x6AA79987, (int)0xBB91433A, 0x029A7245, (int)0xD1F6F86C, + (int)0xD340BBCD, (int)0xCD8778E7, 0x4C73A942, (int)0xDAEA58BA, + 0x5E503A67, (int)0xEE897110, 0x3193CA54, 0x452EC40A, + (int)0x90E5E945, 0x6FACAA50, 0x29645F8B, 0x5F811CB9 + })), + ((__m512i)((__v16si){ + 0x1FCFF454, (int)0xDFC9E3B1, 0x6ED4E94B, 0x42D6CB5C, + (int)0x8FE46024, (int)0xA091250E, 0x2CA1C789, (int)0x9C9CEA0C, + (int)0x8D9FE5B9, 0x2FD2B7A4, 0x5ADAD121, (int)0xBCF74D7A, + (int)0xF543BBCF, (int)0xBB9D58E4, 0x175F0CD2, (int)0x87F26AEE + })), + ((__m512i)((__v16si){ + (int)0xFA882692, (int)0xBC428D42, 0x6980A81F, (int)0x95C5FB98, + (int)0x8101E89A, 0x2AA4857E, 0x25ECE845, 0x34A9AF41, + (int)0xB80E3B0D, 0x13ED748B, 0x30A1F6D5, (int)0xD64A3CE0, + 0x57708107, 0x527122DC, 0x06057C82, 0x7576714A + })), + (unsigned char)0x11), // ~A & ~C + 0x00300929, 0x0034100C, (int)0x902B16A0, 0x28280423, + 0x701A1741, 0x554A5A81, (int)0xD2121032, 0x434210B2, + 0x42600042, (int)0xC0000850, (int)0x8504080A, 0x01008205, + 0x088C4430, 0x04028503, (int)0xE8A0832D, 0x08098411)); +TEST_CONSTEXPR(match_v16si( + _mm512_ternarylogic_epi32( + ((__m512i)((__v16si){ + (int)0xA3B1799D, (int)0x46685257, (int)0x392456DE, (int)0xBC8960A9, + (int)0x6C031199, (int)0x07A0CA6E, (int)0x37F8A88B, (int)0x8B8148F6, + (int)0x386ECBE0, (int)0x96DA1DAC, (int)0xCE4A2BBD, (int)0xB2B9437A, + (int)0x571AA876, (int)0x27CD8130, (int)0x562B0F79, (int)0x17BE3111 + })), + ((__m512i)((__v16si){ + (int)0x18C26797, (int)0xD8F56413, (int)0x9A8DCA03, (int)0xCE9FF57F, + (int)0xBACFB3D0, (int)0x89463E85, (int)0x60E7A113, (int)0x8D5288F1, + (int)0xDC98D2C1, (int)0x93CD59BF, (int)0xB45ED1F0, (int)0x19DB3AD0, + (int)0x47294739, (int)0x5D65A441, (int)0x5EC42E08, (int)0xA5E5A5AB + })), + ((__m512i)((__v16si){ + (int)0xBAA80DD4, (int)0x29D4BEEF, (int)0x6123FDF7, (int)0x8E944239, + (int)0xAF42E12F, (int)0xC6A7EE39, (int)0x50C187FC, (int)0x448AAA9E, + (int)0x508EBAD7, (int)0xA7CAD415, (int)0x757750A9, (int)0x43CF2FDE, + (int)0x95A76D79, (int)0x663F1C97, (int)0xFF5E9FF0, (int)0x827050A8 + })), + (unsigned char)0x38), // (C & ~B) | (~C & A & B) + (int)0xBB311C08, (int)0x0E9C3644, (int)0x21219CDD, (int)0x32140090, + (int)0xC640A009, (int)0x86A6E46B, (int)0x57190998, (int)0x0683C006, + (int)0x60E61921, (int)0x05124411, (int)0x7A147A0D, (int)0xA36269AA, + (int)0x1033ED4F, (int)0x62A80531, (int)0x086F0171, (int)0x925A10B8)); +TEST_CONSTEXPR(match_v16si( + _mm512_ternarylogic_epi32( + ((__m512i)((__v16si){ + (int)0x3193CA54, (int)0x90E5E945, (int)0x29645F8B, (int)0x6ED4E94B, + (int)0x8D9FE5B9, (int)0x8101E89A, (int)0x25ECE845, (int)0xB80E3B0D, + (int)0x57708107, (int)0x06057C82, (int)0x56EAA301, (int)0xBE99854A, + (int)0x00E266D0, (int)0xDEEA959E, (int)0x2DCAABD5, (int)0x6A1ECCDA})), + ((__m512i)((__v16si){ + (int)0x93FD7234, (int)0xBC90A6EC, (int)0xD3285151, (int)0xCE9FB6A8, + (int)0x3B788B66, (int)0xDF8960AD, (int)0x2F927291, (int)0x96AF0DEA, + (int)0xF56AE7EA, (int)0x2A04F77A, (int)0xD50B612B, (int)0x3AA725CB, + (int)0x8A04F74F, (int)0x282FE557, (int)0x52E1FBB0, (int)0x0CA02F4D})), + ((__m512i)((__v16si){ + (int)0xB6307BAD, (int)0x141CB03E, (int)0xEBAA7701, (int)0xC9F0B072, + (int)0x5E2503DD, (int)0xC2E1DAC4, (int)0x0FC01B11, (int)0xA0485922, + (int)0x339BB47E, (int)0xB2D4F32A, (int)0x8E7AE9AF, (int)0x147DE9B0, + (int)0xF79FCAA0, (int)0x3B0B6398, (int)0x29DDF4C7, (int)0x49CDBEC7})), + (unsigned char)0xC3), // ~(B ^ C) + (int)0x5D91479F, (int)0xD38AB056, (int)0x05B3F125, (int)0x5FB4A01C, + (int)0x49189120, (int)0xA17777C8, (int)0xF581652B, (int)0xD15EC918, + (int)0x5DE59912, (int)0xD3FE7407, (int)0x7C1E3DD5, (int)0x7BC15F7E, + (int)0x75196E60, (int)0x093A8F36, (int)0x80D4AF9A, (int)0x99411C68)); __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_mask_ternarylogic_epi32 @@ -6280,6 +6352,63 @@ __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i _ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_ternarylogic_epi32(__A, __U, __B, __C, _MM_TERNLOG_B); } +TEST_CONSTEXPR(match_v16si( + _mm512_mask_ternarylogic_epi32( + _mm512_setr_epi32( + (int)0xFFFFFFFF, 0x00000000, (int)0xDEADBEEF, (int)0xCAFEBABE, 0x12345678, (int)0x87654321, + (int)0xAAAAAAAA, 0x55555555, (int)0xF00DBEEF, (int)0xBAD2FEAF, 0x0112358D, (int)0xDEADF00D, + (int)0x8BADF00D, (int)0xBADDCAFE, (int)0xBAADF00D, (int)0xBAAAAAAD), + (__mmask16)0x9D71, + _mm512_setr_epi32( + 0x11111111, 0x22222222, 0x33333333, 0x44444444, (int)0xABCDEF01, (int)0xFEDCBA98, + (int)0xCCCCCCCC, 0x33333333, 0x1337BEEF, 0x01010101, (int)0x81321345, (int)0xBAADF00D, + 0x1BADB002, 0x5EE7C0DE, 0x12345678, 0x55555555), + _mm512_setr_epi32( + (int)0xF0F0F0F0, 0x0F0F0F0F, 0x1234ABCD, (int)0x9876FEDC, 0x00FF00FF, (int)0xFF00FF00, + (int)0xFF0000FF, 0x00FFFF00, 0x50D4CAFE, (int)0x8BADF00D, (int)0xABCDEFFF, (int)0xFEEDF00D, + (int)0xBEEFCAFE, (int)0xDEADC0DE, (int)0x1BADBEEF, 0x33333333), + (unsigned char)0xB1), // op: (~B & (A | ~C)) | (B & A & C) + (int)0xFEFEFEFE, 0x00000000, (int)0xDEADBEEF, (int)0xCAFEBABE, 0x54341078, (int)0x87234367, + (int)0xAA3333AA, 0x55555555, (int)0xFC0C8BEE, (int)0xBAD2FEAF, 0x5500258D, (int)0xDFBFFFFF, + (int)0xCABDC50D, (int)0xBADDCAFE, (int)0xBAADF00D, (int)0xBAAAAAA9)); +TEST_CONSTEXPR(match_v16si( + _mm512_mask_ternarylogic_epi32( + _mm512_setr_epi32( + 0x0000FFFF, (int)0xFFFF0000, 0x01010101, (int)0xFF00FF00, (int)0xAAAAAAAA, 0x33333333, + (int)0xF0F0F0F0, 0x0F0F0F0F, 0x12345678, (int)0x87654321, 0x7FFFFFFF, (int)0xDEADBEEF, + (int)0xCAFEBABE, 0x01234567, (int)0xABCDEF01, (int)0xFEDCBA98), + (__mmask16)0x3C3C, + _mm512_setr_epi32( + 0x1111EEEE, 0x2222DDDD, (int)0x80808080, 0x00FF00FF, 0x55555555, 0x00000000, + (int)0xCCCCCCCC, 0x33333333, 0x11111111, 0x22222222, (int)0x80000000, 0x12345678, + 0x11223344, (int)0xFEDCBA98, (int)0xBAD0BAD0, (int)0xBEEFCAFE), + _mm512_setr_epi32( + 0x12345678, (int)0x87654321, 0x7F7F7F7F, (int)0xFEDCBA98, (int)0xCCCCCCCC, (int)0xFFFFFFFF, + 0x11111111, 0x22222222, (int)0xABABABAB, (int)0xCDCDCDCD, 0x00000001, (int)0xFACEB00C, + 0x55667788, (int)0xABCDEF01, 0x12345678, (int)0xDEADBEEF), + (unsigned char)0xE8), // op: (A & B) | (B & C) | (C & A) (Majority) + 0x0000FFFF, (int)0xFFFF0000, 0x01010101, (int)0xFEDCBA98, (int)0xCCCCCCCC, 0x33333333, + (int)0xF0F0F0F0, 0x0F0F0F0F, 0x12345678, (int)0x87654321, 0x00000001, (int)0xDAACB66C, + 0x5166338C, (int)0xABCDEF01, (int)0xABCDEF01, (int)0xFEDCBA98)); +TEST_CONSTEXPR(match_v16si( + _mm512_mask_ternarylogic_epi32( + _mm512_setr_epi32( + (int)0xDEADBEEF, 0x01234567, (int)0xAAAAAAAA, 0x0F0F0F0F, (int)0xBAADF00D, 0x00000001, + (int)0x80000000, 0x7FFFFFFF, (int)0xCAFEBABE, 0x13579BDF, (int)0xABCDEF01, (int)0xCAFEBABE, + (int)0xDEADBEEF, (int)0xFF00FF00, (int)0xBEEFCAFE, 0x00000001), + (__mmask16)0xBEEF, + _mm512_setr_epi32( + (int)0xFACEB00C, (int)0x89ABCDEF, 0x55555555, (int)0xF0F0F0F0, 0x1337C0DE, 0x00000002, + 0x40000000, (int)0xBFFFFFFF, 0x00000000, 0x2468ACE0, 0x10FEDCBA, 0x00000000, + (int)0xFEEDFACE, 0x00FF00FF, 0x12345678, 0x00000002), + _mm512_setr_epi32( + 0x12345678, (int)0xFFFFFFFF, (int)0xCCCCCCCC, (int)0x88888888, (int)0xDEADC0DE, 0x00000004, + 0x20000000, (int)0xDFFFFFFF, (int)0xFFFFFFFF, (int)0xFEDCBA98, 0x55555555, (int)0xFFFFFFFF, + (int)0x8BADF00D, (int)0xF0F0F0F0, (int)0xFACEB00C, 0x00000003), + (unsigned char)0x96), // op: A ^ B ^ C (XOR3) + (int)0x3657589B, 0x77777777, 0x33333333, 0x77777777, (int)0xBAADF00D, 0x00000007, + (int)0xE0000000, 0x1FFFFFFF, (int)0xCAFEBABE, (int)0xC9E38DA7, (int)0xEE6666EE, 0x35014541, + (int)0xABEDB42C, 0x0F0F0F0F, (int)0xBEEFCAFE, 0x00000000)); __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_maskz_ternarylogic_epi32 @@ -6287,12 +6416,106 @@ __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> zeroinitializer return _mm512_maskz_ternarylogic_epi32(__U, __A, __B, __C, _MM_TERNLOG_C); } +TEST_CONSTEXPR(match_v16si( + _mm512_maskz_ternarylogic_epi32( + (__mmask16)0x6498, + ((__m512i)((__v16si){ + 1393174638, 1243877629, -826208314, 1770837977, + -1678093555, -414088391, 1288769935, 703296098, + 1428104678, 405688910, -167788555, 1965219804, + -1959018749, 514303227, 754191429, 579811517})), + ((__m512i)((__v16si){ + -1301280384, -923736510, -797648805, 475853364, + 1247377062, 213070102, 626020209, 2037794518, + 122183669, 1712787569, -1042441569, -1416844145, + 1374304252, -1323427639, 1432483217, 1621706359})), + ((__m512i)((__v16si){ + 234227517, -313293475, 1851213039, -300885844, + -1479339544, 575183087, -655840260, -1853668117, + 433622095, 933629633, -1324904005, -68434060, + 486070655, 226865941, -1461464269, 1471789621})), + (unsigned char)0xAB), // (~A & ~B) | (B & C) + 0, 0, 0, -298592082, + -1479042568, 0, 0, -1752969749, + 0, 0, -1157115461, 0, + 0, 1304818453, -1427385541, 0)); +TEST_CONSTEXPR(match_v16si( + _mm512_maskz_ternarylogic_epi32( + (__mmask16)0xA593, + ((__m512i)((__v16si){ + 1789368711, -1148107974, 43676229, -772343700, + -750732339, -846759705, 1282648386, -622176070, + 1582316135, -292982512, 831769172, 1160692746, + -1863980731, 1873586768, 694443915, 1602297017})), + ((__m512i)((__v16si){ + 533722196, -540417103, 1859447115, 1121373020, + -1880858588, -1601100530, 748799881, -1667438068, + -1918900807, 802338724, 1524289825, -1124643462, + -180110385, -1147315996, 392105170, -2014156050})), + ((__m512i)((__v16si){ + -91740526, -1136489150, 1770039327, -1782187112, + -2130581350, 715425150, 636282949, 883535681, + -1207026931, 334328971, 815920853, -699777824, + 1466990855, 1383146204, 101022850, 1970696522})), + (unsigned char)0x21), // (~B) & ~(A ^ C) + 1611661482, 539234310, 0, 0, + 538610824, 0, 0, 18874368, + 270539268, 0, -1543175586, 0, + 0, 1075980051, 0, 1342738432)); +TEST_CONSTEXPR(match_v16si( + _mm512_maskz_ternarylogic_epi32( + (__mmask16)0xC3A5, + ((__m512i)((__v16si){ + 0x00000000, -0x1, (int)0x80000000, 0x7FFFFFFF, + (int)0xAAAAAAAA, 0x55555555, 0x00000001, (int)0xFFFFFFFE, + 0x0000FFFF, (int)0xFFFF0000, (int)0xDEADBEEF, (int)0xCAFEBABE, + 0x01234567, (int)0x89ABCDEF, 0x13579BDF, 0x2468ACE0})), + ((__m512i)((__v16si){ + 0x2468ACE0, 0x13579BDF, (int)0x89ABCDEF, 0x01234567, + (int)0xCAFEBABE, (int)0xDEADBEEF, (int)0xFFFF0000, 0x0000FFFF, + (int)0xFFFFFFFE, 0x00000001, 0x55555555, (int)0xAAAAAAAA, + 0x7FFFFFFF, (int)0x80000000, -0x1, 0x00000000})), + ((__m512i)((__v16si){ + -0x1, 0x00000000, -0x1, 0x00000000, + -0x1, 0x00000000, -0x1, 0x00000000, + -0x1, 0x00000000, -0x1, 0x00000000, + -0x1, 0x00000000, -0x1, 0x00000000})), + (unsigned char)0xC9), // F = (A & B) | (~A & ~(B ^ C)) + 0x2468ACE0, 0x0, (int)0x89ABCDEF, 0x0, + 0x0, 0x74071445, 0x0, 0x0000FFFE, + (int)0xFFFFFFFE, 0x0000FFFE, 0x0, 0x0, + 0x0, 0x0, (int)0xFFFFFFFF, (int)0xDB97531F)); __m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_ternarylogic_epi64 // CHECK: @llvm.x86.avx512.pternlog.q.512({{.*}}, i32 192) return _mm512_ternarylogic_epi64(__A, __B, __C, _MM_TERNLOG_A & _MM_TERNLOG_B); } +TEST_CONSTEXPR(match_v8di( + _mm512_ternarylogic_epi64( + ((__m512i)((__v8di){0x1111, 0x2222, 0x3333, 0x4444, 0x5555, 0x6666, 0x7777, 0x8888})), + ((__m512i)((__v8di){0xAAAA, 0xBBBB, 0xCCCC, 0xDDDD, 0xEEEE, 0xFFFF, 0x1111, 0x2222})), + ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + (unsigned char)0xD8), // C ? B : A + 0xAAAA, 0x2222, 0xCCCC, 0x4444, 0xEEEE, 0x6666, 0x1111, 0x8888)); +TEST_CONSTEXPR(match_v8di( + _mm512_ternarylogic_epi64( + ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, 0xF0F0, 0xFF, -0x5555555555555556, 0x5555555555555555})), + ((__m512i)((__v8di){0x1234, 0xFFFF, 0xFF, 0xF0F, 0x3333, 0xFF00, -0x5555555555555556, -0x0F0F0F0F0F0F0F10})), + ((__m512i)((__v8di){0xFFFF, 0x1234, 0xF0F, 0xFF00, 0xF0F0, 0x3333, 0x5555555555555555, 0x0F0F0F0F0F0F0F0})), + (unsigned char)0x8F), // ~A | (B & C) + 0x1234, -0x1, 0xF, -0x1, -0xC0C1, -0x100, 0x5555555555555555, -0x5505050505050506)); +TEST_CONSTEXPR(match_v8di( + _mm512_ternarylogic_epi64( + ((__m512i)((__v8di){0x7FFFFFFFFFFFFFFF, 0x0, 0x00FF00FF00FF00FF, 0x0F0F0F0F0F0F0F0F, + 0x123456789ABCDEF0, 0x3333333333333333, 0x5555555555555555, 0x0123456789ABCDEF})), + ((__m512i)((__v8di){0x1111111111111111, 0x2222222222222222, 0xFFFFFFFF, -0x100000000, + 0x0, -0x3333333333333334, -0x0F0F0F0F0F0F0F10, -0x123456789ABCDF0})), + ((__m512i)((__v8di){0x2222222222222222, 0x1111111111111111, -0x1000000000000, 0xFFFFFFFF, + -0x1, 0x0, 0x0F0F0F0F0F0F0F0F, 0x0})), + (unsigned char)0xE0), // A & (B | C) + 0x3333333333333333, 0x0, 0x00FF000000FF00FF, 0x0F0F0F0F0F0F0F0F, + 0x123456789ABCDEF0, 0x0, 0x5555555555555555, 0x0)); __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_mask_ternarylogic_epi64 @@ -6300,6 +6523,40 @@ __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_ternarylogic_epi64(__A, __U, __B, __C, _MM_TERNLOG_B | _MM_TERNLOG_C); } +TEST_CONSTEXPR(match_v8di( + _mm512_mask_ternarylogic_epi64( + ((__m512i)((__v8di){0x0LL, 0x1LL, 0x2LL, 0x3LL, 0x4LL, 0x5LL, 0x6LL, 0x7LL})), + (__mmask8)0xFF, + ((__m512i)((__v8di){0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL})), + ((__m512i)((__v8di){0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL})), + (unsigned char)0x96), + 0x1, 0x0, 0x3, 0x2, 0x5, 0x4, 0x7, 0x6)); +TEST_CONSTEXPR(match_v8di( + _mm512_mask_ternarylogic_epi64( + ((__m512i)((__v8di){ + (long long)0x9FD641D41C6A70FEULL, (long long)0xB51D9082CF18D398ULL, + (long long)0x730E520285F4D01BULL, (long long)0x347E72CE341FD932ULL, + (long long)0x438F8D9BEA5D486FULL, (long long)0xFDB554A5DEEF750DULL, + (long long)0x0ABAA254BFFC2308ULL, (long long)0x825FE29BF1D51FC6ULL + })), + (__mmask8)0xE4, + ((__m512i)((__v8di){ + (long long)0xC1779B12FA832A6EULL, (long long)0xCF6E876B587C4762ULL, + (long long)0x25DC09833D4ECA24ULL, (long long)0x34E55E25691BB80AULL, + (long long)0x9A02450CD8F20DD7ULL, (long long)0x78B9E240FB5B77A9ULL, + (long long)0xE1F37F76C1162596ULL, (long long)0xDCCB561738CE2941ULL + })), + ((__m512i)((__v8di){ + (long long)0xD13840986BC8DC3CULL, (long long)0x34CDE7E8C960187EULL, + (long long)0x7EE068D9D111EEB8ULL, (long long)0xAD11149DE686B811ULL, + (long long)0x849F38BFD9AB0DFAULL, (long long)0x5C28948ED106227BULL, + (long long)0xFB1918D4A18E304DULL, (long long)0x4EDE6944F84AD59FULL + })), + (unsigned char)0x67), + (long long)0x9FD641D41C6A70FEULL, (long long)0xB51D9082CF18D398ULL, + (long long)0xDB3DE57EEE5F25DCULL, (long long)0x347E72CE341FD932ULL, + (long long)0x438F8D9BEA5D486FULL, (long long)0x26D37FDE2A5DDDD2ULL, + (long long)0x1EEE67AB6099DDFBULL, (long long)0xB3353F73C6A4FCFEULL)); __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_maskz_ternarylogic_epi64 @@ -6307,6 +6564,59 @@ __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i _ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> zeroinitializer return _mm512_maskz_ternarylogic_epi64(__U, __A, __B, __C, ~_MM_TERNLOG_A | (_MM_TERNLOG_B ^ _MM_TERNLOG_C)); } +TEST_CONSTEXPR(match_v8di( + _mm512_maskz_ternarylogic_epi64( + (__mmask8)0x6D, + ((__m512i)((__v8di){ + (long long)0xFFFFFFFFFFFFFFFF, (long long)0x0000000000000000, + (long long)0x0000FFFF0000FFFF, (long long)0x5555555555555555, + (long long)0x0123456789ABCDEF, (long long)0x1122334455667788, + (long long)0x00000000FFFFFFFF, (long long)0x0F0F0F0F0F0F0F0F + })), + ((__m512i)((__v8di){ + (long long)0x000000000000000B, (long long)0x000000000000000C, + (long long)0x00000000FFFF0000, (long long)0x3333333333333333, + (long long)0x0FEDCBA987654321, (long long)0x1111111111111111, + (long long)0x7FFFFFFFFFFFFFFF, (long long)0x2222222222222222 + })), + ((__m512i)((__v8di){ + (long long)0x000000000000000C, (long long)0x000000000000000B, + (long long)0x00F0F0F0F0F0F0F0, (long long)0x5555555555555555, + (long long)0x0000000000000000, (long long)0x7FFFFFFFFFFFFFFF, + (long long)0x0000000000000001, (long long)0x2222222222222222 + })), + (unsigned char)0x89), + (long long)0x0000000000000008, (long long)0x0000000000000000, + (long long)0xFF0F0000F0F00000, (long long)0x9999999999999999, + (long long)0x0000000000000000, (long long)0x9111111111111111, + (long long)0x8000000000000001, (long long)0x0000000000000000)); + +TEST_CONSTEXPR(match_v8di( + _mm512_maskz_ternarylogic_epi64( + (__mmask8)0x6D, + ((__m512i)((__v8di){ + (long long)0xFFFFFFFFFFFFFFFF, (long long)0x0000000000000000, + (long long)0x0000FFFF0000FFFF, (long long)0x5555555555555555, + (long long)0x0123456789ABCDEF, (long long)0x1122334455667788, + (long long)0x00000000FFFFFFFF, (long long)0x0F0F0F0F0F0F0F0F + })), + ((__m512i)((__v8di){ + (long long)0x000000000000000B, (long long)0x000000000000000C, + (long long)0x00000000FFFF0000, (long long)0x3333333333333333, + (long long)0x0FEDCBA987654321, (long long)0x1111111111111111, + (long long)0x7FFFFFFFFFFFFFFF, (long long)0x2222222222222222 + })), + ((__m512i)((__v8di){ + (long long)0x000000000000000C, (long long)0x000000000000000B, + (long long)0x00F0F0F0F0F0F0F0, (long long)0x5555555555555555, + (long long)0x0000000000000000, (long long)0x7FFFFFFFFFFFFFFF, + (long long)0x0000000000000001, (long long)0x2222222222222222 + })), + (unsigned char)0x29), + (long long)0x0000000000000004, (long long)0x0000000000000000, + (long long)0xFF0FF0F0F0F0F0F0, (long long)0xCCCCCCCCCCCCCCCC, + (long long)0x0000000000000000, (long long)0x8033225544776699, + (long long)0x8000000000000000, (long long)0x0000000000000000)); __m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) { // CHECK-LABEL: test_mm512_shuffle_f32x4 diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c index 37443d5..dbf89b3 100644 --- a/clang/test/CodeGen/X86/avx512fp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c @@ -17,6 +17,7 @@ _Float16 test_mm512_cvtsh_h(__m512h __A) { // CHECK: extractelement <32 x half> %{{.*}}, i32 0 return _mm512_cvtsh_h(__A); } +TEST_CONSTEXPR(_mm512_cvtsh_h((__m512h){-32.0, 31.0, -30.0, 29.0, -28.0, 27.0, -26.0, 25.0, -24.0, 23.0, -22.0, 21.0, -20.0, 19.0, -18.0, 17.0, -16.0, 15.0, -14.0, 13.0, -12.0, 11.0, -10.0, 9.0, -8.0, 7.0, -6.0, 5.0, -4.0, 3.0, -2.0, 1.0}) == -32.0); __m128h test_mm_setzero_ph(void) { // CHECK-LABEL: test_mm_setzero_ph diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 6d91870..51385d5 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -8359,6 +8359,27 @@ __m128i test_mm_ternarylogic_epi32(__m128i __A, __m128i __B, __m128i __C) { // CHECK: @llvm.x86.avx512.pternlog.d.128 return _mm_ternarylogic_epi32(__A, __B, __C, 4); } +TEST_CONSTEXPR(match_v4si( + _mm_ternarylogic_epi32( + ((__m128i)((__v4si){(int)0x7FFFFFFF, (int)0x80000000, (int)0xAAAAAAAA, 0x00000000})), + ((__m128i)((__v4si){0x00000000, (int)0xFFFFFFFF, 0x12345678, (int)0xFFFFFFFF})), + ((__m128i)((__v4si){(int)0xCAFEBABE, 0x0F0F0F0F, (int)0xFFFFFFFF, 0x00000000})), + (unsigned char)0xCA), /* B ? (A | C) : (C & ~A) */ + (int)0x80000000, (int)0x8F0F0F0F, 0x5775577D, 0x00000000)); +TEST_CONSTEXPR(match_v4si( + _mm_ternarylogic_epi32( + ((__m128i)((__v4si){0x12345678, (int)0x80000000, 0x00000000, (int)0xAAAAAAAA})), + ((__m128i)((__v4si){0x0000FFFF, 0x7FFFFFFF, 0x55555555, 0x00000000})), + ((__m128i)((__v4si){(int)0xF0F0F0F0, 0x00000001, 0x0F0F0F0F, 0x33333333})), + (unsigned char)0xFE), /* A | B | C */ + (int)0xF2F4FFFF, (int)0xFFFFFFFF, 0x5F5F5F5F, (int)0xBBBBBBBB)); +TEST_CONSTEXPR(match_v4si( + _mm_ternarylogic_epi32( + ((__m128i)((__v4si){(int)0xFFFFFFFF, 0x12345678, (int)0x80000000, 0x0F0F0F0F})), + ((__m128i)((__v4si){0x00FF00FF, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0xF0F0F0F0})), + ((__m128i)((__v4si){0x0F0F0F0F, 0x00FF00FF, (int)0xFFFFFFFF, (int)0xFFFFFFFF})), + (unsigned char)0x80), /* A & B & C */ + 0x000F000F, 0x00340078, 0x00000000, 0x00000000)); __m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_mask_ternarylogic_epi32 @@ -8366,6 +8387,30 @@ __m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B, // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_ternarylogic_epi32(__A, __U, __B, __C, 4); } +TEST_CONSTEXPR(match_v4si( + _mm_mask_ternarylogic_epi32( + ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})), + (__mmask8)0x03, + ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})), + ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })), + (unsigned char)0xCA), // A ? B : C + 0xB, 0xC, -0x1, 0x0)); +TEST_CONSTEXPR(match_v4si( + _mm_mask_ternarylogic_epi32( + ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})), + (__mmask8)0x0C, + ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), + ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), // A | B | C + 0x9, 0x9, 0xF, 0xF)); +TEST_CONSTEXPR(match_v4si( + _mm_mask_ternarylogic_epi32( + ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})), + (__mmask8)0x05, + ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), + ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), // A & B & C + 0x0, 0x9, 0x0, 0x9)); __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_maskz_ternarylogic_epi32 @@ -8373,12 +8418,57 @@ __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B, // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer return _mm_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); } +TEST_CONSTEXPR(match_v4si( + _mm_maskz_ternarylogic_epi32( + (__mmask8)0x0B, + ((__m128i)((__v4si){(int)0xDEADBEEF, 0, (int)0xFFFFFFFF, 0x13579BDF})), + ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, 0})), + ((__m128i)((__v4si){(int)0xCAFEBABE, (int)0xFFFFFFFF, 0, 0x2468ACE0})), + (unsigned char)0xE2), // B ? A : C + (int)0xDEADBEEF, (int)0xFFFFFFFF, 0, 0x2468ACE0)); +TEST_CONSTEXPR(match_v4si( + _mm_maskz_ternarylogic_epi32( + (__mmask8)0x0C, + ((__m128i)((__v4si){0, (int)0xFFFFFFFF, (int)0xAAAAAAAA, 0x55555555})), + ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, (int)0xFFFFFFFF})), + ((__m128i)((__v4si){(int)0xF0F0F0F0, 0, 0, (int)0xFFFFFFFF})), + (unsigned char)0x7F), // ~(A & B) | ~(B & C) + 0, 0, (int)0xFFFFFFFF, (int)0xAAAAAAAA)); +TEST_CONSTEXPR(match_v4si( + _mm_maskz_ternarylogic_epi32( + (__mmask8)0x05, + ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, 0x12345678, 0})), + ((__m128i)((__v4si){0, 0, 0x0000FFFF, (int)0xFFFFFFFF})), + ((__m128i)((__v4si){0, 0, 0x0000000F, 0})), + (unsigned char)0xBF), // ~A | ~B | C imm = 0xBF + (int)0xFFFFFFFF, 0, (int)0xFFFFA98F, 0)); __m256i test_mm256_ternarylogic_epi32(__m256i __A, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_ternarylogic_epi32 // CHECK: @llvm.x86.avx512.pternlog.d.256 return _mm256_ternarylogic_epi32(__A, __B, __C, 4); } +TEST_CONSTEXPR(match_v8si( + _mm256_ternarylogic_epi32( + ((__m256i)((__v8si){0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0x80000000, 0x00FF00FF, (int)0xF0F0F0F0, (int)0xAAAAAAAA})), + ((__m256i)((__v8si){(int)0xDEADBEEF, 0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555, 0x66666666, 0x77777777})), + ((__m256i)((__v8si){(int)0xCAFEBABE, (int)0x88888888, (int)0x99999999, (int)0xAAAAAAAA, (int)0xBBBBBBBB, (int)0xCCCCCCCC, (int)0xDDDDDDDD, (int)0xFFFFFFFF})), + (unsigned char)0xF0), /* A */ + 0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0x80000000, 0x00FF00FF, (int)0xF0F0F0F0, (int)0xAAAAAAAA)); +TEST_CONSTEXPR(match_v8si( + _mm256_ternarylogic_epi32( + ((__m256i)((__v8si){0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0x80000000, 0x00FF00FF, (int)0xF0F0F0F0, (int)0xAAAAAAAA})), + ((__m256i)((__v8si){(int)0xAAAAAAAA, (int)0xBBBBBBBB, (int)0xCCCCCCCC, (int)0xDDDDDDDD, (int)0xEEEEEEEE, (int)0xFFFFFFFF, 0x00000000, 0x11111111})), + ((__m256i)((__v8si){0x22222222, 0x33333333, 0x44444444, 0x55555555, 0x66666666, 0x77777777, (int)0x88888888, (int)0x99999999})), + (unsigned char)0x0F), /* ~A */ + (int)0xEDCBA987, (int)0xFFFFFFFF, 0x00000000, (int)0x80000000, 0x7FFFFFFF, (int)0xFF00FF00, 0x0F0F0F0F, 0x55555555)); +TEST_CONSTEXPR(match_v8si( + _mm256_ternarylogic_epi32( + ((__m256i)((__v8si){0x0F0F0F0F, (int)0xAAAAAAAA, 0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x13579BDF, (int)0x80000000, 0x7FFFFFFF})), + ((__m256i)((__v8si){(int)0xF0F0F0F0, 0x55555555, 0x11111111, (int)0xFFFFFFFF, 0x00000000, 0x02468ACE, 0x7FFFFFFF, (int)0x80000000})), + ((__m256i)((__v8si){(int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA})), + (unsigned char)0x3C), /* A ^ B */ + (int)0xFFFFFFFF, (int)0xFFFFFFFF, 0x03254769, (int)0xFFFFFFFF, (int)0xFFFFFFFF, 0x11111111, (int)0xFFFFFFFF, (int)0xFFFFFFFF)); __m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_mask_ternarylogic_epi32 @@ -8386,6 +8476,30 @@ __m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_ternarylogic_epi32(__A, __U, __B, __C, 4); } +TEST_CONSTEXPR(match_v8si( + _mm256_mask_ternarylogic_epi32( + ((__m256i)((__v8si){(int)0xFFFFFFFF, 0x00000000, 0x12345678, (int)0xAAAAAAAA, 0x7FFFFFFF, (int)0x80000000, 0x13579BDF, 0x2468ACE0})), + (__mmask8)0xA5, + ((__m256i)((__v8si){(int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x00000000})), + ((__m256i)((__v8si){0x00000000, (int)0xFFFFFFFF, 0x55555555, 0x33333333, (int)0x89ABCDEF, 0x00000000, (int)0xFFFFFFFF, 0x11111111})), + (unsigned char)0xE2), /* B ? A : C */ + (int)0xFFFFFFFF, 0x00000000, 0x12345678, (int)0xAAAAAAAA, 0x7FFFFFFF, 0x00000000, 0x13579BDF, 0x11111111)); +TEST_CONSTEXPR(match_v8si( + _mm256_mask_ternarylogic_epi32( + ((__m256i)((__v8si){0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F})), + (__mmask8)0xFF, + ((__m256i)((__v8si){0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF})), + ((__m256i)((__v8si){0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333})), + (unsigned char)0x96), /* A ^ B ^ C */ + 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3)); +TEST_CONSTEXPR(match_v8si( + _mm256_mask_ternarylogic_epi32( + ((__m256i)((__v8si){(int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x12345678, (int)0xAAAAAAAA, 0x55555555, (int)0x80000000, 0x7FFFFFFF})), + (__mmask8)0x5A, + ((__m256i)((__v8si){0x00000000, (int)0xFFFFFFFF, 0x11111111, (int)0xFFFFFFFF, 0x55555555, (int)0xAAAAAAAA, (int)0x80000000, 0x7FFFFFFF})), + ((__m256i)((__v8si){0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000})), + (unsigned char)0xC0), /* A & B */ + (int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x12345678, 0x00000000, 0x55555555, (int)0x80000000, 0x7FFFFFFF)); __m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_maskz_ternarylogic_epi32 @@ -8393,12 +8507,60 @@ __m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i _ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> zeroinitializer return _mm256_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); } +TEST_CONSTEXPR(match_v8si( + _mm256_maskz_ternarylogic_epi32( + (__mmask8)0x6D, + ((__m256i)((__v8si){(int)-1, 0, (int)-1, 0, (int)-1, 0, (int)-1, 0})), + ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0x30), /* A & ~B */ + (int)0xFFFFFFF4, 0, (int)0xFFFFFFF4, 0, 0, 0, (int)0xFFFFFFF4, 0)); +TEST_CONSTEXPR(match_v8si( + _mm256_maskz_ternarylogic_epi32( + (__mmask8)0x90, + ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x44), /* B & ~C */ + 0, 0, 0, 0, 0x4, 0, 0, 0x4)); +TEST_CONSTEXPR(match_v8si( + _mm256_maskz_ternarylogic_epi32( + (__mmask8)0x0F, + ((__m256i)((__v8si){0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3})), + ((__m256i)((__v8si){0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1})), + ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x28), /* (A ^ B) & C */ + 0x2, 0x2, 0x2, 0x2, 0, 0, 0, 0)); __m128i test_mm_ternarylogic_epi64(__m128i __A, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_ternarylogic_epi64 // CHECK: @llvm.x86.avx512.pternlog.q.128 return _mm_ternarylogic_epi64(__A, __B, __C, 4); } +TEST_CONSTEXPR(match_v2di( + _mm_ternarylogic_epi64( + ((__m128i)((__v2di){ (long long)0xBB91433A6AA79987ULL, (long long)0xD1F6F86C029A7245ULL })), + ((__m128i)((__v2di){ (long long)0xCD8778E7D340BBCDULL, (long long)0xDAEA58BA4C73A942ULL })), + ((__m128i)((__v2di){ (long long)0xEE8971105E503A67ULL, (long long)0x452EC40A3193CA54ULL })), + (unsigned char)0x77), // F = ~(A & B) + (long long)0x337E8FFFADBFC5BAULL, + (long long)0xBFD5BFF5FFEC77BFULL)); +TEST_CONSTEXPR(match_v2di( + _mm_ternarylogic_epi64( + ((__m128i)((__v2di){ (long long)0x6FACAA5090E5E945ULL, (long long)0x5F811CB929645F8BULL })), + ((__m128i)((__v2di){ (long long)0xDFC9E3B11FCFF454ULL, (long long)0x42D6CB5C6ED4E94BULL })), + ((__m128i)((__v2di){ (long long)0xA091250E8FE46024ULL, (long long)0x9C9CEA0C2CA1C789ULL })), + (unsigned char)0xDD), // F = (~A) | B + (long long)0xDFEFFBF17FDFFFDFULL, + (long long)0x63F7DFFFFFDEF97FULL)); +TEST_CONSTEXPR(match_v2di( + _mm_ternarylogic_epi64( + ((__m128i)((__v2di){ (long long)0x2FD2B7A48D9FE5B9ULL, (long long)0xBCF74D7A5ADAD121ULL })), + ((__m128i)((__v2di){ (long long)0xBB9D58E4F543BBCFULL, (long long)0x87F26AEE175F0CD2ULL })), + ((__m128i)((__v2di){ (long long)0xBC428D42FA882692ULL, (long long)0x95C5FB986980A81FULL })), + (unsigned char)0x22), // F = A & ~B + (long long)0x044285020A880410ULL, + (long long)0x100591106880A00DULL)); __m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_mask_ternarylogic_epi64 @@ -8406,6 +8568,40 @@ __m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B, // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_ternarylogic_epi64(__A, __U, __B, __C, 4); } +TEST_CONSTEXPR(match_v2di( + _mm_mask_ternarylogic_epi64( + ((__m128i)((__v2di){(long long)0xF4C3B00C0D15EA5ELL, (long long)0x0123456789ABCDE0LL})), + (__mmask8)0x9D, + ((__m128i)((__v2di){(long long)0x9A7F3C2155EE00DDLL, (long long)0xDEADBEEFCAFEBABELL})), + ((__m128i)((__v2di){(long long)0x00F0F0F0F0F0F0F0LL, (long long)0x13579BDF2468ACE0LL})), + (unsigned char)0xFF), // All 1s + (long long)-1, + (long long)0x0123456789ABCDE0LL)); +TEST_CONSTEXPR(match_v2di( + _mm_mask_ternarylogic_epi64( + ((__m128i)((__v2di){ (long long)0x3A7C19E54B20D8A1LL, (long long)0x4F12B39D0C85E762LL })), + (__mmask8)0xD2, + ((__m128i)((__v2di){ (long long)0x6D93A0F217C54E3BLL, (long long)0x24E1C7A95B08D6F2LL })), + ((__m128i)((__v2di){ (long long)0x5A0C3E19D472B8F5LL, (long long)0x0187D3B2C9E4056ALL })), + (unsigned char)0x00), + (long long)0x3A7C19E54B20D8A1LL, + (long long)0x0LL)); // All 0s +TEST_CONSTEXPR(match_v2di( + _mm_mask_ternarylogic_epi64( + ((__m128i)((__v2di){ + (long long)0xA3F10B6C7D8294E1ULL, (long long)0x19D4E7350AB2C98FLL + })), + (__mmask8)0xB5, + ((__m128i)((__v2di){ + (long long)0x5C2E9A10F4B7D863LL, (long long)0x9B7E1D2C3A4F5E60LL + })), + ((__m128i)((__v2di){ + (long long)0x2A6D3F81C9E047B5LL, (long long)0x7F0A1C3E5D2B6490LL + })), + (unsigned char)0x55), // ~C + (long long)0xD592C07E361FB84AULL, + (long long)0x19D4E7350AB2C98FLL +)); __m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_maskz_ternarylogic_epi64 @@ -8413,12 +8609,72 @@ __m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B, // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> zeroinitializer return _mm_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4); } +TEST_CONSTEXPR(match_v2di( + _mm_maskz_ternarylogic_epi64( + (__mmask8)0xA9, + ((__m128i)((__v2di){ + (long long)0x8F3A5C7E21D4B690ULL, (long long)0x5AD02CE19B7F46A3ULL + })), + ((__m128i)((__v2di){ + (long long)0xC19E04B2A7D35F68ULL, (long long)0x2F7B93C4E1A05D76ULL + })), + ((__m128i)((__v2di){ + (long long)0x7A0C1D2E3F405162ULL, (long long)0xD4E5F60718293A4BULL + })), + (unsigned char)0xD2), // F = C ? (B | ~A) : (A & ~B) + (long long)0xB53A457239D4B692ULL, + (long long)0x0ULL)); +TEST_CONSTEXPR(match_v2di( + _mm_maskz_ternarylogic_epi64( + (__mmask8)0xB6, + ((__m128i)((__v2di){ + (long long)0x83C1D2E3F4051627ULL, (long long)0x5A0B1C2D3E4F6071ULL + })), + ((__m128i)((__v2di){ + (long long)0x9E8D7C6B5A493827ULL, (long long)0x13579BDF2468ACE0ULL + })), + ((__m128i)((__v2di){ + (long long)0x02468ACE13579BDFULL, (long long)0xFEDCBA9876543210ULL + })), + (unsigned char)0xFE), // F = A | B | C + (long long)0x0ULL, + (long long)0xFFDFBFFF7E7FFEF1ULL)); +TEST_CONSTEXPR(match_v2di( + _mm_maskz_ternarylogic_epi64( + (__mmask8)0xA5, + ((__m128i)((__v2di){ + (long long)0x1C80317FA3B1799DULL, (long long)0xBDD640FB06671AD1ULL + })), + ((__m128i)((__v2di){ + (long long)0x3EB13B9046685257ULL, (long long)0x23B8C1E9392456DEULL + })), + ((__m128i)((__v2di){ + (long long)0x1A3D1FA7BC8960A9ULL, (long long)0xBD9C66B3AD3C2D6DULL + })), + (unsigned char)0x80), // F = A & B & C + (long long)0x1800110000004001ULL, + (long long)0x0ULL +)); __m256i test_mm256_ternarylogic_epi64(__m256i __A, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_ternarylogic_epi64 // CHECK: @llvm.x86.avx512.pternlog.q.256 return _mm256_ternarylogic_epi64(__A, __B, __C, 4); } +TEST_CONSTEXPR(match_v4di( + _mm256_ternarylogic_epi64( + ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})), + ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})), + ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})), + (unsigned char)0x94), + (long long)-0x8, (long long)0x3, (long long)-0x8, (long long)0x3)); +TEST_CONSTEXPR(match_v4di( + _mm256_ternarylogic_epi64( + ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x76), + (long long)0xF, (long long)0xF, (long long)0xF, (long long)0xF)); __m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_mask_ternarylogic_epi64 @@ -8426,6 +8682,46 @@ __m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_ternarylogic_epi64(__A, __U, __B, __C, 4); } +TEST_CONSTEXPR(match_v4di( + _mm256_mask_ternarylogic_epi64( + ((__m256i)((__v4di){ + (long long)0x0123456789ABCDEFULL, (long long)0x0F0F0F0F0F0F0F0FULL, + (long long)0xAAAAAAAAAAAAAAAALL, (long long)0x13579BDF02468ACEULL + })), + (__mmask8)0x09, + ((__m256i)((__v4di){ + (long long)0x1111111111111111ULL, (long long)0x2222222222222222ULL, + (long long)0x3333333333333333ULL, (long long)0x4444444444444444ULL + })), + ((__m256i)((__v4di){ + (long long)0x5555555555555555ULL, (long long)0x6666666666666666ULL, + (long long)0x7777777777777777ULL, (long long)0x8888888888888888ULL + })), + (unsigned char)0x12), + (long long)0x44660022CCEE88AAULL, + (long long)0x0F0F0F0F0F0F0F0FULL, + (long long)0xAAAAAAAAAAAAAAAALL, + (long long)0x9B9B13138A8A0202ULL)); +TEST_CONSTEXPR(match_v4di( + _mm256_mask_ternarylogic_epi64( + ((__m256i)((__v4di){ + (long long)0xDEADBEEFDEADBEEFULL, (long long)0xCAFEBABECAFEBABEULL, + (long long)0xF00DFACEF00DFACEULL, (long long)0x0123456789ABCDEFULL + })), + (__mmask8)0x06, + ((__m256i)((__v4di){ + (long long)0x0000000000000000ULL, (long long)0xFFFFFFFFFFFFFFFFULL, + (long long)0x13579BDF13579BDFULL, (long long)0x0AAAAAAAAAAAAAAULL + })), + ((__m256i)((__v4di){ + (long long)0x1111111111111111ULL, (long long)0x2222222222222222ULL, + (long long)0x3333333333333333ULL, (long long)0x4444444444444444ULL + })), + (unsigned char)0x23), + (long long)0xDEADBEEFDEADBEEFULL, + (long long)0x0000000000000000ULL, + (long long)0x2CA024202CA02420ULL, + (long long)0x0123456789ABCDEFULL)); __m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_maskz_ternarylogic_epi64 @@ -8433,6 +8729,41 @@ __m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i _ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> zeroinitializer return _mm256_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4); } +TEST_CONSTEXPR(match_v4di( + _mm256_maskz_ternarylogic_epi64( + (__mmask8)0x05, + ((__m256i)((__v4di){ + (long long)0x1, (long long)0x2, (long long)0x0, (long long)0x7 + })), + ((__m256i)((__v4di){ + (long long)0x0, (long long)0x3, (long long)0x4, (long long)0x0 + })), + ((__m256i)((__v4di){ + (long long)0x0, (long long)0x5, (long long)0x0, (long long)0x1 + })), + (unsigned char)0xFE), + (long long)0x1, + (long long)0x0, + (long long)0x4, + (long long)0x0)); +TEST_CONSTEXPR(match_v4di( + _mm256_maskz_ternarylogic_epi64( + (__mmask8)0x0A, + ((__m256i)((__v4di){ + (long long)0x1, (long long)0x0, (long long)0x2, (long long)0x1 + })), + ((__m256i)((__v4di){ + (long long)0x0, (long long)0x1, (long long)0x0, (long long)0x0 + })), + ((__m256i)((__v4di){ + (long long)0x0, (long long)0x0, (long long)0x4, (long long)0x1 + })), + (unsigned char)0xED), + (long long)0x0, + (long long)-0x1, + (long long)0x0, + (long long)-0x1)); + __m256 test_mm256_shuffle_f32x4(__m256 __A, __m256 __B) { // CHECK-LABEL: test_mm256_shuffle_f32x4 // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> diff --git a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl index 512fcd4..9524f02 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl @@ -54,18 +54,16 @@ struct S { // struct splats // CHECK-LABEL: define void {{.*}}call3 -// CHECK: [[A:%.*]] = alloca <1 x i32>, align 4 +// CHECK: [[AA:%.*]] = alloca i32, align 4 // CHECK: [[s:%.*]] = alloca %struct.S, align 1 -// CHECK-NEXT: store <1 x i32> splat (i32 1), ptr [[A]], align 4 -// CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[A]], align 4 -// CHECK-NEXT: [[VL:%.*]] = extractelement <1 x i32> [[L]], i32 0 +// CHECK-NEXT: store i32 %A, ptr [[AA]], align 4 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[AA]], align 4 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1 -// CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4 -// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float +// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 +// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4 -export void call3() { - int1 A = {1}; +export void call3(int A) { S s = (S)A; } @@ -85,3 +83,41 @@ export void call5() { int1 A = {1}; S s = (S)A; } + +struct BFields { + double DF; + int E: 15; + int : 8; + float F; +}; + +struct Derived : BFields { + int G; +}; + +// derived struct with bitfields splat from scalar +// CHECK-LABEL: call6 +// CHECK: [[AAddr:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[D:%.*]] = alloca %struct.Derived, align 1 +// CHECK-NEXT: store i32 %A, ptr [[AAddr]], align 4 +// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[AAddr]], align 4 +// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0 +// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1 +// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0, i32 0 +// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0, i32 2 +// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 1 +// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[B]] to double +// CHECK-NEXT: store double [[C]], ptr [[Gep1]], align 8 +// CHECK-NEXT: [[H:%.*]] = trunc i32 [[B]] to i24 +// CHECK-NEXT: [[BFL:%.*]] = load i24, ptr [[E]], align 1 +// CHECK-NEXT: [[BFV:%.*]] = and i24 [[H]], 32767 +// CHECK-NEXT: [[BFC:%.*]] = and i24 [[BFL]], -32768 +// CHECK-NEXT: [[BFS:%.*]] = or i24 [[BFC]], [[BFV]] +// CHECK-NEXT: store i24 [[BFS]], ptr [[E]], align 1 +// CHECK-NEXT: [[C4:%.*]] = sitofp i32 [[B]] to float +// CHECK-NEXT: store float [[C4]], ptr [[Gep2]], align 4 +// CHECK-NEXT: store i32 [[B]], ptr [[Gep3]], align 4 +// CHECK-NEXT: ret void +export void call6(int A) { + Derived D = (Derived)A; +} diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl index ac02ddf..5f2182e 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl @@ -10,7 +10,8 @@ // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 -// CHECK-NEXT: store i32 [[L]], ptr [[B]], align 4 +// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float +// CHECK-NEXT: store float [[C]], ptr [[B]], align 4 export void call0() { int A[2] = {0,1}; float B = (float)A; @@ -141,3 +142,46 @@ export void call7() { int A[1] = {1}; A = (int[1])s; } + +struct BFields { + double D; + int E: 15; + int : 8; + float F; +}; + +struct Derived : BFields { + int G; +}; + +// flatten from a derived struct with bitfields +// CHECK-LABEL: call8 +// CHECK: [[A:%.*]] = alloca [4 x i32], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false) +// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 1 +// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 2 +// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 3 +// CHECK-NEXT: [[Gep4:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep4]], i32 0, i32 1 +// CHECK-NEXT: [[Gep5:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 0 +// CHECK-NEXT: [[Gep6:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 2 +// CHECK-NEXT: [[Gep7:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[Z:%.*]] = load double, ptr [[Gep5]], align 8 +// CHECK-NEXT: [[C:%.*]] = fptosi double [[Z]] to i32 +// CHECK-NEXT: store i32 [[C]], ptr [[Gep]], align 4 +// CHECK-NEXT: [[BFL:%.*]] = load i24, ptr [[E]], align 1 +// CHECK-NEXT: [[BFShl:%.*]] = shl i24 [[BFL]], 9 +// CHECK-NEXT: [[BFAshr:%.*]] = ashr i24 [[BFShl]], 9 +// CHECK-NEXT: [[BFC:%.*]] = sext i24 [[BFAshr]] to i32 +// CHECK-NEXT: store i32 [[BFC]], ptr [[Gep1]], align 4 +// CHECK-NEXT: [[Y:%.*]] = load float, ptr [[Gep6]], align 4 +// CHECK-NEXT: [[C8:%.*]] = fptosi float [[Y]] to i32 +// CHECK-NEXT: store i32 [[C8]], ptr [[Gep2]], align 4 +// CHECK-NEXT: [[X:%.*]] = load i32, ptr [[Gep7]], align 4 +// CHECK-NEXT: store i32 [[X]], ptr [[Gep3]], align 4 +// CHECK-NEXT: ret void +export void call8(Derived D) { + int A[4] = (int[4])D; +} diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl index 81b9f5b..4e29994 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// RUN: %clang_cc1 -finclude-default-header -fnative-half-type -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s struct S { int X; @@ -127,14 +127,219 @@ struct T { // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1 // CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.T, ptr [[Tmp]], i32 0, i32 0 -// CHECK-NEXT: %gep3 = getelementptr inbounds %struct.T, ptr %agg-temp, i32 0, i32 1 -// CHECK-NEXT: %gep4 = getelementptr inbounds %struct.T, ptr %agg-temp, i32 0, i32 2 -// CHECK-NEXT: %load = load i32, ptr %gep2, align 4 -// CHECK-NEXT: store i32 %load, ptr %gep, align 4 -// CHECK-NEXT: %load5 = load i32, ptr %gep3, align 4 -// CHECK-NEXT: %conv = sitofp i32 %load5 to float -// CHECK-NEXT: store float %conv, ptr %gep1, align 4 +// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds %struct.T, ptr %agg-temp, i32 0, i32 1 +// CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds %struct.T, ptr %agg-temp, i32 0, i32 2 +// CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[G3]], align 4 +// CHECK-NEXT: store i32 [[L1]], ptr [[G1]], align 4 +// CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[G4]], align 4 +// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L2]] to float +// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4 export void call8() { T t = {1,2,3}; S s = (S)t; } + +struct BFields { + double D; + int E: 15; + int : 8; + float F; +}; + +struct Derived : BFields { + int G; +}; + +// Derived Struct truncate to scalar +// CHECK-LABEL: call9 +// CHECK: [[D2:%.*]] = alloca double, align 8 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false) +// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1 +// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 0 +// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 2 +// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[A:%.*]] = load double, ptr [[Gep1]], align 8 +// CHECK-NEXT: store double [[A]], ptr [[D2]], align 8 +// CHECK-NEXT: ret void +export void call9(Derived D) { + double D2 = (double)D; +} + +// Derived struct from vector +// CHECK-LABEL: call10 +// CHECK: [[IAddr:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[D:%.*]] = alloca %struct.Derived, align 1 +// CHECK-NEXT: store <4 x i32> %I, ptr [[IAddr]], align 16 +// CHECK-NEXT: [[A:%.*]] = load <4 x i32>, ptr [[IAddr]], align 16 +// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0 +// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1 +// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0, i32 0 +// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0, i32 2 +// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 1 +// CHECK-NEXT: [[VL:%.*]] = extractelement <4 x i32> [[A]], i64 0 +// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to double +// CHECK-NEXT: store double [[C]], ptr [[Gep1]], align 8 +// CHECK-NEXT: [[VL4:%.*]] = extractelement <4 x i32> [[A]], i64 1 +// CHECK-NEXT: [[B:%.*]] = trunc i32 [[VL4]] to i24 +// CHECK-NEXT: [[BFL:%.*]] = load i24, ptr [[E]], align 1 +// CHECK-NEXT: [[BFV:%.*]] = and i24 [[B]], 32767 +// CHECK-NEXT: [[BFC:%.*]] = and i24 [[BFL]], -32768 +// CHECK-NEXT: [[BFSet:%.*]] = or i24 [[BFC]], [[BFV]] +// CHECK-NEXT: store i24 [[BFSet]], ptr [[E]], align 1 +// CHECK-NEXT: [[VL5:%.*]] = extractelement <4 x i32> [[A]], i64 2 +// CHECK-NEXT: [[C6:%.*]] = sitofp i32 [[VL5]] to float +// CHECK-NEXT: store float [[C6]], ptr [[Gep2]], align 4 +// CHECK-NEXT: [[VL7:%.*]] = extractelement <4 x i32> [[A]], i64 3 +// CHECK-NEXT: store i32 [[VL7]], ptr [[Gep3]], align 4 +// CHECK-NEXT: ret void +export void call10(int4 I) { + Derived D = (Derived)I; +} + +// truncate derived struct +// CHECK-LABEL: call11 +// CHECK: [[B:%.*]] = alloca %struct.BFields, align 1 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[D]], i32 19, i1 false) +// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.BFields, ptr [[B]], i32 0 +// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1 +// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.BFields, ptr [[B]], i32 0, i32 0 +// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds %struct.BFields, ptr [[B]], i32 0, i32 2 +// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[E4:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep3]], i32 0, i32 1 +// CHECK-NEXT: [[Gep5:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 0 +// CHECK-NEXT: [[Gep6:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 2 +// CHECK-NEXT: [[Gep7:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[A:%.*]] = load double, ptr [[Gep5]], align 8 +// CHECK-NEXT: store double [[A]], ptr [[Gep1]], align 8 +// CHECK-NEXT: [[BFl:%.*]] = load i24, ptr [[E4]], align 1 +// CHECK-NEXT: [[Shl:%.*]] = shl i24 [[BFL]], 9 +// CHECK-NEXT: [[Ashr:%.*]] = ashr i24 [[Shl]], 9 +// CHECK-NEXT: [[BFC:%.*]] = sext i24 [[Ashr]] to i32 +// CHECK-NEXT: [[B:%.*]] = trunc i32 [[BFC]] to i24 +// CHECK-NEXT: [[BFL8:%.*]] = load i24, ptr [[E]], align 1 +// CHECK-NEXT: [[BFV:%.*]] = and i24 [[B]], 32767 +// CHECK-NEXT: [[BFC:%.*]] = and i24 [[BFL8]], -32768 +// CHECK-NEXT: [[BFSet:%.*]] = or i24 [[BFC]], [[BFV]] +// CHECK-NEXT: store i24 [[BFSet]], ptr [[E]], align 1 +// CHECK-NEXT: [[C:%.*]] = load float, ptr [[Gep6]], align 4 +// CHECK-NEXT: store float [[C]], ptr [[Gep2]], align 4 +// CHECK-NEXT: ret void +export void call11(Derived D) { + BFields B = (BFields)D; +} + +struct Empty { +}; + +// cast to an empty struct +// CHECK-LABEL: call12 +// CHECK: [[I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[E:%.*]] = alloca %struct.Empty, align 1 +// CHECK-NEXT: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr [[I]], align 16 +// CHECK-NEXT: [[A:%.*]] = load <4 x i32>, ptr [[I]], align 16 +// CHECK-NEXt: ret void +export void call12() { + int4 I = {1,2,3,4}; + Empty E = (Empty)I; +} + +struct MoreBFields { + int A; + uint64_t B: 60; + float C; + uint16_t D: 10; + uint16_t E: 6; + int : 32; + double F; + int : 8; + uint G; +}; + +// more complicated bitfield case +// CHECK-LABEL: call13 +// CHECK: [[AA:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[MBF:%.*]] = alloca %struct.MoreBFields, align 1 +// CHECK-NEXT: store i32 %A, ptr [[AA]], align 4 +// CHECK-NEXT: [[Z:%.*]] = load i32, ptr [[AA]], align 4 +// get the gep for the struct. +// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0 +// CHECK-NEXT: [[FieldB:%.*]] = getelementptr inbounds nuw %struct.MoreBFields, ptr [[Gep]], i32 0, i32 1 +// D and E share the same field index +// CHECK-NEXT: [[FieldD:%.*]] = getelementptr inbounds nuw %struct.MoreBFields, ptr [[Gep]], i32 0, i32 3 +// CHECK-NEXT: [[FieldE:%.*]] = getelementptr inbounds nuw %struct.MoreBFields, ptr [[Gep]], i32 0, i32 3 +// CHECK-NEXT: [[FieldA:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0, i32 0 +// CHECK-NEXT: [[FieldC:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0, i32 2 +// CHECK-NEXT: [[FieldF:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0, i32 5 +// CHECK-NEXT: [[FieldG:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0, i32 7 +// store int A into field A +// CHECK-NEXT: store i32 [[Z]], ptr [[FieldA]], align 4 +// store int A in bitField B, do necessary conversions +// CHECK-NEXT: [[Conv:%.*]] = sext i32 [[Z]] to i64 +// CHECK-NEXT: [[BFL:%.*]] = load i64, ptr [[FieldB]], align 1 +// CHECK-NEXT: [[BFV:%.*]] = and i64 [[Conv]], 1152921504606846975 +// CHECK-NEXT: [[BFC:%.*]] = and i64 [[BFL]], -1152921504606846976 +// CHECK-NEXT: [[BFS:%.*]] = or i64 [[BFC]], [[BFV]] +// CHECK-NEXT: store i64 [[BFS]], ptr [[FieldB]], align 1 +// store int A into field C +// CHECK-NEXT: [[Conv5:%.*]] = sitofp i32 [[Z]] to float +// CHECK-NEXT: store float [[Conv5]], ptr [[FieldC]], align 4 +// store int A into bitfield D +// CHECK-NEXT: [[Conv6:%.*]] = trunc i32 [[Z]] to i16 +// CHECK-NEXT: [[FDL:%.*]] = load i16, ptr [[FieldD]], align 1 +// CHECK-NEXT: [[FDV:%.*]] = and i16 [[Conv6]], 1023 +// CHECK-NEXT: [[FDC:%.*]] = and i16 [[FDL]], -1024 +// CHECK-NEXT: [[FDS:%.*]] = or i16 [[FDC]], [[FDV]] +// CHECK-NEXT: store i16 [[FDS]], ptr [[FieldD]], align 1 +// store int A into bitfield E; +// CHECK-NEXT: [[Conv11:%.*]] = trunc i32 [[Z]] to i16 +// CHECK-NEXT: [[FEL:%.*]] = load i16, ptr [[FieldE]], align 1 +// CHECK-NEXT: [[FEV:%.*]] = and i16 [[Conv11]], 63 +// CHECK-NEXT: [[FESHL:%.*]] = shl i16 [[FEV]], 10 +// CHECK-NEXT: [[FEC:%.*]] = and i16 [[FEL]], 1023 +// CHECK-NEXT: [[FES:%.*]] = or i16 [[FEC]], [[FESHL]] +// CHECK-NEXT: store i16 [[FES]], ptr [[FieldE]], align 1 +// store int A into field F +// CHECK-NEXT: [[Conv16:%.*]] = sitofp i32 [[Z]] to double +// CHECK-NEXT: store double [[Conv16]], ptr [[FieldF]], align 8 +// store int A into field G +// CHECK-NEXT: store i32 [[Z]], ptr [[FieldG]], align 4 +// CHECK-NEXT: ret void +export void call13(int A) { + MoreBFields MBF = (MoreBFields)A; +} + +struct Inner { + int Z; + int Y : 25; +}; + +struct Outer { + int A; + Inner I; +}; + +// show usage of "extra" gep for struct containing bitfield +// CHECK-LABEL: call14 +// CHECK: [[AA:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[O:%.*]] = alloca %struct.Outer, align 1 +// CHECK-NEXT: store i32 %A, ptr [[AA]], align 4 +// CHECK-NEXT: [[Z:%.*]] = load i32, ptr [[AA]], align 4 +// CHECK-NEXT: [[FieldA:%.*]] = getelementptr inbounds %struct.Outer, ptr [[O]], i32 0, i32 0 +// showing real usage of "extra gep". need Inner struct to generate access of its bitfield. +// CHECK-NEXT: [[FieldI:%.*]] = getelementptr inbounds %struct.Outer, ptr [[O]], i32 0, i32 1 +// CHECK-NEXT: [[FieldY:%.*]] = getelementptr inbounds nuw %struct.Inner, ptr [[FieldI]], i32 0, i32 1 +// CHECK-NEXT: [[FieldZ:%.*]] = getelementptr inbounds %struct.Outer, ptr [[O]], i32 0, i32 1, i32 0 +// CHECK-NEXT: store i32 [[Z]], ptr [[FieldA]], align 4 +// CHECK-NEXT: store i32 [[Z]], ptr [[FieldZ]], align 4 +// CHECK-NEXT: [[BFL:%.*]] = load i32, ptr [[FieldY]], align 1 +// CHECK-NEXT: [[BFV:%.*]] = and i32 [[Z]], 33554431 +// CHECK-NEXT: [[BFC:%.*]] = and i32 [[BFL]], -33554432 +// CHECK-NEXT: [[BFS:%.*]] = or i32 [[BFC]], [[BFV]] +// CHECK-NEXT: store i32 [[BFS]], ptr [[FieldY]], align 1 +// CHECK-NEXT: ret void +export void call14(int A) { + Outer O = (Outer)A; +} diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl index 253b38a7c..26aa41a 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl @@ -79,3 +79,45 @@ export void call5() { S s = {1, 2.0}; int A = (int)s; } + +struct BFields { + double D; + int E: 15; + int : 8; + float F; +}; + +struct Derived : BFields { + int G; +}; + +// vector flat cast from derived struct with bitfield +// CHECK-LABEL: call6 +// CHECK: [[A:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1 +// CHECK-NEXT: [[FlatTmp:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false) +// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1 +// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 0 +// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 2 +// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[Z:%.*]] = load <4 x i32>, ptr [[FlatTmp]], align 16 +// CHECK-NEXT: [[Y:%.*]] = load double, ptr [[Gep1]], align 8 +// CHECK-NEXT: [[C:%.*]] = fptosi double [[Y]] to i32 +// CHECK-NEXT: [[X:%.*]] = insertelement <4 x i32> [[Z]], i32 [[C]], i64 0 +// CHECK-NEXT: [[BFL:%.*]] = load i24, ptr [[E]], align 1 +// CHECK-NEXT: [[BFShl:%.*]] = shl i24 [[BFL]], 9 +// CHECK-NEXT: [[BFAshr:%.*]] = ashr i24 [[BFShl]], 9 +// CHECK-NEXT: [[BFC:%.*]] = sext i24 [[BFAshr]] to i32 +// CHECK-NEXT: [[W:%.*]] = insertelement <4 x i32> [[X]], i32 [[BFC]], i64 1 +// CHECK-NEXT: [[V:%.*]] = load float, ptr [[Gep2]], align 4 +// CHECK-NEXT: [[C4:%.*]] = fptosi float [[V]] to i32 +// CHECK-NEXT: [[U:%.*]] = insertelement <4 x i32> [[W]], i32 [[C4]], i64 2 +// CHECK-NEXT: [[T:%.*]] = load i32, ptr [[Gep3]], align 4 +// CHECK-NEXT: [[S:%.*]] = insertelement <4 x i32> [[U]], i32 [[T]], i64 3 +// CHECK-NEXT: store <4 x i32> [[S]], ptr [[A]], align 16 +// CHECK-NEXT: ret void +export void call6(Derived D) { + int4 A = (int4)D; +} diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index c0c22bc..7cc83c0 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -109,8 +109,8 @@ // GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+cluster,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" -// GFX1251: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+cluster,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" +// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" +// GFX1251: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" // GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64" diff --git a/clang/test/Misc/amdgcn.languageOptsOpenCL.cl b/clang/test/Misc/amdgcn.languageOptsOpenCL.cl index 50c78d7..80c0825 100644 --- a/clang/test/Misc/amdgcn.languageOptsOpenCL.cl +++ b/clang/test/Misc/amdgcn.languageOptsOpenCL.cl @@ -8,6 +8,9 @@ // RUN: %clang_cc1 -x cl -cl-std=CL1.2 %s -verify -triple amdgcn-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES // RUN: %clang_cc1 -x cl -cl-std=CL2.0 %s -verify -triple amdgcn-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES +// RUN: %clang_cc1 -x cl -cl-std=CL3.0 %s -verify -triple amdgcn-unknown-unknown -Wpedantic-core-features -DTEST_CORE_FEATURES +// RUN: %clang_cc1 -x cl -cl-std=CL3.0 %s -verify -triple amdgcn-unknown-unknown -target-cpu gfx700 -Wpedantic-core-features -DTEST_CORE_FEATURES -DFLAT_SUPPORT + // Extensions in all versions #ifndef cl_clang_storage_class_specifiers #error "Missing cl_clang_storage_class_specifiers define" @@ -156,10 +159,31 @@ #pragma OPENCL EXTENSION cl_amd_media_ops2: enable #if (__OPENCL_C_VERSION__ >= 300) -#ifndef __opencl_c_generic_address_space -#error "Missing __opencl_c_generic_address_space define" -#else -#error "Incorrect __opencl_c_generic_address_space define" + #ifndef __opencl_c_program_scope_global_variables + #error "Missing __opencl_c_program_scope_global_variables define" + #endif #endif -#pragma OPENCL EXTENSION __opencl_c_generic_address_space: enable + +#if (__OPENCL_C_VERSION__ >= 300) + #ifdef FLAT_SUPPORT + #ifndef __opencl_c_generic_address_space + #error "Missing __opencl_c_generic_address_space define" + #endif + #else + #ifdef __opencl_c_generic_address_space + #error "Incorrect __opencl_c_generic_address_space define" + #endif + #endif +#endif + +#if (__OPENCL_C_VERSION__ >= 300) + #ifdef FLAT_SUPPORT + #ifndef __opencl_c_device_enqueue + #error "Missing __opencl_c_device_enqueue define" + #endif + #else + #ifdef __opencl_c_device_enqueue + #error "Incorrect __opencl_c_device_enqueue define" + #endif + #endif #endif diff --git a/clang/test/SemaCXX/constant-expression-p2280r4.cpp b/clang/test/SemaCXX/constant-expression-p2280r4.cpp index 78e2e17..5cbfaff 100644 --- a/clang/test/SemaCXX/constant-expression-p2280r4.cpp +++ b/clang/test/SemaCXX/constant-expression-p2280r4.cpp @@ -44,7 +44,7 @@ void splash(Swim& swam) { // nointerpreter-note {{declared here} static_assert(how_many(swam) == 28); // ok static_assert(Swim().lochte() == 12); // ok static_assert(swam.lochte() == 12); // expected-error {{static assertion expression is not an integral constant expression}} \ - // nointerpreter-note {{virtual function called on object 'swam' whose dynamic type is not constant}} + // expected-note {{virtual function called on object 'swam' whose dynamic type is not constant}} static_assert(swam.coughlin == 12); // expected-error {{static assertion expression is not an integral constant expression}} \ // nointerpreter-note {{read of variable 'swam' whose value is not known}} } diff --git a/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl b/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl index 2320e13..fbb47bd 100644 --- a/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl +++ b/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl @@ -13,12 +13,6 @@ struct R { }; }; -// casting types which contain bitfields is not yet supported. -export void cantCast() { - S s = (S)1; - // expected-error@-1 {{no matching conversion for C-style cast from 'int' to 'S'}} -} - // Can't cast a union export void cantCast2() { R r = (R)1; diff --git a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl index 3059150..d9f50e9 100644 --- a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl +++ b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl @@ -7,27 +7,6 @@ export void cantCast() { // expected-error@-1 {{C-style cast from 'int[3]' to 'int[4]' is not allowed}} } -struct S { -// expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int2' (aka 'vector<int, 2>') to 'const S' for 1st argument}} -// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int2' (aka 'vector<int, 2>') to 'S' for 1st argument}} -// expected-note@-3 {{candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided}} - int A : 8; - int B; -}; - -// casting types which contain bitfields is not yet supported. -export void cantCast2() { - S s = {1,2}; - int2 C = (int2)s; - // expected-error@-1 {{cannot convert 'S' to 'int2' (aka 'vector<int, 2>') without a conversion operator}} -} - -export void cantCast3() { - int2 C = {1,2}; - S s = (S)C; - // expected-error@-1 {{no matching conversion for C-style cast from 'int2' (aka 'vector<int, 2>') to 'S'}} -} - struct R { // expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int2' (aka 'vector<int, 2>') to 'const R' for 1st argument}} // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int2' (aka 'vector<int, 2>') to 'R' for 1st argument}} diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp index b46a810..28e3522 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -2412,7 +2412,11 @@ TSAN_INTERCEPTOR(int, vfork, int fake) { } #endif -#if SANITIZER_LINUX +#if SANITIZER_LINUX && !SANITIZER_ANDROID +// Bionic's pthread_create internally calls clone. When the CLONE_THREAD flag is +// set, clone does not create a new process but a new thread. This is a +// workaround for Android. Disabling the interception of clone solves the +// problem in most scenarios. TSAN_INTERCEPTOR(int, clone, int (*fn)(void *), void *stack, int flags, void *arg, int *parent_tid, void *tls, pid_t *child_tid) { SCOPED_INTERCEPTOR_RAW(clone, fn, stack, flags, arg, parent_tid, tls, @@ -3135,7 +3139,7 @@ void InitializeInterceptors() { TSAN_INTERCEPT(fork); TSAN_INTERCEPT(vfork); -#if SANITIZER_LINUX +#if SANITIZER_LINUX && !SANITIZER_ANDROID TSAN_INTERCEPT(clone); #endif #if !SANITIZER_ANDROID diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp index 4b55aab..6b65387 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp @@ -486,8 +486,20 @@ int ExtractRecvmsgFDs(void *msgp, int *fds, int nfd) { // Reverse operation of libc stack pointer mangling static uptr UnmangleLongJmpSp(uptr mangled_sp) { -#if defined(__x86_64__) -# if SANITIZER_LINUX +# if SANITIZER_ANDROID + if (longjmp_xor_key == 0) { + // bionic libc initialization process: __libc_init_globals -> + // __libc_init_vdso (calls strcmp) -> __libc_init_setjmp_cookie. strcmp is + // intercepted by TSan, so during TSan initialization the setjmp_cookie + // remains uninitialized. On Android, longjmp_xor_key must be set on first + // use. + InitializeLongjmpXorKey(); + CHECK_NE(longjmp_xor_key, 0); + } +# endif + +# if defined(__x86_64__) +# if SANITIZER_LINUX // Reverse of: // xor %fs:0x30, %rsi // rol $0x11, %rsi @@ -542,13 +554,23 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) { # else # define LONG_JMP_SP_ENV_SLOT 2 # endif -#elif SANITIZER_LINUX -# ifdef __aarch64__ -# define LONG_JMP_SP_ENV_SLOT 13 -# elif defined(__loongarch__) -# define LONG_JMP_SP_ENV_SLOT 1 -# elif defined(__mips64) -# define LONG_JMP_SP_ENV_SLOT 1 +# elif SANITIZER_ANDROID +# ifdef __aarch64__ +# define LONG_JMP_SP_ENV_SLOT 3 +# elif SANITIZER_RISCV64 +# define LONG_JMP_SP_ENV_SLOT 3 +# elif defined(__x86_64__) +# define LONG_JMP_SP_ENV_SLOT 6 +# else +# error unsupported +# endif +# elif SANITIZER_LINUX +# ifdef __aarch64__ +# define LONG_JMP_SP_ENV_SLOT 13 +# elif defined(__loongarch__) +# define LONG_JMP_SP_ENV_SLOT 1 +# elif defined(__mips64) +# define LONG_JMP_SP_ENV_SLOT 1 # elif SANITIZER_RISCV64 # define LONG_JMP_SP_ENV_SLOT 13 # elif defined(__s390x__) @@ -556,7 +578,7 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) { # else # define LONG_JMP_SP_ENV_SLOT 6 # endif -#endif +# endif uptr ExtractLongJmpSp(uptr *env) { uptr mangled_sp = env[LONG_JMP_SP_ENV_SLOT]; @@ -653,7 +675,12 @@ ThreadState *cur_thread() { } CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, &oldset, nullptr)); } - return thr; + + // Skia calls mallopt(M_THREAD_DISABLE_MEM_INIT, 1), which sets the least + // significant bit of TLS_SLOT_SANITIZER to 1. Scudo allocator uses this bit + // as a flag to disable memory initialization. This is a workaround to get the + // correct ThreadState pointer. + reinterpret_cast<ThreadState*>(addr & ~1ULL); } void set_cur_thread(ThreadState *thr) { diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp index b1464cc..978d853 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp @@ -206,10 +206,14 @@ void ThreadStart(ThreadState *thr, Tid tid, ThreadID os_id, } #endif -#if !SANITIZER_GO +#if !SANITIZER_GO && !SANITIZER_ANDROID // Don't imitate stack/TLS writes for the main thread, // because its initialization is synchronized with all // subsequent threads anyway. + // Because thr is created by MmapOrDie, the thr object + // is not in tls, the pointer to the thr object is in + // TLS_SLOT_SANITIZER slot. So skip this check on + // Android platform. if (tid != kMainTid) { if (stk_addr && stk_size) { const uptr pc = StackTrace::GetNextInstructionPc( diff --git a/flang/docs/FortranLLVMTestSuite.md b/flang/docs/FortranLLVMTestSuite.md index 8d9daa4..17083b4 100644 --- a/flang/docs/FortranLLVMTestSuite.md +++ b/flang/docs/FortranLLVMTestSuite.md @@ -73,3 +73,5 @@ instructions described [above](#running-the-llvm-test-suite-with-fortran). There are additional configure-time options that can be used with the gfortran tests. More details about those options and their purpose can be found in [`Fortran/gfortran/README.md`](https://github.com/llvm/llvm-test-suite/tree/main/Fortran/gfortran/README.md). + + These tests are Free Software and are shared under the terms of the GNU General Public License (GPL). For more details, please see the accompanying [`LICENSE`](https://github.com/llvm/llvm-test-suite/tree/main/Fortran/gfortran/LICENSE.txt) file. diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 3c7175c..c7f01e6 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -488,6 +488,8 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_ranges_concat`` *unimplemented* ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges_indices`` ``202506L`` + ---------------------------------------------------------- ----------------- ``__cpp_lib_ratio`` ``202306L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_rcu`` *unimplemented* diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst index 8d023a1..ec23ba9 100644 --- a/libcxx/docs/ReleaseNotes/22.rst +++ b/libcxx/docs/ReleaseNotes/22.rst @@ -42,6 +42,7 @@ Implemented Papers is implemented in this release) - P3044R2: sub-``string_view`` from ``string`` (`Github <https://llvm.org/PR148140>`__) - P3223R2: Making ``std::istream::ignore`` less surprising (`Github <https://llvm.org/PR148178>`__) +- P3060R3: Add ``std::views::indices(n)`` (`Github <https://llvm.org/PR148175>`__) - P3168R2: Give ``std::optional`` Range Support (`Github <https://llvm.org/PR105430>`__) Improvements and New Features diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv index 4e0918b..69b9984 100644 --- a/libcxx/docs/Status/Cxx2cPapers.csv +++ b/libcxx/docs/Status/Cxx2cPapers.csv @@ -149,7 +149,7 @@ "`P3503R3 <https://wg21.link/P3503R3>`__","Make type-erased allocator use in ``promise`` and ``packaged_task`` consistent","2025-06 (Sofia)","","","`#148164 <https://github.com/llvm/llvm-project/issues/148164>`__","" "`P3008R6 <https://wg21.link/P3008R6>`__","Atomic floating-point min/max","2025-06 (Sofia)","","","`#148168 <https://github.com/llvm/llvm-project/issues/148168>`__","" "`P3111R8 <https://wg21.link/P3111R8>`__","Atomic Reduction Operations","2025-06 (Sofia)","","","`#148174 <https://github.com/llvm/llvm-project/issues/148174>`__","" -"`P3060R3 <https://wg21.link/P3060R3>`__","Add ``std::views::indices(n)``","2025-06 (Sofia)","","","`#148175 <https://github.com/llvm/llvm-project/issues/148175>`__","" +"`P3060R3 <https://wg21.link/P3060R3>`__","Add ``std::views::indices(n)``","2025-06 (Sofia)","|Complete|","22","`#148175 <https://github.com/llvm/llvm-project/issues/148175>`__","" "`P2319R5 <https://wg21.link/P2319R5>`__","Prevent ``path`` presentation problems","2025-06 (Sofia)","","","`#148177 <https://github.com/llvm/llvm-project/issues/148177>`__","" "`P3223R2 <https://wg21.link/P3223R2>`__","Making ``std::istream::ignore`` less surprising","2025-06 (Sofia)","|Complete|","22","`#148178 <https://github.com/llvm/llvm-project/issues/148178>`__","" "`P2781R9 <https://wg21.link/P2781R9>`__","``std::constant_wrapper``","2025-06 (Sofia)","","","`#148179 <https://github.com/llvm/llvm-project/issues/148179>`__","" diff --git a/libcxx/include/__ranges/iota_view.h b/libcxx/include/__ranges/iota_view.h index 32ff340..22adc22 100644 --- a/libcxx/include/__ranges/iota_view.h +++ b/libcxx/include/__ranges/iota_view.h @@ -393,6 +393,15 @@ struct __fn { inline namespace __cpo { inline constexpr auto iota = __iota::__fn{}; } // namespace __cpo + +# if _LIBCPP_STD_VER >= 26 + +inline constexpr auto indices = [](__integer_like auto __size) static { + return ranges::views::iota(decltype(__size){}, __size); +}; + +# endif + } // namespace views } // namespace ranges diff --git a/libcxx/include/ranges b/libcxx/include/ranges index 96d7a6b..cfaa66a 100644 --- a/libcxx/include/ranges +++ b/libcxx/include/ranges @@ -267,6 +267,11 @@ namespace std::ranges { template<class W, class Bound> inline constexpr bool enable_borrowed_range<iota_view<W, Bound>> = true; + namespace views { + inline constexpr unspecified iota = unspecified; + inline constexpr unspecified indices = unspecified; // Since C++26 + } + // [range.repeat], repeat view template<class T> concept integer-like-with-usable-difference-type = // exposition only diff --git a/libcxx/include/version b/libcxx/include/version index a132f08..99e6929 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -205,6 +205,7 @@ __cpp_lib_ranges_chunk_by 202202L <ranges> __cpp_lib_ranges_concat 202403L <ranges> __cpp_lib_ranges_contains 202207L <algorithm> __cpp_lib_ranges_find_last 202207L <algorithm> +__cpp_lib_ranges_indices 202506L <ranges> __cpp_lib_ranges_iota 202202L <numeric> __cpp_lib_ranges_join_with 202202L <ranges> __cpp_lib_ranges_repeat 202207L <ranges> @@ -591,6 +592,7 @@ __cpp_lib_void_t 201411L <type_traits> # define __cpp_lib_out_ptr 202311L // # define __cpp_lib_philox_engine 202406L // # define __cpp_lib_ranges_concat 202403L +# define __cpp_lib_ranges_indices 202506L # define __cpp_lib_ratio 202306L // # define __cpp_lib_rcu 202306L # define __cpp_lib_reference_wrapper 202403L diff --git a/libcxx/modules/std/ranges.inc b/libcxx/modules/std/ranges.inc index 7ede42e..cc7daa3 100644 --- a/libcxx/modules/std/ranges.inc +++ b/libcxx/modules/std/ranges.inc @@ -114,6 +114,9 @@ export namespace std { namespace views { using std::ranges::views::iota; +#if _LIBCPP_STD_VER >= 26 + using std::ranges::views::indices; +#endif } // namespace views #if _LIBCPP_STD_VER >= 23 diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp index df19f03..5116864 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp @@ -48,6 +48,10 @@ # error "__cpp_lib_ranges_concat should not be defined before c++26" # endif +# ifdef __cpp_lib_ranges_indices +# error "__cpp_lib_ranges_indices should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_join_with # error "__cpp_lib_ranges_join_with should not be defined before c++23" # endif @@ -98,6 +102,10 @@ # error "__cpp_lib_ranges_concat should not be defined before c++26" # endif +# ifdef __cpp_lib_ranges_indices +# error "__cpp_lib_ranges_indices should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_join_with # error "__cpp_lib_ranges_join_with should not be defined before c++23" # endif @@ -148,6 +156,10 @@ # error "__cpp_lib_ranges_concat should not be defined before c++26" # endif +# ifdef __cpp_lib_ranges_indices +# error "__cpp_lib_ranges_indices should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_join_with # error "__cpp_lib_ranges_join_with should not be defined before c++23" # endif @@ -201,6 +213,10 @@ # error "__cpp_lib_ranges_concat should not be defined before c++26" # endif +# ifdef __cpp_lib_ranges_indices +# error "__cpp_lib_ranges_indices should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_join_with # error "__cpp_lib_ranges_join_with should not be defined before c++23" # endif @@ -278,6 +294,10 @@ # error "__cpp_lib_ranges_concat should not be defined before c++26" # endif +# ifdef __cpp_lib_ranges_indices +# error "__cpp_lib_ranges_indices should not be defined before c++26" +# endif + # ifndef __cpp_lib_ranges_join_with # error "__cpp_lib_ranges_join_with should be defined in c++23" # endif @@ -400,6 +420,13 @@ # endif # endif +# ifndef __cpp_lib_ranges_indices +# error "__cpp_lib_ranges_indices should be defined in c++26" +# endif +# if __cpp_lib_ranges_indices != 202506L +# error "__cpp_lib_ranges_indices should have the value 202506L in c++26" +# endif + # ifndef __cpp_lib_ranges_join_with # error "__cpp_lib_ranges_join_with should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index 6aa704a..9a8a1da 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -664,6 +664,10 @@ # error "__cpp_lib_ranges_find_last should not be defined before c++23" # endif +# ifdef __cpp_lib_ranges_indices +# error "__cpp_lib_ranges_indices should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_iota # error "__cpp_lib_ranges_iota should not be defined before c++23" # endif @@ -1608,6 +1612,10 @@ # error "__cpp_lib_ranges_find_last should not be defined before c++23" # endif +# ifdef __cpp_lib_ranges_indices +# error "__cpp_lib_ranges_indices should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_iota # error "__cpp_lib_ranges_iota should not be defined before c++23" # endif @@ -2723,6 +2731,10 @@ # error "__cpp_lib_ranges_find_last should not be defined before c++23" # endif +# ifdef __cpp_lib_ranges_indices +# error "__cpp_lib_ranges_indices should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_iota # error "__cpp_lib_ranges_iota should not be defined before c++23" # endif @@ -4111,6 +4123,10 @@ # error "__cpp_lib_ranges_find_last should not be defined before c++23" # endif +# ifdef __cpp_lib_ranges_indices +# error "__cpp_lib_ranges_indices should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_iota # error "__cpp_lib_ranges_iota should not be defined before c++23" # endif @@ -5694,6 +5710,10 @@ # error "__cpp_lib_ranges_find_last should have the value 202207L in c++23" # endif +# ifdef __cpp_lib_ranges_indices +# error "__cpp_lib_ranges_indices should not be defined before c++26" +# endif + # ifndef __cpp_lib_ranges_iota # error "__cpp_lib_ranges_iota should be defined in c++23" # endif @@ -7610,6 +7630,13 @@ # error "__cpp_lib_ranges_find_last should have the value 202207L in c++26" # endif +# ifndef __cpp_lib_ranges_indices +# error "__cpp_lib_ranges_indices should be defined in c++26" +# endif +# if __cpp_lib_ranges_indices != 202506L +# error "__cpp_lib_ranges_indices should have the value 202506L in c++26" +# endif + # ifndef __cpp_lib_ranges_iota # error "__cpp_lib_ranges_iota should be defined in c++26" # endif diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp index 4949787..7e2510f 100644 --- a/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp +++ b/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp @@ -89,6 +89,9 @@ static_assert(test(std::ranges::ssize, a)); // views::empty<T> is not a CPO static_assert(test(std::views::iota, 1)); static_assert(test(std::views::iota, 1, 10)); +#if TEST_STD_VER >= 26 +static_assert(test(std::views::indices, 10)); +#endif #ifndef TEST_HAS_NO_LOCALIZATION static_assert(test(std::views::istream<int>, stream)); #endif diff --git a/libcxx/test/std/ranges/range.factories/range.iota.view/indices.pass.cpp b/libcxx/test/std/ranges/range.factories/range.iota.view/indices.pass.cpp new file mode 100644 index 0000000..d92b6cb --- /dev/null +++ b/libcxx/test/std/ranges/range.factories/range.iota.view/indices.pass.cpp @@ -0,0 +1,97 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// ranges + +// inline constexpr unspecified indices = unspecified; + +#include <cassert> +#include <cstddef> +#include <ranges> +#include <vector> + +#include "test_macros.h" +#define TEST_HAS_NO_INT128 // Size cannot be larger than 64 bits +#include "type_algorithms.h" + +#include "types.h" + +// Test SFINAE. + +template <typename SizeType> +concept HasIndices = requires(SizeType s) { std::ranges::views::indices(s); }; + +struct NotIntegerLike {}; + +void test_SFINAE() { + static_assert(HasIndices<std::size_t>); + types::for_each(types::integer_types(), []<typename T> { static_assert(HasIndices<T>); }); + + // Non-integer-like types should not satisfy HasIndices + static_assert(!HasIndices<bool>); + static_assert(!HasIndices<float>); + static_assert(!HasIndices<void>); + static_assert(!HasIndices<SomeInt>); // Does satisfy is_integer_like, but not the conversion to std::size_t + static_assert(!HasIndices<NotIntegerLike>); +} + +constexpr bool test() { + { + auto indices_view = std::ranges::views::indices(5); + static_assert(std::ranges::range<decltype(indices_view)>); + + assert(indices_view.size() == 5); + + assert(indices_view[0] == 0); + assert(indices_view[1] == 1); + assert(indices_view[2] == 2); + assert(indices_view[3] == 3); + assert(indices_view[4] == 4); + } + + { + std::vector v(5, 0); + + auto indices_view = std::ranges::views::indices(std::ranges::size(v)); + static_assert(std::ranges::range<decltype(indices_view)>); + + assert(indices_view.size() == 5); + + assert(indices_view[0] == 0); + assert(indices_view[1] == 1); + assert(indices_view[2] == 2); + assert(indices_view[3] == 3); + assert(indices_view[4] == 4); + } + + { + std::vector v(5, SomeInt{}); + + auto indices_view = std::ranges::views::indices(std::ranges::size(v)); + static_assert(std::ranges::range<decltype(indices_view)>); + + assert(indices_view.size() == 5); + + assert(indices_view[0] == 0); + assert(indices_view[1] == 1); + assert(indices_view[2] == 2); + assert(indices_view[3] == 3); + assert(indices_view[4] == 4); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 5d469d4..2d5b66d9 100644 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -1114,6 +1114,11 @@ feature_test_macros = [ "headers": ["algorithm"], }, { + "name": "__cpp_lib_ranges_indices", + "values": {"c++26": 202506}, + "headers": ["ranges"], + }, + { "name": "__cpp_lib_ranges_iota", "values": {"c++23": 202202}, "headers": ["numeric"], diff --git a/lldb/docs/resources/extensions.rst b/lldb/docs/resources/extensions.rst index 30bd6d5..61fffe7 100644 --- a/lldb/docs/resources/extensions.rst +++ b/lldb/docs/resources/extensions.rst @@ -134,5 +134,5 @@ Objective-C runtime Clang emits the Objective-C runtime version into the ``DW_TAG_compile_unit`` using the -``DW_AT_APPLE_major_runtime_version`` attribute. The value 2 stands +``DW_AT_APPLE_major_runtime_vers`` attribute. The value 2 stands for Objective-C 2.0. diff --git a/llvm/include/llvm/CAS/OnDiskDataAllocator.h b/llvm/include/llvm/CAS/OnDiskDataAllocator.h new file mode 100644 index 0000000..2809df8 --- /dev/null +++ b/llvm/include/llvm/CAS/OnDiskDataAllocator.h @@ -0,0 +1,95 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares interface for OnDiskDataAllocator, a file backed data +/// pool can be used to allocate space to store data packed in a single file. It +/// is based on MappedFileRegionArena and includes a header in the beginning to +/// provide metadata. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_ONDISKDATAALLOCATOR_H +#define LLVM_CAS_ONDISKDATAALLOCATOR_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CAS/FileOffset.h" +#include "llvm/Support/Error.h" + +namespace llvm::cas { + +/// Sink for data. Stores variable length data with 8-byte alignment. Does not +/// track size of data, which is assumed to known from context, or embedded. +/// Uses 0-padding but does not guarantee 0-termination. +class OnDiskDataAllocator { +public: + using ValueProxy = MutableArrayRef<char>; + + /// A pointer to data stored on disk. + class OnDiskPtr { + public: + FileOffset getOffset() const { return Offset; } + explicit operator bool() const { return bool(getOffset()); } + const ValueProxy &operator*() const { + assert(Offset && "Null dereference"); + return Value; + } + const ValueProxy *operator->() const { + assert(Offset && "Null dereference"); + return &Value; + } + + OnDiskPtr() = default; + + private: + friend class OnDiskDataAllocator; + OnDiskPtr(FileOffset Offset, ValueProxy Value) + : Offset(Offset), Value(Value) {} + FileOffset Offset; + ValueProxy Value; + }; + + /// Get the data of \p Size stored at the given \p Offset. Note the allocator + /// doesn't keep track of the allocation size, thus \p Size doesn't need to + /// match the size of allocation but needs to be smaller. + Expected<ArrayRef<char>> get(FileOffset Offset, size_t Size) const; + + /// Allocate at least \p Size with 8-byte alignment. + Expected<OnDiskPtr> allocate(size_t Size); + + /// \returns the buffer that was allocated at \p create time, with size + /// \p UserHeaderSize. + MutableArrayRef<uint8_t> getUserHeader(); + + size_t size() const; + size_t capacity() const; + + static Expected<OnDiskDataAllocator> + create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, + std::optional<uint64_t> NewFileInitialSize, + uint32_t UserHeaderSize = 0, + function_ref<void(void *)> UserHeaderInit = nullptr); + + OnDiskDataAllocator(OnDiskDataAllocator &&RHS); + OnDiskDataAllocator &operator=(OnDiskDataAllocator &&RHS); + + // No copy. Just call \a create() again. + OnDiskDataAllocator(const OnDiskDataAllocator &) = delete; + OnDiskDataAllocator &operator=(const OnDiskDataAllocator &) = delete; + + ~OnDiskDataAllocator(); + +private: + struct ImplType; + explicit OnDiskDataAllocator(std::unique_ptr<ImplType> Impl); + std::unique_ptr<ImplType> Impl; +}; + +} // namespace llvm::cas + +#endif // LLVM_CAS_ONDISKDATAALLOCATOR_H diff --git a/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h b/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h index 5e41bf6..fbd68d0 100644 --- a/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h +++ b/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h @@ -133,38 +133,38 @@ public: bool IsValue = false; }; - class pointer; - class const_pointer : public PointerImpl<ConstValueProxy> { + class OnDiskPtr; + class ConstOnDiskPtr : public PointerImpl<ConstValueProxy> { public: - const_pointer() = default; + ConstOnDiskPtr() = default; private: - friend class pointer; + friend class OnDiskPtr; friend class OnDiskTrieRawHashMap; - using const_pointer::PointerImpl::PointerImpl; + using ConstOnDiskPtr::PointerImpl::PointerImpl; }; - class pointer : public PointerImpl<ValueProxy> { + class OnDiskPtr : public PointerImpl<ValueProxy> { public: - operator const_pointer() const { - return const_pointer(Value, getOffset(), IsValue); + operator ConstOnDiskPtr() const { + return ConstOnDiskPtr(Value, getOffset(), IsValue); } - pointer() = default; + OnDiskPtr() = default; private: friend class OnDiskTrieRawHashMap; - using pointer::PointerImpl::PointerImpl; + using OnDiskPtr::PointerImpl::PointerImpl; }; /// Find the value from hash. /// /// \returns pointer to the value if exists, otherwise returns a non-value /// pointer that evaluates to `false` when convert to boolean. - const_pointer find(ArrayRef<uint8_t> Hash) const; + ConstOnDiskPtr find(ArrayRef<uint8_t> Hash) const; /// Helper function to recover a pointer into the trie from file offset. - Expected<const_pointer> recoverFromFileOffset(FileOffset Offset) const; + Expected<ConstOnDiskPtr> recoverFromFileOffset(FileOffset Offset) const; using LazyInsertOnConstructCB = function_ref<void(FileOffset TentativeOffset, ValueProxy TentativeValue)>; @@ -186,11 +186,11 @@ public: /// The in-memory \a TrieRawHashMap uses LazyAtomicPointer to synchronize /// simultaneous writes, but that seems dangerous to use in a memory-mapped /// file in case a process crashes in the busy state. - Expected<pointer> insertLazy(ArrayRef<uint8_t> Hash, - LazyInsertOnConstructCB OnConstruct = nullptr, - LazyInsertOnLeakCB OnLeak = nullptr); + Expected<OnDiskPtr> insertLazy(ArrayRef<uint8_t> Hash, + LazyInsertOnConstructCB OnConstruct = nullptr, + LazyInsertOnLeakCB OnLeak = nullptr); - Expected<pointer> insert(const ConstValueProxy &Value) { + Expected<OnDiskPtr> insert(const ConstValueProxy &Value) { return insertLazy(Value.Hash, [&](FileOffset, ValueProxy Allocated) { assert(Allocated.Hash == Value.Hash); assert(Allocated.Data.size() == Value.Data.size()); diff --git a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h index 754714d..eaca0a8 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h @@ -79,6 +79,19 @@ public: LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; +/// Additional 'norecurse' attribute deduction during postlink LTO phase. +/// +/// This is a module pass that infers 'norecurse' attribute on functions. +/// It runs during LTO and analyzes the module's call graph to find functions +/// that are guaranteed not to call themselves, either directly or indirectly. +/// The pass uses a module-wide flag which checks if any function's address is +/// taken or any function in the module has external linkage, to safely handle +/// indirect and library function calls from current function. +class NoRecurseLTOInferencePass + : public PassInfoMixin<NoRecurseLTOInferencePass> { +public: + LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt index 7ae5f7e..bca39b6 100644 --- a/llvm/lib/CAS/CMakeLists.txt +++ b/llvm/lib/CAS/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_component_library(LLVMCAS MappedFileRegionArena.cpp ObjectStore.cpp OnDiskCommon.cpp + OnDiskDataAllocator.cpp OnDiskTrieRawHashMap.cpp ADDITIONAL_HEADER_DIRS diff --git a/llvm/lib/CAS/OnDiskDataAllocator.cpp b/llvm/lib/CAS/OnDiskDataAllocator.cpp new file mode 100644 index 0000000..13bbd66 --- /dev/null +++ b/llvm/lib/CAS/OnDiskDataAllocator.cpp @@ -0,0 +1,234 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file Implements OnDiskDataAllocator. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskDataAllocator.h" +#include "DatabaseFile.h" +#include "llvm/Config/llvm-config.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +#if LLVM_ENABLE_ONDISK_CAS + +//===----------------------------------------------------------------------===// +// DataAllocator data structures. +//===----------------------------------------------------------------------===// + +namespace { +/// DataAllocator table layout: +/// - [8-bytes: Generic table header] +/// - 8-bytes: AllocatorOffset (reserved for implementing free lists) +/// - 8-bytes: Size for user data header +/// - <user data buffer> +/// +/// Record layout: +/// - <data> +class DataAllocatorHandle { +public: + static constexpr TableHandle::TableKind Kind = + TableHandle::TableKind::DataAllocator; + + struct Header { + TableHandle::Header GenericHeader; + std::atomic<int64_t> AllocatorOffset; + const uint64_t UserHeaderSize; + }; + + operator TableHandle() const { + if (!H) + return TableHandle(); + return TableHandle(*Region, H->GenericHeader); + } + + Expected<MutableArrayRef<char>> allocate(MappedFileRegionArena &Alloc, + size_t DataSize) { + assert(&Alloc.getRegion() == Region); + auto Ptr = Alloc.allocate(DataSize); + if (LLVM_UNLIKELY(!Ptr)) + return Ptr.takeError(); + return MutableArrayRef(*Ptr, DataSize); + } + + explicit operator bool() const { return H; } + const Header &getHeader() const { return *H; } + MappedFileRegion &getRegion() const { return *Region; } + + MutableArrayRef<uint8_t> getUserHeader() { + return MutableArrayRef(reinterpret_cast<uint8_t *>(H + 1), + H->UserHeaderSize); + } + + static Expected<DataAllocatorHandle> + create(MappedFileRegionArena &Alloc, StringRef Name, uint32_t UserHeaderSize); + + DataAllocatorHandle() = default; + DataAllocatorHandle(MappedFileRegion &Region, Header &H) + : Region(&Region), H(&H) {} + DataAllocatorHandle(MappedFileRegion &Region, intptr_t HeaderOffset) + : DataAllocatorHandle( + Region, *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) { + } + +private: + MappedFileRegion *Region = nullptr; + Header *H = nullptr; +}; + +} // end anonymous namespace + +struct OnDiskDataAllocator::ImplType { + DatabaseFile File; + DataAllocatorHandle Store; +}; + +Expected<DataAllocatorHandle> +DataAllocatorHandle::create(MappedFileRegionArena &Alloc, StringRef Name, + uint32_t UserHeaderSize) { + // Allocate. + auto Offset = + Alloc.allocateOffset(sizeof(Header) + UserHeaderSize + Name.size() + 1); + if (LLVM_UNLIKELY(!Offset)) + return Offset.takeError(); + + // Construct the header and the name. + assert(Name.size() <= UINT16_MAX && "Expected smaller table name"); + auto *H = new (Alloc.getRegion().data() + *Offset) + Header{{TableHandle::TableKind::DataAllocator, + static_cast<uint16_t>(Name.size()), + static_cast<int32_t>(sizeof(Header) + UserHeaderSize)}, + /*AllocatorOffset=*/{0}, + /*UserHeaderSize=*/UserHeaderSize}; + // Memset UserHeader. + char *UserHeader = reinterpret_cast<char *>(H + 1); + memset(UserHeader, 0, UserHeaderSize); + // Write database file name (null-terminated). + char *NameStorage = UserHeader + UserHeaderSize; + llvm::copy(Name, NameStorage); + NameStorage[Name.size()] = 0; + return DataAllocatorHandle(Alloc.getRegion(), *H); +} + +Expected<OnDiskDataAllocator> OnDiskDataAllocator::create( + const Twine &PathTwine, const Twine &TableNameTwine, uint64_t MaxFileSize, + std::optional<uint64_t> NewFileInitialSize, uint32_t UserHeaderSize, + function_ref<void(void *)> UserHeaderInit) { + assert(!UserHeaderSize || UserHeaderInit); + SmallString<128> PathStorage; + StringRef Path = PathTwine.toStringRef(PathStorage); + SmallString<128> TableNameStorage; + StringRef TableName = TableNameTwine.toStringRef(TableNameStorage); + + // Constructor for if the file doesn't exist. + auto NewDBConstructor = [&](DatabaseFile &DB) -> Error { + auto Store = + DataAllocatorHandle::create(DB.getAlloc(), TableName, UserHeaderSize); + if (LLVM_UNLIKELY(!Store)) + return Store.takeError(); + + if (auto E = DB.addTable(*Store)) + return E; + + if (UserHeaderSize) + UserHeaderInit(Store->getUserHeader().data()); + return Error::success(); + }; + + // Get or create the file. + Expected<DatabaseFile> File = + DatabaseFile::create(Path, MaxFileSize, NewDBConstructor); + if (!File) + return File.takeError(); + + // Find the table and validate it. + std::optional<TableHandle> Table = File->findTable(TableName); + if (!Table) + return createTableConfigError(std::errc::argument_out_of_domain, Path, + TableName, "table not found"); + if (Error E = checkTable("table kind", (size_t)DataAllocatorHandle::Kind, + (size_t)Table->getHeader().Kind, Path, TableName)) + return std::move(E); + auto Store = Table->cast<DataAllocatorHandle>(); + assert(Store && "Already checked the kind"); + + // Success. + OnDiskDataAllocator::ImplType Impl{DatabaseFile(std::move(*File)), Store}; + return OnDiskDataAllocator(std::make_unique<ImplType>(std::move(Impl))); +} + +Expected<OnDiskDataAllocator::OnDiskPtr> +OnDiskDataAllocator::allocate(size_t Size) { + auto Data = Impl->Store.allocate(Impl->File.getAlloc(), Size); + if (LLVM_UNLIKELY(!Data)) + return Data.takeError(); + + return OnDiskPtr(FileOffset(Data->data() - Impl->Store.getRegion().data()), + *Data); +} + +Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset, + size_t Size) const { + assert(Offset); + assert(Impl); + if (Offset.get() + Size >= Impl->File.getAlloc().size()) + return createStringError(make_error_code(std::errc::protocol_error), + "requested size too large in allocator"); + return ArrayRef<char>{Impl->File.getRegion().data() + Offset.get(), Size}; +} + +MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() { + return Impl->Store.getUserHeader(); +} + +size_t OnDiskDataAllocator::size() const { return Impl->File.size(); } +size_t OnDiskDataAllocator::capacity() const { + return Impl->File.getRegion().size(); +} + +OnDiskDataAllocator::OnDiskDataAllocator(std::unique_ptr<ImplType> Impl) + : Impl(std::move(Impl)) {} + +#else // !LLVM_ENABLE_ONDISK_CAS + +struct OnDiskDataAllocator::ImplType {}; + +Expected<OnDiskDataAllocator> OnDiskDataAllocator::create( + const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, + std::optional<uint64_t> NewFileInitialSize, uint32_t UserHeaderSize, + function_ref<void(void *)> UserHeaderInit) { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskDataAllocator is not supported"); +} + +Expected<OnDiskDataAllocator::OnDiskPtr> +OnDiskDataAllocator::allocate(size_t Size) { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskDataAllocator is not supported"); +} + +Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset, + size_t Size) const { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskDataAllocator is not supported"); +} + +MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() { return {}; } + +size_t OnDiskDataAllocator::size() const { return 0; } +size_t OnDiskDataAllocator::capacity() const { return 0; } + +#endif // LLVM_ENABLE_ONDISK_CAS + +OnDiskDataAllocator::OnDiskDataAllocator(OnDiskDataAllocator &&RHS) = default; +OnDiskDataAllocator & +OnDiskDataAllocator::operator=(OnDiskDataAllocator &&RHS) = default; +OnDiskDataAllocator::~OnDiskDataAllocator() = default; diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp index 9403893..323b21e 100644 --- a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp +++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp @@ -427,7 +427,7 @@ TrieRawHashMapHandle::createRecord(MappedFileRegionArena &Alloc, return Record; } -Expected<OnDiskTrieRawHashMap::const_pointer> +Expected<OnDiskTrieRawHashMap::ConstOnDiskPtr> OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const { // Check alignment. if (!isAligned(MappedFileRegionArena::getAlign(), Offset.get())) @@ -448,17 +448,17 @@ OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const { // Looks okay... TrieRawHashMapHandle::RecordData D = Impl->Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset)); - return const_pointer(D.Proxy, D.getFileOffset()); + return ConstOnDiskPtr(D.Proxy, D.getFileOffset()); } -OnDiskTrieRawHashMap::const_pointer +OnDiskTrieRawHashMap::ConstOnDiskPtr OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const { TrieRawHashMapHandle Trie = Impl->Trie; assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash"); SubtrieHandle S = Trie.getRoot(); if (!S) - return const_pointer(); + return ConstOnDiskPtr(); TrieHashIndexGenerator IndexGen = Trie.getIndexGen(S, Hash); size_t Index = IndexGen.next(); @@ -466,13 +466,13 @@ OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const { // Try to set the content. SubtrieSlotValue V = S.load(Index); if (!V) - return const_pointer(); + return ConstOnDiskPtr(); // Check for an exact match. if (V.isData()) { TrieRawHashMapHandle::RecordData D = Trie.getRecord(V); - return D.Proxy.Hash == Hash ? const_pointer(D.Proxy, D.getFileOffset()) - : const_pointer(); + return D.Proxy.Hash == Hash ? ConstOnDiskPtr(D.Proxy, D.getFileOffset()) + : ConstOnDiskPtr(); } Index = IndexGen.next(); @@ -490,7 +490,7 @@ void SubtrieHandle::reinitialize(uint32_t StartBit, uint32_t NumBits) { H->NumBits = NumBits; } -Expected<OnDiskTrieRawHashMap::pointer> +Expected<OnDiskTrieRawHashMap::OnDiskPtr> OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash, LazyInsertOnConstructCB OnConstruct, LazyInsertOnLeakCB OnLeak) { @@ -523,7 +523,8 @@ OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash, } if (S->compare_exchange_strong(Index, Existing, NewRecord->Offset)) - return pointer(NewRecord->Proxy, NewRecord->Offset.asDataFileOffset()); + return OnDiskPtr(NewRecord->Proxy, + NewRecord->Offset.asDataFileOffset()); // Race means that Existing is no longer empty; fall through... } @@ -540,8 +541,8 @@ OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash, if (NewRecord && OnLeak) OnLeak(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy, ExistingRecord.Offset.asDataFileOffset(), ExistingRecord.Proxy); - return pointer(ExistingRecord.Proxy, - ExistingRecord.Offset.asDataFileOffset()); + return OnDiskPtr(ExistingRecord.Proxy, + ExistingRecord.Offset.asDataFileOffset()); } // Sink the existing content as long as the indexes match. @@ -1135,7 +1136,7 @@ OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine, "OnDiskTrieRawHashMap is not supported"); } -Expected<OnDiskTrieRawHashMap::pointer> +Expected<OnDiskTrieRawHashMap::OnDiskPtr> OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash, LazyInsertOnConstructCB OnConstruct, LazyInsertOnLeakCB OnLeak) { @@ -1143,15 +1144,15 @@ OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash, "OnDiskTrieRawHashMap is not supported"); } -Expected<OnDiskTrieRawHashMap::const_pointer> +Expected<OnDiskTrieRawHashMap::ConstOnDiskPtr> OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const { return createStringError(make_error_code(std::errc::not_supported), "OnDiskTrieRawHashMap is not supported"); } -OnDiskTrieRawHashMap::const_pointer +OnDiskTrieRawHashMap::ConstOnDiskPtr OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const { - return const_pointer(); + return ConstOnDiskPtr(); } void OnDiskTrieRawHashMap::print( diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 1a7a5c5..c3a472b 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -419,6 +419,7 @@ findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases, case Instruction::PtrToAddr: case Instruction::PtrToInt: case Instruction::BitCast: + case Instruction::AddrSpaceCast: case Instruction::GetElementPtr: return findBaseObject(CE->getOperand(0), Aliases, Op); default: diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 7069e8d..119caea 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1960,6 +1960,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // is fixed. MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); + MPM.addPass(NoRecurseLTOInferencePass()); // Stop here at -O1. if (Level == OptimizationLevel::O1) { // The LowerTypeTestsPass needs to run to lower type metadata and the diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index f0e7d36..88550ea 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -119,6 +119,7 @@ MODULE_PASS("metarenamer", MetaRenamerPass()) MODULE_PASS("module-inline", ModuleInlinerPass()) MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) +MODULE_PASS("norecurse-lto-inference", NoRecurseLTOInferencePass()) MODULE_PASS("nsan", NumericalStabilitySanitizerPass()) MODULE_PASS("openmp-opt", OpenMPOptPass()) MODULE_PASS("openmp-opt-postlink", diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 50a8754..479e345 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -5666,18 +5666,21 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost( VectorType *AccumVectorType = VectorType::get(AccumType, VF.divideCoefficientBy(Ratio)); // We don't yet support all kinds of legalization. - auto TA = TLI->getTypeAction(AccumVectorType->getContext(), - EVT::getEVT(AccumVectorType)); - switch (TA) { + auto TC = TLI->getTypeConversion(AccumVectorType->getContext(), + EVT::getEVT(AccumVectorType)); + switch (TC.first) { default: return Invalid; case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: case TargetLowering::TypeSplitVector: + // The legalised type (e.g. after splitting) must be legal too. + if (TLI->getTypeAction(AccumVectorType->getContext(), TC.second) != + TargetLowering::TypeLegal) + return Invalid; break; } - // Check what kind of type-legalisation happens. std::pair<InstructionCost, MVT> AccumLT = getTypeLegalizationCost(AccumVectorType); std::pair<InstructionCost, MVT> InputLT = diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 6b3c151..1a697f7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1448,10 +1448,10 @@ def Feature45BitNumRecordsBufferResource : SubtargetFeature< "45-bit-num-records "The buffer resource (V#) supports 45-bit num_records" >; -def FeatureCluster : SubtargetFeature< "cluster", - "HasCluster", +def FeatureClusters : SubtargetFeature< "clusters", + "HasClusters", "true", - "Has cluster support" + "Has clusters of workgroups support" >; // Dummy feature used to disable assembler instructions. @@ -2120,7 +2120,7 @@ def FeatureISAVersion12_50 : FeatureSet< Feature45BitNumRecordsBufferResource, FeatureSupportsXNACK, FeatureXNACK, - FeatureCluster, + FeatureClusters, ]>; def FeatureISAVersion12_51 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp index 7b94ea3..f291e37 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp @@ -541,7 +541,7 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const { unsigned GCNSubtarget::getBaseMaxNumVGPRs( const Function &F, std::pair<unsigned, unsigned> NumVGPRBounds) const { - const auto &[Min, Max] = NumVGPRBounds; + const auto [Min, Max] = NumVGPRBounds; // Check if maximum number of VGPRs was explicitly requested using // "amdgpu-num-vgpr" attribute. diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 879bf5a..c2e6078 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -288,7 +288,7 @@ protected: bool Has45BitNumRecordsBufferResource = false; - bool HasCluster = false; + bool HasClusters = false; // Dummy feature to use for assembler in tablegen. bool FeatureDisable = false; @@ -1839,7 +1839,7 @@ public: } /// \returns true if the subtarget supports clusters of workgroups. - bool hasClusters() const { return HasCluster; } + bool hasClusters() const { return HasClusters; } /// \returns true if the subtarget requires a wait for xcnt before atomic /// flat/global stores & rmw. diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 3c2dd42..3115579 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1118,12 +1118,7 @@ SIRegisterInfo::getPointerRegClass(unsigned Kind) const { const TargetRegisterClass * SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { - if (isAGPRClass(RC) && !ST.hasGFX90AInsts()) - return getEquivalentVGPRClass(RC); - if (RC == &AMDGPU::SCC_CLASSRegClass) - return getWaveMaskRegClass(); - - return RC; + return RC == &AMDGPU::SCC_CLASSRegClass ? &AMDGPU::SReg_32RegClass : RC; } static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 1fc475d..561a9c5 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -349,32 +349,30 @@ public: bool isImm() const override { return Kind == Immediate || Kind == Expression; } - bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); } - bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); } - bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); } - bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); } - bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); } - bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); } - bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); } - bool isU6ImmX2() const { return Kind == Immediate && - isUInt<6>(getImm()) && - (getImm() & 1) == 0; } - bool isU7Imm() const { return Kind == Immediate && isUInt<7>(getImm()); } - bool isU7ImmX4() const { return Kind == Immediate && - isUInt<7>(getImm()) && - (getImm() & 3) == 0; } - bool isU8Imm() const { return Kind == Immediate && isUInt<8>(getImm()); } - bool isU8ImmX8() const { return Kind == Immediate && - isUInt<8>(getImm()) && - (getImm() & 7) == 0; } - - bool isU10Imm() const { return Kind == Immediate && isUInt<10>(getImm()); } - bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); } + + template <uint64_t N> bool isUImm() const { + return Kind == Immediate && isUInt<N>(getImm()); + } + template <uint64_t N> bool isSImm() const { + return Kind == Immediate && isInt<N>(getImm()); + } + bool isU6ImmX2() const { return isUImm<6>() && (getImm() & 1) == 0; } + bool isU7ImmX4() const { return isUImm<7>() && (getImm() & 3) == 0; } + bool isU8ImmX8() const { return isUImm<8>() && (getImm() & 7) == 0; } + bool isU16Imm() const { return isExtImm<16>(/*Signed*/ false, 1); } bool isS16Imm() const { return isExtImm<16>(/*Signed*/ true, 1); } bool isS16ImmX4() const { return isExtImm<16>(/*Signed*/ true, 4); } bool isS16ImmX16() const { return isExtImm<16>(/*Signed*/ true, 16); } bool isS17Imm() const { return isExtImm<17>(/*Signed*/ true, 1); } + bool isS34Imm() const { + // Once the PC-Rel ABI is finalized, evaluate whether a 34-bit + // ContextImmediate is needed. + return Kind == Expression || isSImm<34>(); + } + bool isS34ImmX16() const { + return Kind == Expression || (isSImm<34>() && (getImm() & 15) == 0); + } bool isHashImmX8() const { // The Hash Imm form is used for instructions that check or store a hash. @@ -384,16 +382,6 @@ public: (getImm() & 7) == 0); } - bool isS34ImmX16() const { - return Kind == Expression || - (Kind == Immediate && isInt<34>(getImm()) && (getImm() & 15) == 0); - } - bool isS34Imm() const { - // Once the PC-Rel ABI is finalized, evaluate whether a 34-bit - // ContextImmediate is needed. - return Kind == Expression || (Kind == Immediate && isInt<34>(getImm())); - } - bool isTLSReg() const { return Kind == TLSRegister; } bool isDirectBr() const { if (Kind == Expression) @@ -1637,7 +1625,7 @@ bool PPCAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, if (Operands.size() != 5) return false; PPCOperand &EHOp = (PPCOperand &)*Operands[4]; - if (EHOp.isU1Imm() && EHOp.getImm() == 0) + if (EHOp.isUImm<1>() && EHOp.getImm() == 0) Operands.pop_back(); } @@ -1817,7 +1805,7 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, } PPCOperand &Op = static_cast<PPCOperand &>(AsmOp); - if (Op.isU3Imm() && Op.getImm() == ImmVal) + if (Op.isUImm<3>() && Op.getImm() == ImmVal) return Match_Success; return Match_InvalidOperand; diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 48c31c9..81d8e94 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -206,45 +206,24 @@ PPCMCCodeEmitter::getVSRpEvenEncoding(const MCInst &MI, unsigned OpNo, return RegBits; } -unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); - - // Add a fixup for the immediate field. - addFixup(Fixups, IsLittleEndian ? 0 : 2, MO.getExpr(), PPC::fixup_ppc_half16); - return 0; -} - -uint64_t PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI, - MCFixupKind Fixup) const { +template <MCFixupKind Fixup> +uint64_t PPCMCCodeEmitter::getImmEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); assert(!MO.isReg() && "Not expecting a register for this operand."); if (MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); + uint32_t Offset = 0; + if (Fixup == PPC::fixup_ppc_half16) + Offset = IsLittleEndian ? 0 : 2; + // Add a fixup for the immediate field. - addFixup(Fixups, 0, MO.getExpr(), Fixup); + addFixup(Fixups, Offset, MO.getExpr(), Fixup); return 0; } -uint64_t -PPCMCCodeEmitter::getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return getImm34Encoding(MI, OpNo, Fixups, STI, PPC::fixup_ppc_imm34); -} - -uint64_t -PPCMCCodeEmitter::getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return getImm34Encoding(MI, OpNo, Fixups, STI, PPC::fixup_ppc_pcrel34); -} - unsigned PPCMCCodeEmitter::getDispRIEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h index b574557..3356513 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h @@ -47,19 +47,10 @@ public: unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; - unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - uint64_t getImm34Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI, - MCFixupKind Fixup) const; - uint64_t getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - uint64_t getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; + template <MCFixupKind Fixup> + uint64_t getImmEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getDispRIEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 60efa4c..fdca5ebc 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -14,30 +14,6 @@ //===----------------------------------------------------------------------===// // 64-bit operands. // -def s16imm64 : Operand<i64> { - let PrintMethod = "printS16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; - let ParserMatchClass = PPCS16ImmAsmOperand; - let DecoderMethod = "decodeSImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def u16imm64 : Operand<i64> { - let PrintMethod = "printU16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; - let ParserMatchClass = PPCU16ImmAsmOperand; - let DecoderMethod = "decodeUImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def s17imm64 : Operand<i64> { - // This operand type is used for addis/lis to allow the assembler parser - // to accept immediates in the range -65536..65535 for compatibility with - // the GNU assembler. The operand is treated as 16-bit otherwise. - let PrintMethod = "printS16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; - let ParserMatchClass = PPCS17ImmAsmOperand; - let DecoderMethod = "decodeSImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; -} def tocentry : Operand<iPTR> { let MIOperandInfo = (ops i64imm:$imm); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index c616db4..23d6d88 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -30,6 +30,11 @@ // Altivec transformation functions and pattern fragments. // +// fneg is not legal, and desugared as an xor. +def desugared_fneg : PatFrag<(ops node:$x), (v4f32 (bitconvert (xor (bitconvert $x), + (int_ppc_altivec_vslw (bitconvert (v16i8 immAllOnesV)), + (bitconvert (v16i8 immAllOnesV))))))>; + def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG); @@ -467,11 +472,12 @@ def VMADDFP : VAForm_1<46, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB), [(set v4f32:$RT, (fma v4f32:$RA, v4f32:$RC, v4f32:$RB))]>; -// FIXME: The fma+fneg pattern won't match because fneg is not legal. +// fneg is not legal, hence we have to match on the desugared version. def VNMSUBFP: VAForm_1<47, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB), "vnmsubfp $RT, $RA, $RC, $RB", IIC_VecFP, - [(set v4f32:$RT, (fneg (fma v4f32:$RA, v4f32:$RC, - (fneg v4f32:$RB))))]>; + [(set v4f32:$RT, (desugared_fneg (fma v4f32:$RA, v4f32:$RC, + (desugared_fneg v4f32:$RB))))]>; + let hasSideEffects = 1 in { def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>; def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs, @@ -892,6 +898,13 @@ def : Pat<(mul v8i16:$vA, v8i16:$vB), (VMLADDUHM $vA, $vB, (v8i16(V_SET0H)))>; // Add def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC), (VMLADDUHM $vA, $vB, $vC)>; + +// Fused negated multiply-subtract +def : Pat<(v4f32 (desugared_fneg + (int_ppc_altivec_vmaddfp v4f32:$RA, v4f32:$RC, + (desugared_fneg v4f32:$RB)))), + (VNMSUBFP $RA, $RC, $RB)>; + // Saturating adds/subtracts. def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>; def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 6d8c122..65d0484 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -615,7 +615,8 @@ def spe4rc : RegisterOperand<GPRC> { } def PPCU1ImmAsmOperand : AsmOperandClass { - let Name = "U1Imm"; let PredicateMethod = "isU1Imm"; + let Name = "U1Imm"; + let PredicateMethod = "isUImm<1>"; let RenderMethod = "addImmOperands"; } def u1imm : Operand<i32> { @@ -626,7 +627,8 @@ def u1imm : Operand<i32> { } def PPCU2ImmAsmOperand : AsmOperandClass { - let Name = "U2Imm"; let PredicateMethod = "isU2Imm"; + let Name = "U2Imm"; + let PredicateMethod = "isUImm<2>"; let RenderMethod = "addImmOperands"; } def u2imm : Operand<i32> { @@ -647,7 +649,8 @@ def atimm : Operand<i32> { } def PPCU3ImmAsmOperand : AsmOperandClass { - let Name = "U3Imm"; let PredicateMethod = "isU3Imm"; + let Name = "U3Imm"; + let PredicateMethod = "isUImm<3>"; let RenderMethod = "addImmOperands"; } def u3imm : Operand<i32> { @@ -658,7 +661,8 @@ def u3imm : Operand<i32> { } def PPCU4ImmAsmOperand : AsmOperandClass { - let Name = "U4Imm"; let PredicateMethod = "isU4Imm"; + let Name = "U4Imm"; + let PredicateMethod = "isUImm<4>"; let RenderMethod = "addImmOperands"; } def u4imm : Operand<i32> { @@ -668,7 +672,8 @@ def u4imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } def PPCS5ImmAsmOperand : AsmOperandClass { - let Name = "S5Imm"; let PredicateMethod = "isS5Imm"; + let Name = "S5Imm"; + let PredicateMethod = "isSImm<5>"; let RenderMethod = "addImmOperands"; } def s5imm : Operand<i32> { @@ -678,7 +683,8 @@ def s5imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } def PPCU5ImmAsmOperand : AsmOperandClass { - let Name = "U5Imm"; let PredicateMethod = "isU5Imm"; + let Name = "U5Imm"; + let PredicateMethod = "isUImm<5>"; let RenderMethod = "addImmOperands"; } def u5imm : Operand<i32> { @@ -688,7 +694,8 @@ def u5imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } def PPCU6ImmAsmOperand : AsmOperandClass { - let Name = "U6Imm"; let PredicateMethod = "isU6Imm"; + let Name = "U6Imm"; + let PredicateMethod = "isUImm<6>"; let RenderMethod = "addImmOperands"; } def u6imm : Operand<i32> { @@ -698,7 +705,8 @@ def u6imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } def PPCU7ImmAsmOperand : AsmOperandClass { - let Name = "U7Imm"; let PredicateMethod = "isU7Imm"; + let Name = "U7Imm"; + let PredicateMethod = "isUImm<7>"; let RenderMethod = "addImmOperands"; } def u7imm : Operand<i32> { @@ -708,7 +716,8 @@ def u7imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } def PPCU8ImmAsmOperand : AsmOperandClass { - let Name = "U8Imm"; let PredicateMethod = "isU8Imm"; + let Name = "U8Imm"; + let PredicateMethod = "isUImm<8>"; let RenderMethod = "addImmOperands"; } def u8imm : Operand<i32> { @@ -718,7 +727,8 @@ def u8imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } def PPCU10ImmAsmOperand : AsmOperandClass { - let Name = "U10Imm"; let PredicateMethod = "isU10Imm"; + let Name = "U10Imm"; + let PredicateMethod = "isUImm<10>"; let RenderMethod = "addImmOperands"; } def u10imm : Operand<i32> { @@ -728,7 +738,8 @@ def u10imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } def PPCU12ImmAsmOperand : AsmOperandClass { - let Name = "U12Imm"; let PredicateMethod = "isU12Imm"; + let Name = "U12Imm"; + let PredicateMethod = "isUImm<12>"; let RenderMethod = "addImmOperands"; } def u12imm : Operand<i32> { @@ -743,7 +754,14 @@ def PPCS16ImmAsmOperand : AsmOperandClass { } def s16imm : Operand<i32> { let PrintMethod = "printS16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>"; + let ParserMatchClass = PPCS16ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def s16imm64 : Operand<i64> { + let PrintMethod = "printS16ImmOperand"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>"; let ParserMatchClass = PPCS16ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<16>"; let OperandType = "OPERAND_IMMEDIATE"; @@ -754,7 +772,14 @@ def PPCU16ImmAsmOperand : AsmOperandClass { } def u16imm : Operand<i32> { let PrintMethod = "printU16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>"; + let ParserMatchClass = PPCU16ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<16>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def u16imm64 : Operand<i64> { + let PrintMethod = "printU16ImmOperand"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>"; let ParserMatchClass = PPCU16ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<16>"; let OperandType = "OPERAND_IMMEDIATE"; @@ -768,7 +793,17 @@ def s17imm : Operand<i32> { // to accept immediates in the range -65536..65535 for compatibility with // the GNU assembler. The operand is treated as 16-bit otherwise. let PrintMethod = "printS16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>"; + let ParserMatchClass = PPCS17ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def s17imm64 : Operand<i64> { + // This operand type is used for addis/lis to allow the assembler parser + // to accept immediates in the range -65536..65535 for compatibility with + // the GNU assembler. The operand is treated as 16-bit otherwise. + let PrintMethod = "printS16ImmOperand"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>"; let ParserMatchClass = PPCS17ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<16>"; let OperandType = "OPERAND_IMMEDIATE"; @@ -780,14 +815,14 @@ def PPCS34ImmAsmOperand : AsmOperandClass { } def s34imm : Operand<i64> { let PrintMethod = "printS34ImmOperand"; - let EncoderMethod = "getImm34EncodingNoPCRel"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_imm34>"; let ParserMatchClass = PPCS34ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<34>"; let OperandType = "OPERAND_IMMEDIATE"; } def s34imm_pcrel : Operand<i64> { let PrintMethod = "printS34ImmOperand"; - let EncoderMethod = "getImm34EncodingPCRel"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_pcrel34>"; let ParserMatchClass = PPCS34ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<34>"; let OperandType = "OPERAND_IMMEDIATE"; diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index 597dd12..9f9ae2f 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -324,6 +324,10 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[0] = GPRValueMapping; + // Atomics always use GPR destinations. Don't refine any further. + if (cast<GLoad>(MI).isAtomic()) + break; + // Use FPR64 for s64 loads on rv32. if (GPRSize == 32 && Size.getFixedValue() == 64) { assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD()); @@ -358,6 +362,10 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[0] = GPRValueMapping; + // Atomics always use GPR sources. Don't refine any further. + if (cast<GStore>(MI).isAtomic()) + break; + // Use FPR64 for s64 stores on rv32. if (GPRSize == 32 && Size.getFixedValue() == 64) { assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD()); diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 9f2e075..e16c8f0 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -2811,9 +2811,7 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { GetElementPtrInst *NewGEP = simplifyZeroLengthArrayGepInst(Ref); if (NewGEP) { Ref->replaceAllUsesWith(NewGEP); - if (isInstructionTriviallyDead(Ref)) - DeadInsts.insert(Ref); - + DeadInsts.insert(Ref); Ref = NewGEP; } if (Type *GepTy = getGEPType(Ref)) diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index b906690..62a3c88 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -444,7 +444,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["atomic-fmin-fmax-global-f32"] = true; Features["atomic-fmin-fmax-global-f64"] = true; Features["wavefrontsize32"] = true; - Features["cluster"] = true; + Features["clusters"] = true; break; case GK_GFX1201: case GK_GFX1200: diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 8d9a0e7..50130da 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -2067,6 +2067,36 @@ static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes, AI.run(SCCNodes, Changed); } +// Determines if the function 'F' can be marked 'norecurse'. +// It returns true if any call within 'F' could lead to a recursive +// call back to 'F', and false otherwise. +// The 'AnyFunctionsAddressIsTaken' parameter is a module-wide flag +// that is true if any function's address is taken, or if any function +// has external linkage. This is used to determine the safety of +// external/library calls. +static bool mayHaveRecursiveCallee(Function &F, + bool AnyFunctionsAddressIsTaken = true) { + for (const auto &BB : F) { + for (const auto &I : BB.instructionsWithoutDebug()) { + if (const auto *CB = dyn_cast<CallBase>(&I)) { + const Function *Callee = CB->getCalledFunction(); + if (!Callee || Callee == &F) + return true; + + if (Callee->doesNotRecurse()) + continue; + + if (!AnyFunctionsAddressIsTaken || + (Callee->isDeclaration() && + Callee->hasFnAttribute(Attribute::NoCallback))) + continue; + return true; + } + } + } + return false; +} + static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, SmallPtrSet<Function *, 8> &Changed) { // Try and identify functions that do not recurse. @@ -2078,28 +2108,14 @@ static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, Function *F = *SCCNodes.begin(); if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) return; - - // If all of the calls in F are identifiable and are to norecurse functions, F - // is norecurse. This check also detects self-recursion as F is not currently - // marked norecurse, so any called from F to F will not be marked norecurse. - for (auto &BB : *F) - for (auto &I : BB.instructionsWithoutDebug()) - if (auto *CB = dyn_cast<CallBase>(&I)) { - Function *Callee = CB->getCalledFunction(); - if (!Callee || Callee == F || - (!Callee->doesNotRecurse() && - !(Callee->isDeclaration() && - Callee->hasFnAttribute(Attribute::NoCallback)))) - // Function calls a potentially recursive function. - return; - } - - // Every call was to a non-recursive function other than this function, and - // we have no indirect recursion as the SCC size is one. This function cannot - // recurse. - F->setDoesNotRecurse(); - ++NumNoRecurse; - Changed.insert(F); + if (!mayHaveRecursiveCallee(*F)) { + // Every call was to a non-recursive function other than this function, and + // we have no indirect recursion as the SCC size is one. This function + // cannot recurse. + F->setDoesNotRecurse(); + ++NumNoRecurse; + Changed.insert(F); + } } // Set the noreturn function attribute if possible. @@ -2429,3 +2445,62 @@ ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { PA.preserve<LazyCallGraphAnalysis>(); return PA; } + +PreservedAnalyses NoRecurseLTOInferencePass::run(Module &M, + ModuleAnalysisManager &MAM) { + + // Check if any function in the whole program has its address taken or has + // potentially external linkage. + // We use this information when inferring norecurse attribute: If there is + // no function whose address is taken and all functions have internal + // linkage, there is no path for a callback to any user function. + bool AnyFunctionsAddressIsTaken = false; + for (Function &F : M) { + if (F.isDeclaration() || F.doesNotRecurse()) + continue; + if (!F.hasLocalLinkage() || F.hasAddressTaken()) { + AnyFunctionsAddressIsTaken = true; + break; + } + } + + // Run norecurse inference on all RefSCCs in the LazyCallGraph for this + // module. + bool Changed = false; + LazyCallGraph &CG = MAM.getResult<LazyCallGraphAnalysis>(M); + CG.buildRefSCCs(); + + for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { + // Skip any RefSCC that is part of a call cycle. A RefSCC containing more + // than one SCC indicates a recursive relationship involving indirect calls. + if (RC.size() > 1) + continue; + + // RefSCC contains a single-SCC. SCC size > 1 indicates mutually recursive + // functions. Ex: foo1 -> foo2 -> foo3 -> foo1. + LazyCallGraph::SCC &S = *RC.begin(); + if (S.size() > 1) + continue; + + // Get the single function from this SCC. + Function &F = S.begin()->getFunction(); + if (!F.hasExactDefinition() || F.doesNotRecurse()) + continue; + + // If the analysis confirms that this function has no recursive calls + // (either direct, indirect, or through external linkages), + // we can safely apply the norecurse attribute. + if (!mayHaveRecursiveCallee(F, AnyFunctionsAddressIsTaken)) { + F.setDoesNotRecurse(); + ++NumNoRecurse; + Changed = true; + } + } + + PreservedAnalyses PA; + if (Changed) + PA.preserve<LazyCallGraphAnalysis>(); + else + PA = PreservedAnalyses::all(); + return PA; +} diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e434e73..d393a9c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8393,11 +8393,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( R->setOperand(1, WideIV->getStepValue()); } - VPlanTransforms::runPass( - VPlanTransforms::addExitUsersForFirstOrderRecurrences, *Plan, Range); + // TODO: We can't call runPass on these transforms yet, due to verifier + // failures. + VPlanTransforms::addExitUsersForFirstOrderRecurrences(*Plan, Range); DenseMap<VPValue *, VPValue *> IVEndValues; - VPlanTransforms::runPass(VPlanTransforms::addScalarResumePhis, *Plan, - RecipeBuilder, IVEndValues); + VPlanTransforms::addScalarResumePhis(*Plan, RecipeBuilder, IVEndValues); // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to @@ -8508,8 +8508,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { DenseMap<VPValue *, VPValue *> IVEndValues; // TODO: IVEndValues are not used yet in the native path, to optimize exit // values. - VPlanTransforms::runPass(VPlanTransforms::addScalarResumePhis, *Plan, - RecipeBuilder, IVEndValues); + // TODO: We can't call runPass on the transform yet, due to verifier + // failures. + VPlanTransforms::addScalarResumePhis(*Plan, RecipeBuilder, IVEndValues); assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; diff --git a/llvm/test/Bitcode/thinlto-alias-addrspacecast.ll b/llvm/test/Bitcode/thinlto-alias-addrspacecast.ll new file mode 100644 index 0000000..fe4f05e --- /dev/null +++ b/llvm/test/Bitcode/thinlto-alias-addrspacecast.ll @@ -0,0 +1,7 @@ +; RUN: opt -module-summary < %s | llvm-dis | FileCheck %s + +@__oclc_ABI_version = linkonce_odr hidden addrspace(4) constant i32 500, align 4 +@_ZL20__oclc_ABI_version__ = internal alias i32, addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr) + +; CHECK: ^1 = gv: (name: "__oclc_ABI_version", summaries: (variable: (module: ^0, flags: {{.*}}))) +; CHECK: ^2 = gv: (name: "_ZL20__oclc_ABI_version__", summaries: (alias: (module: ^0, flags: {{.*}}, aliasee: ^1))) diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll index 9e24023..ebbeab9 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -146,9 +146,9 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 { ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v32, a2 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a3, v32 +; GFX908-NEXT: v_accvgpr_write_b32 a3, v39 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use a3 v[0:31] ; GFX908-NEXT: ;;#ASMEND @@ -437,9 +437,9 @@ define void @v32_asm_def_use(float %v0, float %v1) #4 { ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: s_nop 7 -; GFX908-NEXT: v_accvgpr_read_b32 v33, a2 +; GFX908-NEXT: v_accvgpr_read_b32 v35, a2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a3, v33 +; GFX908-NEXT: v_accvgpr_write_b32 a3, v35 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use a3 v[0:31] ; GFX908-NEXT: ;;#ASMEND @@ -1045,9 +1045,9 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 { ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v32, a2 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a3, v32 +; GFX908-NEXT: v_accvgpr_write_b32 a3, v39 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use a3 v[0:31] ; GFX908-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir b/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir index a42cf43..7e82382d 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir @@ -40,8 +40,8 @@ body: | ; GFX908: liveins: $agpr0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec - ; GFX908-NEXT: renamable $agpr1 = COPY renamable $vgpr0, implicit $exec - ; GFX908-NEXT: renamable $agpr2 = COPY renamable $vgpr0, implicit $exec + ; GFX908-NEXT: renamable $agpr1 = COPY $agpr0, implicit $exec + ; GFX908-NEXT: renamable $agpr2 = COPY $agpr0, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2 ; ; GFX90A-LABEL: name: do_not_propagate_agpr_to_agpr diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll index c4479b3..e3bc516 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll @@ -15,6 +15,9 @@ ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s +; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1250 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX1250 %s +; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1250 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX1250 %s + ; NO-SRAM-ECC-GFX906: Flags [ ; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100) ; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) @@ -52,6 +55,11 @@ ; SRAM-ECC-GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F) ; SRAM-ECC-GFX950: ] +; SRAM-ECC-GFX1250: Flags [ +; SRAM-ECC-GFX1250: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200) +; SRAM-ECC-GFX1250: EF_AMDGPU_MACH_AMDGCN_GFX1250 (0x49) +; SRAM-ECC-GFX1250: ] + define amdgpu_kernel void @elf_header() { ret void } diff --git a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll index 51cd564..f46116e 100644 --- a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll @@ -95,66 +95,66 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32(ptr addrspace(1) %arg) #0 { ; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v3, v0, a[0:31] ; GREEDY908-NEXT: s_nop 15 ; GREEDY908-NEXT: s_nop 1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a32 -; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a61 -; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60 -; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a33 -; GREEDY908-NEXT: v_accvgpr_read_b32 v7, a59 -; GREEDY908-NEXT: v_accvgpr_read_b32 v8, a58 -; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a32 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a33 ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a34 -; GREEDY908-NEXT: v_accvgpr_read_b32 v9, a57 -; GREEDY908-NEXT: v_accvgpr_read_b32 v10, a56 +; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v6 ; GREEDY908-NEXT: v_accvgpr_write_b32 a4, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a35 -; GREEDY908-NEXT: v_accvgpr_read_b32 v11, a55 -; GREEDY908-NEXT: v_accvgpr_read_b32 v12, a54 -; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a36 -; GREEDY908-NEXT: v_accvgpr_read_b32 v13, a53 -; GREEDY908-NEXT: v_accvgpr_read_b32 v14, a52 -; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a35 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a36 ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a37 -; GREEDY908-NEXT: v_accvgpr_read_b32 v15, a51 -; GREEDY908-NEXT: v_accvgpr_read_b32 v16, a50 +; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v6 ; GREEDY908-NEXT: v_accvgpr_write_b32 a7, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a38 -; GREEDY908-NEXT: v_accvgpr_read_b32 v17, a49 -; GREEDY908-NEXT: v_accvgpr_read_b32 v18, a48 -; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a39 -; GREEDY908-NEXT: v_accvgpr_read_b32 v19, a47 -; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a46 -; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a38 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a39 ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a40 -; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v2 -; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v19 +; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v6 ; GREEDY908-NEXT: v_accvgpr_write_b32 a10, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a41 -; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v18 -; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v17 -; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a42 -; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v16 -; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v15 -; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a41 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a42 ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a43 -; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v14 -; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v13 +; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v6 ; GREEDY908-NEXT: v_accvgpr_write_b32 a13, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a44 -; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v12 -; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v11 -; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a45 -; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v10 -; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v9 -; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v1 -; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v8 -; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v7 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a44 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a45 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a46 +; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a47 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a48 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a49 +; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a50 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a51 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a52 +; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a53 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a54 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a55 +; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a56 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a57 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a58 +; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a59 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a61 +; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v2 ; GREEDY908-NEXT: v_accvgpr_write_b32 a30, v6 -; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v5 +; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v1 ; GREEDY908-NEXT: s_nop 0 ; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v3, v0, a[0:31] ; GREEDY908-NEXT: s_nop 15 @@ -667,11 +667,11 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32(ptr addrspace(1) %arg) #0 { ; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[18:33], v0, v1, a[18:33] ; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[2:17], v0, v1, a[18:33] ; GREEDY908-NEXT: s_nop 8 +; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a18 ; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a19 -; GREEDY908-NEXT: v_accvgpr_read_b32 v3, a18 ; GREEDY908-NEXT: s_nop 0 +; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v5 ; GREEDY908-NEXT: v_accvgpr_write_b32 a1, v2 -; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v3 ; GREEDY908-NEXT: s_nop 0 ; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[0:15] ; GREEDY908-NEXT: s_nop 9 diff --git a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll index cf244f0..be1788c 100644 --- a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll +++ b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll @@ -54,19 +54,20 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) { ; GFX908-NEXT: s_branch .LBB0_2 ; GFX908-NEXT: .LBB0_1: ; %bb2 ; GFX908-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; GFX908-NEXT: s_nop 6 +; GFX908-NEXT: v_accvgpr_read_b32 v3, a2 ; GFX908-NEXT: s_or_b32 s4, s3, 1 ; GFX908-NEXT: s_ashr_i32 s5, s3, 31 ; GFX908-NEXT: s_mov_b32 s3, s2 ; GFX908-NEXT: v_mov_b32_e32 v1, s2 -; GFX908-NEXT: s_nop 2 -; GFX908-NEXT: v_accvgpr_read_b32 v0, a2 ; GFX908-NEXT: v_mov_b32_e32 v2, s3 +; GFX908-NEXT: v_accvgpr_write_b32 a0, v3 ; GFX908-NEXT: v_accvgpr_read_b32 v4, a1 ; GFX908-NEXT: v_accvgpr_read_b32 v3, a1 -; GFX908-NEXT: v_accvgpr_write_b32 a0, v0 +; GFX908-NEXT: s_and_b32 s3, s5, s4 ; GFX908-NEXT: v_accvgpr_write_b32 a2, v4 ; GFX908-NEXT: v_accvgpr_write_b32 a3, v3 -; GFX908-NEXT: s_and_b32 s3, s5, s4 +; GFX908-NEXT: s_nop 0 ; GFX908-NEXT: v_mfma_f32_16x16x16f16 a[2:5], v[1:2], v[1:2], a[0:3] ; GFX908-NEXT: s_cbranch_execz .LBB0_4 ; GFX908-NEXT: .LBB0_2: ; %bb diff --git a/llvm/test/CodeGen/PowerPC/vec-nmsub.ll b/llvm/test/CodeGen/PowerPC/vec-nmsub.ll new file mode 100644 index 0000000..8f4ac972 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vec-nmsub.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs < %s -mcpu=pwr5 -mtriple=ppc32-- -mattr=+altivec | FileCheck %s + +define dso_local <4 x float> @intrinsic(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) local_unnamed_addr { +; CHECK-LABEL: intrinsic: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnmsubfp 2, 2, 3, 4 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x float> @llvm.ppc.altivec.vnmsubfp(<4 x float> %a, <4 x float> %b, <4 x float> %c) + ret <4 x float> %0 +} + +define <4 x float> @manual_llvm_fma(<4 x float> %a, <4 x float> %b, <4 x float> %c) unnamed_addr { +; CHECK-LABEL: manual_llvm_fma: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vnmsubfp 2, 2, 3, 4 +; CHECK-NEXT: blr +start: + %0 = fneg <4 x float> %c + %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %0) + %2 = fneg <4 x float> %1 + ret <4 x float> %2 +} + +define dso_local <4 x float> @manual_vmaddfp(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) local_unnamed_addr { +; CHECK-LABEL: manual_vmaddfp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnmsubfp 2, 2, 3, 4 +; CHECK-NEXT: blr +entry: + %fneg.i3 = fneg <4 x float> %c + %0 = tail call <4 x float> @llvm.ppc.altivec.vmaddfp(<4 x float> %a, <4 x float> %b, <4 x float> %fneg.i3) + %fneg.i = fneg <4 x float> %0 + ret <4 x float> %fneg.i +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll new file mode 100644 index 0000000..4ad2d2c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll @@ -0,0 +1,950 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+ztso,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s +; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+ztso,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s + + +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s + +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s + + +define float @atomic_load_f32_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f32_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: fmv.w.x fa0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f32_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: fmv.w.x fa0, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f32_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_f32_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lw a0, 0(a0) +; RV64IA-NEXT: fmv.w.x fa0, a0 +; RV64IA-NEXT: ret + %1 = load atomic float, ptr %a unordered, align 4 + ret float %1 +} + +define float @atomic_load_f32_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f32_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: fmv.w.x fa0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f32_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: fmv.w.x fa0, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f32_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_f32_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lw a0, 0(a0) +; RV64IA-NEXT: fmv.w.x fa0, a0 +; RV64IA-NEXT: ret + %1 = load atomic float, ptr %a monotonic, align 4 + ret float %1 +} + +define float @atomic_load_f32_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f32_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: fmv.w.x fa0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_f32_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lw a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: fmv.w.x fa0, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_f32_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: fmv.w.x fa0, a0 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_f32_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_f32_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lw a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: fmv.w.x fa0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_f32_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: fmv.w.x fa0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic float, ptr %a acquire, align 4 + ret float %1 +} + +define float @atomic_load_f32_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f32_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: fmv.w.x fa0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_f32_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, rw +; RV32IA-WMO-NEXT: lw a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: fmv.w.x fa0, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_f32_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: fmv.w.x fa0, a0 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_f32_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_f32_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: lw a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: fmv.w.x fa0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_f32_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: fmv.w.x fa0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic float, ptr %a seq_cst, align 4 + ret float %1 +} + +define double @atomic_load_f64_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f64_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: fld fa0, 0(sp) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f64_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 0 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: sw a0, 0(sp) +; RV32IA-NEXT: sw a1, 4(sp) +; RV32IA-NEXT: fld fa0, 0(sp) +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f64_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_f64_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: ld a0, 0(a0) +; RV64IA-NEXT: fmv.d.x fa0, a0 +; RV64IA-NEXT: ret + %1 = load atomic double, ptr %a unordered, align 8 + ret double %1 +} + +define double @atomic_load_f64_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f64_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: fld fa0, 0(sp) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f64_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 0 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: sw a0, 0(sp) +; RV32IA-NEXT: sw a1, 4(sp) +; RV32IA-NEXT: fld fa0, 0(sp) +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f64_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_f64_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: ld a0, 0(a0) +; RV64IA-NEXT: fmv.d.x fa0, a0 +; RV64IA-NEXT: ret + %1 = load atomic double, ptr %a monotonic, align 8 + ret double %1 +} + +define double @atomic_load_f64_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f64_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: fld fa0, 0(sp) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f64_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 2 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: sw a0, 0(sp) +; RV32IA-NEXT: sw a1, 4(sp) +; RV32IA-NEXT: fld fa0, 0(sp) +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f64_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_f64_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: ld a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: fmv.d.x fa0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_f64_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: fmv.d.x fa0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f64_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f64_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic double, ptr %a acquire, align 8 + ret double %1 +} + +define double @atomic_load_f64_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f64_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: fld fa0, 0(sp) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f64_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 5 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: sw a0, 0(sp) +; RV32IA-NEXT: sw a1, 4(sp) +; RV32IA-NEXT: fld fa0, 0(sp) +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f64_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_f64_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: ld a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: fmv.d.x fa0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_f64_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: fmv.d.x fa0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f64_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f64_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic double, ptr %a seq_cst, align 8 + ret double %1 +} + +define void @atomic_store_f32_unordered(ptr %a, float %b) nounwind { +; RV32I-LABEL: atomic_store_f32_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fmv.x.w a1, fa0 +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f32_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: fmv.x.w a1, fa0 +; RV32IA-NEXT: sw a1, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f32_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_f32_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: fmv.x.w a1, fa0 +; RV64IA-NEXT: sw a1, 0(a0) +; RV64IA-NEXT: ret + store atomic float %b, ptr %a unordered, align 4 + ret void +} + +define void @atomic_store_f32_monotonic(ptr %a, float %b) nounwind { +; RV32I-LABEL: atomic_store_f32_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fmv.x.w a1, fa0 +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f32_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: fmv.x.w a1, fa0 +; RV32IA-NEXT: sw a1, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f32_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_f32_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: fmv.x.w a1, fa0 +; RV64IA-NEXT: sw a1, 0(a0) +; RV64IA-NEXT: ret + store atomic float %b, ptr %a monotonic, align 4 + ret void +} + +define void @atomic_store_f32_release(ptr %a, float %b) nounwind { +; RV32I-LABEL: atomic_store_f32_release: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 3 +; RV32I-NEXT: fmv.x.w a1, fa0 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_store_f32_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, w +; RV32IA-WMO-NEXT: fmv.x.w a1, fa0 +; RV32IA-WMO-NEXT: sw a1, 0(a0) +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_store_f32_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fmv.x.w a1, fa0 +; RV32IA-TSO-NEXT: sw a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_store_f32_release: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 3 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_f32_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: fmv.x.w a1, fa0 +; RV64IA-WMO-NEXT: sw a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_f32_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fmv.x.w a1, fa0 +; RV64IA-TSO-NEXT: sw a1, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_release: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_release: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_release: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_release: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic float %b, ptr %a release, align 4 + ret void +} + +define void @atomic_store_f32_seq_cst(ptr %a, float %b) nounwind { +; RV32I-LABEL: atomic_store_f32_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 5 +; RV32I-NEXT: fmv.x.w a1, fa0 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_store_f32_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, w +; RV32IA-WMO-NEXT: fmv.x.w a1, fa0 +; RV32IA-WMO-NEXT: sw a1, 0(a0) +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_store_f32_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fmv.x.w a1, fa0 +; RV32IA-TSO-NEXT: sw a1, 0(a0) +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_store_f32_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_f32_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: fmv.x.w a1, fa0 +; RV64IA-WMO-NEXT: sw a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_f32_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fmv.x.w a1, fa0 +; RV64IA-TSO-NEXT: sw a1, 0(a0) +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic float %b, ptr %a seq_cst, align 4 + ret void +} + +define void @atomic_store_f64_unordered(ptr %a, double %b) nounwind { +; RV32I-LABEL: atomic_store_f64_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fsd fa0, 0(sp) +; RV32I-NEXT: lw a1, 0(sp) +; RV32I-NEXT: lw a2, 4(sp) +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f64_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: fsd fa0, 0(sp) +; RV32IA-NEXT: lw a1, 0(sp) +; RV32IA-NEXT: lw a2, 4(sp) +; RV32IA-NEXT: li a3, 0 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f64_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_f64_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: fmv.x.d a1, fa0 +; RV64IA-NEXT: sd a1, 0(a0) +; RV64IA-NEXT: ret + store atomic double %b, ptr %a unordered, align 8 + ret void +} + +define void @atomic_store_f64_monotonic(ptr %a, double %b) nounwind { +; RV32I-LABEL: atomic_store_f64_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fsd fa0, 0(sp) +; RV32I-NEXT: lw a1, 0(sp) +; RV32I-NEXT: lw a2, 4(sp) +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f64_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: fsd fa0, 0(sp) +; RV32IA-NEXT: lw a1, 0(sp) +; RV32IA-NEXT: lw a2, 4(sp) +; RV32IA-NEXT: li a3, 0 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f64_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_f64_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: fmv.x.d a1, fa0 +; RV64IA-NEXT: sd a1, 0(a0) +; RV64IA-NEXT: ret + store atomic double %b, ptr %a monotonic, align 8 + ret void +} + +define void @atomic_store_f64_release(ptr %a, double %b) nounwind { +; RV32I-LABEL: atomic_store_f64_release: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fsd fa0, 0(sp) +; RV32I-NEXT: lw a1, 0(sp) +; RV32I-NEXT: lw a2, 4(sp) +; RV32I-NEXT: li a3, 3 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f64_release: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: fsd fa0, 0(sp) +; RV32IA-NEXT: lw a1, 0(sp) +; RV32IA-NEXT: lw a2, 4(sp) +; RV32IA-NEXT: li a3, 3 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f64_release: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 3 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_f64_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: fmv.x.d a1, fa0 +; RV64IA-WMO-NEXT: sd a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_f64_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fmv.x.d a1, fa0 +; RV64IA-TSO-NEXT: sd a1, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f64_release: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f64_release: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic double %b, ptr %a release, align 8 + ret void +} + +define void @atomic_store_f64_seq_cst(ptr %a, double %b) nounwind { +; RV32I-LABEL: atomic_store_f64_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fsd fa0, 0(sp) +; RV32I-NEXT: lw a1, 0(sp) +; RV32I-NEXT: lw a2, 4(sp) +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f64_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: fsd fa0, 0(sp) +; RV32IA-NEXT: lw a1, 0(sp) +; RV32IA-NEXT: lw a2, 4(sp) +; RV32IA-NEXT: li a3, 5 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f64_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_f64_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: fmv.x.d a1, fa0 +; RV64IA-WMO-NEXT: sd a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_f64_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fmv.x.d a1, fa0 +; RV64IA-TSO-NEXT: sd a1, 0(a0) +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f64_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f64_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic double %b, ptr %a seq_cst, align 8 + ret void +} diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index 3aea0f2..f595dfe 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -67,6 +67,7 @@ ; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O-NEXT: Running pass: GlobalSplitPass ; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass +; CHECK-O-NEXT: Running pass: NoRecurseLTOInferencePass ; CHECK-O23SZ-NEXT: Running pass: CoroEarlyPass ; CHECK-O1-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O23SZ-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll new file mode 100644 index 0000000..bcdf75b --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s + +; This test includes a call to a library function which is not marked as +; NoCallback. Function bob() does not have internal linkage and hence prevents +; norecurse to be added. + +@.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1 + +;. +; CHECK: @.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1 +;. +define dso_local void @bob() { +; CHECK-LABEL: define dso_local void @bob() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str) +; CHECK-NEXT: ret void +; +entry: + %call = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str) + ret void +} + +declare i32 @printf(ptr readonly captures(none), ...) + +define dso_local i32 @main() norecurse { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define dso_local i32 @main( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bob() +; CHECK-NEXT: ret i32 0 +; +entry: + tail call void @bob() + ret i32 0 +} +;. +; CHECK: attributes #[[ATTR0]] = { norecurse } +;. diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_no_address_taken.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_no_address_taken.ll new file mode 100644 index 0000000..a03b4ca --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_no_address_taken.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s + +; This test includes a call to a library function which is not marked as +; NoCallback. All functions except main() are internal and main is marked +; norecurse, so as to not block norecurse to be added to bob(). + +@.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1 + +; Function Attrs: nofree noinline nounwind uwtable +;. +; CHECK: @.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1 +;. +define internal void @bob() { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define internal void @bob( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str) +; CHECK-NEXT: ret void +; +entry: + %call = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str) + ret void +} + +; Function Attrs: nofree nounwind +declare i32 @printf(ptr readonly captures(none), ...) + +; Function Attrs: nofree norecurse nounwind uwtable +define dso_local i32 @main() norecurse { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define dso_local i32 @main( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bob() +; CHECK-NEXT: ret i32 0 +; +entry: + tail call void @bob() + ret i32 0 +} +;. +; CHECK: attributes #[[ATTR0]] = { norecurse } +;. diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_lto.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_lto.ll new file mode 100644 index 0000000..5be707b --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/norecurse_lto.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s + +; This test includes a call graph which has a recursive function(foo2) which +; calls a non-recursive internal function (foo3) satisfying the norecurse +; attribute criteria. + + +define internal void @foo3() { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define internal void @foo3( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +define internal i32 @foo2(i32 %accum, i32 %n) { +; CHECK-LABEL: define internal i32 @foo2( +; CHECK-SAME: i32 [[ACCUM:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[RECURSE:.*]] +; CHECK: [[RECURSE]]: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[N]], 1 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ACCUM]], [[SUB]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo2(i32 [[MUL]], i32 [[SUB]]) +; CHECK-NEXT: call void @foo3() +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[ACCUM]], %[[ENTRY]] ], [ [[CALL]], %[[RECURSE]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + %cmp = icmp eq i32 %n, 0 + br i1 %cmp, label %exit, label %recurse + +recurse: + %sub = sub i32 %n, 1 + %mul = mul i32 %accum, %sub + %call = call i32 @foo2(i32 %mul, i32 %sub) + call void @foo3() + br label %exit + +exit: + %res = phi i32 [ %accum, %entry ], [ %call, %recurse ] + ret i32 %res +} + +define internal i32 @foo1() { +; CHECK-LABEL: define internal i32 @foo1() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @foo2(i32 1, i32 5) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @foo2(i32 1, i32 5) + ret i32 %res +} + +define dso_local i32 @main() { +; CHECK-LABEL: define dso_local i32 @main() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @foo1() +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @foo1() + ret i32 %res +} +;. +; CHECK: attributes #[[ATTR0]] = { norecurse } +;. diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion.ll new file mode 100644 index 0000000..e351f60 --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion.ll @@ -0,0 +1,141 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s + +; This test includes a call graph with multiple SCCs. The purpose of this is +; to check that norecurse is not added when a function is part of non-singular +; SCC. +; There are three different SCCs in this test: +; SCC#1: f1, foo, bar, foo1, bar1 +; SCC#2: bar2, bar3, bar4 +; SCC#3: baz, fun +; None of these functions should be marked as norecurse + +define internal void @bar1() { +; CHECK-LABEL: define internal void @bar1() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @f1() +; CHECK-NEXT: ret void +; +entry: + tail call void @f1() + ret void +} + +define internal void @f1() { +; CHECK-LABEL: define internal void @f1() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @foo() +; CHECK-NEXT: tail call void @bar2() +; CHECK-NEXT: tail call void @baz() +; CHECK-NEXT: ret void +; +entry: + tail call void @foo() + tail call void @bar2() + tail call void @baz() + ret void +} + +define dso_local i32 @main() norecurse { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define dso_local i32 @main( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @f1() +; CHECK-NEXT: ret i32 0 +; +entry: + tail call void @f1() + ret i32 0 +} + +define internal void @foo1() { +; CHECK-LABEL: define internal void @foo1() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar1() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar1() + ret void +} + +define internal void @bar() { +; CHECK-LABEL: define internal void @bar() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @foo1() +; CHECK-NEXT: ret void +; +entry: + tail call void @foo1() + ret void +} + +define internal void @foo() { +; CHECK-LABEL: define internal void @foo() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar() + ret void +} + +define internal void @bar4() { +; CHECK-LABEL: define internal void @bar4() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar2() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar2() + ret void +} + +define internal void @bar2() { +; CHECK-LABEL: define internal void @bar2() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar3() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar3() + ret void +} + +define internal void @bar3() { +; CHECK-LABEL: define internal void @bar3() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar4() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar4() + ret void +} + +define internal void @fun() { +; CHECK-LABEL: define internal void @fun() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @baz() +; CHECK-NEXT: ret void +; +entry: + tail call void @baz() + ret void +} + +define internal void @baz() { +; CHECK-LABEL: define internal void @baz() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @fun() +; CHECK-NEXT: ret void +; +entry: + tail call void @fun() + ret void +} +;. +; CHECK: attributes #[[ATTR0]] = { norecurse } +;. diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion1.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion1.ll new file mode 100644 index 0000000..cd94037 --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion1.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s + +; This test includes a call graph with multiple SCCs. The purpose of this is +; to check that norecurse is added to a function which calls a function which +; is indirectly recursive but is not part of the recursive chain. +; There are two SCCs in this test: +; SCC#1: bar2, bar3, bar4 +; SCC#2: baz, fun +; f1() calls bar2 and baz, both of which are part of some indirect recursive +; chain. but does not call back f1() and hence f1() can be marked as +; norecurse. + +define dso_local i32 @main() norecurse { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define dso_local i32 @main( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @f1() +; CHECK-NEXT: ret i32 0 +; +entry: + tail call void @f1() + ret i32 0 +} + +define internal void @f1() { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define internal void @f1( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar2() +; CHECK-NEXT: tail call void @baz() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar2() + tail call void @baz() + ret void +} + +define internal void @bar4() { +; CHECK-LABEL: define internal void @bar4() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar2() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar2() + ret void +} + +define internal void @bar2() { +; CHECK-LABEL: define internal void @bar2() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar3() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar3() + ret void +} + +define internal void @bar3() { +; CHECK-LABEL: define internal void @bar3() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar4() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar4() + ret void +} + +define internal void @fun() { +; CHECK-LABEL: define internal void @fun() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @baz() +; CHECK-NEXT: ret void +; +entry: + tail call void @baz() + ret void +} + +define internal void @baz() { +; CHECK-LABEL: define internal void @baz() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @fun() +; CHECK-NEXT: ret void +; +entry: + tail call void @fun() + ret void +} +;. +; CHECK: attributes #[[ATTR0]] = { norecurse } +;. diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_multinode_refscc.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_multinode_refscc.ll new file mode 100644 index 0000000..8b81a90 --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/norecurse_multinode_refscc.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt -passes=norecurse-lto-inference -S %s | FileCheck %s + +; This is a negative test which results in RefSCC with size > 1. +; RefSCC : [(f2), (f1)] +; --- SCC A (f1) --- size() = 1 +define internal void @f1() { +; CHECK-LABEL: define internal void @f1() { +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: ret void +; + call void @f2() + ret void +} + +; --- SCC B (f2) --- size() = 1 +; f2 indirectly calls f1 using locally allocated function pointer +define internal void @f2() { +; CHECK-LABEL: define internal void @f2() { +; CHECK-NEXT: [[FP:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store ptr @f1, ptr [[FP]], align 8 +; CHECK-NEXT: [[TMP:%.*]] = load ptr, ptr [[FP]], align 8 +; CHECK-NEXT: call void [[TMP]]() +; CHECK-NEXT: ret void +; + %fp = alloca void ()* + store void ()* @f1, void ()** %fp + %tmp = load void ()*, void ()** %fp + call void %tmp() + ret void +} + +define i32 @main() { +; CHECK-LABEL: define i32 @main() { +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: ret i32 0 +; + call void @f1() + ret i32 0 +} + diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll new file mode 100644 index 0000000..461e5df --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s + +; This test includes a call graph with a self recursive function. +; The purpose of this is to check that norecurse is added to functions +; which have a self-recursive function in the call-chain. +; The call-chain in this test is as follows +; main -> bob -> callee1 -> callee2 +; where callee2 is self recursive. + +@x = dso_local global i32 4, align 4 +@y = dso_local global i32 2, align 4 + +;. +; CHECK: @x = dso_local global i32 4, align 4 +; CHECK: @y = dso_local global i32 2, align 4 +;. +define internal void @callee2() { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define internal void @callee2( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @y, align 4 +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: store volatile i32 [[INC]], ptr @y, align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = load volatile i32, ptr @y, align 4 + %inc = add nsw i32 %0, 1 + store volatile i32 %inc, ptr @y, align 4 + ret void +} + +define internal void @callee1(i32 %x) { +; CHECK-LABEL: define internal void @callee1( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: tail call void @callee1(i32 [[X]]) +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: tail call void @callee2() +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp sgt i32 %x, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @callee1(i32 %x) + br label %if.end + +if.end: ; preds = %if.then, %entry + tail call void @callee2() + ret void +} + +define internal void @bob() { +; CHECK-LABEL: define internal void @bob() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @x, align 4 +; CHECK-NEXT: tail call void @callee2(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + %0 = load volatile i32, ptr @x, align 4 + tail call void @callee2(i32 %0) + ret void +} + +define dso_local i32 @main() norecurse { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define dso_local i32 @main( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bob() +; CHECK-NEXT: ret i32 0 +; +entry: + tail call void @bob() + ret i32 0 +} +;. +; CHECK: attributes #[[ATTR0]] = { norecurse } +;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr162009.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr162009.ll new file mode 100644 index 0000000..6095b24 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr162009.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-NO-PARTIAL-REDUCTION + +target triple = "aarch64" + +define i128 @add_reduc_i32_i128_unsupported(ptr %a, ptr %b) "target-features"="+dotprod" { +; CHECK-NO-PARTIAL-REDUCTION-LABEL: define i128 @add_reduc_i32_i128_unsupported( +; CHECK-NO-PARTIAL-REDUCTION-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ENTRY:.*:]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK-NO-PARTIAL-REDUCTION: [[VECTOR_PH]]: +; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NO-PARTIAL-REDUCTION: [[VECTOR_BODY]]: +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[VEC_PHI:%.*]] = phi <4 x i128> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 1 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP1:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 1 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[WIDE_LOAD1]] to <4 x i64> +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP4:%.*]] = mul nuw <4 x i64> [[TMP1]], [[TMP3]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP5:%.*]] = zext <4 x i64> [[TMP4]] to <4 x i128> +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP7]] = add <4 x i128> [[VEC_PHI]], [[TMP5]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4024 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NO-PARTIAL-REDUCTION: [[MIDDLE_BLOCK]]: +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP8:%.*]] = call i128 @llvm.vector.reduce.add.v4i128(<4 x i128> [[TMP7]]) +; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK-NO-PARTIAL-REDUCTION: [[SCALAR_PH]]: +; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[FOR_BODY:.*]] +; CHECK-NO-PARTIAL-REDUCTION: [[FOR_BODY]]: +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[IV:%.*]] = phi i64 [ 4024, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ACCUM:%.*]] = phi i128 [ [[TMP8]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[LOAD_A:%.*]] = load i32, ptr [[GEP_A]], align 1 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[EXT_A:%.*]] = zext i32 [[LOAD_A]] to i64 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[LOAD_B:%.*]] = load i32, ptr [[GEP_B]], align 1 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[EXT_B:%.*]] = zext i32 [[LOAD_B]] to i64 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[MUL:%.*]] = mul nuw i64 [[EXT_A]], [[EXT_B]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[MUL_ZEXT:%.*]] = zext i64 [[MUL]] to i128 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ADD]] = add i128 [[ACCUM]], [[MUL_ZEXT]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 4025 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_EXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NO-PARTIAL-REDUCTION: [[FOR_EXIT]]: +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ADD_LCSSA:%.*]] = phi i128 [ [[ADD]], %[[FOR_BODY]] ] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: ret i128 [[ADD_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %accum = phi i128 [ 0, %entry ], [ %add, %for.body ] + %gep.a = getelementptr i32, ptr %a, i64 %iv + %load.a = load i32, ptr %gep.a, align 1 + %ext.a = zext i32 %load.a to i64 + %gep.b = getelementptr i32, ptr %b, i64 %iv + %load.b = load i32, ptr %gep.b, align 1 + %ext.b = zext i32 %load.b to i64 + %mul = mul nuw i64 %ext.a, %ext.b + %mul.zext = zext i64 %mul to i128 + %add = add i128 %accum, %mul.zext + %iv.next = add i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 4025 + br i1 %exitcond.not, label %for.exit, label %for.body + +for.exit: + ret i128 %add +} +;. +; CHECK-NO-PARTIAL-REDUCTION: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-NO-PARTIAL-REDUCTION: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-NO-PARTIAL-REDUCTION: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-NO-PARTIAL-REDUCTION: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s b/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s index 65e1203..c8a5746 100644 --- a/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s +++ b/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s @@ -1,4 +1,6 @@ REQUIRES: aarch64-registered-target +// Flakey on SVE buildbots, disabled pending invesgitation. +UNSUPPORTED: target={{.*}} RUN: llvm-exegesis -mtriple=aarch64 -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FMOVWSr --benchmark-phase=assemble-measured-code 2>&1 RUN: llvm-objdump -d %d > %t.s diff --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt index 0f8fcb9..ee40e6c 100644 --- a/llvm/unittests/CAS/CMakeLists.txt +++ b/llvm/unittests/CAS/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_unittest(CASTests ActionCacheTest.cpp CASTestConfig.cpp ObjectStoreTest.cpp + OnDiskDataAllocatorTest.cpp OnDiskTrieRawHashMapTest.cpp ProgramTest.cpp ) diff --git a/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp b/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp new file mode 100644 index 0000000..966fa03 --- /dev/null +++ b/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp @@ -0,0 +1,66 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskDataAllocator.h" +#include "llvm/CAS/MappedFileRegionArena.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Testing/Support/Error.h" +#include "llvm/Testing/Support/SupportHelpers.h" + +#if LLVM_ENABLE_ONDISK_CAS + +using namespace llvm; +using namespace llvm::cas; + +TEST(OnDiskDataAllocatorTest, Allocate) { + unittest::TempDir Temp("data-allocator", /*Unique=*/true); + constexpr size_t MB = 1024u * 1024u; + + std::optional<OnDiskDataAllocator> Allocator; + ASSERT_THAT_ERROR(OnDiskDataAllocator::create( + Temp.path("allocator"), "data", /*MaxFileSize=*/MB, + /*NewFileInitialSize=*/std::nullopt) + .moveInto(Allocator), + Succeeded()); + + // Allocate. + { + for (size_t Size = 1; Size < 16; ++Size) { + OnDiskDataAllocator::OnDiskPtr P; + ASSERT_THAT_ERROR(Allocator->allocate(Size).moveInto(P), Succeeded()); + EXPECT_TRUE( + isAligned(MappedFileRegionArena::getAlign(), P.getOffset().get())); + } + } + + // Out of space. + { + OnDiskDataAllocator::OnDiskPtr P; + ASSERT_THAT_ERROR(Allocator->allocate(MB).moveInto(P), Failed()); + } + + // Check size and capacity. + { + ASSERT_EQ(Allocator->capacity(), MB); + ASSERT_LE(Allocator->size(), MB); + } + + // Get. + { + OnDiskDataAllocator::OnDiskPtr P; + ASSERT_THAT_ERROR(Allocator->allocate(32).moveInto(P), Succeeded()); + ArrayRef<char> Data; + ASSERT_THAT_ERROR(Allocator->get(P.getOffset(), 16).moveInto(Data), + Succeeded()); + ASSERT_THAT_ERROR(Allocator->get(P.getOffset(), 1025).moveInto(Data), + Failed()); + } +} + +#endif // LLVM_ENABLE_ONDISK_CAS diff --git a/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp b/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp index 7bedfe4..6034c70 100644 --- a/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp +++ b/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp @@ -71,7 +71,7 @@ TEST_P(OnDiskTrieRawHashMapTestFixture, General) { std::optional<FileOffset> Offset; std::optional<MutableArrayRef<char>> Data; { - std::optional<OnDiskTrieRawHashMap::pointer> Insertion; + std::optional<OnDiskTrieRawHashMap::OnDiskPtr> Insertion; ASSERT_THAT_ERROR(Trie1->insert({Hash0, Data0v1}).moveInto(Insertion), Succeeded()); EXPECT_EQ(Hash0, (*Insertion)->Hash); @@ -128,7 +128,7 @@ TEST_P(OnDiskTrieRawHashMapTestFixture, General) { // Recover from an offset. { - OnDiskTrieRawHashMap::const_pointer Recovered; + OnDiskTrieRawHashMap::ConstOnDiskPtr Recovered; ASSERT_THAT_ERROR(Trie1->recoverFromFileOffset(*Offset).moveInto(Recovered), Succeeded()); ASSERT_TRUE(Recovered); @@ -140,14 +140,14 @@ TEST_P(OnDiskTrieRawHashMapTestFixture, General) { // Recover from a bad offset. { FileOffset BadOffset(1); - OnDiskTrieRawHashMap::const_pointer Recovered; + OnDiskTrieRawHashMap::ConstOnDiskPtr Recovered; ASSERT_THAT_ERROR( Trie1->recoverFromFileOffset(BadOffset).moveInto(Recovered), Failed()); } // Insert another thing. { - std::optional<OnDiskTrieRawHashMap::pointer> Insertion; + std::optional<OnDiskTrieRawHashMap::OnDiskPtr> Insertion; ASSERT_THAT_ERROR(Trie1->insert({Hash1, Data1}).moveInto(Insertion), Succeeded()); EXPECT_EQ(Hash1, (*Insertion)->Hash); @@ -210,7 +210,7 @@ TEST(OnDiskTrieRawHashMapTest, OutOfSpace) { auto Hash0 = ArrayRef(Hash0Bytes); constexpr StringLiteral Data0v1Bytes = "data0.v1"; ArrayRef<char> Data0v1 = ArrayRef(Data0v1Bytes.data(), Data0v1Bytes.size()); - std::optional<OnDiskTrieRawHashMap::pointer> Insertion; + std::optional<OnDiskTrieRawHashMap::OnDiskPtr> Insertion; ASSERT_THAT_ERROR(Trie->insert({Hash0, Data0v1}).moveInto(Insertion), Failed()); } diff --git a/mlir/cmake/modules/AddMLIRPython.cmake b/mlir/cmake/modules/AddMLIRPython.cmake index fa6aec8..ea34f94 100644 --- a/mlir/cmake/modules/AddMLIRPython.cmake +++ b/mlir/cmake/modules/AddMLIRPython.cmake @@ -123,12 +123,12 @@ function(mlir_generate_type_stubs) "IMPORT_PATHS;DEPENDS_TARGETS;OUTPUTS;DEPENDS_TARGET_SRC_DEPS" ${ARGN}) - # for people doing find_package(nanobind) + # for people installing a distro (e.g., pip install) of nanobind if(EXISTS ${nanobind_DIR}/../src/stubgen.py) set(NB_STUBGEN "${nanobind_DIR}/../src/stubgen.py") elseif(EXISTS ${nanobind_DIR}/../stubgen.py) set(NB_STUBGEN "${nanobind_DIR}/../stubgen.py") - # for people using FetchContent_Declare and FetchContent_MakeAvailable + # for people using nanobind git source tree (e.g., FetchContent_Declare and FetchContent_MakeAvailable) elseif(EXISTS ${nanobind_SOURCE_DIR}/src/stubgen.py) set(NB_STUBGEN "${nanobind_SOURCE_DIR}/src/stubgen.py") elseif(EXISTS ${nanobind_SOURCE_DIR}/stubgen.py) @@ -226,11 +226,10 @@ endfunction() # EMBED_CAPI_LINK_LIBS: Dependent CAPI libraries that this extension depends # on. These will be collected for all extensions and put into an # aggregate dylib that is linked against. -# PYTHON_BINDINGS_LIBRARY: Either pybind11 or nanobind. function(declare_mlir_python_extension name) cmake_parse_arguments(ARG "" - "ROOT_DIR;MODULE_NAME;ADD_TO_PARENT;PYTHON_BINDINGS_LIBRARY" + "ROOT_DIR;MODULE_NAME;ADD_TO_PARENT" "SOURCES;PRIVATE_LINK_LIBS;EMBED_CAPI_LINK_LIBS" ${ARGN}) @@ -239,20 +238,15 @@ function(declare_mlir_python_extension name) endif() set(_install_destination "src/python/${name}") - if(NOT ARG_PYTHON_BINDINGS_LIBRARY) - set(ARG_PYTHON_BINDINGS_LIBRARY "pybind11") - endif() - add_library(${name} INTERFACE) set_target_properties(${name} PROPERTIES # Yes: Leading-lowercase property names are load bearing and the recommended # way to do this: https://gitlab.kitware.com/cmake/cmake/-/issues/19261 - EXPORT_PROPERTIES "mlir_python_SOURCES_TYPE;mlir_python_EXTENSION_MODULE_NAME;mlir_python_EMBED_CAPI_LINK_LIBS;mlir_python_DEPENDS;mlir_python_BINDINGS_LIBRARY" + EXPORT_PROPERTIES "mlir_python_SOURCES_TYPE;mlir_python_EXTENSION_MODULE_NAME;mlir_python_EMBED_CAPI_LINK_LIBS;mlir_python_DEPENDS" mlir_python_SOURCES_TYPE extension mlir_python_EXTENSION_MODULE_NAME "${ARG_MODULE_NAME}" mlir_python_EMBED_CAPI_LINK_LIBS "${ARG_EMBED_CAPI_LINK_LIBS}" mlir_python_DEPENDS "" - mlir_python_BINDINGS_LIBRARY "${ARG_PYTHON_BINDINGS_LIBRARY}" ) # Set the interface source and link_libs properties of the target @@ -341,14 +335,12 @@ function(add_mlir_python_modules name) elseif(_source_type STREQUAL "extension") # Native CPP extension. get_target_property(_module_name ${sources_target} mlir_python_EXTENSION_MODULE_NAME) - get_target_property(_bindings_library ${sources_target} mlir_python_BINDINGS_LIBRARY) # Transform relative source to based on root dir. set(_extension_target "${modules_target}.extension.${_module_name}.dso") add_mlir_python_extension(${_extension_target} "${_module_name}" INSTALL_COMPONENT ${modules_target} INSTALL_DIR "${ARG_INSTALL_PREFIX}/_mlir_libs" OUTPUT_DIRECTORY "${ARG_ROOT_PREFIX}/_mlir_libs" - PYTHON_BINDINGS_LIBRARY ${_bindings_library} LINK_LIBS PRIVATE ${sources_target} ${ARG_COMMON_CAPI_LINK_LIBS} @@ -753,7 +745,7 @@ endfunction() function(add_mlir_python_extension libname extname) cmake_parse_arguments(ARG "" - "INSTALL_COMPONENT;INSTALL_DIR;OUTPUT_DIRECTORY;PYTHON_BINDINGS_LIBRARY" + "INSTALL_COMPONENT;INSTALL_DIR;OUTPUT_DIRECTORY" "SOURCES;LINK_LIBS" ${ARGN}) if(ARG_UNPARSED_ARGUMENTS) @@ -761,7 +753,7 @@ function(add_mlir_python_extension libname extname) endif() # The extension itself must be compiled with RTTI and exceptions enabled. - # Also, some warning classes triggered by pybind11 are disabled. + # Also, some warning classes triggered by nanobind are disabled. set(eh_rtti_enable) if (MSVC) set(eh_rtti_enable /EHsc /GR) @@ -769,62 +761,53 @@ function(add_mlir_python_extension libname extname) set(eh_rtti_enable -frtti -fexceptions) endif () - # The actual extension library produces a shared-object or DLL and has - # sources that must be compiled in accordance with pybind11 needs (RTTI and - # exceptions). - if(NOT DEFINED ARG_PYTHON_BINDINGS_LIBRARY OR ARG_PYTHON_BINDINGS_LIBRARY STREQUAL "pybind11") - pybind11_add_module(${libname} - ${ARG_SOURCES} - ) - elseif(ARG_PYTHON_BINDINGS_LIBRARY STREQUAL "nanobind") - nanobind_add_module(${libname} - NB_DOMAIN ${MLIR_BINDINGS_PYTHON_NB_DOMAIN} - FREE_THREADED - ${ARG_SOURCES} - ) + nanobind_add_module(${libname} + NB_DOMAIN ${MLIR_BINDINGS_PYTHON_NB_DOMAIN} + FREE_THREADED + ${ARG_SOURCES} + ) - if (NOT MLIR_DISABLE_CONFIGURE_PYTHON_DEV_PACKAGES - AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) - # Avoid some warnings from upstream nanobind. - # If a superproject set MLIR_DISABLE_CONFIGURE_PYTHON_DEV_PACKAGES, let - # the super project handle compile options as it wishes. - get_property(NB_LIBRARY_TARGET_NAME TARGET ${libname} PROPERTY LINK_LIBRARIES) - target_compile_options(${NB_LIBRARY_TARGET_NAME} - PRIVATE - -Wall -Wextra -Wpedantic - -Wno-c++98-compat-extra-semi - -Wno-cast-qual - -Wno-covered-switch-default - -Wno-deprecated-literal-operator - -Wno-nested-anon-types - -Wno-unused-parameter - -Wno-zero-length-array - ${eh_rtti_enable}) - - target_compile_options(${libname} - PRIVATE - -Wall -Wextra -Wpedantic - -Wno-c++98-compat-extra-semi - -Wno-cast-qual - -Wno-covered-switch-default - -Wno-deprecated-literal-operator - -Wno-nested-anon-types - -Wno-unused-parameter - -Wno-zero-length-array - ${eh_rtti_enable}) - endif() + if (NOT MLIR_DISABLE_CONFIGURE_PYTHON_DEV_PACKAGES + AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) + # Avoid some warnings from upstream nanobind. + # If a superproject set MLIR_DISABLE_CONFIGURE_PYTHON_DEV_PACKAGES, let + # the super project handle compile options as it wishes. + get_property(NB_LIBRARY_TARGET_NAME TARGET ${libname} PROPERTY LINK_LIBRARIES) + target_compile_options(${NB_LIBRARY_TARGET_NAME} + PRIVATE + -Wall -Wextra -Wpedantic + -Wno-c++98-compat-extra-semi + -Wno-cast-qual + -Wno-covered-switch-default + -Wno-deprecated-literal-operator + -Wno-nested-anon-types + -Wno-unused-parameter + -Wno-zero-length-array + ${eh_rtti_enable}) + + target_compile_options(${libname} + PRIVATE + -Wall -Wextra -Wpedantic + -Wno-c++98-compat-extra-semi + -Wno-cast-qual + -Wno-covered-switch-default + -Wno-deprecated-literal-operator + -Wno-nested-anon-types + -Wno-unused-parameter + -Wno-zero-length-array + ${eh_rtti_enable}) + endif() - if(APPLE) - # NanobindAdaptors.h uses PyClassMethod_New to build `pure_subclass`es but nanobind - # doesn't declare this API as undefined in its linker flags. So we need to declare it as such - # for downstream users that do not do something like `-undefined dynamic_lookup`. - # Same for the rest. - target_link_options(${libname} PUBLIC - "LINKER:-U,_PyClassMethod_New" - "LINKER:-U,_PyCode_Addr2Location" - "LINKER:-U,_PyFrame_GetLasti" - ) - endif() + if(APPLE) + # NanobindAdaptors.h uses PyClassMethod_New to build `pure_subclass`es but nanobind + # doesn't declare this API as undefined in its linker flags. So we need to declare it as such + # for downstream users that do not do something like `-undefined dynamic_lookup`. + # Same for the rest. + target_link_options(${libname} PUBLIC + "LINKER:-U,_PyClassMethod_New" + "LINKER:-U,_PyCode_Addr2Location" + "LINKER:-U,_PyFrame_GetLasti" + ) endif() target_compile_options(${libname} PRIVATE ${eh_rtti_enable}) @@ -862,11 +845,11 @@ function(add_mlir_python_extension libname extname) if(WIN32) # On Windows, pyconfig.h (and by extension python.h) hardcode the version of the # python library which will be used for linkage depending on the flavor of the build. - # pybind11 has a workaround which depends on the definition of Py_DEBUG (if Py_DEBUG - # is not passed in as a compile definition, pybind11 undefs _DEBUG when including + # nanobind has a workaround which depends on the definition of Py_DEBUG (if Py_DEBUG + # is not passed in as a compile definition, nanobind undefs _DEBUG when including # python.h, so that the release python library would be used). - # Since mlir uses pybind11, we can leverage their workaround by never directly - # pyconfig.h or python.h and instead relying on the pybind11 headers to include the + # Since mlir uses nanobind, we can leverage their workaround by never directly + # pyconfig.h or python.h and instead relying on the nanobind headers to include the # necessary python headers. This results in mlir always linking against the # release python library via the (undocumented) cmake property Python3_LIBRARY_RELEASE. target_link_libraries(${libname} PRIVATE ${Python3_LIBRARY_RELEASE}) diff --git a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake index d18f8c0..edbad2e 100644 --- a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake +++ b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake @@ -46,81 +46,20 @@ macro(mlir_configure_python_dev_packages) message(STATUS "Found python include dirs: ${Python3_INCLUDE_DIRS}") message(STATUS "Found python libraries: ${Python3_LIBRARIES}") message(STATUS "Found numpy v${Python3_NumPy_VERSION}: ${Python3_NumPy_INCLUDE_DIRS}") - mlir_detect_pybind11_install() - find_package(pybind11 2.10 CONFIG REQUIRED) - message(STATUS "Found pybind11 v${pybind11_VERSION}: ${pybind11_INCLUDE_DIR}") - message(STATUS "Python prefix = '${PYTHON_MODULE_PREFIX}', " - "suffix = '${PYTHON_MODULE_SUFFIX}', " - "extension = '${PYTHON_MODULE_EXTENSION}") - - mlir_detect_nanobind_install() - find_package(nanobind 2.9 CONFIG REQUIRED) - message(STATUS "Found nanobind v${nanobind_VERSION}: ${nanobind_INCLUDE_DIR}") - message(STATUS "Python prefix = '${PYTHON_MODULE_PREFIX}', " - "suffix = '${PYTHON_MODULE_SUFFIX}', " - "extension = '${PYTHON_MODULE_EXTENSION}") - endif() -endmacro() - -# Detects a pybind11 package installed in the current python environment -# and sets variables to allow it to be found. This allows pybind11 to be -# installed via pip, which typically yields a much more recent version than -# the OS install, which will be available otherwise. -function(mlir_detect_pybind11_install) - if(pybind11_DIR) - message(STATUS "Using explicit pybind11 cmake directory: ${pybind11_DIR} (-Dpybind11_DIR to change)") - else() - message(STATUS "Checking for pybind11 in python path...") - execute_process( - COMMAND "${Python3_EXECUTABLE}" - -c "import pybind11;print(pybind11.get_cmake_dir(), end='')" - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - RESULT_VARIABLE STATUS - OUTPUT_VARIABLE PACKAGE_DIR - ERROR_QUIET) - if(NOT STATUS EQUAL "0") - message(STATUS "not found (install via 'pip install pybind11' or set pybind11_DIR)") - return() - endif() - message(STATUS "found (${PACKAGE_DIR})") - set(pybind11_DIR "${PACKAGE_DIR}" PARENT_SCOPE) - endif() -endfunction() - - -# Detects a nanobind package installed in the current python environment -# and sets variables to allow it to be found. This allows nanobind to be -# installed via pip, which typically yields a much more recent version than -# the OS install, which will be available otherwise. -function(mlir_detect_nanobind_install) - if(nanobind_DIR) - message(STATUS "Using explicit nanobind cmake directory: ${nanobind_DIR} (-Dnanobind_DIR to change)") - else() - message(STATUS "Checking for nanobind in python path...") - execute_process( - COMMAND "${Python3_EXECUTABLE}" - -c "import nanobind;print(nanobind.cmake_dir(), end='')" - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - RESULT_VARIABLE STATUS - OUTPUT_VARIABLE PACKAGE_DIR - ERROR_QUIET) - if(NOT STATUS EQUAL "0") - message(STATUS "not found (install via 'pip install nanobind' or set nanobind_DIR)") - return() + message(STATUS "Python extension suffix for modules: '${Python3_SOABI}'") + if(nanobind_DIR) + message(STATUS "Using explicit nanobind cmake directory: ${nanobind_DIR} (-Dnanobind_DIR to change)") + find_package(nanobind 2.9 CONFIG REQUIRED) + else() + include(FetchContent) + FetchContent_Declare( + nanobind + GIT_REPOSITORY https://github.com/wjakob/nanobind.git + GIT_TAG v2.9.0 + GIT_SHALLOW TRUE + ) + FetchContent_MakeAvailable(nanobind) endif() - message(STATUS "found (${PACKAGE_DIR})") - set(nanobind_DIR "${PACKAGE_DIR}" PARENT_SCOPE) - execute_process( - COMMAND "${Python3_EXECUTABLE}" - -c "import nanobind;print(nanobind.include_dir(), end='')" - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - RESULT_VARIABLE STATUS - OUTPUT_VARIABLE PACKAGE_DIR - ERROR_QUIET) - if(NOT STATUS EQUAL "0") - message(STATUS "not found (install via 'pip install nanobind' or set nanobind_DIR)") - return() - endif() - set(nanobind_INCLUDE_DIR "${PACKAGE_DIR}" PARENT_SCOPE) + message(STATUS "Found nanobind: ${NB_DIR}") endif() -endfunction() +endmacro() diff --git a/mlir/docs/Dialects/Linalg/OpDSL.md b/mlir/docs/Dialects/Linalg/OpDSL.md index b892bbe..5d7e274 100644 --- a/mlir/docs/Dialects/Linalg/OpDSL.md +++ b/mlir/docs/Dialects/Linalg/OpDSL.md @@ -16,7 +16,7 @@ corresponding `linalg.generic` IR for the composition. ## Basic usage The tool is bundled with the MLIR Python bindings. To use from the CMake build -tree, MLIR must be build with Python bindings enabled +tree, MLIR must be built with Python bindings enabled (`-DMLIR_ENABLE_BINDINGS_PYTHON=ON`). Then add the `python` directory in the build tree to your `PYTHONPATH` environment variable (i.e. `export PYTHONPATH=$PWD/build/tools/mlir/python_packages/mlir_core`). Optionally, use an @@ -24,7 +24,7 @@ installed MLIR package, if available, to avoid building. ```shell # Dump the `core_named_ops.py` module as YAML. -python -m mlir.dialects.linalg.opdsl.dump_oplib .ops.core_named_ops +python -m mlir.dialects.linalg.opdsl.dump_oplib.ops.core_named_ops ``` Alternatively, run the `$PWD/build/bin/update_core_linalg_named_ops.sh` script, diff --git a/mlir/examples/standalone/pyproject.toml b/mlir/examples/standalone/pyproject.toml index 5a1e6e8..75e2153 100644 --- a/mlir/examples/standalone/pyproject.toml +++ b/mlir/examples/standalone/pyproject.toml @@ -23,9 +23,7 @@ Discussions = "https://discourse.llvm.org/" [build-system] requires = [ "scikit-build-core>=0.10.7", - "typing_extensions>=4.12.2", - "nanobind>=2.9, <3.0", - "pybind11>=2.10.0, <=2.13.6", + "typing_extensions>=4.12.2" ] build-backend = "scikit_build_core.build" diff --git a/mlir/examples/standalone/python/CMakeLists.txt b/mlir/examples/standalone/python/CMakeLists.txt index 905c9449..108c343 100644 --- a/mlir/examples/standalone/python/CMakeLists.txt +++ b/mlir/examples/standalone/python/CMakeLists.txt @@ -16,27 +16,10 @@ declare_mlir_dialect_python_bindings( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir_standalone" TD_FILE dialects/StandaloneOps.td SOURCES - dialects/standalone_pybind11.py dialects/standalone_nanobind.py _mlir_libs/_standaloneDialectsNanobind/py.typed DIALECT_NAME standalone) - -declare_mlir_python_extension(StandalonePythonSources.Pybind11Extension - MODULE_NAME _standaloneDialectsPybind11 - ADD_TO_PARENT StandalonePythonSources - SOURCES - StandaloneExtensionPybind11.cpp - PRIVATE_LINK_LIBS - LLVMSupport - EMBED_CAPI_LINK_LIBS - MLIRCAPIIR - MLIRCAPIArith - MLIRCAPITransforms - StandaloneCAPI - PYTHON_BINDINGS_LIBRARY pybind11 -) - declare_mlir_python_extension(StandalonePythonSources.NanobindExtension MODULE_NAME _standaloneDialectsNanobind ADD_TO_PARENT StandalonePythonSources @@ -49,7 +32,6 @@ declare_mlir_python_extension(StandalonePythonSources.NanobindExtension MLIRCAPIArith MLIRCAPITransforms StandaloneCAPI - PYTHON_BINDINGS_LIBRARY nanobind ) diff --git a/mlir/examples/standalone/python/StandaloneExtensionPybind11.cpp b/mlir/examples/standalone/python/StandaloneExtensionPybind11.cpp deleted file mode 100644 index da8c216..0000000 --- a/mlir/examples/standalone/python/StandaloneExtensionPybind11.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//===- StandaloneExtensionPybind11.cpp - Extension module -----------------===// -// -// This is the pybind11 version of the example module. There is also a nanobind -// example in StandaloneExtensionNanobind.cpp. -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Standalone-c/Dialects.h" -#include "mlir-c/Dialect/Arith.h" -#include "mlir/Bindings/Python/PybindAdaptors.h" - -using namespace mlir::python::adaptors; - -PYBIND11_MODULE(_standaloneDialectsPybind11, m) { - //===--------------------------------------------------------------------===// - // standalone dialect - //===--------------------------------------------------------------------===// - auto standaloneM = m.def_submodule("standalone"); - - standaloneM.def( - "register_dialects", - [](MlirContext context, bool load) { - MlirDialectHandle arithHandle = mlirGetDialectHandle__arith__(); - MlirDialectHandle standaloneHandle = - mlirGetDialectHandle__standalone__(); - mlirDialectHandleRegisterDialect(arithHandle, context); - mlirDialectHandleRegisterDialect(standaloneHandle, context); - if (load) { - mlirDialectHandleLoadDialect(arithHandle, context); - mlirDialectHandleRegisterDialect(standaloneHandle, context); - } - }, - py::arg("context") = py::none(), py::arg("load") = true); -} diff --git a/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py b/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py deleted file mode 100644 index bfb98e40..0000000 --- a/mlir/examples/standalone/python/mlir_standalone/dialects/standalone_pybind11.py +++ /dev/null @@ -1,6 +0,0 @@ -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from ._standalone_ops_gen import * -from .._mlir_libs._standaloneDialectsPybind11.standalone import * diff --git a/mlir/examples/standalone/test/python/smoketest.py b/mlir/examples/standalone/test/python/smoketest.py index 26d84fd..f881984 100644 --- a/mlir/examples/standalone/test/python/smoketest.py +++ b/mlir/examples/standalone/test/python/smoketest.py @@ -1,16 +1,7 @@ -# RUN: %python %s pybind11 | FileCheck %s # RUN: %python %s nanobind | FileCheck %s -import sys from mlir_standalone.ir import * - -if sys.argv[1] == "pybind11": - from mlir_standalone.dialects import standalone_pybind11 as standalone_d -elif sys.argv[1] == "nanobind": - from mlir_standalone.dialects import standalone_nanobind as standalone_d -else: - raise ValueError("Expected either pybind11 or nanobind as arguments") - +from mlir_standalone.dialects import standalone_nanobind as standalone_d with Context(): standalone_d.register_dialects() diff --git a/mlir/include/mlir/Bindings/Python/PybindAdaptors.h b/mlir/include/mlir/Bindings/Python/PybindAdaptors.h deleted file mode 100644 index edc6977..0000000 --- a/mlir/include/mlir/Bindings/Python/PybindAdaptors.h +++ /dev/null @@ -1,616 +0,0 @@ -//===- PybindAdaptors.h - Interop with MLIR APIs via pybind11 -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// This file contains adaptors for clients of the core MLIR Python APIs to -// interop via MLIR CAPI types, using pybind11. The facilities here do not -// depend on implementation details of the MLIR Python API and do not introduce -// C++-level dependencies with it (requiring only Python and CAPI-level -// dependencies). -// -// It is encouraged to be used both in-tree and out-of-tree. For in-tree use -// cases, it should be used for dialect implementations (versus relying on -// Pybind-based internals of the core libraries). -//===----------------------------------------------------------------------===// - -#ifndef MLIR_BINDINGS_PYTHON_PYBINDADAPTORS_H -#define MLIR_BINDINGS_PYTHON_PYBINDADAPTORS_H - -#include <pybind11/functional.h> -#include <pybind11/pybind11.h> -#include <pybind11/pytypes.h> -#include <pybind11/stl.h> - -#include "mlir-c/Bindings/Python/Interop.h" -#include "mlir-c/Diagnostics.h" -#include "mlir-c/IR.h" - -#include "llvm/ADT/Twine.h" - -namespace py = pybind11; -using namespace py::literals; - -// Raw CAPI type casters need to be declared before use, so always include them -// first. -namespace pybind11 { -namespace detail { - -/// Helper to convert a presumed MLIR API object to a capsule, accepting either -/// an explicit Capsule (which can happen when two C APIs are communicating -/// directly via Python) or indirectly by querying the MLIR_PYTHON_CAPI_PTR_ATTR -/// attribute (through which supported MLIR Python API objects export their -/// contained API pointer as a capsule). Throws a type error if the object is -/// neither. This is intended to be used from type casters, which are invoked -/// with a raw handle (unowned). The returned object's lifetime may not extend -/// beyond the apiObject handle without explicitly having its refcount increased -/// (i.e. on return). -static py::object mlirApiObjectToCapsule(py::handle apiObject) { - if (PyCapsule_CheckExact(apiObject.ptr())) - return py::reinterpret_borrow<py::object>(apiObject); - if (!py::hasattr(apiObject, MLIR_PYTHON_CAPI_PTR_ATTR)) { - auto repr = py::repr(apiObject).cast<std::string>(); - throw py::type_error( - (llvm::Twine("Expected an MLIR object (got ") + repr + ").").str()); - } - return apiObject.attr(MLIR_PYTHON_CAPI_PTR_ATTR); -} - -// Note: Currently all of the following support cast from py::object to the -// Mlir* C-API type, but only a few light-weight, context-bound ones -// implicitly cast the other way because the use case has not yet emerged and -// ownership is unclear. - -/// Casts object <-> MlirAffineMap. -template <> -struct type_caster<MlirAffineMap> { - PYBIND11_TYPE_CASTER(MlirAffineMap, _("MlirAffineMap")); - bool load(handle src, bool) { - py::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToAffineMap(capsule.ptr()); - if (mlirAffineMapIsNull(value)) { - return false; - } - return !mlirAffineMapIsNull(value); - } - static handle cast(MlirAffineMap v, return_value_policy, handle) { - py::object capsule = - py::reinterpret_steal<py::object>(mlirPythonAffineMapToCapsule(v)); - return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("AffineMap") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .release(); - } -}; - -/// Casts object <-> MlirAttribute. -template <> -struct type_caster<MlirAttribute> { - PYBIND11_TYPE_CASTER(MlirAttribute, _("MlirAttribute")); - bool load(handle src, bool) { - py::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToAttribute(capsule.ptr()); - return !mlirAttributeIsNull(value); - } - static handle cast(MlirAttribute v, return_value_policy, handle) { - py::object capsule = - py::reinterpret_steal<py::object>(mlirPythonAttributeToCapsule(v)); - return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Attribute") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)() - .release(); - } -}; - -/// Casts object -> MlirBlock. -template <> -struct type_caster<MlirBlock> { - PYBIND11_TYPE_CASTER(MlirBlock, _("MlirBlock")); - bool load(handle src, bool) { - py::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToBlock(capsule.ptr()); - return !mlirBlockIsNull(value); - } -}; - -/// Casts object -> MlirContext. -template <> -struct type_caster<MlirContext> { - PYBIND11_TYPE_CASTER(MlirContext, _("MlirContext")); - bool load(handle src, bool) { - if (src.is_none()) { - // Gets the current thread-bound context. - // TODO: This raises an error of "No current context" currently. - // Update the implementation to pretty-print the helpful error that the - // core implementations print in this case. - src = py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Context") - .attr("current"); - } - py::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToContext(capsule.ptr()); - return !mlirContextIsNull(value); - } -}; - -/// Casts object <-> MlirDialectRegistry. -template <> -struct type_caster<MlirDialectRegistry> { - PYBIND11_TYPE_CASTER(MlirDialectRegistry, _("MlirDialectRegistry")); - bool load(handle src, bool) { - py::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToDialectRegistry(capsule.ptr()); - return !mlirDialectRegistryIsNull(value); - } - static handle cast(MlirDialectRegistry v, return_value_policy, handle) { - py::object capsule = py::reinterpret_steal<py::object>( - mlirPythonDialectRegistryToCapsule(v)); - return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("DialectRegistry") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .release(); - } -}; - -/// Casts object <-> MlirLocation. -template <> -struct type_caster<MlirLocation> { - PYBIND11_TYPE_CASTER(MlirLocation, _("MlirLocation")); - bool load(handle src, bool) { - if (src.is_none()) { - // Gets the current thread-bound context. - src = py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Location") - .attr("current"); - } - py::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToLocation(capsule.ptr()); - return !mlirLocationIsNull(value); - } - static handle cast(MlirLocation v, return_value_policy, handle) { - py::object capsule = - py::reinterpret_steal<py::object>(mlirPythonLocationToCapsule(v)); - return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Location") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .release(); - } -}; - -/// Casts object <-> MlirModule. -template <> -struct type_caster<MlirModule> { - PYBIND11_TYPE_CASTER(MlirModule, _("MlirModule")); - bool load(handle src, bool) { - py::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToModule(capsule.ptr()); - return !mlirModuleIsNull(value); - } - static handle cast(MlirModule v, return_value_policy, handle) { - py::object capsule = - py::reinterpret_steal<py::object>(mlirPythonModuleToCapsule(v)); - return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Module") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .release(); - }; -}; - -/// Casts object <-> MlirFrozenRewritePatternSet. -template <> -struct type_caster<MlirFrozenRewritePatternSet> { - PYBIND11_TYPE_CASTER(MlirFrozenRewritePatternSet, - _("MlirFrozenRewritePatternSet")); - bool load(handle src, bool) { - py::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToFrozenRewritePatternSet(capsule.ptr()); - return value.ptr != nullptr; - } - static handle cast(MlirFrozenRewritePatternSet v, return_value_policy, - handle) { - py::object capsule = py::reinterpret_steal<py::object>( - mlirPythonFrozenRewritePatternSetToCapsule(v)); - return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("rewrite")) - .attr("FrozenRewritePatternSet") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .release(); - }; -}; - -/// Casts object <-> MlirOperation. -template <> -struct type_caster<MlirOperation> { - PYBIND11_TYPE_CASTER(MlirOperation, _("MlirOperation")); - bool load(handle src, bool) { - py::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToOperation(capsule.ptr()); - return !mlirOperationIsNull(value); - } - static handle cast(MlirOperation v, return_value_policy, handle) { - if (v.ptr == nullptr) - return py::none(); - py::object capsule = - py::reinterpret_steal<py::object>(mlirPythonOperationToCapsule(v)); - return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Operation") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .release(); - }; -}; - -/// Casts object <-> MlirValue. -template <> -struct type_caster<MlirValue> { - PYBIND11_TYPE_CASTER(MlirValue, _("MlirValue")); - bool load(handle src, bool) { - py::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToValue(capsule.ptr()); - return !mlirValueIsNull(value); - } - static handle cast(MlirValue v, return_value_policy, handle) { - if (v.ptr == nullptr) - return py::none(); - py::object capsule = - py::reinterpret_steal<py::object>(mlirPythonValueToCapsule(v)); - return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Value") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)() - .release(); - }; -}; - -/// Casts object -> MlirPassManager. -template <> -struct type_caster<MlirPassManager> { - PYBIND11_TYPE_CASTER(MlirPassManager, _("MlirPassManager")); - bool load(handle src, bool) { - py::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToPassManager(capsule.ptr()); - return !mlirPassManagerIsNull(value); - } -}; - -/// Casts object <-> MlirTypeID. -template <> -struct type_caster<MlirTypeID> { - PYBIND11_TYPE_CASTER(MlirTypeID, _("MlirTypeID")); - bool load(handle src, bool) { - py::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToTypeID(capsule.ptr()); - return !mlirTypeIDIsNull(value); - } - static handle cast(MlirTypeID v, return_value_policy, handle) { - if (v.ptr == nullptr) - return py::none(); - py::object capsule = - py::reinterpret_steal<py::object>(mlirPythonTypeIDToCapsule(v)); - return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("TypeID") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .release(); - }; -}; - -/// Casts object <-> MlirType. -template <> -struct type_caster<MlirType> { - PYBIND11_TYPE_CASTER(MlirType, _("MlirType")); - bool load(handle src, bool) { - py::object capsule = mlirApiObjectToCapsule(src); - value = mlirPythonCapsuleToType(capsule.ptr()); - return !mlirTypeIsNull(value); - } - static handle cast(MlirType t, return_value_policy, handle) { - py::object capsule = - py::reinterpret_steal<py::object>(mlirPythonTypeToCapsule(t)); - return py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Type") - .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule) - .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)() - .release(); - } -}; - -} // namespace detail -} // namespace pybind11 - -namespace mlir { -namespace python { -namespace adaptors { - -/// Provides a facility like py::class_ for defining a new class in a scope, -/// but this allows extension of an arbitrary Python class, defining methods -/// on it is a similar way. Classes defined in this way are very similar to -/// if defined in Python in the usual way but use Pybind11 machinery to do -/// it. These are not "real" Pybind11 classes but pure Python classes with no -/// relation to a concrete C++ class. -/// -/// Derived from a discussion upstream: -/// https://github.com/pybind/pybind11/issues/1193 -/// (plus a fair amount of extra curricular poking) -/// TODO: If this proves useful, see about including it in pybind11. -class pure_subclass { -public: - pure_subclass(py::handle scope, const char *derivedClassName, - const py::object &superClass) { - py::object pyType = - py::reinterpret_borrow<py::object>((PyObject *)&PyType_Type); - py::object metaclass = pyType(superClass); - py::dict attributes; - - thisClass = - metaclass(derivedClassName, py::make_tuple(superClass), attributes); - scope.attr(derivedClassName) = thisClass; - } - - template <typename Func, typename... Extra> - pure_subclass &def(const char *name, Func &&f, const Extra &...extra) { - py::cpp_function cf( - std::forward<Func>(f), py::name(name), py::is_method(thisClass), - py::sibling(py::getattr(thisClass, name, py::none())), extra...); - thisClass.attr(cf.name()) = cf; - return *this; - } - - template <typename Func, typename... Extra> - pure_subclass &def_property_readonly(const char *name, Func &&f, - const Extra &...extra) { - py::cpp_function cf( - std::forward<Func>(f), py::name(name), py::is_method(thisClass), - py::sibling(py::getattr(thisClass, name, py::none())), extra...); - auto builtinProperty = - py::reinterpret_borrow<py::object>((PyObject *)&PyProperty_Type); - thisClass.attr(name) = builtinProperty(cf); - return *this; - } - - template <typename Func, typename... Extra> - pure_subclass &def_staticmethod(const char *name, Func &&f, - const Extra &...extra) { - static_assert(!std::is_member_function_pointer<Func>::value, - "def_staticmethod(...) called with a non-static member " - "function pointer"); - py::cpp_function cf(std::forward<Func>(f), py::name(name), - py::scope(thisClass), extra...); - thisClass.attr(cf.name()) = py::staticmethod(cf); - return *this; - } - - template <typename Func, typename... Extra> - pure_subclass &def_classmethod(const char *name, Func &&f, - const Extra &...extra) { - static_assert(!std::is_member_function_pointer<Func>::value, - "def_classmethod(...) called with a non-static member " - "function pointer"); - py::cpp_function cf(std::forward<Func>(f), py::name(name), - py::scope(thisClass), extra...); - thisClass.attr(cf.name()) = - py::reinterpret_borrow<py::object>(PyClassMethod_New(cf.ptr())); - return *this; - } - - py::object get_class() const { return thisClass; } - -protected: - py::object superClass; - py::object thisClass; -}; - -/// Creates a custom subclass of mlir.ir.Attribute, implementing a casting -/// constructor and type checking methods. -class mlir_attribute_subclass : public pure_subclass { -public: - using IsAFunctionTy = bool (*)(MlirAttribute); - using GetTypeIDFunctionTy = MlirTypeID (*)(); - - /// Subclasses by looking up the super-class dynamically. - mlir_attribute_subclass(py::handle scope, const char *attrClassName, - IsAFunctionTy isaFunction, - GetTypeIDFunctionTy getTypeIDFunction = nullptr) - : mlir_attribute_subclass( - scope, attrClassName, isaFunction, - py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("Attribute"), - getTypeIDFunction) {} - - /// Subclasses with a provided mlir.ir.Attribute super-class. This must - /// be used if the subclass is being defined in the same extension module - /// as the mlir.ir class (otherwise, it will trigger a recursive - /// initialization). - mlir_attribute_subclass(py::handle scope, const char *typeClassName, - IsAFunctionTy isaFunction, const py::object &superCls, - GetTypeIDFunctionTy getTypeIDFunction = nullptr) - : pure_subclass(scope, typeClassName, superCls) { - // Casting constructor. Note that it hard, if not impossible, to properly - // call chain to parent `__init__` in pybind11 due to its special handling - // for init functions that don't have a fully constructed self-reference, - // which makes it impossible to forward it to `__init__` of a superclass. - // Instead, provide a custom `__new__` and call that of a superclass, which - // eventually calls `__init__` of the superclass. Since attribute subclasses - // have no additional members, we can just return the instance thus created - // without amending it. - std::string captureTypeName( - typeClassName); // As string in case if typeClassName is not static. - py::cpp_function newCf( - [superCls, isaFunction, captureTypeName](py::object cls, - py::object otherAttribute) { - MlirAttribute rawAttribute = py::cast<MlirAttribute>(otherAttribute); - if (!isaFunction(rawAttribute)) { - auto origRepr = py::repr(otherAttribute).cast<std::string>(); - throw std::invalid_argument( - (llvm::Twine("Cannot cast attribute to ") + captureTypeName + - " (from " + origRepr + ")") - .str()); - } - py::object self = superCls.attr("__new__")(cls, otherAttribute); - return self; - }, - py::name("__new__"), py::arg("cls"), py::arg("cast_from_attr")); - thisClass.attr("__new__") = newCf; - - // 'isinstance' method. - def_staticmethod( - "isinstance", - [isaFunction](MlirAttribute other) { return isaFunction(other); }, - py::arg("other_attribute")); - def("__repr__", [superCls, captureTypeName](py::object self) { - return py::repr(superCls(self)) - .attr("replace")(superCls.attr("__name__"), captureTypeName); - }); - if (getTypeIDFunction) { - def_staticmethod("get_static_typeid", - [getTypeIDFunction]() { return getTypeIDFunction(); }); - py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr(MLIR_PYTHON_CAPI_TYPE_CASTER_REGISTER_ATTR)( - getTypeIDFunction())(pybind11::cpp_function( - [thisClass = thisClass](const py::object &mlirAttribute) { - return thisClass(mlirAttribute); - })); - } - } -}; - -/// Creates a custom subclass of mlir.ir.Type, implementing a casting -/// constructor and type checking methods. -class mlir_type_subclass : public pure_subclass { -public: - using IsAFunctionTy = bool (*)(MlirType); - using GetTypeIDFunctionTy = MlirTypeID (*)(); - - /// Subclasses by looking up the super-class dynamically. - mlir_type_subclass(py::handle scope, const char *typeClassName, - IsAFunctionTy isaFunction, - GetTypeIDFunctionTy getTypeIDFunction = nullptr) - : mlir_type_subclass( - scope, typeClassName, isaFunction, - py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")).attr("Type"), - getTypeIDFunction) {} - - /// Subclasses with a provided mlir.ir.Type super-class. This must - /// be used if the subclass is being defined in the same extension module - /// as the mlir.ir class (otherwise, it will trigger a recursive - /// initialization). - mlir_type_subclass(py::handle scope, const char *typeClassName, - IsAFunctionTy isaFunction, const py::object &superCls, - GetTypeIDFunctionTy getTypeIDFunction = nullptr) - : pure_subclass(scope, typeClassName, superCls) { - // Casting constructor. Note that it hard, if not impossible, to properly - // call chain to parent `__init__` in pybind11 due to its special handling - // for init functions that don't have a fully constructed self-reference, - // which makes it impossible to forward it to `__init__` of a superclass. - // Instead, provide a custom `__new__` and call that of a superclass, which - // eventually calls `__init__` of the superclass. Since attribute subclasses - // have no additional members, we can just return the instance thus created - // without amending it. - std::string captureTypeName( - typeClassName); // As string in case if typeClassName is not static. - py::cpp_function newCf( - [superCls, isaFunction, captureTypeName](py::object cls, - py::object otherType) { - MlirType rawType = py::cast<MlirType>(otherType); - if (!isaFunction(rawType)) { - auto origRepr = py::repr(otherType).cast<std::string>(); - throw std::invalid_argument((llvm::Twine("Cannot cast type to ") + - captureTypeName + " (from " + - origRepr + ")") - .str()); - } - py::object self = superCls.attr("__new__")(cls, otherType); - return self; - }, - py::name("__new__"), py::arg("cls"), py::arg("cast_from_type")); - thisClass.attr("__new__") = newCf; - - // 'isinstance' method. - def_staticmethod( - "isinstance", - [isaFunction](MlirType other) { return isaFunction(other); }, - py::arg("other_type")); - def("__repr__", [superCls, captureTypeName](py::object self) { - return py::repr(superCls(self)) - .attr("replace")(superCls.attr("__name__"), captureTypeName); - }); - if (getTypeIDFunction) { - // 'get_static_typeid' method. - // This is modeled as a static method instead of a static property because - // `def_property_readonly_static` is not available in `pure_subclass` and - // we do not want to introduce the complexity that pybind uses to - // implement it. - def_staticmethod("get_static_typeid", - [getTypeIDFunction]() { return getTypeIDFunction(); }); - py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr(MLIR_PYTHON_CAPI_TYPE_CASTER_REGISTER_ATTR)( - getTypeIDFunction())(pybind11::cpp_function( - [thisClass = thisClass](const py::object &mlirType) { - return thisClass(mlirType); - })); - } - } -}; - -/// Creates a custom subclass of mlir.ir.Value, implementing a casting -/// constructor and type checking methods. -class mlir_value_subclass : public pure_subclass { -public: - using IsAFunctionTy = bool (*)(MlirValue); - - /// Subclasses by looking up the super-class dynamically. - mlir_value_subclass(py::handle scope, const char *valueClassName, - IsAFunctionTy isaFunction) - : mlir_value_subclass( - scope, valueClassName, isaFunction, - py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")).attr("Value")) { - } - - /// Subclasses with a provided mlir.ir.Value super-class. This must - /// be used if the subclass is being defined in the same extension module - /// as the mlir.ir class (otherwise, it will trigger a recursive - /// initialization). - mlir_value_subclass(py::handle scope, const char *valueClassName, - IsAFunctionTy isaFunction, const py::object &superCls) - : pure_subclass(scope, valueClassName, superCls) { - // Casting constructor. Note that it hard, if not impossible, to properly - // call chain to parent `__init__` in pybind11 due to its special handling - // for init functions that don't have a fully constructed self-reference, - // which makes it impossible to forward it to `__init__` of a superclass. - // Instead, provide a custom `__new__` and call that of a superclass, which - // eventually calls `__init__` of the superclass. Since attribute subclasses - // have no additional members, we can just return the instance thus created - // without amending it. - std::string captureValueName( - valueClassName); // As string in case if valueClassName is not static. - py::cpp_function newCf( - [superCls, isaFunction, captureValueName](py::object cls, - py::object otherValue) { - MlirValue rawValue = py::cast<MlirValue>(otherValue); - if (!isaFunction(rawValue)) { - auto origRepr = py::repr(otherValue).cast<std::string>(); - throw std::invalid_argument((llvm::Twine("Cannot cast value to ") + - captureValueName + " (from " + - origRepr + ")") - .str()); - } - py::object self = superCls.attr("__new__")(cls, otherValue); - return self; - }, - py::name("__new__"), py::arg("cls"), py::arg("cast_from_value")); - thisClass.attr("__new__") = newCf; - - // 'isinstance' method. - def_staticmethod( - "isinstance", - [isaFunction](MlirValue other) { return isaFunction(other); }, - py::arg("other_value")); - } -}; - -} // namespace adaptors - -} // namespace python -} // namespace mlir - -#endif // MLIR_BINDINGS_PYTHON_PYBINDADAPTORS_H diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td index 115a11b..80337fc 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td @@ -201,9 +201,9 @@ def Tosa_PadOpQuantInfoBuilder : OpBuilder< // and optional initial value. The builder will extract var_shape and element type // attributes from variable type. def Tosa_VariableOpBuilder : OpBuilder< - (ins "StringRef":$name, "Type":$variable_type, "Attribute":$initial_value), + (ins "StringRef":$sym_name, "Type":$variable_type, "Attribute":$initial_value), [{ - buildVariableOp($_builder, $_state, name, variable_type, initial_value); + buildVariableOp($_builder, $_state, sym_name, variable_type, initial_value); }]>; diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td index d819cc1..f1a618e 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td @@ -18,6 +18,7 @@ include "mlir/IR/OpBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/IR/SymbolInterfaces.td" include "mlir/Interfaces/LoopLikeInterface.td" include "mlir/Interfaces/VectorInterfaces.td" include "mlir/Dialect/Tosa/IR/TosaInterfaces.td" @@ -82,7 +83,7 @@ def Tosa_YieldOp : Tosa_Op<"yield", [ //===----------------------------------------------------------------------===// // Operator: variable //===----------------------------------------------------------------------===// -def Tosa_VariableOp : Tosa_Op<"variable", []> { +def Tosa_VariableOp : Tosa_Op<"variable", [Symbol]> { let summary = "Defines a variable"; let description = [{ @@ -91,7 +92,10 @@ def Tosa_VariableOp : Tosa_Op<"variable", []> { }]; let arguments = (ins - SymbolNameAttr:$name, + // Note: "sym_name" is used as opposed to "name" in the specification, + // since a Symbol must be named "sym_name" for it to be recognised by + // the containing SymbolTable. + SymbolNameAttr:$sym_name, IndexElementsAttr:$var_shape, TypeAttr:$type, OptionalAttr<AnyAttr>:$initial_value @@ -105,14 +109,18 @@ def Tosa_VariableOp : Tosa_Op<"variable", []> { let hasCustomAssemblyFormat = 1; let assemblyFormat = [{ - $name + $sym_name attr-dict custom<VariableOpTypeOrInitialValue>($var_shape, $type, $initial_value) }]; let builders = [Tosa_VariableOpBuilder]; - let hasVerifier = 1; + let extraClassDeclaration = [{ + ::llvm::StringRef getName() { + return getSymName(); + } + }]; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp index d57926ec..39d4815 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp @@ -243,7 +243,7 @@ static void getTreePredicates(std::vector<PositionalPredicate> &predList, .Case<OperandPosition, OperandGroupPosition>([&](auto *pos) { getOperandTreePredicates(predList, val, builder, inputs, pos); }) - .Default([](auto *) { llvm_unreachable("unexpected position kind"); }); + .DefaultUnreachable("unexpected position kind"); } static void getAttributePredicates(pdl::AttributeOp op, diff --git a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp index 9b61540..50fca56 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp @@ -1118,10 +1118,7 @@ StringRef getTypeMangling(Type type, bool isSigned) { llvm_unreachable("Unsupported integer width"); } }) - .Default([](auto) { - llvm_unreachable("No mangling defined"); - return ""; - }); + .DefaultUnreachable("No mangling defined"); } template <typename ReduceOp> diff --git a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp index 0f90acf..57877b8 100644 --- a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp +++ b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp @@ -68,9 +68,7 @@ std::string getTypeMangling(Type ty, bool isUnsigned = false) { llvm_unreachable("unhandled integer type"); } }) - .Default([](Type) -> std::string { - llvm_unreachable("unhandled type for mangling"); - }); + .DefaultUnreachable("unhandled type for mangling"); } std::string mangle(StringRef baseName, ArrayRef<Type> types, diff --git a/mlir/lib/Dialect/ArmSME/Transforms/OuterProductFusion.cpp b/mlir/lib/Dialect/ArmSME/Transforms/OuterProductFusion.cpp index 9196d2e..39e398b 100644 --- a/mlir/lib/Dialect/ArmSME/Transforms/OuterProductFusion.cpp +++ b/mlir/lib/Dialect/ArmSME/Transforms/OuterProductFusion.cpp @@ -170,7 +170,7 @@ public: op2, op.getResultType(), lhs, rhs, lhsMask, rhsMask, op1.getAcc()); }) - .Default([&](auto) { llvm_unreachable("unexpected extend op!"); }); + .DefaultUnreachable("unexpected extend op!"); } else if (kind == arm_sme::CombiningKind::Sub) { TypeSwitch<Operation *>(extOp) .Case<arith::ExtFOp>([&](auto) { @@ -188,7 +188,7 @@ public: op2, op.getResultType(), lhs, rhs, lhsMask, rhsMask, op1.getAcc()); }) - .Default([&](auto) { llvm_unreachable("unexpected extend op!"); }); + .DefaultUnreachable("unexpected extend op!"); } else { llvm_unreachable("unexpected arm_sme::CombiningKind!"); } diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index c0f9132..19eba6b 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -375,7 +375,7 @@ void GPUDialect::printType(Type type, DialectAsmPrinter &os) const { os << shape.back() << 'x' << fragTy.getElementType(); os << ", \"" << fragTy.getOperand() << "\"" << '>'; }) - .Default([](Type) { llvm_unreachable("unexpected 'gpu' type kind"); }); + .DefaultUnreachable("unexpected 'gpu' type kind"); } static LogicalResult verifyKnownLaunchSizeAttr(Operation *op, diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp index 2561f66..0a3ef7d 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp +++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp @@ -847,9 +847,7 @@ getThreadIdBuilder(std::optional<TransformOpInterface> transformOp, return GpuLaneIdBuilder(ctx, warpSize, useLinearMapping, *maybeMaskingAttr); }) - .Default([&](DeviceMappingAttrInterface) -> GpuIdBuilder { - llvm_unreachable("unknown mapping attribute"); - }); + .DefaultUnreachable("unknown mapping attribute"); return DiagnosedSilenceableFailure::success(); } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp index ef38027..cee943d 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp @@ -1096,10 +1096,8 @@ static Value memsetGetStored(MemsetIntr op, const MemorySlot &slot, Value intVal = buildMemsetValue(type.getWidth()); return LLVM::BitcastOp::create(builder, op.getLoc(), type, intVal); }) - .Default([](Type) -> Value { - llvm_unreachable( - "getStored should not be called on memset to unsupported type"); - }); + .DefaultUnreachable( + "getStored should not be called on memset to unsupported type"); } template <class MemsetIntr> diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp index 297640c..705d07d 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp @@ -45,9 +45,7 @@ static StringRef getTypeKeyword(Type type) { .Case<LLVMStructType>([&](Type) { return "struct"; }) .Case<LLVMTargetExtType>([&](Type) { return "target"; }) .Case<LLVMX86AMXType>([&](Type) { return "x86_amx"; }) - .Default([](Type) -> StringRef { - llvm_unreachable("unexpected 'llvm' type kind"); - }); + .DefaultUnreachable("unexpected 'llvm' type kind"); } /// Prints a structure type. Keeps track of known struct names to handle self- diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp index 38f1a8b..42160a1 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -192,7 +192,7 @@ static void replaceIndexOpsByInductionVariables(RewriterBase &rewriter, .Case([&](affine::AffineForOp affineForOp) { allIvs.push_back(affineForOp.getInductionVar()); }) - .Default([&](Operation *op) { assert(false && "unexpected op"); }); + .DefaultUnreachable("unexpected op"); } assert(linalgOp.getNumLoops() == allIvs.size() && "expected the number of loops and induction variables to match"); diff --git a/mlir/lib/Dialect/Linalg/Transforms/NamedToElementwise.cpp b/mlir/lib/Dialect/Linalg/Transforms/NamedToElementwise.cpp index 00a076b..c904556 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/NamedToElementwise.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/NamedToElementwise.cpp @@ -48,10 +48,7 @@ ElementwiseKind getKind(Operation *op) { .Case([](SquareOp) { return ElementwiseKind::square; }) .Case([](TanhOp) { return ElementwiseKind::tanh; }) .Case([](ErfOp) { return ElementwiseKind::erf; }) - .Default([&](Operation *op) { - llvm_unreachable("unhandled case in named to elementwise"); - return ElementwiseKind::sub; - }); + .DefaultUnreachable("unhandled case in named to elementwise"); } template <typename NamedOpTy> diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index e9a8b25..7863c21 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -1427,10 +1427,7 @@ FailureOr<Conv1DOp> DownscaleSizeOneWindowed2DConvolution<Conv2DOp, Conv1DOp>:: .Case([&](linalg::PoolingNchwMaxOp op) { return std::make_tuple(0, 1, 2, 3); }) - .Default([&](Operation *op) { - llvm_unreachable("unexpected conv2d/pool2d operation."); - return std::make_tuple(0, 0, 0, 0); - }); + .DefaultUnreachable("unexpected conv2d/pool2d operation."); // Only handle the case where at least one of the window dimensions is // of size 1. Other cases can rely on tiling to reduce to such cases. diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 3593b53..24d3722 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -604,9 +604,7 @@ static Operation *materializeTiledShape(OpBuilder &builder, Location loc, builder, loc, valueToTile, sliceParams.offsets, sliceParams.sizes, sliceParams.strides); }) - .Default([](ShapedType) -> Operation * { - llvm_unreachable("Unexpected shaped type"); - }); + .DefaultUnreachable("Unexpected shaped type"); return sliceOp; } diff --git a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp index 24da447..214410f 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp @@ -315,7 +315,7 @@ LogicalResult LoadOpOfSubViewOpFolder<OpTy>::matchAndRewrite( op, op.getType(), subViewOp.getSource(), sourceIndices, op.getTranspose(), op.getNumTiles()); }) - .Default([](Operation *) { llvm_unreachable("unexpected operation."); }); + .DefaultUnreachable("unexpected operation"); return success(); } @@ -367,7 +367,7 @@ LogicalResult LoadOpOfExpandShapeOpFolder<OpTy>::matchAndRewrite( op, op.getType(), expandShapeOp.getViewSource(), sourceIndices, op.getMask(), op.getPassThru()); }) - .Default([](Operation *) { llvm_unreachable("unexpected operation."); }); + .DefaultUnreachable("unexpected operation"); return success(); } @@ -415,7 +415,7 @@ LogicalResult LoadOpOfCollapseShapeOpFolder<OpTy>::matchAndRewrite( op, op.getType(), collapseShapeOp.getViewSource(), sourceIndices, op.getMask(), op.getPassThru()); }) - .Default([](Operation *) { llvm_unreachable("unexpected operation."); }); + .DefaultUnreachable("unexpected operation"); return success(); } @@ -482,7 +482,7 @@ LogicalResult StoreOpOfSubViewOpFolder<OpTy>::matchAndRewrite( op, op.getSrc(), subViewOp.getSource(), sourceIndices, op.getLeadDimension(), op.getTransposeAttr()); }) - .Default([](Operation *) { llvm_unreachable("unexpected operation."); }); + .DefaultUnreachable("unexpected operation"); return success(); } @@ -535,7 +535,7 @@ LogicalResult StoreOpOfExpandShapeOpFolder<OpTy>::matchAndRewrite( op, expandShapeOp.getViewSource(), sourceIndices, op.getMask(), op.getValueToStore()); }) - .Default([](Operation *) { llvm_unreachable("unexpected operation."); }); + .DefaultUnreachable("unexpected operation"); return success(); } @@ -584,7 +584,7 @@ LogicalResult StoreOpOfCollapseShapeOpFolder<OpTy>::matchAndRewrite( op, collapseShapeOp.getViewSource(), sourceIndices, op.getMask(), op.getValueToStore()); }) - .Default([](Operation *) { llvm_unreachable("unexpected operation."); }); + .DefaultUnreachable("unexpected operation"); return success(); } diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 5672942..fd4cabbad 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -3425,10 +3425,7 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) { } llvm_unreachable("Unexpected generatee argument"); }) - .Default([&](Operation *op) { - assert(false && "TODO: Custom name for this operation"); - return "transformed"; - }); + .DefaultUnreachable("TODO: Custom name for this operation"); } setNameFn(result, cliName); diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp index 36685d3..29b770f 100644 --- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp @@ -2177,10 +2177,9 @@ cloneAsInsertSlices(RewriterBase &rewriter, auto clonedOp = cloneAsInsertSlice(rewriter, op); clonedSlices.push_back(clonedOp); }) - .Default([&](Operation *op) { - // Assert here assuming this has already been checked. - assert(0 && "unexpected slice type while cloning as insert slice"); - }); + // Assert here assuming this has already been checked. + .DefaultUnreachable( + "unexpected slice type while cloning as insert slice"); } return clonedSlices; } diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp index c8efdf0..24c33f9 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp @@ -987,7 +987,7 @@ void SPIRVDialect::printType(Type type, DialectAsmPrinter &os) const { .Case<ArrayType, CooperativeMatrixType, PointerType, RuntimeArrayType, ImageType, SampledImageType, StructType, MatrixType, TensorArmType>( [&](auto type) { print(type, os); }) - .Default([](Type) { llvm_unreachable("unhandled SPIR-V type"); }); + .DefaultUnreachable("Unhandled SPIR-V type"); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp index 7e9a80e..f895807 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp @@ -57,7 +57,7 @@ public: for (Type elementType : concreteType.getElementTypes()) add(elementType); }) - .Default([](SPIRVType) { llvm_unreachable("Unhandled type"); }); + .DefaultUnreachable("Unhandled type"); } void add(Type type) { add(cast<SPIRVType>(type)); } @@ -107,7 +107,7 @@ public: for (Type elementType : concreteType.getElementTypes()) add(elementType); }) - .Default([](SPIRVType) { llvm_unreachable("Unhandled type"); }); + .DefaultUnreachable("Unhandled type"); } void add(Type type) { add(cast<SPIRVType>(type)); } @@ -198,8 +198,7 @@ Type CompositeType::getElementType(unsigned index) const { .Case<MatrixType>([](MatrixType type) { return type.getColumnType(); }) .Case<StructType>( [index](StructType type) { return type.getElementType(index); }) - .Default( - [](Type) -> Type { llvm_unreachable("invalid composite type"); }); + .DefaultUnreachable("Invalid composite type"); } unsigned CompositeType::getNumElements() const { @@ -207,9 +206,7 @@ unsigned CompositeType::getNumElements() const { .Case<ArrayType, StructType, TensorArmType, VectorType>( [](auto type) { return type.getNumElements(); }) .Case<MatrixType>([](MatrixType type) { return type.getNumColumns(); }) - .Default([](SPIRVType) -> unsigned { - llvm_unreachable("Invalid type for number of elements query"); - }); + .DefaultUnreachable("Invalid type for number of elements query"); } bool CompositeType::hasCompileTimeKnownNumElements() const { diff --git a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp index 122f61e0..88e1ab6 100644 --- a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp +++ b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp @@ -622,7 +622,7 @@ static spirv::Dim convertRank(int64_t rank) { } static spirv::ImageFormat getImageFormat(Type elementType) { - return llvm::TypeSwitch<Type, spirv::ImageFormat>(elementType) + return TypeSwitch<Type, spirv::ImageFormat>(elementType) .Case<Float16Type>([](Float16Type) { return spirv::ImageFormat::R16f; }) .Case<Float32Type>([](Float32Type) { return spirv::ImageFormat::R32f; }) .Case<IntegerType>([](IntegerType intType) { @@ -639,11 +639,7 @@ static spirv::ImageFormat getImageFormat(Type elementType) { llvm_unreachable("Unhandled integer type!"); } }) - .Default([](Type) { - llvm_unreachable("Unhandled element type!"); - // We need to return something here to satisfy the type switch. - return spirv::ImageFormat::R32f; - }); + .DefaultUnreachable("Unhandled element type!"); #undef BIT_WIDTH_CASE } diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 332f1a0..c51b5e9 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -905,56 +905,29 @@ static inline LogicalResult errorIfShapeNotSizeOne(Operation *op, Type type) { return shapeAdaptor.getNumElements() == 1 ? success() : failure(); } -// Returns the first declaration point prior to this operation or failure if -// not found. -static FailureOr<tosa::VariableOp> findVariableDecl(Operation *op, - StringRef symName) { - ModuleOp module = op->getParentOfType<ModuleOp>(); - tosa::VariableOp varOp = nullptr; - - // TODO: Adopt SymbolTable trait to Varible ops. - // Currently, the variable's definition point is searched via walk(), - // starting from the top-level ModuleOp and stopping at the point of use. Once - // TOSA control flow and variable extensions reach the complete state, may - // leverage MLIR's Symbol Table functionality to look up symbol and enhance - // the search to a TOSA specific graph traversal over the IR structure. - module.walk([&](Operation *tempOp) { - // Reach this op itself. - if (tempOp == op) { - return WalkResult::interrupt(); - } - - if (auto tosaOp = dyn_cast<tosa::VariableOp>(tempOp)) { - if (symName == tosaOp.getName()) { - varOp = tosaOp; - return WalkResult::interrupt(); - } - } - - return WalkResult::advance(); - }); - - if (varOp) - return varOp; - - return failure(); -} - template <typename T> static LogicalResult verifyVariableOpErrorIf(T op, Type type, StringRef name) { - StringRef symName = op.getName(); - FailureOr<tosa::VariableOp> varOp = findVariableDecl(op, symName); - if (failed(varOp)) + Operation *symTableOp = + op->template getParentWithTrait<OpTrait::SymbolTable>(); + if (!symTableOp) + // If the operation is not the scope of a symbol table, we cannot + // verify it against it's declaration. + return success(); + + SymbolTable symTable(symTableOp); + const auto varOp = symTable.lookup<tosa::VariableOp>(op.getName()); + + // Verify prior declaration + if (!varOp) return op->emitOpError("'") - << symName << "' has not been declared by 'tosa.variable'"; + << op.getName() << "' has not been declared by 'tosa.variable'"; // Verify type and shape - auto variableType = getVariableType(varOp.value()); + auto variableType = getVariableType(varOp); if (errorIfTypeOrShapeMismatch(op, type, name, variableType, "the input tensor") .failed()) return failure(); - return success(); } @@ -1418,7 +1391,7 @@ static void buildVariableOp(OpBuilder &builder, OperationState &result, ArrayRef<int64_t> shape = shapedType.getShape(); auto varShapeAttr = builder.getIndexTensorAttr(convertFromMlirShape(shape)); - result.addAttribute("name", nameAttr); + result.addAttribute("sym_name", nameAttr); result.addAttribute("var_shape", varShapeAttr); result.addAttribute("type", elementTypeAttr); result.addAttribute("initial_value", initialValue); @@ -4160,16 +4133,6 @@ LogicalResult tosa::SelectOp::verify() { return success(); } -LogicalResult tosa::VariableOp::verify() { - StringRef symName = getName(); - FailureOr<tosa::VariableOp> varOp = findVariableDecl(*this, symName); - if (succeeded(varOp)) - return emitOpError("illegal to have multiple declaration of '") - << symName << "'"; - - return success(); -} - LogicalResult tosa::VariableReadOp::verify() { if (verifyVariableOpErrorIf(*this, getOutput1().getType(), "'output1'") .failed()) diff --git a/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp b/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp index a500228..45cef9c1 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/Transform/IR/Utils.h" #include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h" #include "mlir/IR/DialectImplementation.h" +#include "mlir/IR/Verifier.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/TypeSwitch.h" @@ -140,6 +141,20 @@ LogicalResult transform::TransformDialect::verifyOperationAttribute( "operations with symbol tables"; } + // Pre-verify calls and callables because call graph construction below + // assumes they are valid, but this verifier runs before verifying the + // nested operations. + WalkResult walkResult = op->walk([](Operation *nested) { + if (!isa<CallableOpInterface, CallOpInterface>(nested)) + return WalkResult::advance(); + + if (failed(verify(nested, /*verifyRecursively=*/false))) + return WalkResult::interrupt(); + return WalkResult::advance(); + }); + if (walkResult.wasInterrupted()) + return failure(); + const mlir::CallGraph callgraph(op); for (auto scc = llvm::scc_begin(&callgraph); !scc.isAtEnd(); ++scc) { if (!scc.hasCycle()) diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index 3385b2a..365afab 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -2097,17 +2097,11 @@ void transform::IncludeOp::getEffects( getOperation(), getTarget()); if (!callee) return defaultEffects(); - DiagnosedSilenceableFailure earlyVerifierResult = - verifyNamedSequenceOp(callee, /*emitWarnings=*/false); - if (!earlyVerifierResult.succeeded()) { - (void)earlyVerifierResult.silence(); - return defaultEffects(); - } for (unsigned i = 0, e = getNumOperands(); i < e; ++i) { if (callee.getArgAttr(i, TransformDialect::kArgConsumedAttrName)) consumesHandle(getOperation()->getOpOperand(i), effects); - else + else if (callee.getArgAttr(i, TransformDialect::kArgReadOnlyAttrName)) onlyReadsHandle(getOperation()->getOpOperand(i), effects); } } @@ -2597,10 +2591,7 @@ transform::NumAssociationsOp::apply(transform::TransformRewriter &rewriter, .Case([&](TransformParamTypeInterface param) { return llvm::range_size(state.getParams(getHandle())); }) - .Default([](Type) { - llvm_unreachable("unknown kind of transform dialect type"); - return 0; - }); + .DefaultUnreachable("unknown kind of transform dialect type"); results.setParams(cast<OpResult>(getNum()), rewriter.getI64IntegerAttr(numAssociations)); return DiagnosedSilenceableFailure::success(); @@ -2657,10 +2648,7 @@ transform::SplitHandleOp::apply(transform::TransformRewriter &rewriter, .Case<TransformParamTypeInterface>([&](auto x) { return llvm::range_size(state.getParams(getHandle())); }) - .Default([](auto x) { - llvm_unreachable("unknown transform dialect type interface"); - return -1; - }); + .DefaultUnreachable("unknown transform dialect type interface"); auto produceNumOpsError = [&]() { return emitSilenceableError() diff --git a/mlir/lib/Interfaces/DataLayoutInterfaces.cpp b/mlir/lib/Interfaces/DataLayoutInterfaces.cpp index 3b6330b..7823849 100644 --- a/mlir/lib/Interfaces/DataLayoutInterfaces.cpp +++ b/mlir/lib/Interfaces/DataLayoutInterfaces.cpp @@ -364,10 +364,7 @@ static DataLayoutSpecInterface getSpec(Operation *operation) { return llvm::TypeSwitch<Operation *, DataLayoutSpecInterface>(operation) .Case<ModuleOp, DataLayoutOpInterface>( [&](auto op) { return op.getDataLayoutSpec(); }) - .Default([](Operation *) { - llvm_unreachable("expected an op with data layout spec"); - return DataLayoutSpecInterface(); - }); + .DefaultUnreachable("expected an op with data layout spec"); } static TargetSystemSpecInterface getTargetSystemSpec(Operation *operation) { diff --git a/mlir/lib/Rewrite/ByteCode.cpp b/mlir/lib/Rewrite/ByteCode.cpp index 5cbea5d..33fbd2a 100644 --- a/mlir/lib/Rewrite/ByteCode.cpp +++ b/mlir/lib/Rewrite/ByteCode.cpp @@ -764,9 +764,7 @@ void Generator::generate(Operation *op, ByteCodeWriter &writer) { pdl_interp::SwitchOperandCountOp, pdl_interp::SwitchOperationNameOp, pdl_interp::SwitchResultCountOp>( [&](auto interpOp) { this->generate(interpOp, writer); }) - .Default([](Operation *) { - llvm_unreachable("unknown `pdl_interp` operation"); - }); + .DefaultUnreachable("unknown `pdl_interp` operation"); } void Generator::generate(pdl_interp::ApplyConstraintOp op, @@ -913,9 +911,7 @@ void Generator::generate(pdl_interp::ExtractOp op, ByteCodeWriter &writer) { .Case([](pdl::OperationType) { return OpCode::ExtractOp; }) .Case([](pdl::ValueType) { return OpCode::ExtractValue; }) .Case([](pdl::TypeType) { return OpCode::ExtractType; }) - .Default([](Type) -> OpCode { - llvm_unreachable("unsupported element type"); - }); + .DefaultUnreachable("unsupported element type"); writer.append(opCode, op.getRange(), op.getIndex(), op.getResult()); } void Generator::generate(pdl_interp::FinalizeOp op, ByteCodeWriter &writer) { diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 9fcb02e..1e2099d 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -4716,10 +4716,7 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, info.HasNoWait = updateDataOp.getNowait(); return success(); }) - .Default([&](Operation *op) { - llvm_unreachable("unexpected operation"); - return failure(); - }); + .DefaultUnreachable("unexpected operation"); if (failed(result)) return failure(); @@ -5312,9 +5309,7 @@ extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, (void)found; assert(found && "unsupported host_eval use"); }) - .Default([](Operation *) { - llvm_unreachable("unsupported host_eval use"); - }); + .DefaultUnreachable("unsupported host_eval use"); } } } diff --git a/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp b/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp index 4d20474..807a94c 100644 --- a/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp +++ b/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp @@ -74,9 +74,7 @@ public: LLVM::LLVMPointerType, LLVM::LLVMStructType, VectorType, LLVM::LLVMTargetExtType, PtrLikeTypeInterface>( [this](auto type) { return this->translate(type); }) - .Default([](Type t) -> llvm::Type * { - llvm_unreachable("unknown LLVM dialect type"); - }); + .DefaultUnreachable("unknown LLVM dialect type"); // Cache the result of the conversion and return. knownTranslations.try_emplace(type, translated); diff --git a/mlir/lib/Tools/PDLL/AST/NodePrinter.cpp b/mlir/lib/Tools/PDLL/AST/NodePrinter.cpp index e2c987a..f49d3d0 100644 --- a/mlir/lib/Tools/PDLL/AST/NodePrinter.cpp +++ b/mlir/lib/Tools/PDLL/AST/NodePrinter.cpp @@ -154,7 +154,7 @@ void NodePrinter::print(Type type) { }) .Case([&](TypeType) { os << "Type"; }) .Case([&](ValueType) { os << "Value"; }) - .Default([](Type) { llvm_unreachable("unknown AST type"); }); + .DefaultUnreachable("unknown AST type"); } void NodePrinter::print(const Node *node) { @@ -182,7 +182,7 @@ void NodePrinter::print(const Node *node) { const VariableDecl, const Module>([&](auto derivedNode) { this->printImpl(derivedNode); }) - .Default([](const Node *) { llvm_unreachable("unknown AST node"); }); + .DefaultUnreachable("unknown AST node"); elementIndentStack.pop_back(); } diff --git a/mlir/lib/Tools/PDLL/AST/Nodes.cpp b/mlir/lib/Tools/PDLL/AST/Nodes.cpp index 159ce62..5aa0937 100644 --- a/mlir/lib/Tools/PDLL/AST/Nodes.cpp +++ b/mlir/lib/Tools/PDLL/AST/Nodes.cpp @@ -72,7 +72,7 @@ public: const Module>( [&](auto derivedNode) { this->visitImpl(derivedNode); }) - .Default([](const Node *) { llvm_unreachable("unknown AST node"); }); + .DefaultUnreachable("unknown AST node"); } private: diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index 9f5246d..cea5b25 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -440,11 +440,11 @@ declare_mlir_dialect_python_bindings( DIALECT_NAME smt) declare_mlir_dialect_python_bindings( - ADD_TO_PARENT MLIRPythonSources.Dialects - ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" - TD_FILE dialects/SPIRVOps.td - SOURCES dialects/spirv.py - DIALECT_NAME spirv) + ADD_TO_PARENT MLIRPythonSources.Dialects + ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" + TD_FILE dialects/SPIRVOps.td + SOURCES dialects/spirv.py + DIALECT_NAME spirv) declare_mlir_dialect_python_bindings( ADD_TO_PARENT MLIRPythonSources.Dialects @@ -501,7 +501,6 @@ declare_mlir_python_extension(MLIRPythonExtension.Core MODULE_NAME _mlir ADD_TO_PARENT MLIRPythonSources.Core ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES MainModule.cpp IRAffine.cpp @@ -540,7 +539,6 @@ declare_mlir_python_extension(MLIRPythonExtension.Core declare_mlir_python_extension(MLIRPythonExtension.RegisterEverything MODULE_NAME _mlirRegisterEverything ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES RegisterEverything.cpp PRIVATE_LINK_LIBS @@ -551,11 +549,10 @@ declare_mlir_python_extension(MLIRPythonExtension.RegisterEverything MLIRCAPIRegisterEverything ) -declare_mlir_python_extension(MLIRPythonExtension.Dialects.Linalg.Pybind +declare_mlir_python_extension(MLIRPythonExtension.Dialects.Linalg.Nanobind MODULE_NAME _mlirDialectsLinalg ADD_TO_PARENT MLIRPythonSources.Dialects.linalg ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES DialectLinalg.cpp PRIVATE_LINK_LIBS @@ -565,11 +562,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.Linalg.Pybind MLIRCAPILinalg ) -declare_mlir_python_extension(MLIRPythonExtension.Dialects.GPU.Pybind +declare_mlir_python_extension(MLIRPythonExtension.Dialects.GPU.Nanobind MODULE_NAME _mlirDialectsGPU ADD_TO_PARENT MLIRPythonSources.Dialects.gpu ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES DialectGPU.cpp PRIVATE_LINK_LIBS @@ -579,11 +575,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.GPU.Pybind MLIRCAPIGPU ) -declare_mlir_python_extension(MLIRPythonExtension.Dialects.LLVM.Pybind +declare_mlir_python_extension(MLIRPythonExtension.Dialects.LLVM.Nanobind MODULE_NAME _mlirDialectsLLVM ADD_TO_PARENT MLIRPythonSources.Dialects.llvm ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES DialectLLVM.cpp PRIVATE_LINK_LIBS @@ -593,11 +588,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.LLVM.Pybind MLIRCAPILLVM ) -declare_mlir_python_extension(MLIRPythonExtension.Dialects.Quant.Pybind +declare_mlir_python_extension(MLIRPythonExtension.Dialects.Quant.Nanobind MODULE_NAME _mlirDialectsQuant ADD_TO_PARENT MLIRPythonSources.Dialects.quant ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES DialectQuant.cpp PRIVATE_LINK_LIBS @@ -607,11 +601,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.Quant.Pybind MLIRCAPIQuant ) -declare_mlir_python_extension(MLIRPythonExtension.Dialects.NVGPU.Pybind +declare_mlir_python_extension(MLIRPythonExtension.Dialects.NVGPU.Nanobind MODULE_NAME _mlirDialectsNVGPU ADD_TO_PARENT MLIRPythonSources.Dialects.nvgpu ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES DialectNVGPU.cpp PRIVATE_LINK_LIBS @@ -621,11 +614,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.NVGPU.Pybind MLIRCAPINVGPU ) -declare_mlir_python_extension(MLIRPythonExtension.Dialects.PDL.Pybind +declare_mlir_python_extension(MLIRPythonExtension.Dialects.PDL.Nanobind MODULE_NAME _mlirDialectsPDL ADD_TO_PARENT MLIRPythonSources.Dialects.pdl ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES DialectPDL.cpp PRIVATE_LINK_LIBS @@ -635,11 +627,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.PDL.Pybind MLIRCAPIPDL ) -declare_mlir_python_extension(MLIRPythonExtension.Dialects.SparseTensor.Pybind +declare_mlir_python_extension(MLIRPythonExtension.Dialects.SparseTensor.Nanobind MODULE_NAME _mlirDialectsSparseTensor ADD_TO_PARENT MLIRPythonSources.Dialects.sparse_tensor ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES DialectSparseTensor.cpp PRIVATE_LINK_LIBS @@ -649,11 +640,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.SparseTensor.Pybind MLIRCAPISparseTensor ) -declare_mlir_python_extension(MLIRPythonExtension.Dialects.Transform.Pybind +declare_mlir_python_extension(MLIRPythonExtension.Dialects.Transform.Nanobind MODULE_NAME _mlirDialectsTransform ADD_TO_PARENT MLIRPythonSources.Dialects.transform ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES DialectTransform.cpp PRIVATE_LINK_LIBS @@ -663,11 +653,10 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.Transform.Pybind MLIRCAPITransformDialect ) -declare_mlir_python_extension(MLIRPythonExtension.Dialects.IRDL.Pybind +declare_mlir_python_extension(MLIRPythonExtension.Dialects.IRDL.Nanobind MODULE_NAME _mlirDialectsIRDL ADD_TO_PARENT MLIRPythonSources.Dialects.irdl ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES DialectIRDL.cpp PRIVATE_LINK_LIBS @@ -681,7 +670,6 @@ declare_mlir_python_extension(MLIRPythonExtension.AsyncDialectPasses MODULE_NAME _mlirAsyncPasses ADD_TO_PARENT MLIRPythonSources.Dialects.async ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES AsyncPasses.cpp PRIVATE_LINK_LIBS @@ -695,7 +683,6 @@ if(MLIR_ENABLE_EXECUTION_ENGINE) MODULE_NAME _mlirExecutionEngine ADD_TO_PARENT MLIRPythonSources.ExecutionEngine ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES ExecutionEngineModule.cpp PRIVATE_LINK_LIBS @@ -709,7 +696,6 @@ declare_mlir_python_extension(MLIRPythonExtension.GPUDialectPasses MODULE_NAME _mlirGPUPasses ADD_TO_PARENT MLIRPythonSources.Dialects.gpu ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES GPUPasses.cpp PRIVATE_LINK_LIBS @@ -722,7 +708,6 @@ declare_mlir_python_extension(MLIRPythonExtension.LinalgPasses MODULE_NAME _mlirLinalgPasses ADD_TO_PARENT MLIRPythonSources.Dialects.linalg ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES LinalgPasses.cpp PRIVATE_LINK_LIBS @@ -731,11 +716,10 @@ declare_mlir_python_extension(MLIRPythonExtension.LinalgPasses MLIRCAPILinalg ) -declare_mlir_python_extension(MLIRPythonExtension.Dialects.SMT.Pybind +declare_mlir_python_extension(MLIRPythonExtension.Dialects.SMT.Nanobind MODULE_NAME _mlirDialectsSMT ADD_TO_PARENT MLIRPythonSources.Dialects.smt ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES DialectSMT.cpp # Headers must be included explicitly so they are installed. @@ -752,7 +736,6 @@ declare_mlir_python_extension(MLIRPythonExtension.SparseTensorDialectPasses MODULE_NAME _mlirSparseTensorPasses ADD_TO_PARENT MLIRPythonSources.Dialects.sparse_tensor ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES SparseTensorPasses.cpp PRIVATE_LINK_LIBS @@ -765,7 +748,6 @@ declare_mlir_python_extension(MLIRPythonExtension.TransformInterpreter MODULE_NAME _mlirTransformInterpreter ADD_TO_PARENT MLIRPythonSources.Dialects.transform ROOT_DIR "${PYTHON_SOURCE_DIR}" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES TransformInterpreter.cpp PRIVATE_LINK_LIBS @@ -807,23 +789,10 @@ if(MLIR_INCLUDE_TESTS) ADD_TO_PARENT MLIRPythonTestSources.Dialects.PythonTest SOURCES "dialects/_python_test_ops_gen.py") - declare_mlir_python_extension(MLIRPythonTestSources.PythonTestExtensionPybind11 - MODULE_NAME _mlirPythonTestPybind11 - ADD_TO_PARENT MLIRPythonTestSources.Dialects - ROOT_DIR "${MLIR_SOURCE_DIR}/test/python/lib" - PYTHON_BINDINGS_LIBRARY pybind11 - SOURCES - PythonTestModulePybind11.cpp - PRIVATE_LINK_LIBS - LLVMSupport - EMBED_CAPI_LINK_LIBS - MLIRCAPIPythonTestDialect - ) declare_mlir_python_extension(MLIRPythonTestSources.PythonTestExtensionNanobind MODULE_NAME _mlirPythonTestNanobind ADD_TO_PARENT MLIRPythonTestSources.Dialects ROOT_DIR "${MLIR_SOURCE_DIR}/test/python/lib" - PYTHON_BINDINGS_LIBRARY nanobind SOURCES PythonTestModuleNanobind.cpp PRIVATE_LINK_LIBS diff --git a/mlir/python/mlir/dialects/python_test.py b/mlir/python/mlir/dialects/python_test.py index 9380896..56d3c0f 100644 --- a/mlir/python/mlir/dialects/python_test.py +++ b/mlir/python/mlir/dialects/python_test.py @@ -5,12 +5,7 @@ from ._python_test_ops_gen import * -def register_python_test_dialect(registry, use_nanobind): - if use_nanobind: - from .._mlir_libs import _mlirPythonTestNanobind +def register_python_test_dialect(registry): + from .._mlir_libs import _mlirPythonTestNanobind - _mlirPythonTestNanobind.register_dialect(registry) - else: - from .._mlir_libs import _mlirPythonTestPybind11 - - _mlirPythonTestPybind11.register_dialect(registry) + _mlirPythonTestNanobind.register_dialect(registry) diff --git a/mlir/python/requirements.txt b/mlir/python/requirements.txt index abe0925..5ff9500 100644 --- a/mlir/python/requirements.txt +++ b/mlir/python/requirements.txt @@ -1,6 +1,4 @@ -nanobind>=2.9, <3.0 numpy>=1.19.5, <=2.1.2 -pybind11>=2.10.0, <=2.13.6 PyYAML>=5.4.0, <=6.0.1 ml_dtypes>=0.1.0, <=0.6.0; python_version<"3.13" # provides several NumPy dtype extensions, including the bf16 ml_dtypes>=0.5.0, <=0.6.0; python_version>="3.13" diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir index 41c3243..e60f1c9b 100644 --- a/mlir/test/Dialect/Tosa/invalid.mlir +++ b/mlir/test/Dialect/Tosa/invalid.mlir @@ -573,64 +573,61 @@ func.func @test_avg_pool2d_zero_dim_input(%arg0: tensor<1x0x?x9xf32>, %arg1: ten // ----- -func.func @test_variable_unranked(%arg0: tensor<2x4x8xi8>) -> () { +module { tosa.variable @stored_var : tensor<*xi8> // expected-error@+1 {{custom op 'tosa.variable' expected ranked type}} - return } // ----- -func.func @test_variable_unranked_initial_value(%arg0: tensor<2x4x8xi8>) -> () { +module { // expected-error@+1 {{elements literal type must have static shape}} tosa.variable @stored_var = dense<0> : tensor<*xi8> // expected-error@+1 {{custom op 'tosa.variable' expected attribute}} - return -} - -// ----- - -func.func @test_variable_duplicates(%arg0: tensor<2x4x8xi8>) -> () { - tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi8> - // expected-error@+1 {{'tosa.variable' op illegal to have multiple declaration of 'stored_var'}} - tosa.variable @stored_var = dense<3> : tensor<1x4x8xi8> - return } // ----- -func.func @test_variable_read_type(%arg0: tensor<2x4x8xi8>) -> () { +module { tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi8> - // expected-error@+1 {{'tosa.variable_read' op require same element type for 'output1' ('i16') and the input tensor ('i8')}} - %0 = tosa.variable_read @stored_var : tensor<2x4x8xi16> - return + func.func @test_variable_read_type(%arg0: tensor<2x4x8xi8>) -> () { + // expected-error@+1 {{'tosa.variable_read' op require same element type for 'output1' ('i16') and the input tensor ('i8')}} + %0 = tosa.variable_read @stored_var : tensor<2x4x8xi16> + return + } } // ----- -func.func @test_variable_read_shape(%arg0: tensor<2x4x8xi8>) -> () { +module { tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi8> - // expected-error@+1 {{'tosa.variable_read' op require same element type for 'output1' ('i32') and the input tensor ('i8'}} - %0 = tosa.variable_read @stored_var : tensor<1x4x8xi32> - return + func.func @test_variable_read_shape(%arg0: tensor<2x4x8xi8>) -> () { + // expected-error@+1 {{'tosa.variable_read' op require same element type for 'output1' ('i32') and the input tensor ('i8'}} + %0 = tosa.variable_read @stored_var : tensor<1x4x8xi32> + return + } } // ----- -func.func @test_variable_write_type(%arg0: tensor<2x4x8xi16>) -> () { +module { tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi8> - // expected-error@+1 {{'tosa.variable_write' op require same element type for 'input1' ('i16') and the input tensor ('i8')}} - tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xi16> - return + func.func @test_variable_write_type(%arg0: tensor<2x4x8xi16>) -> () { + // expected-error@+1 {{'tosa.variable_write' op require same element type for 'input1' ('i16') and the input tensor ('i8')}} + tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xi16> + return + } } // ----- -func.func @test_variable_write_shape(%arg0: tensor<1x4x8xi8>) -> () { +module { tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi8> - // expected-error@+1 {{'tosa.variable_write' op require same shapes for 'input1' ('tensor<1x4x8xi8>') and the input tensor ('tensor<2x4x8xi8>')}} - tosa.variable_write @stored_var, %arg0 : tensor<1x4x8xi8> - return + func.func @test_variable_write_shape(%arg0: tensor<1x4x8xi8>) -> () { + // expected-error@+1 {{'tosa.variable_write' op require same shapes for 'input1' ('tensor<1x4x8xi8>') and the input tensor ('tensor<2x4x8xi8>')}} + tosa.variable_write @stored_var, %arg0 : tensor<1x4x8xi8> + return + } } // ----- diff --git a/mlir/test/Dialect/Tosa/invalid_extension.mlir b/mlir/test/Dialect/Tosa/invalid_extension.mlir index 3138ce2..1daabe9 100644 --- a/mlir/test/Dialect/Tosa/invalid_extension.mlir +++ b/mlir/test/Dialect/Tosa/invalid_extension.mlir @@ -310,21 +310,27 @@ func.func @test_identity(%arg0: tensor<13x21x3xi4>) -> tensor<13x21x3xi4> { } // ----- -func.func @test_variable_read_type(%arg0: tensor<2x4x8xi8>) -> () { +module { // expected-error@+1 {{'tosa.variable' op illegal: requires [variable] but not enabled in target}} tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi8> - // expected-error@+1 {{'tosa.variable_read' op illegal: requires [variable]}} - %0 = tosa.variable_read @stored_var : tensor<2x4x8xi8> - return + + func.func @test_variable_read_type(%arg0: tensor<2x4x8xi8>) -> () { + // expected-error@+1 {{'tosa.variable_read' op illegal: requires [variable]}} + %0 = tosa.variable_read @stored_var : tensor<2x4x8xi8> + return + } } // ----- -func.func @test_variable_write_type(%arg0: tensor<2x4x8xi8>) -> () { +module { // expected-error@+1 {{'tosa.variable' op illegal: requires [variable] but not enabled in target}} tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi8> - // expected-error@+1 {{'tosa.variable_write' op illegal: requires [variable]}} - tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xi8> - return + + func.func @test_variable_write_type(%arg0: tensor<2x4x8xi8>) -> () { + // expected-error@+1 {{'tosa.variable_write' op illegal: requires [variable]}} + tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xi8> + return + } } // ----- diff --git a/mlir/test/Dialect/Tosa/level_check.mlir b/mlir/test/Dialect/Tosa/level_check.mlir index 3742adf..5bf2dbb8 100644 --- a/mlir/test/Dialect/Tosa/level_check.mlir +++ b/mlir/test/Dialect/Tosa/level_check.mlir @@ -1097,14 +1097,17 @@ func.func @test_scatter_tensor_size_invalid(%arg0: tensor<13x260000000x3xf32>, % // ----- -func.func @test_variable_read_write_tensor_size_invalid() -> () { +module { // expected-error@+1 {{'tosa.variable' op failed level check: variable type tensor size (in bytes) <= (1 << MAX_LOG2_SIZE - 1)}} tosa.variable @stored_var : tensor<536870912xf32> - // expected-error@+1 {{'tosa.variable_read' op failed level check: result tensor size (in bytes) <= (1 << MAX_LOG2_SIZE - 1)}} - %0 = tosa.variable_read @stored_var : tensor<536870912xf32> - // expected-error@+1 {{'tosa.variable_write' op failed level check: operand tensor size (in bytes) <= (1 << MAX_LOG2_SIZE - 1)}} - tosa.variable_write @stored_var, %0 : tensor<536870912xf32> - return + + func.func @test_variable_read_write_tensor_size_invalid() -> () { + // expected-error@+1 {{'tosa.variable_read' op failed level check: result tensor size (in bytes) <= (1 << MAX_LOG2_SIZE - 1)}} + %0 = tosa.variable_read @stored_var : tensor<536870912xf32> + // expected-error@+1 {{'tosa.variable_write' op failed level check: operand tensor size (in bytes) <= (1 << MAX_LOG2_SIZE - 1)}} + tosa.variable_write @stored_var, %0 : tensor<536870912xf32> + return + } } // ----- @@ -1165,14 +1168,17 @@ func.func @test_cond_if_rank_invalid(%arg0: tensor<1x1x1x1x1x1x1x1xf32>, %arg1: // ----- -func.func @test_variable_read_write_rank_invalid() -> () { +module { // expected-error@+1 {{'tosa.variable' op failed level check: variable type rank(shape) <= MAX_RANK}} tosa.variable @stored_var : tensor<1x1x1x1x1x1x1x1xf32> - // expected-error@+1 {{'tosa.variable_read' op failed level check: result rank(shape) <= MAX_RANK}} - %0 = tosa.variable_read @stored_var : tensor<1x1x1x1x1x1x1x1xf32> - // expected-error@+1 {{'tosa.variable_write' op failed level check: operand rank(shape) <= MAX_RANK}} - tosa.variable_write @stored_var, %0 : tensor<1x1x1x1x1x1x1x1xf32> - return + + func.func @test_variable_read_write_rank_invalid() -> () { + // expected-error@+1 {{'tosa.variable_read' op failed level check: result rank(shape) <= MAX_RANK}} + %0 = tosa.variable_read @stored_var : tensor<1x1x1x1x1x1x1x1xf32> + // expected-error@+1 {{'tosa.variable_write' op failed level check: operand rank(shape) <= MAX_RANK}} + tosa.variable_write @stored_var, %0 : tensor<1x1x1x1x1x1x1x1xf32> + return + } } // ----- diff --git a/mlir/test/Dialect/Tosa/variables.mlir b/mlir/test/Dialect/Tosa/variables.mlir index 9953eb3..0c104e8 100644 --- a/mlir/test/Dialect/Tosa/variables.mlir +++ b/mlir/test/Dialect/Tosa/variables.mlir @@ -3,76 +3,98 @@ // ----- -// CHECK-LABEL: @test_variable_scalar( -// CHECK-SAME: %[[ADD_VAL:.*]]: tensor<f32>) { -func.func @test_variable_scalar(%arg0: tensor<f32>) -> () { - // CHECK: tosa.variable @stored_var = dense<3.140000e+00> : tensor<f32> + +module { + // CHECK: tosa.variable @stored_var = dense<3.140000e+00> : tensor<f32> tosa.variable @stored_var = dense<3.14> : tensor<f32> - // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<f32> - %0 = tosa.variable_read @stored_var : tensor<f32> - // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<f32>, tensor<f32>) -> tensor<f32> - %1 = "tosa.add"(%arg0, %0) : (tensor<f32>, tensor<f32>) -> tensor<f32> - // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<f32> - tosa.variable_write @stored_var, %1 : tensor<f32> - return + + // CHECK-LABEL: @test_variable_scalar( + // CHECK-SAME: %[[ADD_VAL:.*]]: tensor<f32>) { + func.func @test_variable_scalar(%arg0: tensor<f32>) -> () { + // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<f32> + %0 = tosa.variable_read @stored_var : tensor<f32> + // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<f32>, tensor<f32>) -> tensor<f32> + %1 = "tosa.add"(%arg0, %0) : (tensor<f32>, tensor<f32>) -> tensor<f32> + // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<f32> + tosa.variable_write @stored_var, %1 : tensor<f32> + return + } } + // ----- -// CHECK-LABEL: @test_variable_tensor( -// CHECK-SAME: %[[ADD_VAL:.*]]: tensor<2x4x8xi32>) { -func.func @test_variable_tensor(%arg0: tensor<2x4x8xi32>) -> () { - // CHECK: tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32> + +module { + // CHECK: tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32> tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32> - // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<2x4x8xi32> - %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32> - // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32> - %1 = "tosa.add"(%arg0, %0) : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32> - // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<2x4x8xi32> - tosa.variable_write @stored_var, %1 : tensor<2x4x8xi32> - return + + // CHECK-LABEL: @test_variable_tensor( + // CHECK-SAME: %[[ADD_VAL:.*]]: tensor<2x4x8xi32>) { + func.func @test_variable_tensor(%arg0: tensor<2x4x8xi32>) -> () { + // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<2x4x8xi32> + %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32> + // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32> + %1 = "tosa.add"(%arg0, %0) : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32> + // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<2x4x8xi32> + tosa.variable_write @stored_var, %1 : tensor<2x4x8xi32> + return + } } // ----- -// CHECK-LABEL: @test_variable_scalar_no_initial_value( -// CHECK-SAME: %[[ADD_VAL:.*]]: tensor<f32>) { -func.func @test_variable_scalar_no_initial_value(%arg0: tensor<f32>) -> () { - // CHECK: tosa.variable @stored_var : tensor<f32> + +module { + // CHECK: tosa.variable @stored_var : tensor<f32> tosa.variable @stored_var : tensor<f32> - // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<f32> - %0 = tosa.variable_read @stored_var : tensor<f32> - // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<f32>, tensor<f32>) -> tensor<f32> - %1 = "tosa.add"(%arg0, %0) : (tensor<f32>, tensor<f32>) -> tensor<f32> - // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<f32> - tosa.variable_write @stored_var, %1 : tensor<f32> - return + + // CHECK-LABEL: @test_variable_scalar_no_initial_value( + // CHECK-SAME: %[[ADD_VAL:.*]]: tensor<f32>) { + func.func @test_variable_scalar_no_initial_value(%arg0: tensor<f32>) -> () { + // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<f32> + %0 = tosa.variable_read @stored_var : tensor<f32> + // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<f32>, tensor<f32>) -> tensor<f32> + %1 = "tosa.add"(%arg0, %0) : (tensor<f32>, tensor<f32>) -> tensor<f32> + // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<f32> + tosa.variable_write @stored_var, %1 : tensor<f32> + return + } } // ----- -// CHECK-LABEL: @test_variable_tensor_no_initial_value( -// CHECK-SAME: %[[ADD_VAL:.*]]: tensor<2x4x8xi32>) { -func.func @test_variable_tensor_no_initial_value(%arg0: tensor<2x4x8xi32>) -> () { - // CHECK: tosa.variable @stored_var : tensor<2x4x8xi32> + +module { + // CHECK: tosa.variable @stored_var : tensor<2x4x8xi32> tosa.variable @stored_var : tensor<2x4x8xi32> - // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<2x4x8xi32> - %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32> - // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32> - %1 = "tosa.add"(%arg0, %0) : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32> - // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<2x4x8xi32> - tosa.variable_write @stored_var, %1 : tensor<2x4x8xi32> - return + + // CHECK-LABEL: @test_variable_tensor_no_initial_value( + // CHECK-SAME: %[[ADD_VAL:.*]]: tensor<2x4x8xi32>) { + func.func @test_variable_tensor_no_initial_value(%arg0: tensor<2x4x8xi32>) -> () { + // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<2x4x8xi32> + %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32> + // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32> + %1 = "tosa.add"(%arg0, %0) : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32> + // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<2x4x8xi32> + tosa.variable_write @stored_var, %1 : tensor<2x4x8xi32> + return + } } + // ----- -// CHECK-LABEL: @test_variable_tensor_with_unknowns( -// CHECK-SAME: %[[ADD_VAL:.*]]: tensor<2x4x8xi32>) { -func.func @test_variable_tensor_with_unknowns(%arg0: tensor<2x4x8xi32>) -> () { - // CHECK: tosa.variable @stored_var : tensor<2x?x8xi32> + +module { + // CHECK: tosa.variable @stored_var : tensor<2x?x8xi32> tosa.variable @stored_var : tensor<2x?x8xi32> - // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<2x4x8xi32> - %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32> - // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32> - %1 = "tosa.add"(%arg0, %0) : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32> - // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<2x4x8xi32> - tosa.variable_write @stored_var, %1 : tensor<2x4x8xi32> - return + + // CHECK-LABEL: @test_variable_tensor_with_unknowns( + // CHECK-SAME: %[[ADD_VAL:.*]]: tensor<2x4x8xi32>) { + func.func @test_variable_tensor_with_unknowns(%arg0: tensor<2x4x8xi32>) -> () { + // CHECK: %[[STORED_VAL:.*]] = tosa.variable_read @stored_var : tensor<2x4x8xi32> + %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32> + // CHECK: %[[RESULT_ADD:.*]] = tosa.add %[[ADD_VAL]], %[[STORED_VAL]] : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32> + %1 = "tosa.add"(%arg0, %0) : (tensor<2x4x8xi32>, tensor<2x4x8xi32>) -> tensor<2x4x8xi32> + // CHECK: tosa.variable_write @stored_var, %[[RESULT_ADD]] : tensor<2x4x8xi32> + tosa.variable_write @stored_var, %1 : tensor<2x4x8xi32> + return + } } diff --git a/mlir/test/Dialect/Tosa/verifier.mlir b/mlir/test/Dialect/Tosa/verifier.mlir index 0128da7..430b06a 100644 --- a/mlir/test/Dialect/Tosa/verifier.mlir +++ b/mlir/test/Dialect/Tosa/verifier.mlir @@ -944,29 +944,27 @@ func.func @test_while_loop_cond_output_not_bool(%arg0: tensor<10xi32>, %arg1: te // ----- -func.func @test_variable_multiple_declaration() -> () { +module { + // expected-note@below {{see existing symbol definition here}} tosa.variable @stored_var = dense<-1> : tensor<2x4x8xi32> - // expected-error@+1 {{'tosa.variable' op illegal to have multiple declaration of 'stored_var'}} + // expected-error@+1 {{redefinition of symbol named 'stored_var'}} tosa.variable @stored_var = dense<-3> : tensor<2x4x8xi32> - return } // ----- -func.func @test_variable_shape_mismatch() -> () { +module { // expected-error@+1 {{inferred shape of elements literal ([2]) does not match type ([3])}} tosa.variable @stored_var = dense<[3.14, 2.14]> : tensor<3xf32> // expected-error@+1 {{custom op 'tosa.variable' expected attribute}} - return } // ----- -func.func @test_variable_type_mismatch() -> () { +module { // expected-error@+1 {{expected integer elements, but parsed floating-point}} tosa.variable @stored_var = dense<-1.2> : tensor<2x4x8xi32> // expected-error@+1 {{custom op 'tosa.variable' expected attribute}} - return } // ----- @@ -979,20 +977,26 @@ func.func @test_variable_read_no_declaration() -> () { // ----- -func.func @test_variable_read_type_mismatch() -> () { +module { tosa.variable @stored_var = dense<-1.2> : tensor<2x4x8xf32> - // expected-error@+1 {{'tosa.variable_read' op require same element type for 'output1' ('i32') and the input tensor ('f32')}} - %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32> - return + + func.func @test_variable_read_type_mismatch() -> () { + // expected-error@+1 {{'tosa.variable_read' op require same element type for 'output1' ('i32') and the input tensor ('f32')}} + %0 = tosa.variable_read @stored_var : tensor<2x4x8xi32> + return + } } // ----- -func.func @test_variable_read_shape_mismatch() -> () { +module { tosa.variable @stored_var = dense<-1.2> : tensor<8x4x2xf32> - // expected-error@+1 {{'tosa.variable_read' op require same shapes for 'output1' ('tensor<2x4x8xf32>') and the input tensor ('tensor<8x4x2xf32>')}} - %0 = tosa.variable_read @stored_var : tensor<2x4x8xf32> - return + + func.func @test_variable_read_shape_mismatch() -> () { + // expected-error@+1 {{'tosa.variable_read' op require same shapes for 'output1' ('tensor<2x4x8xf32>') and the input tensor ('tensor<8x4x2xf32>')}} + %0 = tosa.variable_read @stored_var : tensor<2x4x8xf32> + return + } } // ----- @@ -1005,20 +1009,26 @@ func.func @test_variable_write_no_declaration(%arg0: tensor<f32>) -> () { // ----- -func.func @test_variable_write_type_mismatch(%arg0: tensor<2x4x8xi32>) -> () { +module { tosa.variable @stored_var = dense<-1.2> : tensor<2x4x8xf32> - // expected-error@+1 {{'tosa.variable_write' op require same element type for 'input1' ('i32') and the input tensor ('f32')}} - tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xi32> - return + + func.func @test_variable_write_type_mismatch(%arg0: tensor<2x4x8xi32>) -> () { + // expected-error@+1 {{'tosa.variable_write' op require same element type for 'input1' ('i32') and the input tensor ('f32')}} + tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xi32> + return + } } // ----- -func.func @test_variable_write_shape_mismatch(%arg0: tensor<2x4x8xf32>) -> () { +module { tosa.variable @stored_var = dense<-1.2> : tensor<8x4x2xf32> - // expected-error@+1 {{'tosa.variable_write' op require same shapes for 'input1' ('tensor<2x4x8xf32>') and the input tensor ('tensor<8x4x2xf32>')}} - tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xf32> - return + + func.func @test_variable_write_shape_mismatch(%arg0: tensor<2x4x8xf32>) -> () { + // expected-error@+1 {{'tosa.variable_write' op require same shapes for 'input1' ('tensor<2x4x8xf32>') and the input tensor ('tensor<8x4x2xf32>')}} + tosa.variable_write @stored_var, %arg0 : tensor<2x4x8xf32> + return + } } // ----- diff --git a/mlir/test/Dialect/Transform/ops-invalid.mlir b/mlir/test/Dialect/Transform/ops-invalid.mlir index 71a260f..68305de 100644 --- a/mlir/test/Dialect/Transform/ops-invalid.mlir +++ b/mlir/test/Dialect/Transform/ops-invalid.mlir @@ -369,6 +369,7 @@ module attributes { transform.with_named_sequence } { // expected-error @below {{recursion not allowed in named sequences}} transform.named_sequence @self_recursion() -> () { transform.include @self_recursion failures(suppress) () : () -> () + transform.yield } } @@ -376,13 +377,13 @@ module attributes { transform.with_named_sequence } { module @mutual_recursion attributes { transform.with_named_sequence } { // expected-note @below {{operation on recursion stack}} - transform.named_sequence @foo(%arg0: !transform.any_op) -> () { + transform.named_sequence @foo(%arg0: !transform.any_op {transform.readonly}) -> () { transform.include @bar failures(suppress) (%arg0) : (!transform.any_op) -> () transform.yield } // expected-error @below {{recursion not allowed in named sequences}} - transform.named_sequence @bar(%arg0: !transform.any_op) -> () { + transform.named_sequence @bar(%arg0: !transform.any_op {transform.readonly}) -> () { transform.include @foo failures(propagate) (%arg0) : (!transform.any_op) -> () transform.yield } @@ -430,7 +431,7 @@ module attributes { transform.with_named_sequence } { // ----- module attributes { transform.with_named_sequence } { - transform.named_sequence @foo(%arg0: !transform.any_op) -> () { + transform.named_sequence @foo(%arg0: !transform.any_op {transform.readonly}) -> () { transform.yield } @@ -444,7 +445,7 @@ module attributes { transform.with_named_sequence } { // ----- module attributes { transform.with_named_sequence } { - transform.named_sequence @foo(%arg0: !transform.any_op) -> (!transform.any_op) { + transform.named_sequence @foo(%arg0: !transform.any_op {transform.readonly}) -> (!transform.any_op) { transform.yield %arg0 : !transform.any_op } @@ -458,7 +459,7 @@ module attributes { transform.with_named_sequence } { // ----- module attributes { transform.with_named_sequence } { - transform.named_sequence @foo(%arg0: !transform.any_op) -> (!transform.any_op) { + transform.named_sequence @foo(%arg0: !transform.any_op {transform.readonly}) -> (!transform.any_op) { transform.yield %arg0 : !transform.any_op } @@ -543,7 +544,6 @@ module attributes { transform.with_named_sequence } { // ----- module attributes { transform.with_named_sequence } { - // expected-error @below {{must provide consumed/readonly status for arguments of external or called ops}} transform.named_sequence @foo(%op: !transform.any_op) { transform.debug.emit_remark_at %op, "message" : !transform.any_op transform.yield @@ -551,6 +551,8 @@ module attributes { transform.with_named_sequence } { transform.sequence failures(propagate) { ^bb0(%arg0: !transform.any_op): + // expected-error @below {{TransformOpInterface requires memory effects on operands to be specified}} + // expected-note @below {{no effects specified for operand #0}} transform.include @foo failures(propagate) (%arg0) : (!transform.any_op) -> () transform.yield } @@ -908,3 +910,54 @@ module attributes { transform.with_named_sequence } { transform.yield } } + +// ----- + +module attributes { transform.with_named_sequence } { + transform.named_sequence @__transform_main(%arg0: !transform.any_op) -> () { + // Intentionally malformed func with no region. This shouldn't crash the + // verifier of `with_named_sequence` that runs before we get to the + // function. + // expected-error @below {{requires one region}} + "func.func"() : () -> () + transform.yield + } +} + +// ----- + +module attributes { transform.with_named_sequence } { + transform.named_sequence @__transform_main(%arg0: !transform.any_op) -> () { + // Intentionally malformed call with a region. This shouldn't crash the + // verifier of `with_named_sequence` that runs before we get to the call. + // expected-error @below {{requires zero regions}} + "func.call"() <{ + function_type = () -> (), + sym_name = "lambda_function" + }> ({ + ^bb0: + "func.return"() : () -> () + }) : () -> () + transform.yield + } +} + +// ----- + +module attributes { transform.with_named_sequence } { + // Intentionally malformed sequence where the verifier should not crash. + // expected-error @below {{ op expects argument attribute array to have the same number of elements as the number of function arguments, got 1, but expected 3}} + "transform.named_sequence"() <{ + arg_attrs = [{transform.readonly}], + function_type = (i1, tensor<f32>, tensor<f32>) -> (), + sym_name = "print_message" + }> ({}) : () -> () + "transform.named_sequence"() <{ + function_type = (!transform.any_op) -> (), + sym_name = "reference_other_module" + }> ({ + ^bb0(%arg0: !transform.any_op): + "transform.include"(%arg0) <{target = @print_message}> : (!transform.any_op) -> () + "transform.yield"() : () -> () + }) : () -> () +} diff --git a/mlir/test/python/dialects/python_test.py b/mlir/test/python/dialects/python_test.py index 1194e32..5a9acc7 100644 --- a/mlir/test/python/dialects/python_test.py +++ b/mlir/test/python/dialects/python_test.py @@ -1,5 +1,4 @@ -# RUN: %PYTHON %s pybind11 | FileCheck %s -# RUN: %PYTHON %s nanobind | FileCheck %s +# RUN: %PYTHON %s | FileCheck %s import sys import typing from typing import Union, Optional @@ -10,26 +9,14 @@ import mlir.dialects.python_test as test import mlir.dialects.tensor as tensor import mlir.dialects.arith as arith -if sys.argv[1] == "pybind11": - from mlir._mlir_libs._mlirPythonTestPybind11 import ( - TestAttr, - TestType, - TestTensorValue, - TestIntegerRankedTensorType, - ) - - test.register_python_test_dialect(get_dialect_registry(), use_nanobind=False) -elif sys.argv[1] == "nanobind": - from mlir._mlir_libs._mlirPythonTestNanobind import ( - TestAttr, - TestType, - TestTensorValue, - TestIntegerRankedTensorType, - ) - - test.register_python_test_dialect(get_dialect_registry(), use_nanobind=True) -else: - raise ValueError("Expected pybind11 or nanobind as argument") +from mlir._mlir_libs._mlirPythonTestNanobind import ( + TestAttr, + TestType, + TestTensorValue, + TestIntegerRankedTensorType, +) + +test.register_python_test_dialect(get_dialect_registry()) def run(f): diff --git a/mlir/test/python/lib/CMakeLists.txt b/mlir/test/python/lib/CMakeLists.txt index 9a813da..f51a7b4 100644 --- a/mlir/test/python/lib/CMakeLists.txt +++ b/mlir/test/python/lib/CMakeLists.txt @@ -1,7 +1,6 @@ set(LLVM_OPTIONAL_SOURCES PythonTestCAPI.cpp PythonTestDialect.cpp - PythonTestModulePybind11.cpp PythonTestModuleNanobind.cpp ) diff --git a/mlir/test/python/lib/PythonTestModulePybind11.cpp b/mlir/test/python/lib/PythonTestModulePybind11.cpp deleted file mode 100644 index 94a5f51..0000000 --- a/mlir/test/python/lib/PythonTestModulePybind11.cpp +++ /dev/null @@ -1,118 +0,0 @@ -//===- PythonTestModule.cpp - Python extension for the PythonTest dialect -===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// This is the pybind11 edition of the PythonTest dialect module. -//===----------------------------------------------------------------------===// - -#include "PythonTestCAPI.h" -#include "mlir-c/BuiltinAttributes.h" -#include "mlir-c/BuiltinTypes.h" -#include "mlir-c/IR.h" -#include "mlir/Bindings/Python/PybindAdaptors.h" - -namespace py = pybind11; -using namespace mlir::python::adaptors; -using namespace pybind11::literals; - -static bool mlirTypeIsARankedIntegerTensor(MlirType t) { - return mlirTypeIsARankedTensor(t) && - mlirTypeIsAInteger(mlirShapedTypeGetElementType(t)); -} - -PYBIND11_MODULE(_mlirPythonTestPybind11, m) { - m.def( - "register_python_test_dialect", - [](MlirContext context, bool load) { - MlirDialectHandle pythonTestDialect = - mlirGetDialectHandle__python_test__(); - mlirDialectHandleRegisterDialect(pythonTestDialect, context); - if (load) { - mlirDialectHandleLoadDialect(pythonTestDialect, context); - } - }, - py::arg("context"), py::arg("load") = true); - - m.def( - "register_dialect", - [](MlirDialectRegistry registry) { - MlirDialectHandle pythonTestDialect = - mlirGetDialectHandle__python_test__(); - mlirDialectHandleInsertDialect(pythonTestDialect, registry); - }, - py::arg("registry")); - - mlir_attribute_subclass(m, "TestAttr", - mlirAttributeIsAPythonTestTestAttribute, - mlirPythonTestTestAttributeGetTypeID) - .def_classmethod( - "get", - [](const py::object &cls, MlirContext ctx) { - return cls(mlirPythonTestTestAttributeGet(ctx)); - }, - py::arg("cls"), py::arg("context") = py::none()); - - mlir_type_subclass(m, "TestType", mlirTypeIsAPythonTestTestType, - mlirPythonTestTestTypeGetTypeID) - .def_classmethod( - "get", - [](const py::object &cls, MlirContext ctx) { - return cls(mlirPythonTestTestTypeGet(ctx)); - }, - py::arg("cls"), py::arg("context") = py::none()); - - auto typeCls = - mlir_type_subclass(m, "TestIntegerRankedTensorType", - mlirTypeIsARankedIntegerTensor, - py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr("RankedTensorType")) - .def_classmethod( - "get", - [](const py::object &cls, std::vector<int64_t> shape, - unsigned width, MlirContext ctx) { - MlirAttribute encoding = mlirAttributeGetNull(); - return cls(mlirRankedTensorTypeGet( - shape.size(), shape.data(), mlirIntegerTypeGet(ctx, width), - encoding)); - }, - "cls"_a, "shape"_a, "width"_a, "context"_a = py::none()); - - assert(py::hasattr(typeCls.get_class(), "static_typeid") && - "TestIntegerRankedTensorType has no static_typeid"); - - MlirTypeID mlirRankedTensorTypeID = mlirRankedTensorTypeGetTypeID(); - - py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr(MLIR_PYTHON_CAPI_TYPE_CASTER_REGISTER_ATTR)(mlirRankedTensorTypeID, - "replace"_a = true)( - pybind11::cpp_function([typeCls](const py::object &mlirType) { - return typeCls.get_class()(mlirType); - })); - - auto valueCls = mlir_value_subclass(m, "TestTensorValue", - mlirTypeIsAPythonTestTestTensorValue) - .def("is_null", [](MlirValue &self) { - return mlirValueIsNull(self); - }); - - py::module::import(MAKE_MLIR_PYTHON_QUALNAME("ir")) - .attr(MLIR_PYTHON_CAPI_VALUE_CASTER_REGISTER_ATTR)( - mlirRankedTensorTypeID)( - pybind11::cpp_function([valueCls](const py::object &valueObj) { - py::object capsule = mlirApiObjectToCapsule(valueObj); - MlirValue v = mlirPythonCapsuleToValue(capsule.ptr()); - MlirType t = mlirValueGetType(v); - // This is hyper-specific in order to exercise/test registering a - // value caster from cpp (but only for a single test case; see - // testTensorValue python_test.py). - if (mlirShapedTypeHasStaticShape(t) && - mlirShapedTypeGetDimSize(t, 0) == 1 && - mlirShapedTypeGetDimSize(t, 1) == 2 && - mlirShapedTypeGetDimSize(t, 2) == 3) - return valueCls.get_class()(valueObj); - return valueObj; - })); -} diff --git a/mlir/tools/mlir-linalg-ods-gen/update_core_linalg_named_ops.sh.in b/mlir/tools/mlir-linalg-ods-gen/update_core_linalg_named_ops.sh.in index da4db39..0bb6a20 100755 --- a/mlir/tools/mlir-linalg-ods-gen/update_core_linalg_named_ops.sh.in +++ b/mlir/tools/mlir-linalg-ods-gen/update_core_linalg_named_ops.sh.in @@ -26,7 +26,7 @@ export PYTHONPATH="$python_package_dir" OUTPUT="$( echo "### AUTOGENERATED from core_named_ops.py" && \ echo "### To regenerate, run: bin/update_core_linalg_named_ops.sh" && \ - "$python_exe" -m mlir.dialects.linalg.opdsl.dump_oplib .ops.core_named_ops \ + "$python_exe" -m mlir.dialects.linalg.opdsl.dump_oplib.ops.core_named_ops \ )" echo "$OUTPUT" > "$dest_file" echo "Success." diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp index 06ef396..a580b1b 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp @@ -864,11 +864,8 @@ bool DefGenerator::emitDecls(StringRef selectedDialect) { // Declare all the def classes first (in case they reference each other). for (const AttrOrTypeDef &def : defs) { - std::string comments = tblgen::emitSummaryAndDescComments( - def.getSummary(), def.getDescription()); - if (!comments.empty()) { - os << comments << "\n"; - } + tblgen::emitSummaryAndDescComments(os, def.getSummary(), + def.getDescription()); os << "class " << def.getCppClassName() << ";\n"; } @@ -1166,7 +1163,7 @@ getAllCppAttrConstraints(const RecordKeeper &records) { /// Emit the declarations for the given constraints, of the form: /// `bool <constraintCppFunctionName>(<parameterTypeName> <parameterName>);` -static void emitConstraintDecls(const std::vector<Constraint> &constraints, +static void emitConstraintDecls(ArrayRef<Constraint> constraints, raw_ostream &os, StringRef parameterTypeName, StringRef parameterName) { static const char *const constraintDecl = "bool {0}({1} {2});\n"; @@ -1192,7 +1189,7 @@ static void emitAttrConstraintDecls(const RecordKeeper &records, /// return (<condition>); }` /// where `<condition>` is the condition template with the `self` variable /// replaced with the `selfName` parameter. -static void emitConstraintDefs(const std::vector<Constraint> &constraints, +static void emitConstraintDefs(ArrayRef<Constraint> constraints, raw_ostream &os, StringRef parameterTypeName, StringRef selfName) { static const char *const constraintDef = R"( diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp index 8dd9713..34547e9 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp @@ -89,10 +89,7 @@ static ParameterElement *getEncapsulatedParameterElement(FormatElement *el) { .Case<ParameterElement>([&](auto param) { return param; }) .Case<RefDirective>( [&](auto ref) { return cast<ParameterElement>(ref->getArg()); }) - .Default([&](auto el) { - assert(false && "unexpected struct element type"); - return nullptr; - }); + .DefaultUnreachable("unexpected struct element type"); } /// Shorthand functions that can be used with ranged-based conditions. diff --git a/mlir/tools/mlir-tblgen/CppGenUtilities.cpp b/mlir/tools/mlir-tblgen/CppGenUtilities.cpp index ebca20c..fddd779 100644 --- a/mlir/tools/mlir-tblgen/CppGenUtilities.cpp +++ b/mlir/tools/mlir-tblgen/CppGenUtilities.cpp @@ -14,26 +14,31 @@ #include "CppGenUtilities.h" #include "mlir/Support/IndentedOstream.h" -std::string -mlir::tblgen::emitSummaryAndDescComments(llvm::StringRef summary, - llvm::StringRef description) { +void mlir::tblgen::emitSummaryAndDescComments(llvm::raw_ostream &os, + llvm::StringRef summary, + llvm::StringRef description, + bool terminateComment) { std::string comments = ""; StringRef trimmedSummary = summary.trim(); StringRef trimmedDesc = description.trim(); - llvm::raw_string_ostream os(comments); raw_indented_ostream ros(os); + bool empty = true; if (!trimmedSummary.empty()) { ros.printReindented(trimmedSummary, "/// "); + empty = false; } if (!trimmedDesc.empty()) { - if (!trimmedSummary.empty()) { + if (!empty) { // If there is a summary, add a newline after it. ros << "\n"; } ros.printReindented(trimmedDesc, "/// "); + empty = false; } - return comments; + + if (!empty && terminateComment) + ros << "\n"; } diff --git a/mlir/tools/mlir-tblgen/CppGenUtilities.h b/mlir/tools/mlir-tblgen/CppGenUtilities.h index 231c59a..69d8cd8 100644 --- a/mlir/tools/mlir-tblgen/CppGenUtilities.h +++ b/mlir/tools/mlir-tblgen/CppGenUtilities.h @@ -15,14 +15,16 @@ #define MLIR_TOOLS_MLIRTBLGEN_CPPGENUTILITIES_H_ #include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" namespace mlir { namespace tblgen { -// Emit the summary and description as a C++ comment, perperly aligned placed -// adjacent to the class declaration of generated classes. -std::string emitSummaryAndDescComments(llvm::StringRef summary, - llvm::StringRef description); +// Emit the summary and description as a C++ comment. If `terminateComment` is +// true, terminates the comment with a `\n`. +void emitSummaryAndDescComments(llvm::raw_ostream &os, llvm::StringRef summary, + llvm::StringRef description, + bool terminateComment = true); } // namespace tblgen } // namespace mlir diff --git a/mlir/tools/mlir-tblgen/DialectGen.cpp b/mlir/tools/mlir-tblgen/DialectGen.cpp index 2e8810d..c2c0c1f 100644 --- a/mlir/tools/mlir-tblgen/DialectGen.cpp +++ b/mlir/tools/mlir-tblgen/DialectGen.cpp @@ -109,9 +109,7 @@ tblgen::findDialectToGenerate(ArrayRef<Dialect> dialects) { /// {0}: The name of the dialect class. /// {1}: The dialect namespace. /// {2}: The dialect parent class. -/// {3}: The summary and description comments. static const char *const dialectDeclBeginStr = R"( -{3} class {0} : public ::mlir::{2} { explicit {0}(::mlir::MLIRContext *context); @@ -249,10 +247,11 @@ static void emitDialectDecl(Dialect &dialect, raw_ostream &os) { StringRef superClassName = dialect.isExtensible() ? "ExtensibleDialect" : "Dialect"; - std::string comments = tblgen::emitSummaryAndDescComments( - dialect.getSummary(), dialect.getDescription()); + tblgen::emitSummaryAndDescComments(os, dialect.getSummary(), + dialect.getDescription(), + /*terminateCmment=*/false); os << llvm::formatv(dialectDeclBeginStr, cppName, dialect.getName(), - superClassName, comments); + superClassName); // If the dialect requested the default attribute printer and parser, emit // the declarations for the hooks. diff --git a/mlir/tools/mlir-tblgen/EnumsGen.cpp b/mlir/tools/mlir-tblgen/EnumsGen.cpp index d4d32f5..d55ad482 100644 --- a/mlir/tools/mlir-tblgen/EnumsGen.cpp +++ b/mlir/tools/mlir-tblgen/EnumsGen.cpp @@ -46,8 +46,7 @@ static std::string makeIdentifier(StringRef str) { static void emitEnumClass(const Record &enumDef, StringRef enumName, StringRef underlyingType, StringRef description, - const std::vector<EnumCase> &enumerants, - raw_ostream &os) { + ArrayRef<EnumCase> enumerants, raw_ostream &os) { os << "// " << description << "\n"; os << "enum class " << enumName; @@ -55,14 +54,13 @@ static void emitEnumClass(const Record &enumDef, StringRef enumName, os << " : " << underlyingType; os << " {\n"; - for (const auto &enumerant : enumerants) { + for (const EnumCase &enumerant : enumerants) { auto symbol = makeIdentifier(enumerant.getSymbol()); auto value = enumerant.getValue(); - if (value >= 0) { + if (value >= 0) os << formatv(" {0} = {1},\n", symbol, value); - } else { + else os << formatv(" {0},\n", symbol); - } } os << "};\n\n"; } diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index c3420d4..2ddb07d 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -4801,11 +4801,9 @@ void OpOperandAdaptorEmitter::emitDef( } /// Emit the class declarations or definitions for the given op defs. -static void -emitOpClasses(const RecordKeeper &records, - const std::vector<const Record *> &defs, raw_ostream &os, - const StaticVerifierFunctionEmitter &staticVerifierEmitter, - bool emitDecl) { +static void emitOpClasses( + const RecordKeeper &records, ArrayRef<const Record *> defs, raw_ostream &os, + const StaticVerifierFunctionEmitter &staticVerifierEmitter, bool emitDecl) { if (defs.empty()) return; @@ -4840,18 +4838,14 @@ emitOpClasses(const RecordKeeper &records, /// Emit the declarations for the provided op classes. static void emitOpClassDecls(const RecordKeeper &records, - const std::vector<const Record *> &defs, - raw_ostream &os) { + ArrayRef<const Record *> defs, raw_ostream &os) { // First emit forward declaration for each class, this allows them to refer // to each others in traits for example. - for (auto *def : defs) { + for (const Record *def : defs) { Operator op(*def); NamespaceEmitter emitter(os, op.getCppNamespace()); - std::string comments = tblgen::emitSummaryAndDescComments( - op.getSummary(), op.getDescription()); - if (!comments.empty()) { - os << comments << "\n"; - } + tblgen::emitSummaryAndDescComments(os, op.getSummary(), + op.getDescription()); os << "class " << op.getCppClassName() << ";\n"; } diff --git a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp index 3cc1636..25f160d 100644 --- a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp +++ b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp @@ -536,11 +536,8 @@ void InterfaceGenerator::forwardDeclareInterface(const Interface &interface) { // Emit a forward declaration of the interface class so that it becomes usable // in the signature of its methods. - std::string comments = tblgen::emitSummaryAndDescComments( - "", interface.getDescription().value_or("")); - if (!comments.empty()) { - os << comments << "\n"; - } + tblgen::emitSummaryAndDescComments(os, "", + interface.getDescription().value_or("")); StringRef interfaceName = interface.getName(); os << "class " << interfaceName << ";\n"; @@ -560,11 +557,8 @@ void InterfaceGenerator::emitInterfaceDecl(const Interface &interface) { // Emit a forward declaration of the interface class so that it becomes usable // in the signature of its methods. - std::string comments = tblgen::emitSummaryAndDescComments( - "", interface.getDescription().value_or("")); - if (!comments.empty()) { - os << comments << "\n"; - } + tblgen::emitSummaryAndDescComments(os, "", + interface.getDescription().value_or("")); // Emit the traits struct containing the concept and model declarations. os << "namespace detail {\n" diff --git a/mlir/tools/mlir-tblgen/TosaUtilsGen.cpp b/mlir/tools/mlir-tblgen/TosaUtilsGen.cpp index c929546..dc8cc58 100644 --- a/mlir/tools/mlir-tblgen/TosaUtilsGen.cpp +++ b/mlir/tools/mlir-tblgen/TosaUtilsGen.cpp @@ -1,4 +1,4 @@ -//===- TosaUtilsGen.cpp - Tosa utility generator -===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 422c29f..0c77a1e 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1049,32 +1049,6 @@ filegroup( ) cc_library( - name = "MLIRBindingsPythonHeaders", - includes = [ - "include", - ], - textual_hdrs = [":MLIRBindingsPythonHeaderFiles"], - deps = [ - ":CAPIIRHeaders", - "@pybind11", - "@rules_python//python/cc:current_py_cc_headers", - ], -) - -cc_library( - name = "MLIRBindingsPythonHeadersAndDeps", - includes = [ - "include", - ], - textual_hdrs = [":MLIRBindingsPythonHeaderFiles"], - deps = [ - ":CAPIIR", - "@pybind11", - "@rules_python//python/cc:current_py_cc_headers", - ], -) - -cc_library( name = "MLIRBindingsPythonNanobindHeaders", includes = [ "include", @@ -1087,6 +1061,11 @@ cc_library( ], ) +alias( + name = "MLIRBindingsPythonHeaders", + actual = ":MLIRBindingsPythonNanobindHeaders", +) + cc_library( name = "MLIRBindingsPythonNanobindHeadersAndDeps", includes = [ @@ -1100,6 +1079,11 @@ cc_library( ], ) +alias( + name = "MLIRBindingsPythonHeadersAndDeps", + actual = ":MLIRBindingsPythonNanobindHeadersAndDeps", +) + # These flags are needed for pybind11 to work. PYBIND11_COPTS = [ "-fexceptions", @@ -1147,7 +1131,7 @@ cc_library( ":CAPIIR", ":CAPIInterfaces", ":CAPITransforms", - ":MLIRBindingsPythonNanobindHeadersAndDeps", + ":MLIRBindingsPythonHeadersAndDeps", ":Support", ":config", "//llvm:Support", @@ -1170,7 +1154,7 @@ cc_library( ":CAPIDebugHeaders", ":CAPIIRHeaders", ":CAPITransformsHeaders", - ":MLIRBindingsPythonNanobindHeaders", + ":MLIRBindingsPythonHeaders", ":Support", ":config", "//llvm:Support", @@ -1220,7 +1204,7 @@ cc_binary( linkstatic = 0, deps = [ ":CAPIIR", - ":MLIRBindingsPythonNanobindHeadersAndDeps", + ":MLIRBindingsPythonHeadersAndDeps", "@nanobind", ], ) @@ -1238,7 +1222,7 @@ cc_binary( deps = [ ":CAPIIR", ":CAPILinalg", - ":MLIRBindingsPythonNanobindHeadersAndDeps", + ":MLIRBindingsPythonHeadersAndDeps", "@nanobind", ], ) @@ -1253,7 +1237,7 @@ cc_binary( deps = [ ":CAPIIR", ":CAPILLVM", - ":MLIRBindingsPythonNanobindHeadersAndDeps", + ":MLIRBindingsPythonHeadersAndDeps", "@nanobind", ], ) @@ -1268,7 +1252,7 @@ cc_binary( deps = [ ":CAPIIR", ":CAPIQuant", - ":MLIRBindingsPythonNanobindHeadersAndDeps", + ":MLIRBindingsPythonHeadersAndDeps", "@nanobind", ], ) @@ -1283,7 +1267,7 @@ cc_binary( deps = [ ":CAPIIR", ":CAPISparseTensor", - ":MLIRBindingsPythonNanobindHeadersAndDeps", + ":MLIRBindingsPythonHeadersAndDeps", "@nanobind", ], ) @@ -1298,7 +1282,7 @@ cc_binary( linkstatic = 0, deps = [ ":CAPIExecutionEngine", - ":MLIRBindingsPythonNanobindHeadersAndDeps", + ":MLIRBindingsPythonHeadersAndDeps", "@nanobind", "@rules_python//python/cc:current_py_cc_headers", ], @@ -1314,7 +1298,7 @@ cc_binary( linkstatic = 0, deps = [ ":CAPILinalg", - ":MLIRBindingsPythonNanobindHeadersAndDeps", + ":MLIRBindingsPythonHeadersAndDeps", "@nanobind", "@rules_python//python/cc:current_py_cc_headers", ], |