diff options
Diffstat (limited to 'clang/lib')
62 files changed, 1408 insertions, 467 deletions
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index a9c71c7..57cc705 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -3699,7 +3699,7 @@ inline bool CheckDestruction(InterpState &S, CodePtr OpPC) { inline bool CheckArraySize(InterpState &S, CodePtr OpPC, uint64_t NumElems) { uint64_t Limit = S.getLangOpts().ConstexprStepLimit; - if (NumElems > Limit) { + if (Limit != 0 && NumElems > Limit) { S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_new_exceeds_limits) << NumElems << Limit; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 84c5ecc..2d3cb6a 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2587,6 +2587,82 @@ static bool interp__builtin_ia32_pmul( return true; } +static bool interp_builtin_horizontal_int_binop( + InterpState &S, CodePtr OpPC, const CallExpr *Call, + llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) { + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); + PrimType ElemT = *S.getContext().classify(VT->getElementType()); + bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType(); + + const Pointer &RHS = S.Stk.pop<Pointer>(); + const Pointer &LHS = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + unsigned NumElts = VT->getNumElements(); + unsigned EltBits = S.getASTContext().getIntWidth(VT->getElementType()); + unsigned EltsPerLane = 128 / EltBits; + unsigned Lanes = NumElts * EltBits / 128; + unsigned DestIndex = 0; + + for (unsigned Lane = 0; Lane < Lanes; ++Lane) { + unsigned LaneStart = Lane * EltsPerLane; + for (unsigned I = 0; I < EltsPerLane; I += 2) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { + APSInt Elem1 = LHS.elem<T>(LaneStart + I).toAPSInt(); + APSInt Elem2 = LHS.elem<T>(LaneStart + I + 1).toAPSInt(); + APSInt ResL = APSInt(Fn(Elem1, Elem2), DestUnsigned); + Dst.elem<T>(DestIndex++) = static_cast<T>(ResL); + }); + } + + for (unsigned I = 0; I < EltsPerLane; I += 2) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { + APSInt Elem1 = RHS.elem<T>(LaneStart + I).toAPSInt(); + APSInt Elem2 = RHS.elem<T>(LaneStart + I + 1).toAPSInt(); + APSInt ResR = APSInt(Fn(Elem1, Elem2), DestUnsigned); + Dst.elem<T>(DestIndex++) = static_cast<T>(ResR); + }); + } + } + Dst.initializeAllElements(); + return true; +} + +static bool interp_builtin_horizontal_fp_binop( + InterpState &S, CodePtr OpPC, const CallExpr *Call, + llvm::function_ref<APFloat(const APFloat &, const APFloat &, + llvm::RoundingMode)> + Fn) { + const Pointer &RHS = S.Stk.pop<Pointer>(); + const Pointer &LHS = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); + llvm::RoundingMode RM = getRoundingMode(FPO); + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); + + unsigned NumElts = VT->getNumElements(); + unsigned EltBits = S.getASTContext().getTypeSize(VT->getElementType()); + unsigned NumLanes = NumElts * EltBits / 128; + unsigned NumElemsPerLane = NumElts / NumLanes; + unsigned HalfElemsPerLane = NumElemsPerLane / 2; + + for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) { + using T = PrimConv<PT_Float>::T; + for (unsigned E = 0; E != HalfElemsPerLane; ++E) { + APFloat Elem1 = LHS.elem<T>(L + (2 * E) + 0).getAPFloat(); + APFloat Elem2 = LHS.elem<T>(L + (2 * E) + 1).getAPFloat(); + Dst.elem<T>(L + E) = static_cast<T>(Fn(Elem1, Elem2, RM)); + } + for (unsigned E = 0; E != HalfElemsPerLane; ++E) { + APFloat Elem1 = RHS.elem<T>(L + (2 * E) + 0).getAPFloat(); + APFloat Elem2 = RHS.elem<T>(L + (2 * E) + 1).getAPFloat(); + Dst.elem<T>(L + E + HalfElemsPerLane) = + static_cast<T>(Fn(Elem1, Elem2, RM)); + } + } + Dst.initializeAllElements(); + return true; +} + static bool interp__builtin_elementwise_triop_fp( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<APFloat(const APFloat &, const APFloat &, @@ -3665,6 +3741,53 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case Builtin::BI__builtin_elementwise_min: return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID); + case clang::X86::BI__builtin_ia32_phaddw128: + case clang::X86::BI__builtin_ia32_phaddw256: + case clang::X86::BI__builtin_ia32_phaddd128: + case clang::X86::BI__builtin_ia32_phaddd256: + return interp_builtin_horizontal_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); + case clang::X86::BI__builtin_ia32_phaddsw128: + case clang::X86::BI__builtin_ia32_phaddsw256: + return interp_builtin_horizontal_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return LHS.sadd_sat(RHS); }); + case clang::X86::BI__builtin_ia32_phsubw128: + case clang::X86::BI__builtin_ia32_phsubw256: + case clang::X86::BI__builtin_ia32_phsubd128: + case clang::X86::BI__builtin_ia32_phsubd256: + return interp_builtin_horizontal_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; }); + case clang::X86::BI__builtin_ia32_phsubsw128: + case clang::X86::BI__builtin_ia32_phsubsw256: + return interp_builtin_horizontal_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return LHS.ssub_sat(RHS); }); + case clang::X86::BI__builtin_ia32_haddpd: + case clang::X86::BI__builtin_ia32_haddps: + case clang::X86::BI__builtin_ia32_haddpd256: + case clang::X86::BI__builtin_ia32_haddps256: + return interp_builtin_horizontal_fp_binop( + S, OpPC, Call, + [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) { + APFloat F = LHS; + F.add(RHS, RM); + return F; + }); + case clang::X86::BI__builtin_ia32_hsubpd: + case clang::X86::BI__builtin_ia32_hsubps: + case clang::X86::BI__builtin_ia32_hsubpd256: + case clang::X86::BI__builtin_ia32_hsubps256: + return interp_builtin_horizontal_fp_binop( + S, OpPC, Call, + [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) { + APFloat F = LHS; + F.subtract(RHS, RM); + return F; + }); + case clang::X86::BI__builtin_ia32_pmuldq128: case clang::X86::BI__builtin_ia32_pmuldq256: case clang::X86::BI__builtin_ia32_pmuldq512: @@ -3695,6 +3818,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return F; }); + case X86::BI__builtin_ia32_vpmadd52luq128: + case X86::BI__builtin_ia32_vpmadd52luq256: + case X86::BI__builtin_ia32_vpmadd52luq512: + return interp__builtin_elementwise_triop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) { + return A + (B.trunc(52) * C.trunc(52)).zext(64); + }); + case X86::BI__builtin_ia32_vpmadd52huq128: + case X86::BI__builtin_ia32_vpmadd52huq256: + case X86::BI__builtin_ia32_vpmadd52huq512: + return interp__builtin_elementwise_triop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) { + return A + llvm::APIntOps::mulhu(B.trunc(52), C.trunc(52)).zext(64); + }); + case X86::BI__builtin_ia32_vpshldd128: case X86::BI__builtin_ia32_vpshldd256: case X86::BI__builtin_ia32_vpshldd512: diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index c734155..69cbf6e 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3316,6 +3316,10 @@ bool FunctionDecl::isImmediateEscalating() const { CD && CD->isInheritingConstructor()) return CD->getInheritedConstructor().getConstructor(); + // Destructors are not immediate escalating. + if (isa<CXXDestructorDecl>(this)) + return false; + // - a function that results from the instantiation of a templated entity // defined with the constexpr specifier. TemplatedKind TK = getTemplatedKind(); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index a3037c3f..51c0382 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -990,7 +990,7 @@ namespace { // of arrays to avoid exhausting the system resources, as initialization // of each element is likely to take some number of steps anyway. uint64_t Limit = Ctx.getLangOpts().ConstexprStepLimit; - if (ElemCount > Limit) { + if (Limit != 0 && ElemCount > Limit) { if (Diag) FFDiag(Loc, diag::note_constexpr_new_exceeds_limits) << ElemCount << Limit; @@ -1016,6 +1016,9 @@ namespace { } bool nextStep(const Stmt *S) { + if (Ctx.getLangOpts().ConstexprStepLimit == 0) + return true; + if (!StepsLeft) { FFDiag(S->getBeginLoc(), diag::note_constexpr_step_limit_exceeded); return false; @@ -1186,7 +1189,8 @@ namespace { /// Should we continue evaluation as much as possible after encountering a /// construct which can't be reduced to a value? bool keepEvaluatingAfterFailure() const override { - if (!StepsLeft) + uint64_t Limit = Ctx.getLangOpts().ConstexprStepLimit; + if (Limit != 0 && !StepsLeft) return false; switch (EvalMode) { @@ -11970,6 +11974,54 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + + case X86::BI__builtin_ia32_vpmadd52luq128: + case X86::BI__builtin_ia32_vpmadd52luq256: + case X86::BI__builtin_ia32_vpmadd52luq512: { + APValue A, B, C; + if (!EvaluateAsRValue(Info, E->getArg(0), A) || + !EvaluateAsRValue(Info, E->getArg(1), B) || + !EvaluateAsRValue(Info, E->getArg(2), C)) + return false; + + unsigned ALen = A.getVectorLength(); + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(ALen); + + for (unsigned EltNum = 0; EltNum < ALen; EltNum += 1) { + APInt AElt = A.getVectorElt(EltNum).getInt(); + APInt BElt = B.getVectorElt(EltNum).getInt().trunc(52); + APInt CElt = C.getVectorElt(EltNum).getInt().trunc(52); + APSInt ResElt(AElt + (BElt * CElt).zext(64), false); + ResultElements.push_back(APValue(ResElt)); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case X86::BI__builtin_ia32_vpmadd52huq128: + case X86::BI__builtin_ia32_vpmadd52huq256: + case X86::BI__builtin_ia32_vpmadd52huq512: { + APValue A, B, C; + if (!EvaluateAsRValue(Info, E->getArg(0), A) || + !EvaluateAsRValue(Info, E->getArg(1), B) || + !EvaluateAsRValue(Info, E->getArg(2), C)) + return false; + + unsigned ALen = A.getVectorLength(); + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(ALen); + + for (unsigned EltNum = 0; EltNum < ALen; EltNum += 1) { + APInt AElt = A.getVectorElt(EltNum).getInt(); + APInt BElt = B.getVectorElt(EltNum).getInt().trunc(52); + APInt CElt = C.getVectorElt(EltNum).getInt().trunc(52); + APSInt ResElt(AElt + llvm::APIntOps::mulhu(BElt, CElt).zext(64), false); + ResultElements.push_back(APValue(ResElt)); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case clang::X86::BI__builtin_ia32_vprotbi: case clang::X86::BI__builtin_ia32_vprotdi: case clang::X86::BI__builtin_ia32_vprotqi: @@ -12377,6 +12429,169 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case clang::X86::BI__builtin_ia32_phaddw128: + case clang::X86::BI__builtin_ia32_phaddw256: + case clang::X86::BI__builtin_ia32_phaddd128: + case clang::X86::BI__builtin_ia32_phaddd256: + case clang::X86::BI__builtin_ia32_phaddsw128: + case clang::X86::BI__builtin_ia32_phaddsw256: + + case clang::X86::BI__builtin_ia32_phsubw128: + case clang::X86::BI__builtin_ia32_phsubw256: + case clang::X86::BI__builtin_ia32_phsubd128: + case clang::X86::BI__builtin_ia32_phsubd256: + case clang::X86::BI__builtin_ia32_phsubsw128: + case clang::X86::BI__builtin_ia32_phsubsw256: { + APValue SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + + unsigned NumElts = SourceLHS.getVectorLength(); + unsigned EltBits = Info.Ctx.getIntWidth(DestEltTy); + unsigned EltsPerLane = 128 / EltBits; + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(NumElts); + + for (unsigned LaneStart = 0; LaneStart != NumElts; + LaneStart += EltsPerLane) { + for (unsigned I = 0; I != EltsPerLane; I += 2) { + APSInt LHSA = SourceLHS.getVectorElt(LaneStart + I).getInt(); + APSInt LHSB = SourceLHS.getVectorElt(LaneStart + I + 1).getInt(); + switch (E->getBuiltinCallee()) { + case clang::X86::BI__builtin_ia32_phaddw128: + case clang::X86::BI__builtin_ia32_phaddw256: + case clang::X86::BI__builtin_ia32_phaddd128: + case clang::X86::BI__builtin_ia32_phaddd256: { + APSInt Res(LHSA + LHSB, DestUnsigned); + ResultElements.push_back(APValue(Res)); + break; + } + case clang::X86::BI__builtin_ia32_phaddsw128: + case clang::X86::BI__builtin_ia32_phaddsw256: { + APSInt Res(LHSA.sadd_sat(LHSB)); + ResultElements.push_back(APValue(Res)); + break; + } + case clang::X86::BI__builtin_ia32_phsubw128: + case clang::X86::BI__builtin_ia32_phsubw256: + case clang::X86::BI__builtin_ia32_phsubd128: + case clang::X86::BI__builtin_ia32_phsubd256: { + APSInt Res(LHSA - LHSB, DestUnsigned); + ResultElements.push_back(APValue(Res)); + break; + } + case clang::X86::BI__builtin_ia32_phsubsw128: + case clang::X86::BI__builtin_ia32_phsubsw256: { + APSInt Res(LHSA.ssub_sat(LHSB)); + ResultElements.push_back(APValue(Res)); + break; + } + } + } + for (unsigned I = 0; I != EltsPerLane; I += 2) { + APSInt RHSA = SourceRHS.getVectorElt(LaneStart + I).getInt(); + APSInt RHSB = SourceRHS.getVectorElt(LaneStart + I + 1).getInt(); + switch (E->getBuiltinCallee()) { + case clang::X86::BI__builtin_ia32_phaddw128: + case clang::X86::BI__builtin_ia32_phaddw256: + case clang::X86::BI__builtin_ia32_phaddd128: + case clang::X86::BI__builtin_ia32_phaddd256: { + APSInt Res(RHSA + RHSB, DestUnsigned); + ResultElements.push_back(APValue(Res)); + break; + } + case clang::X86::BI__builtin_ia32_phaddsw128: + case clang::X86::BI__builtin_ia32_phaddsw256: { + APSInt Res(RHSA.sadd_sat(RHSB)); + ResultElements.push_back(APValue(Res)); + break; + } + case clang::X86::BI__builtin_ia32_phsubw128: + case clang::X86::BI__builtin_ia32_phsubw256: + case clang::X86::BI__builtin_ia32_phsubd128: + case clang::X86::BI__builtin_ia32_phsubd256: { + APSInt Res(RHSA - RHSB, DestUnsigned); + ResultElements.push_back(APValue(Res)); + break; + } + case clang::X86::BI__builtin_ia32_phsubsw128: + case clang::X86::BI__builtin_ia32_phsubsw256: { + APSInt Res(RHSA.ssub_sat(RHSB)); + ResultElements.push_back(APValue(Res)); + break; + } + } + } + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case clang::X86::BI__builtin_ia32_haddpd: + case clang::X86::BI__builtin_ia32_haddps: + case clang::X86::BI__builtin_ia32_haddps256: + case clang::X86::BI__builtin_ia32_haddpd256: + case clang::X86::BI__builtin_ia32_hsubpd: + case clang::X86::BI__builtin_ia32_hsubps: + case clang::X86::BI__builtin_ia32_hsubps256: + case clang::X86::BI__builtin_ia32_hsubpd256: { + APValue SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + unsigned NumElts = SourceLHS.getVectorLength(); + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(NumElts); + llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E); + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + unsigned EltBits = Info.Ctx.getTypeSize(DestEltTy); + unsigned NumLanes = NumElts * EltBits / 128; + unsigned NumElemsPerLane = NumElts / NumLanes; + unsigned HalfElemsPerLane = NumElemsPerLane / 2; + + for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) { + for (unsigned I = 0; I != HalfElemsPerLane; ++I) { + APFloat LHSA = SourceLHS.getVectorElt(L + (2 * I) + 0).getFloat(); + APFloat LHSB = SourceLHS.getVectorElt(L + (2 * I) + 1).getFloat(); + switch (E->getBuiltinCallee()) { + case clang::X86::BI__builtin_ia32_haddpd: + case clang::X86::BI__builtin_ia32_haddps: + case clang::X86::BI__builtin_ia32_haddps256: + case clang::X86::BI__builtin_ia32_haddpd256: + LHSA.add(LHSB, RM); + break; + case clang::X86::BI__builtin_ia32_hsubpd: + case clang::X86::BI__builtin_ia32_hsubps: + case clang::X86::BI__builtin_ia32_hsubps256: + case clang::X86::BI__builtin_ia32_hsubpd256: + LHSA.subtract(LHSB, RM); + break; + } + ResultElements.push_back(APValue(LHSA)); + } + for (unsigned I = 0; I != HalfElemsPerLane; ++I) { + APFloat RHSA = SourceRHS.getVectorElt(L + (2 * I) + 0).getFloat(); + APFloat RHSB = SourceRHS.getVectorElt(L + (2 * I) + 1).getFloat(); + switch (E->getBuiltinCallee()) { + case clang::X86::BI__builtin_ia32_haddpd: + case clang::X86::BI__builtin_ia32_haddps: + case clang::X86::BI__builtin_ia32_haddps256: + case clang::X86::BI__builtin_ia32_haddpd256: + RHSA.add(RHSB, RM); + break; + case clang::X86::BI__builtin_ia32_hsubpd: + case clang::X86::BI__builtin_ia32_hsubps: + case clang::X86::BI__builtin_ia32_hsubps256: + case clang::X86::BI__builtin_ia32_hsubpd256: + RHSA.subtract(RHSB, RM); + break; + } + ResultElements.push_back(APValue(RHSA)); + } + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case Builtin::BI__builtin_elementwise_fshl: case Builtin::BI__builtin_elementwise_fshr: { APValue SourceHi, SourceLo, SourceShift; diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp index 6c4bc7c..142c932 100644 --- a/clang/lib/AST/OpenACCClause.cpp +++ b/clang/lib/AST/OpenACCClause.cpp @@ -506,11 +506,17 @@ OpenACCDeviceTypeClause *OpenACCDeviceTypeClause::Create( OpenACCReductionClause *OpenACCReductionClause::Create( const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc, OpenACCReductionOperator Operator, ArrayRef<Expr *> VarList, - ArrayRef<OpenACCReductionRecipe> Recipes, + ArrayRef<OpenACCReductionRecipeWithStorage> Recipes, SourceLocation EndLoc) { - void *Mem = C.Allocate( - OpenACCReductionClause::totalSizeToAlloc<Expr *, OpenACCReductionRecipe>( - VarList.size(), Recipes.size())); + size_t NumCombiners = llvm::accumulate( + Recipes, 0, [](size_t Num, const OpenACCReductionRecipeWithStorage &R) { + return Num + R.CombinerRecipes.size(); + }); + + void *Mem = C.Allocate(OpenACCReductionClause::totalSizeToAlloc< + Expr *, OpenACCReductionRecipe, + OpenACCReductionRecipe::CombinerRecipe>( + VarList.size(), Recipes.size(), NumCombiners)); return new (Mem) OpenACCReductionClause(BeginLoc, LParenLoc, Operator, VarList, Recipes, EndLoc); } diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 2ce4419..791df7e 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -309,6 +309,12 @@ OMPClause::child_range OMPIfClause::used_children() { return child_range(&Condition, &Condition + 1); } +OMPClause::child_range OMPNowaitClause::used_children() { + if (Condition) + return child_range(&Condition, &Condition + 1); + return children(); +} + OMPClause::child_range OMPGrainsizeClause::used_children() { if (Stmt **C = getAddrOfExprAsWritten(getPreInitStmt())) return child_range(C, C + 1); @@ -2113,8 +2119,13 @@ void OMPClausePrinter::VisitOMPOrderedClause(OMPOrderedClause *Node) { } } -void OMPClausePrinter::VisitOMPNowaitClause(OMPNowaitClause *) { +void OMPClausePrinter::VisitOMPNowaitClause(OMPNowaitClause *Node) { OS << "nowait"; + if (auto *Cond = Node->getCondition()) { + OS << "("; + Cond->printPretty(OS, nullptr, Policy, 0); + OS << ")"; + } } void OMPClausePrinter::VisitOMPUntiedClause(OMPUntiedClause *) { diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 3cd033e..05b64cc 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -585,7 +585,10 @@ void OMPClauseProfiler::VisitOMPOrderedClause(const OMPOrderedClause *C) { Profiler->VisitStmt(Num); } -void OMPClauseProfiler::VisitOMPNowaitClause(const OMPNowaitClause *) {} +void OMPClauseProfiler::VisitOMPNowaitClause(const OMPNowaitClause *C) { + if (C->getCondition()) + Profiler->VisitStmt(C->getCondition()); +} void OMPClauseProfiler::VisitOMPUntiedClause(const OMPUntiedClause *) {} diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp index 55476e2..e952e82 100644 --- a/clang/lib/AST/TypeLoc.cpp +++ b/clang/lib/AST/TypeLoc.cpp @@ -494,39 +494,6 @@ NestedNameSpecifierLoc TypeLoc::getPrefix() const { } } -SourceLocation TypeLoc::getNonPrefixBeginLoc() const { - switch (getTypeLocClass()) { - case TypeLoc::TemplateSpecialization: { - auto TL = castAs<TemplateSpecializationTypeLoc>(); - SourceLocation Loc = TL.getTemplateKeywordLoc(); - if (!Loc.isValid()) - Loc = TL.getTemplateNameLoc(); - return Loc; - } - case TypeLoc::DeducedTemplateSpecialization: { - auto TL = castAs<DeducedTemplateSpecializationTypeLoc>(); - SourceLocation Loc = TL.getTemplateKeywordLoc(); - if (!Loc.isValid()) - Loc = TL.getTemplateNameLoc(); - return Loc; - } - case TypeLoc::DependentName: - return castAs<DependentNameTypeLoc>().getNameLoc(); - case TypeLoc::Enum: - case TypeLoc::Record: - case TypeLoc::InjectedClassName: - return castAs<TagTypeLoc>().getNameLoc(); - case TypeLoc::Typedef: - return castAs<TypedefTypeLoc>().getNameLoc(); - case TypeLoc::UnresolvedUsing: - return castAs<UnresolvedUsingTypeLoc>().getNameLoc(); - case TypeLoc::Using: - return castAs<UsingTypeLoc>().getNameLoc(); - default: - return getBeginLoc(); - } -} - SourceLocation TypeLoc::getNonElaboratedBeginLoc() const { // For elaborated types (e.g. `struct a::A`) we want the portion after the // `struct` but including the namespace qualifier, `a::`. diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index 485308f..9b68de1 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -9,9 +9,11 @@ #include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" #include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/TimeProfiler.h" namespace clang::lifetimes::internal { +using llvm::isa_and_present; static bool isGslPointerType(QualType QT) { if (const auto *RD = QT->getAsCXXRecordDecl()) { @@ -108,7 +110,7 @@ void FactsGenerator::VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { // Specifically for conversion operators, // like `std::string_view p = std::string{};` if (isGslPointerType(MCE->getType()) && - isa<CXXConversionDecl>(MCE->getCalleeDecl())) { + isa_and_present<CXXConversionDecl>(MCE->getCalleeDecl())) { // The argument is the implicit object itself. handleFunctionCall(MCE, MCE->getMethodDecl(), {MCE->getImplicitObjectArgument()}, diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp index 8ecbd3c..2dec26e 100644 --- a/clang/lib/Basic/Diagnostic.cpp +++ b/clang/lib/Basic/Diagnostic.cpp @@ -525,7 +525,8 @@ std::unique_ptr<WarningsSpecialCaseList> WarningsSpecialCaseList::create(const llvm::MemoryBuffer &Input, std::string &Err) { auto WarningSuppressionList = std::make_unique<WarningsSpecialCaseList>(); - if (!WarningSuppressionList->createInternal(&Input, Err)) + if (!WarningSuppressionList->createInternal(&Input, Err, + /*OrderBySize=*/true)) return nullptr; return WarningSuppressionList; } diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 9e03a08..18641a9 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -1568,6 +1568,7 @@ bool AArch64TargetInfo::validateAsmConstraint( if (const unsigned Len = matchAsmCCConstraint(Name)) { Name += Len - 1; Info.setAllowsRegister(); + Info.setOutputOperandBounds(0, 2); return true; } } diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp index 13b8623..30f846c 100644 --- a/clang/lib/Basic/Targets/SystemZ.cpp +++ b/clang/lib/Basic/Targets/SystemZ.cpp @@ -99,6 +99,16 @@ bool SystemZTargetInfo::validateAsmConstraint( case 'T': // Likewise, plus an index Info.setAllowsMemory(); return true; + case '@': + // CC condition changes. + if (StringRef(Name) == "@cc") { + Name += 2; + Info.setAllowsRegister(); + // SystemZ has 2-bits CC, and hence Interval [0, 4). + Info.setOutputOperandBounds(0, 4); + return true; + } + return false; } } @@ -161,6 +171,9 @@ unsigned SystemZTargetInfo::getMinGlobalAlign(uint64_t Size, void SystemZTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { + // Inline assembly supports SystemZ flag outputs. + Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__"); + Builder.defineMacro("__s390__"); Builder.defineMacro("__s390x__"); Builder.defineMacro("__zarch__"); diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h index dc2185e..4e15d5a 100644 --- a/clang/lib/Basic/Targets/SystemZ.h +++ b/clang/lib/Basic/Targets/SystemZ.h @@ -136,6 +136,12 @@ public: std::string convertConstraint(const char *&Constraint) const override { switch (Constraint[0]) { + case '@': // Flag output operand. + if (llvm::StringRef(Constraint) == "@cc") { + Constraint += 2; + return std::string("{@cc}"); + } + break; case 'p': // Keep 'p' constraint. return std::string("p"); case 'Z': diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 6eb4db5..187815c 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -1516,6 +1516,7 @@ bool X86TargetInfo::validateAsmConstraint( if (auto Len = matchAsmCCConstraint(Name)) { Name += Len - 1; Info.setAllowsRegister(); + Info.setOutputOperandBounds(0, 2); return true; } return false; diff --git a/clang/lib/CIR/CodeGen/Address.h b/clang/lib/CIR/CodeGen/Address.h index fb74aa0..a67cbad 100644 --- a/clang/lib/CIR/CodeGen/Address.h +++ b/clang/lib/CIR/CodeGen/Address.h @@ -17,6 +17,7 @@ #include "mlir/IR/Value.h" #include "clang/AST/CharUnits.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" +#include "clang/CIR/MissingFeatures.h" #include "llvm/ADT/PointerIntPair.h" namespace clang::CIRGen { @@ -90,6 +91,13 @@ public: return getPointer(); } + /// Return the pointer contained in this class after authenticating it and + /// adding offset to it if necessary. + mlir::Value emitRawPointer() const { + assert(!cir::MissingFeatures::addressPointerAuthInfo()); + return getBasePointer(); + } + mlir::Type getType() const { assert(mlir::cast<cir::PointerType>( pointerAndKnownNonNull.getPointer().getType()) diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp index 0f4d6d2..a9983f8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp @@ -255,7 +255,7 @@ static void emitAtomicCmpXchg(CIRGenFunction &cgf, AtomicExpr *e, bool isWeak, mlir::Value expected = builder.createLoad(loc, val1); mlir::Value desired = builder.createLoad(loc, val2); - auto cmpxchg = cir::AtomicCmpXchg::create( + auto cmpxchg = cir::AtomicCmpXchgOp::create( builder, loc, expected.getType(), builder.getBoolTy(), ptr.getPointer(), expected, desired, cir::MemOrderAttr::get(&cgf.getMLIRContext(), successOrder), @@ -404,7 +404,7 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest, case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_exchange: - opName = cir::AtomicXchg::getOperationName(); + opName = cir::AtomicXchgOp::getOperationName(); break; case AtomicExpr::AO__opencl_atomic_init: diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h index 06f41cd..6d3741c4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h +++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h @@ -191,6 +191,15 @@ public: virtual void emitVTableDefinitions(CIRGenVTables &cgvt, const CXXRecordDecl *rd) = 0; + using DeleteOrMemberCallExpr = + llvm::PointerUnion<const CXXDeleteExpr *, const CXXMemberCallExpr *>; + + virtual mlir::Value emitVirtualDestructorCall(CIRGenFunction &cgf, + const CXXDestructorDecl *dtor, + CXXDtorType dtorType, + Address thisAddr, + DeleteOrMemberCallExpr e) = 0; + /// Emit any tables needed to implement virtual inheritance. For Itanium, /// this emits virtual table tables. virtual void emitVirtualInheritanceTables(const CXXRecordDecl *rd) = 0; diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp index 485b2c8..dd357ce 100644 --- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp @@ -895,6 +895,26 @@ void CIRGenFunction::destroyCXXObject(CIRGenFunction &cgf, Address addr, } namespace { +mlir::Value loadThisForDtorDelete(CIRGenFunction &cgf, + const CXXDestructorDecl *dd) { + if (Expr *thisArg = dd->getOperatorDeleteThisArg()) + return cgf.emitScalarExpr(thisArg); + return cgf.loadCXXThis(); +} + +/// Call the operator delete associated with the current destructor. +struct CallDtorDelete final : EHScopeStack::Cleanup { + CallDtorDelete() {} + + void emit(CIRGenFunction &cgf) override { + const CXXDestructorDecl *dtor = cast<CXXDestructorDecl>(cgf.curFuncDecl); + const CXXRecordDecl *classDecl = dtor->getParent(); + cgf.emitDeleteCall(dtor->getOperatorDelete(), + loadThisForDtorDelete(cgf, dtor), + cgf.getContext().getCanonicalTagType(classDecl)); + } +}; + class DestroyField final : public EHScopeStack::Cleanup { const FieldDecl *field; CIRGenFunction::Destroyer *destroyer; @@ -932,7 +952,18 @@ void CIRGenFunction::enterDtorCleanups(const CXXDestructorDecl *dd, // The deleting-destructor phase just needs to call the appropriate // operator delete that Sema picked up. if (dtorType == Dtor_Deleting) { - cgm.errorNYI(dd->getSourceRange(), "deleting destructor cleanups"); + assert(dd->getOperatorDelete() && + "operator delete missing - EnterDtorCleanups"); + if (cxxStructorImplicitParamValue) { + cgm.errorNYI(dd->getSourceRange(), "deleting destructor with vtt"); + } else { + if (dd->getOperatorDelete()->isDestroyingOperatorDelete()) { + cgm.errorNYI(dd->getSourceRange(), + "deleting destructor with destroying operator delete"); + } else { + ehStack.pushCleanup<CallDtorDelete>(NormalAndEHCleanup); + } + } return; } diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp index 97c0944..b1e9e76 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp @@ -130,13 +130,11 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorMemberCallExpr( const CXXMethodDecl *calleeDecl = devirtualizedMethod ? devirtualizedMethod : md; const CIRGenFunctionInfo *fInfo = nullptr; - if (isa<CXXDestructorDecl>(calleeDecl)) { - cgm.errorNYI(ce->getSourceRange(), - "emitCXXMemberOrOperatorMemberCallExpr: destructor call"); - return RValue::get(nullptr); - } - - fInfo = &cgm.getTypes().arrangeCXXMethodDeclaration(calleeDecl); + if (const auto *dtor = dyn_cast<CXXDestructorDecl>(calleeDecl)) + fInfo = &cgm.getTypes().arrangeCXXStructorDeclaration( + GlobalDecl(dtor, Dtor_Complete)); + else + fInfo = &cgm.getTypes().arrangeCXXMethodDeclaration(calleeDecl); cir::FuncType ty = cgm.getTypes().getFunctionType(*fInfo); @@ -151,9 +149,34 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorMemberCallExpr( // because then we know what the type is. bool useVirtualCall = canUseVirtualCall && !devirtualizedMethod; - if (isa<CXXDestructorDecl>(calleeDecl)) { - cgm.errorNYI(ce->getSourceRange(), - "emitCXXMemberOrOperatorMemberCallExpr: destructor call"); + if (const auto *dtor = dyn_cast<CXXDestructorDecl>(calleeDecl)) { + assert(ce->arg_begin() == ce->arg_end() && + "Destructor shouldn't have explicit parameters"); + assert(returnValue.isNull() && "Destructor shouldn't have return value"); + if (useVirtualCall) { + cgm.getCXXABI().emitVirtualDestructorCall(*this, dtor, Dtor_Complete, + thisPtr.getAddress(), + cast<CXXMemberCallExpr>(ce)); + } else { + GlobalDecl globalDecl(dtor, Dtor_Complete); + CIRGenCallee callee; + assert(!cir::MissingFeatures::appleKext()); + if (!devirtualizedMethod) { + callee = CIRGenCallee::forDirect( + cgm.getAddrOfCXXStructor(globalDecl, fInfo, ty), globalDecl); + } else { + cgm.errorNYI(ce->getSourceRange(), "devirtualized destructor call"); + return RValue::get(nullptr); + } + + QualType thisTy = + isArrow ? base->getType()->getPointeeType() : base->getType(); + // CIRGen does not pass CallOrInvoke here (different from OG LLVM codegen) + // because in practice it always null even in OG. + emitCXXDestructorCall(globalDecl, callee, thisPtr.getPointer(), thisTy, + /*implicitParam=*/nullptr, + /*implicitParamTy=*/QualType(), ce); + } return RValue::get(nullptr); } diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index 7a774e0..01a43a99 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -678,7 +678,13 @@ void CIRGenFunction::emitDestructorBody(FunctionArgList &args) { // possible to delegate the destructor body to the complete // destructor. Do so. if (dtorType == Dtor_Deleting) { - cgm.errorNYI(dtor->getSourceRange(), "deleting destructor"); + RunCleanupsScope dtorEpilogue(*this); + enterDtorCleanups(dtor, Dtor_Deleting); + if (haveInsertPoint()) { + QualType thisTy = dtor->getFunctionObjectParameterType(); + emitCXXDestructorCall(dtor, Dtor_Complete, /*forVirtualBase=*/false, + /*delegating=*/false, loadCXXThisAddress(), thisTy); + } return; } diff --git a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp index 9e490c6d..d30c975 100644 --- a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp @@ -95,7 +95,10 @@ public: clang::GlobalDecl gd, Address thisAddr, mlir::Type ty, SourceLocation loc) override; - + mlir::Value emitVirtualDestructorCall(CIRGenFunction &cgf, + const CXXDestructorDecl *dtor, + CXXDtorType dtorType, Address thisAddr, + DeleteOrMemberCallExpr e) override; mlir::Value getVTableAddressPoint(BaseSubobject base, const CXXRecordDecl *vtableClass) override; mlir::Value getVTableAddressPointInStructorWithVTT( @@ -465,6 +468,29 @@ void CIRGenItaniumCXXABI::emitVTableDefinitions(CIRGenVTables &cgvt, } } +mlir::Value CIRGenItaniumCXXABI::emitVirtualDestructorCall( + CIRGenFunction &cgf, const CXXDestructorDecl *dtor, CXXDtorType dtorType, + Address thisAddr, DeleteOrMemberCallExpr expr) { + auto *callExpr = dyn_cast<const CXXMemberCallExpr *>(expr); + auto *delExpr = dyn_cast<const CXXDeleteExpr *>(expr); + assert((callExpr != nullptr) ^ (delExpr != nullptr)); + assert(callExpr == nullptr || callExpr->arg_begin() == callExpr->arg_end()); + assert(dtorType == Dtor_Deleting || dtorType == Dtor_Complete); + + GlobalDecl globalDecl(dtor, dtorType); + const CIRGenFunctionInfo *fnInfo = + &cgm.getTypes().arrangeCXXStructorDeclaration(globalDecl); + const cir::FuncType &fnTy = cgm.getTypes().getFunctionType(*fnInfo); + auto callee = CIRGenCallee::forVirtual(callExpr, globalDecl, thisAddr, fnTy); + + QualType thisTy = + callExpr ? callExpr->getObjectType() : delExpr->getDestroyedType(); + + cgf.emitCXXDestructorCall(globalDecl, callee, thisAddr.emitRawPointer(), + thisTy, nullptr, QualType(), nullptr); + return nullptr; +} + void CIRGenItaniumCXXABI::emitVirtualInheritanceTables( const CXXRecordDecl *rd) { CIRGenVTables &vtables = cgm.getVTables(); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index fe1ea56..82b1051 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -451,15 +451,47 @@ void CIRGenModule::emitGlobalFunctionDefinition(clang::GlobalDecl gd, setNonAliasAttributes(gd, funcOp); assert(!cir::MissingFeatures::opFuncAttributesForDefinition()); - if (funcDecl->getAttr<ConstructorAttr>()) - errorNYI(funcDecl->getSourceRange(), "constructor attribute"); - if (funcDecl->getAttr<DestructorAttr>()) - errorNYI(funcDecl->getSourceRange(), "destructor attribute"); + auto getPriority = [this](const auto *attr) -> int { + Expr *e = attr->getPriority(); + if (e) + return e->EvaluateKnownConstInt(this->getASTContext()).getExtValue(); + return attr->DefaultPriority; + }; + + if (const ConstructorAttr *ca = funcDecl->getAttr<ConstructorAttr>()) + addGlobalCtor(funcOp, getPriority(ca)); + if (const DestructorAttr *da = funcDecl->getAttr<DestructorAttr>()) + addGlobalDtor(funcOp, getPriority(da)); if (funcDecl->getAttr<AnnotateAttr>()) errorNYI(funcDecl->getSourceRange(), "deferredAnnotations"); } +/// Track functions to be called before main() runs. +void CIRGenModule::addGlobalCtor(cir::FuncOp ctor, + std::optional<int> priority) { + assert(!cir::MissingFeatures::globalCtorLexOrder()); + assert(!cir::MissingFeatures::globalCtorAssociatedData()); + + // Traditional LLVM codegen directly adds the function to the list of global + // ctors. In CIR we just add a global_ctor attribute to the function. The + // global list is created in LoweringPrepare. + // + // FIXME(from traditional LLVM): Type coercion of void()* types. + ctor.setGlobalCtorPriority(priority); +} + +/// Add a function to the list that will be called when the module is unloaded. +void CIRGenModule::addGlobalDtor(cir::FuncOp dtor, + std::optional<int> priority) { + if (codeGenOpts.RegisterGlobalDtorsWithAtExit && + (!getASTContext().getTargetInfo().getTriple().isOSAIX())) + errorNYI(dtor.getLoc(), "registerGlobalDtorsWithAtExit"); + + // FIXME(from traditional LLVM): Type coercion of void()* types. + dtor.setGlobalDtorPriority(priority); +} + void CIRGenModule::handleCXXStaticMemberVarInstantiation(VarDecl *vd) { VarDecl::DefinitionKind dk = vd->isThisDeclarationADefinition(); if (dk == VarDecl::Definition && vd->hasAttr<DLLImportAttr>()) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index f627bae..690f0ed 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -159,6 +159,13 @@ public: bool isConstant = false, mlir::Operation *insertPoint = nullptr); + /// Add a global constructor or destructor to the module. + /// The priority is optional, if not specified, the default priority is used. + void addGlobalCtor(cir::FuncOp ctor, + std::optional<int> priority = std::nullopt); + void addGlobalDtor(cir::FuncOp dtor, + std::optional<int> priority = std::nullopt); + bool shouldZeroInitPadding() const { // In C23 (N3096) $6.7.10: // """ diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp index 3d86f71..ce4ae7e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp @@ -1005,7 +1005,7 @@ public: /*temporary=*/nullptr, OpenACCReductionOperator::Invalid, Decl::castToDeclContext(cgf.curFuncDecl), opInfo.origType, opInfo.bounds.size(), opInfo.boundTypes, opInfo.baseType, - privateOp); + privateOp, /*reductionCombinerRecipes=*/{}); // TODO: OpenACC: The dialect is going to change in the near future to // have these be on a different operation, so when that changes, we // probably need to change these here. @@ -1046,7 +1046,7 @@ public: OpenACCReductionOperator::Invalid, Decl::castToDeclContext(cgf.curFuncDecl), opInfo.origType, opInfo.bounds.size(), opInfo.boundTypes, opInfo.baseType, - firstPrivateOp); + firstPrivateOp, /*reductionCombinerRecipe=*/{}); // TODO: OpenACC: The dialect is going to change in the near future to // have these be on a different operation, so when that changes, we @@ -1088,7 +1088,7 @@ public: /*temporary=*/nullptr, clause.getReductionOp(), Decl::castToDeclContext(cgf.curFuncDecl), opInfo.origType, opInfo.bounds.size(), opInfo.boundTypes, opInfo.baseType, - reductionOp); + reductionOp, varRecipe.CombinerRecipes); operation.addReduction(builder.getContext(), reductionOp, recipe); } diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp index 24a5fc2..ce14aa8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp @@ -527,16 +527,142 @@ void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy( // doesn't restore it aftewards. void OpenACCRecipeBuilderBase::createReductionRecipeCombiner( mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, - mlir::acc::ReductionRecipeOp recipe, size_t numBounds) { + mlir::acc::ReductionRecipeOp recipe, size_t numBounds, QualType origType, + llvm::ArrayRef<OpenACCReductionRecipe::CombinerRecipe> combinerRecipes) { mlir::Block *block = createRecipeBlock(recipe.getCombinerRegion(), mainOp.getType(), loc, numBounds, /*isInit=*/false); builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back()); CIRGenFunction::LexicalScope ls(cgf, loc, block); - mlir::BlockArgument lhsArg = block->getArgument(0); + mlir::Value lhsArg = block->getArgument(0); + mlir::Value rhsArg = block->getArgument(1); + llvm::MutableArrayRef<mlir::BlockArgument> boundsRange = + block->getArguments().drop_front(2); + + if (llvm::any_of(combinerRecipes, [](auto &r) { return r.Op == nullptr; })) { + cgf.cgm.errorNYI(loc, "OpenACC Reduction combiner not generated"); + mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0)); + return; + } + + // apply the bounds so that we can get our bounds emitted correctly. + for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) + std::tie(lhsArg, rhsArg) = + createBoundsLoop(lhsArg, rhsArg, boundArg, loc, /*inverse=*/false); + + // Emitter for when we know this isn't a struct or array we have to loop + // through. This should work for the 'field' once the get-element call has + // been made. + auto emitSingleCombiner = + [&](mlir::Value lhsArg, mlir::Value rhsArg, + const OpenACCReductionRecipe::CombinerRecipe &combiner) { + mlir::Type elementTy = + mlir::cast<cir::PointerType>(lhsArg.getType()).getPointee(); + CIRGenFunction::DeclMapRevertingRAII declMapRAIILhs{cgf, combiner.LHS}; + cgf.setAddrOfLocalVar( + combiner.LHS, Address{lhsArg, elementTy, + cgf.getContext().getDeclAlign(combiner.LHS)}); + CIRGenFunction::DeclMapRevertingRAII declMapRAIIRhs{cgf, combiner.RHS}; + cgf.setAddrOfLocalVar( + combiner.RHS, Address{rhsArg, elementTy, + cgf.getContext().getDeclAlign(combiner.RHS)}); + + [[maybe_unused]] mlir::LogicalResult stmtRes = + cgf.emitStmt(combiner.Op, /*useCurrentScope=*/true); + }; + + // Emitter for when we know this is either a non-array or element of an array + // (which also shouldn't be an array type?). This function should generate the + // initialization code for an entire 'array-element'/non-array, including + // diving into each element of a struct (if necessary). + auto emitCombiner = [&](mlir::Value lhsArg, mlir::Value rhsArg, QualType ty) { + assert(!ty->isArrayType() && "Array type shouldn't get here"); + if (const auto *rd = ty->getAsRecordDecl()) { + if (combinerRecipes.size() == 1 && + cgf.getContext().hasSameType(ty, combinerRecipes[0].LHS->getType())) { + // If this is a 'top level' operator on the type we can just emit this + // as a simple one. + emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]); + } else { + // else we have to handle each individual field after after a + // get-element. + for (const auto &[field, combiner] : + llvm::zip_equal(rd->fields(), combinerRecipes)) { + mlir::Type fieldType = cgf.convertType(field->getType()); + auto fieldPtr = cir::PointerType::get(fieldType); + + mlir::Value lhsField = builder.createGetMember( + loc, fieldPtr, lhsArg, field->getName(), field->getFieldIndex()); + mlir::Value rhsField = builder.createGetMember( + loc, fieldPtr, rhsArg, field->getName(), field->getFieldIndex()); + + emitSingleCombiner(lhsField, rhsField, combiner); + } + } + + } else { + // if this is a single-thing (because we should know this isn't an array, + // as Sema wouldn't let us get here), we can just do a normal emit call. + emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]); + } + }; + + if (const auto *cat = cgf.getContext().getAsConstantArrayType(origType)) { + // If we're in an array, we have to emit the combiner for each element of + // the array. + auto itrTy = mlir::cast<cir::IntType>(cgf.PtrDiffTy); + auto itrPtrTy = cir::PointerType::get(itrTy); + + mlir::Value zero = + builder.getConstInt(loc, mlir::cast<cir::IntType>(cgf.PtrDiffTy), 0); + mlir::Value itr = + cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "itr", + cgf.cgm.getSize(cgf.getPointerAlign())); + builder.CIRBaseBuilderTy::createStore(loc, zero, itr); + + builder.setInsertionPointAfter(builder.createFor( + loc, + /*condBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + auto loadItr = cir::LoadOp::create(builder, loc, {itr}); + mlir::Value arraySize = builder.getConstInt( + loc, mlir::cast<cir::IntType>(cgf.PtrDiffTy), cat->getZExtSize()); + auto cmp = builder.createCompare(loc, cir::CmpOpKind::lt, loadItr, + arraySize); + builder.createCondition(cmp); + }, + /*bodyBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + auto loadItr = cir::LoadOp::create(builder, loc, {itr}); + auto lhsElt = builder.getArrayElement( + loc, loc, lhsArg, cgf.convertType(cat->getElementType()), loadItr, + /*shouldDecay=*/true); + auto rhsElt = builder.getArrayElement( + loc, loc, rhsArg, cgf.convertType(cat->getElementType()), loadItr, + /*shouldDecay=*/true); + + emitCombiner(lhsElt, rhsElt, cat->getElementType()); + builder.createYield(loc); + }, + /*stepBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + auto loadItr = cir::LoadOp::create(builder, loc, {itr}); + auto inc = cir::UnaryOp::create(builder, loc, loadItr.getType(), + cir::UnaryOpKind::Inc, loadItr); + builder.CIRBaseBuilderTy::createStore(loc, inc, itr); + builder.createYield(loc); + })); - mlir::acc::YieldOp::create(builder, locEnd, lhsArg); + } else if (origType->isArrayType()) { + cgf.cgm.errorNYI(loc, + "OpenACC Reduction combiner non-constant array recipe"); + } else { + emitCombiner(lhsArg, rhsArg, origType); + } + + builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back()); + mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0)); } } // namespace clang::CIRGen diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h index a5da744..745d424 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h @@ -64,10 +64,10 @@ protected: // that this function is not 'insertion point' clean, in that it alters the // insertion point to be inside of the 'combiner' section of the recipe, but // doesn't restore it aftewards. - void createReductionRecipeCombiner(mlir::Location loc, mlir::Location locEnd, - mlir::Value mainOp, - mlir::acc::ReductionRecipeOp recipe, - size_t numBounds); + void createReductionRecipeCombiner( + mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, + mlir::acc::ReductionRecipeOp recipe, size_t numBounds, QualType origType, + llvm::ArrayRef<OpenACCReductionRecipe::CombinerRecipe> combinerRecipes); void createInitRecipe(mlir::Location loc, mlir::Location locEnd, SourceRange exprRange, mlir::Value mainOp, @@ -169,7 +169,9 @@ public: const Expr *varRef, const VarDecl *varRecipe, const VarDecl *temporary, OpenACCReductionOperator reductionOp, DeclContext *dc, QualType origType, size_t numBounds, llvm::ArrayRef<QualType> boundTypes, QualType baseType, - mlir::Value mainOp) { + mlir::Value mainOp, + llvm::ArrayRef<OpenACCReductionRecipe::CombinerRecipe> + reductionCombinerRecipes) { assert(!varRecipe->getType()->isSpecificBuiltinType( BuiltinType::ArraySection) && "array section shouldn't make it to recipe creation"); @@ -208,7 +210,8 @@ public: createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp, recipe.getInitRegion(), numBounds, boundTypes, varRecipe, origType, /*emitInitExpr=*/true); - createReductionRecipeCombiner(loc, locEnd, mainOp, recipe, numBounds); + createReductionRecipeCombiner(loc, locEnd, mainOp, recipe, numBounds, + origType, reductionCombinerRecipes); } else { static_assert(std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>); createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp, diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp index e65896a..2ab1ea0c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp @@ -619,10 +619,8 @@ const CIRGenFunctionInfo &CIRGenTypes::arrangeGlobalDeclaration(GlobalDecl gd) { const auto *fd = cast<FunctionDecl>(gd.getDecl()); if (isa<CXXConstructorDecl>(gd.getDecl()) || - isa<CXXDestructorDecl>(gd.getDecl())) { - cgm.errorNYI(SourceLocation(), - "arrangeGlobalDeclaration for C++ constructor or destructor"); - } + isa<CXXDestructorDecl>(gd.getDecl())) + return arrangeCXXStructorDeclaration(gd); return arrangeFunctionDeclaration(fd); } diff --git a/clang/lib/CIR/CodeGen/CIRGenVTables.cpp b/clang/lib/CIR/CodeGen/CIRGenVTables.cpp index 84f5977..36bab62 100644 --- a/clang/lib/CIR/CodeGen/CIRGenVTables.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenVTables.cpp @@ -120,12 +120,6 @@ mlir::Attribute CIRGenVTables::getVTableComponent( assert(!cir::MissingFeatures::vtableRelativeLayout()); switch (component.getKind()) { - case VTableComponent::CK_CompleteDtorPointer: - cgm.errorNYI("getVTableComponent: CompleteDtorPointer"); - return mlir::Attribute(); - case VTableComponent::CK_DeletingDtorPointer: - cgm.errorNYI("getVTableComponent: DeletingDtorPointer"); - return mlir::Attribute(); case VTableComponent::CK_UnusedFunctionPointer: cgm.errorNYI("getVTableComponent: UnusedFunctionPointer"); return mlir::Attribute(); @@ -148,7 +142,9 @@ mlir::Attribute CIRGenVTables::getVTableComponent( "expected GlobalViewAttr or ConstPtrAttr"); return rtti; - case VTableComponent::CK_FunctionPointer: { + case VTableComponent::CK_FunctionPointer: + case VTableComponent::CK_CompleteDtorPointer: + case VTableComponent::CK_DeletingDtorPointer: { GlobalDecl gd = component.getGlobalDecl(); assert(!cir::MissingFeatures::cudaSupport()); diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 5f88590..7af3dc1 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -15,6 +15,7 @@ #include "clang/CIR/Dialect/IR/CIROpsEnums.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" +#include "mlir/IR/DialectImplementation.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/FunctionImplementation.h" #include "mlir/Support/LLVM.h" @@ -1720,6 +1721,43 @@ ParseResult cir::FuncOp::parse(OpAsmParser &parser, OperationState &state) { hasAlias = true; } + auto parseGlobalDtorCtor = + [&](StringRef keyword, + llvm::function_ref<void(std::optional<int> prio)> createAttr) + -> mlir::LogicalResult { + if (mlir::succeeded(parser.parseOptionalKeyword(keyword))) { + std::optional<int> priority; + if (mlir::succeeded(parser.parseOptionalLParen())) { + auto parsedPriority = mlir::FieldParser<int>::parse(parser); + if (mlir::failed(parsedPriority)) + return parser.emitError(parser.getCurrentLocation(), + "failed to parse 'priority', of type 'int'"); + priority = parsedPriority.value_or(int()); + // Parse literal ')' + if (parser.parseRParen()) + return failure(); + } + createAttr(priority); + } + return success(); + }; + + if (parseGlobalDtorCtor("global_ctor", [&](std::optional<int> priority) { + mlir::IntegerAttr globalCtorPriorityAttr = + builder.getI32IntegerAttr(priority.value_or(65535)); + state.addAttribute(getGlobalCtorPriorityAttrName(state.name), + globalCtorPriorityAttr); + }).failed()) + return failure(); + + if (parseGlobalDtorCtor("global_dtor", [&](std::optional<int> priority) { + mlir::IntegerAttr globalDtorPriorityAttr = + builder.getI32IntegerAttr(priority.value_or(65535)); + state.addAttribute(getGlobalDtorPriorityAttrName(state.name), + globalDtorPriorityAttr); + }).failed()) + return failure(); + // Parse the optional function body. auto *body = state.addRegion(); OptionalParseResult parseResult = parser.parseOptionalRegion( @@ -1801,6 +1839,18 @@ void cir::FuncOp::print(OpAsmPrinter &p) { p << ")"; } + if (auto globalCtorPriority = getGlobalCtorPriority()) { + p << " global_ctor"; + if (globalCtorPriority.value() != 65535) + p << "(" << globalCtorPriority.value() << ")"; + } + + if (auto globalDtorPriority = getGlobalDtorPriority()) { + p << " global_dtor"; + if (globalDtorPriority.value() != 65535) + p << "(" << globalDtorPriority.value() << ")"; + } + // Print the body if this is not an external function. Region &body = getOperation()->getRegion(0); if (!body.empty()) { @@ -2851,20 +2901,6 @@ mlir::LogicalResult cir::ThrowOp::verify() { } //===----------------------------------------------------------------------===// -// AtomicCmpXchg -//===----------------------------------------------------------------------===// - -LogicalResult cir::AtomicCmpXchg::verify() { - mlir::Type pointeeType = getPtr().getType().getPointee(); - - if (pointeeType != getExpected().getType() || - pointeeType != getDesired().getType()) - return emitOpError("ptr, expected and desired types must match"); - - return success(); -} - -//===----------------------------------------------------------------------===// // TypeInfoAttr //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index dbff0b9..d99c362 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -105,6 +105,8 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> { /// List of ctors and their priorities to be called before main() llvm::SmallVector<std::pair<std::string, uint32_t>, 4> globalCtorList; + /// List of dtors and their priorities to be called when unloading module. + llvm::SmallVector<std::pair<std::string, uint32_t>, 4> globalDtorList; void setASTContext(clang::ASTContext *c) { astCtx = c; @@ -823,10 +825,13 @@ void LoweringPreparePass::buildGlobalCtorDtorList() { mlir::ArrayAttr::get(&getContext(), globalCtors)); } - // We will eventual need to populate a global_dtor list, but that's not - // needed for globals with destructors. It will only be needed for functions - // that are marked as global destructors with an attribute. - assert(!cir::MissingFeatures::opGlobalDtorList()); + if (!globalDtorList.empty()) { + llvm::SmallVector<mlir::Attribute> globalDtors = + prepareCtorDtorAttrList<cir::GlobalDtorAttr>(&getContext(), + globalDtorList); + mlirModule->setAttr(cir::CIRDialect::getGlobalDtorsAttrName(), + mlir::ArrayAttr::get(&getContext(), globalDtors)); + } } void LoweringPreparePass::buildCXXGlobalInitFunc() { @@ -975,22 +980,28 @@ void LoweringPreparePass::lowerArrayCtor(cir::ArrayCtor op) { } void LoweringPreparePass::runOnOp(mlir::Operation *op) { - if (auto arrayCtor = dyn_cast<ArrayCtor>(op)) + if (auto arrayCtor = dyn_cast<cir::ArrayCtor>(op)) { lowerArrayCtor(arrayCtor); - else if (auto arrayDtor = dyn_cast<cir::ArrayDtor>(op)) + } else if (auto arrayDtor = dyn_cast<cir::ArrayDtor>(op)) { lowerArrayDtor(arrayDtor); - else if (auto cast = mlir::dyn_cast<cir::CastOp>(op)) + } else if (auto cast = mlir::dyn_cast<cir::CastOp>(op)) { lowerCastOp(cast); - else if (auto complexDiv = mlir::dyn_cast<cir::ComplexDivOp>(op)) + } else if (auto complexDiv = mlir::dyn_cast<cir::ComplexDivOp>(op)) { lowerComplexDivOp(complexDiv); - else if (auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op)) + } else if (auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op)) { lowerComplexMulOp(complexMul); - else if (auto glob = mlir::dyn_cast<cir::GlobalOp>(op)) + } else if (auto glob = mlir::dyn_cast<cir::GlobalOp>(op)) { lowerGlobalOp(glob); - else if (auto dynamicCast = mlir::dyn_cast<cir::DynamicCastOp>(op)) + } else if (auto dynamicCast = mlir::dyn_cast<cir::DynamicCastOp>(op)) { lowerDynamicCastOp(dynamicCast); - else if (auto unary = mlir::dyn_cast<cir::UnaryOp>(op)) + } else if (auto unary = mlir::dyn_cast<cir::UnaryOp>(op)) { lowerUnaryOp(unary); + } else if (auto fnOp = dyn_cast<cir::FuncOp>(op)) { + if (auto globalCtor = fnOp.getGlobalCtorPriority()) + globalCtorList.emplace_back(fnOp.getName(), globalCtor.value()); + else if (auto globalDtor = fnOp.getGlobalDtorPriority()) + globalDtorList.emplace_back(fnOp.getName(), globalDtor.value()); + } } void LoweringPreparePass::runOnOperation() { @@ -1003,7 +1014,7 @@ void LoweringPreparePass::runOnOperation() { op->walk([&](mlir::Operation *op) { if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp, cir::ComplexMulOp, cir::ComplexDivOp, cir::DynamicCastOp, - cir::GlobalOp, cir::UnaryOp>(op)) + cir::FuncOp, cir::GlobalOp, cir::UnaryOp>(op)) opsToTransform.push_back(op); }); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 26e0ba9..3abba3d 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -694,8 +694,8 @@ getLLVMMemOrder(std::optional<cir::MemOrder> memorder) { llvm_unreachable("unknown memory order"); } -mlir::LogicalResult CIRToLLVMAtomicCmpXchgLowering::matchAndRewrite( - cir::AtomicCmpXchg op, OpAdaptor adaptor, +mlir::LogicalResult CIRToLLVMAtomicCmpXchgOpLowering::matchAndRewrite( + cir::AtomicCmpXchgOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { mlir::Value expected = adaptor.getExpected(); mlir::Value desired = adaptor.getDesired(); @@ -719,8 +719,8 @@ mlir::LogicalResult CIRToLLVMAtomicCmpXchgLowering::matchAndRewrite( return mlir::success(); } -mlir::LogicalResult CIRToLLVMAtomicXchgLowering::matchAndRewrite( - cir::AtomicXchg op, OpAdaptor adaptor, +mlir::LogicalResult CIRToLLVMAtomicXchgOpLowering::matchAndRewrite( + cir::AtomicXchgOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { assert(!cir::MissingFeatures::atomicSyncScopeID()); mlir::LLVM::AtomicOrdering llvmOrder = getLLVMMemOrder(adaptor.getMemOrder()); @@ -2598,7 +2598,13 @@ void ConvertCIRToLLVMPass::runOnOperation() { return std::make_pair(ctorAttr.getName(), ctorAttr.getPriority()); }); - assert(!cir::MissingFeatures::opGlobalDtorList()); + // Emit the llvm.global_dtors array. + buildCtorDtorList(module, cir::CIRDialect::getGlobalDtorsAttrName(), + "llvm.global_dtors", [](mlir::Attribute attr) { + auto dtorAttr = mlir::cast<cir::GlobalDtorAttr>(attr); + return std::make_pair(dtorAttr.getName(), + dtorAttr.getPriority()); + }); } mlir::LogicalResult CIRToLLVMBrOpLowering::matchAndRewrite( diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index fa94692..1ff2be7 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1762,8 +1762,11 @@ void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD, // access its value. llvm::GlobalValue *Addr = GV; if (CGM.getLangOpts().OpenMPIsTargetDevice) { + llvm::PointerType *FnPtrTy = llvm::PointerType::get( + CGM.getLLVMContext(), + CGM.getModule().getDataLayout().getProgramAddressSpace()); Addr = new llvm::GlobalVariable( - CGM.getModule(), CGM.VoidPtrTy, + CGM.getModule(), FnPtrTy, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name, nullptr, llvm::GlobalValue::NotThreadLocal, CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace()); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 3613b6a..fddeba9 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This provides a generalized class for OpenMP runtime code generation -// specialized by GPU targets NVPTX and AMDGCN. +// specialized by GPU targets NVPTX, AMDGCN and SPIR-V. // //===----------------------------------------------------------------------===// @@ -1242,12 +1242,13 @@ void CGOpenMPRuntimeGPU::emitParallelCall( CGBuilderTy &Bld = CGF.Builder; llvm::Value *NumThreadsVal = NumThreads; llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn]; - llvm::Value *ID = llvm::ConstantPointerNull::get(CGM.Int8PtrTy); - if (WFn) - ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy); - llvm::Type *FnPtrTy = llvm::PointerType::get( + llvm::PointerType *FnPtrTy = llvm::PointerType::get( CGF.getLLVMContext(), CGM.getDataLayout().getProgramAddressSpace()); + llvm::Value *ID = llvm::ConstantPointerNull::get(FnPtrTy); + if (WFn) + ID = Bld.CreateBitOrPointerCast(WFn, FnPtrTy); + llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, FnPtrTy); // Create a private scope that will globalize the arguments diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 92636f2..fdc1a11 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -2674,7 +2674,8 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, const llvm::ArrayRef<LValue> ResultRegDests, const llvm::ArrayRef<QualType> ResultRegQualTys, const llvm::BitVector &ResultTypeRequiresCast, - const llvm::BitVector &ResultRegIsFlagReg) { + const std::vector<std::optional<std::pair<unsigned, unsigned>>> + &ResultBounds) { CGBuilderTy &Builder = CGF.Builder; CodeGenModule &CGM = CGF.CGM; llvm::LLVMContext &CTX = CGF.getLLVMContext(); @@ -2685,18 +2686,20 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, // ResultRegDests can be also populated by addReturnRegisterOutputs() above, // in which case its size may grow. assert(ResultTypeRequiresCast.size() <= ResultRegDests.size()); - assert(ResultRegIsFlagReg.size() <= ResultRegDests.size()); + assert(ResultBounds.size() <= ResultRegDests.size()); for (unsigned i = 0, e = RegResults.size(); i != e; ++i) { llvm::Value *Tmp = RegResults[i]; llvm::Type *TruncTy = ResultTruncRegTypes[i]; - if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) { - // Target must guarantee the Value `Tmp` here is lowered to a boolean - // value. - llvm::Constant *Two = llvm::ConstantInt::get(Tmp->getType(), 2); + if ((i < ResultBounds.size()) && ResultBounds[i].has_value()) { + const auto [LowerBound, UpperBound] = ResultBounds[i].value(); + // FIXME: Support for nonzero lower bounds not yet implemented. + assert(LowerBound == 0 && "Output operand lower bound is not zero."); + llvm::Constant *UpperBoundConst = + llvm::ConstantInt::get(Tmp->getType(), UpperBound); llvm::Value *IsBooleanValue = - Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, Two); + Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, UpperBoundConst); llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume); Builder.CreateCall(FnAssume, IsBooleanValue); } @@ -2825,7 +2828,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { std::vector<llvm::Type *> ArgElemTypes; std::vector<llvm::Value*> Args; llvm::BitVector ResultTypeRequiresCast; - llvm::BitVector ResultRegIsFlagReg; + std::vector<std::optional<std::pair<unsigned, unsigned>>> ResultBounds; // Keep track of inout constraints. std::string InOutConstraints; @@ -2883,8 +2886,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { ResultRegQualTys.push_back(QTy); ResultRegDests.push_back(Dest); - bool IsFlagReg = llvm::StringRef(OutputConstraint).starts_with("{@cc"); - ResultRegIsFlagReg.push_back(IsFlagReg); + ResultBounds.emplace_back(Info.getOutputOperandBounds()); llvm::Type *Ty = ConvertTypeForMem(QTy); const bool RequiresCast = Info.allowsRegister() && @@ -3231,7 +3233,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { EmitAsmStores(*this, S, RegResults, ResultRegTypes, ResultTruncRegTypes, ResultRegDests, ResultRegQualTys, ResultTypeRequiresCast, - ResultRegIsFlagReg); + ResultBounds); // If this is an asm goto with outputs, repeat EmitAsmStores, but with a // different insertion point; one for each indirect destination and with @@ -3242,7 +3244,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { Builder.SetInsertPoint(Succ, --(Succ->end())); EmitAsmStores(*this, S, CBRRegResults[Succ], ResultRegTypes, ResultTruncRegTypes, ResultRegDests, ResultRegQualTys, - ResultTypeRequiresCast, ResultRegIsFlagReg); + ResultTypeRequiresCast, ResultBounds); } } } diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp index 0ef39b6..0d0941e 100644 --- a/clang/lib/CodeGen/Targets/RISCV.cpp +++ b/clang/lib/CodeGen/Targets/RISCV.cpp @@ -680,22 +680,22 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, if (const auto *ED = Ty->getAsEnumDecl()) Ty = ED->getIntegerType(); - // All integral types are promoted to XLen width - if (Size < XLen && Ty->isIntegralOrEnumerationType()) { - return extendType(Ty, CGT.ConvertType(Ty)); - } - if (const auto *EIT = Ty->getAs<BitIntType>()) { - if (EIT->getNumBits() < XLen) + + if (XLen == 64 && EIT->getNumBits() == 32) return extendType(Ty, CGT.ConvertType(Ty)); - if (EIT->getNumBits() > 128 || - (!getContext().getTargetInfo().hasInt128Type() && - EIT->getNumBits() > 64)) - return getNaturalAlignIndirect( - Ty, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(), - /*ByVal=*/false); + + if (EIT->getNumBits() <= 2 * XLen) + return ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty)); + return getNaturalAlignIndirect( + Ty, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(), + /*ByVal=*/false); } + // All integral types are promoted to XLen width + if (Size < XLen && Ty->isIntegralOrEnumerationType()) + return extendType(Ty, CGT.ConvertType(Ty)); + return ABIArgInfo::getDirect(); } diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index c2956a1..cb3fc1c 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -41,8 +41,7 @@ static constexpr std::array<StringRef, 14> QtPropertyKeywords = { bool FormatToken::isQtProperty() const { assert(llvm::is_sorted(QtPropertyKeywords)); - return std::binary_search(QtPropertyKeywords.begin(), - QtPropertyKeywords.end(), TokenText); + return llvm::binary_search(QtPropertyKeywords, TokenText); } // Sorted common C++ non-keyword types. diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 4aaca2d..e150aa6 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -834,10 +834,9 @@ _mm256_cmpgt_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadd_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadd_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit @@ -866,10 +865,9 @@ _mm256_hadd_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadd_epi32(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadd_epi32(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); } /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit @@ -901,10 +899,9 @@ _mm256_hadd_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadds_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadds_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit @@ -937,10 +934,9 @@ _mm256_hadds_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsub_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsub_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit @@ -969,10 +965,9 @@ _mm256_hsub_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsub_epi32(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsub_epi32(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); } /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit @@ -1005,10 +1000,9 @@ _mm256_hsub_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsubs_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsubs_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); } /// Multiplies each unsigned byte from the 256-bit integer vector in \a __a diff --git a/clang/lib/Headers/avx512ifmaintrin.h b/clang/lib/Headers/avx512ifmaintrin.h index f01b322..625a8ff 100644 --- a/clang/lib/Headers/avx512ifmaintrin.h +++ b/clang/lib/Headers/avx512ifmaintrin.h @@ -15,54 +15,53 @@ #define __IFMAINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS \ + constexpr \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), \ + __min_vector_width__(512))) +#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), \ __min_vector_width__(512))) +#endif static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y, - (__v8di) __Z); +_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di)__X, (__v8di)__Y, + (__v8di)__Z); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), - (__v8di)__W); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52hi_epu64( + __m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z), - (__v8di)_mm512_setzero_si512()); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52hi_epu64( + __mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z), + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di) __X, (__v8di) __Y, - (__v8di) __Z); +_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di)__X, (__v8di)__Y, + (__v8di)__Z); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), - (__v8di)__W); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52lo_epu64( + __m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z), - (__v8di)_mm512_setzero_si512()); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52lo_epu64( + __mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z), + (__v8di)_mm512_setzero_si512()); } #undef __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512ifmavlintrin.h b/clang/lib/Headers/avx512ifmavlintrin.h index a72b561..c4449c7 100644 --- a/clang/lib/Headers/avx512ifmavlintrin.h +++ b/clang/lib/Headers/avx512ifmavlintrin.h @@ -8,13 +8,24 @@ *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H -#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead." +#error \ + "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead." #endif #ifndef __IFMAVLINTRIN_H #define __IFMAVLINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512ifma,avx512vl"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512ifma,avx512vl"), \ + __min_vector_width__(256))) +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512ifma,avx512vl"), \ @@ -24,6 +35,8 @@ __target__("avx512ifma,avx512vl"), \ __min_vector_width__(256))) +#endif + #define _mm_madd52hi_epu64(X, Y, Z) \ ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \ (__v2di)(Z))) @@ -41,70 +54,57 @@ (__v4di)(Z))) static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), - (__v2di)__W); +_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), - (__v2di)_mm_setzero_si128()); +_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), - (__v4di)__W); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64( + __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), - (__v4di)_mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64( + __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), + (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), - (__v2di)__W); +_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), - (__v2di)_mm_setzero_si128()); +_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), - (__v4di)__W); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64( + __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), - (__v4di)_mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64( + __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), + (__v4di)_mm256_setzero_si256()); } - #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avxifmaintrin.h b/clang/lib/Headers/avxifmaintrin.h index 5c782d2a..a2ef601 100644 --- a/clang/lib/Headers/avxifmaintrin.h +++ b/clang/lib/Headers/avxifmaintrin.h @@ -15,12 +15,21 @@ #define __AVXIFMAINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avxifma"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avxifma"), __min_vector_width__(256))) +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ __min_vector_width__(256))) +#endif // must vex-encoding diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 123fa79..696ec31 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -694,9 +694,8 @@ _mm256_xor_ps(__m256 __a, __m256 __b) /// elements of a vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the horizontal sums of /// both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_hadd_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_hadd_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b); } @@ -717,9 +716,8 @@ _mm256_hadd_pd(__m256d __a, __m256d __b) /// index 2, 3, 6, 7 of a vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the horizontal sums of /// both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_hadd_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hadd_ps(__m256 __a, + __m256 __b) { return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b); } @@ -740,9 +738,8 @@ _mm256_hadd_ps(__m256 __a, __m256 __b) /// odd-indexed elements of a vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the horizontal /// differences of both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_hsub_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_hsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b); } @@ -763,9 +760,8 @@ _mm256_hsub_pd(__m256d __a, __m256d __b) /// elements with index 2, 3, 6, 7 of a vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the horizontal /// differences of both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_hsub_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a, + __m256 __b) { return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b); } diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index f0c9b2b..42bd343 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -83,9 +83,8 @@ _mm_addsub_ps(__m128 __a, __m128 __b) /// destination. /// \returns A 128-bit vector of [4 x float] containing the horizontal sums of /// both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_hadd_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b); } @@ -106,9 +105,8 @@ _mm_hadd_ps(__m128 __a, __m128 __b) /// bits of the destination. /// \returns A 128-bit vector of [4 x float] containing the horizontal /// differences of both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_hsub_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b); } @@ -168,9 +166,8 @@ _mm_moveldup_ps(__m128 __a) /// A 128-bit vector of [2 x double] containing the right source operand. /// \returns A 128-bit vector of [2 x double] containing the alternating sums /// and differences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_addsub_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_addsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } @@ -191,9 +188,8 @@ _mm_addsub_pd(__m128d __a, __m128d __b) /// destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of /// both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_hadd_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_pd(__m128d __a, __m128d __b) { return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b); } @@ -214,9 +210,8 @@ _mm_hadd_pd(__m128d __a, __m128d __b) /// the destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal /// differences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_hsub_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b); } diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index 3fc9f98..9d007c8 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -202,10 +202,9 @@ _mm_abs_epi32(__m128i __a) { /// destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of /// both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadd_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -225,10 +224,9 @@ _mm_hadd_epi16(__m128i __a, __m128i __b) /// destination. /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of /// both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadd_epi32(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_epi32(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -248,11 +246,10 @@ _mm_hadd_epi32(__m128i __a, __m128i __b) /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both /// operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadd_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -272,11 +269,10 @@ _mm_hadd_pi16(__m64 __a, __m64 __b) /// destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both /// operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadd_pi32(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddd128( - (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi32(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddd128( + (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); } /// Horizontally adds, with saturation, the adjacent pairs of values contained @@ -299,10 +295,9 @@ _mm_hadd_pi32(__m64 __a, __m64 __b) /// destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated /// sums of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadds_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadds_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally adds, with saturation, the adjacent pairs of values contained @@ -325,11 +320,10 @@ _mm_hadds_epi16(__m128i __a, __m128i __b) /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// sums of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadds_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddsw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddsw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -349,10 +343,9 @@ _mm_hadds_pi16(__m64 __a, __m64 __b) /// the destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences /// of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsub_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -372,10 +365,9 @@ _mm_hsub_epi16(__m128i __a, __m128i __b) /// the destination. /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences /// of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsub_epi32(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_epi32(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -395,11 +387,10 @@ _mm_hsub_epi32(__m128i __a, __m128i __b) /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences /// of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsub_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -419,11 +410,10 @@ _mm_hsub_pi16(__m64 __a, __m64 __b) /// the destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences /// of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsub_pi32(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubd128( - (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi32(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubd128( + (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); } /// Horizontally subtracts, with saturation, the adjacent pairs of values @@ -446,10 +436,9 @@ _mm_hsub_pi32(__m64 __a, __m64 __b) /// the destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated /// differences of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsubs_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsubs_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally subtracts, with saturation, the adjacent pairs of values @@ -472,11 +461,10 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b) /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// differences of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsubs_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubsw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubsw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Multiplies corresponding pairs of packed 8-bit unsigned integer @@ -556,10 +544,9 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b) { /// A 128-bit vector of [8 x i16] containing one of the source operands. /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled /// products of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mulhrs_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, + __m128i __b) { + return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 04f29c8..25199c7 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3311,7 +3311,11 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, ErrorFound = true; } - Clause = ParseOpenMPClause(CKind, WrongDirective); + if (CKind == OMPC_nowait && PP.LookAhead(/*N=*/0).is(tok::l_paren) && + getLangOpts().OpenMP >= 60) + Clause = ParseOpenMPSingleExprClause(CKind, WrongDirective); + else + Clause = ParseOpenMPClause(CKind, WrongDirective); break; case OMPC_self_maps: // OpenMP [6.0, self_maps clause] diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 7c44efd..87dd682 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -606,15 +606,15 @@ ConstraintSatisfactionChecker::SubstitutionInTemplateArguments( Constraint.mappingOccurenceList(); // The empty MLTAL situation should only occur when evaluating non-dependent // constraints. - if (!MLTAL.getNumSubstitutedLevels()) - MLTAL.addOuterTemplateArguments(TD, {}, /*Final=*/false); - SubstitutedOuterMost = - llvm::to_vector_of<TemplateArgument>(MLTAL.getOutermost()); + if (MLTAL.getNumSubstitutedLevels()) + SubstitutedOuterMost = + llvm::to_vector_of<TemplateArgument>(MLTAL.getOutermost()); unsigned Offset = 0; for (unsigned I = 0, MappedIndex = 0; I < Used.size(); I++) { TemplateArgument Arg; if (Used[I]) - Arg = CTAI.SugaredConverted[MappedIndex++]; + Arg = S.Context.getCanonicalTemplateArgument( + CTAI.SugaredConverted[MappedIndex++]); if (I < SubstitutedOuterMost.size()) { SubstitutedOuterMost[I] = Arg; Offset = I + 1; @@ -626,8 +626,10 @@ ConstraintSatisfactionChecker::SubstitutionInTemplateArguments( if (Offset < SubstitutedOuterMost.size()) SubstitutedOuterMost.erase(SubstitutedOuterMost.begin() + Offset); - MLTAL.replaceOutermostTemplateArguments(TD, SubstitutedOuterMost); - return std::move(MLTAL); + MultiLevelTemplateArgumentList SubstitutedTemplateArgs; + SubstitutedTemplateArgs.addOuterTemplateArguments(TD, SubstitutedOuterMost, + /*Final=*/false); + return std::move(SubstitutedTemplateArgs); } ExprResult ConstraintSatisfactionChecker::EvaluateSlow( diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 0e83c20..8ac09c4 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -20797,7 +20797,7 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD, // SYCL functions can be template, so we check if they have appropriate // attribute prior to checking if it is a template. - if (LangOpts.SYCLIsDevice && FD->hasAttr<DeviceKernelAttr>()) + if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelAttr>()) return FunctionEmissionStatus::Emitted; // Templates are emitted when they're instantiated. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 328ccf6..3107876 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5204,16 +5204,7 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) { const auto *FD = dyn_cast_or_null<FunctionDecl>(D); bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate(); - if (S.getLangOpts().SYCLIsDevice) { - if (!IsFunctionTemplate) { - S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type_str) - << AL << AL.isRegularKeywordAttribute() << "function templates"; - } else { - S.SYCL().handleKernelAttr(D, AL); - } - } else if (DeviceKernelAttr::isSYCLSpelling(AL)) { - S.Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL; - } else if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) { + if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) { handleGlobalAttr(S, D, AL); } else { // OpenCL C++ will throw a more specific error. @@ -7100,6 +7091,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_EnumExtensibility: handleEnumExtensibilityAttr(S, D, AL); break; + case ParsedAttr::AT_SYCLKernel: + S.SYCL().handleKernelAttr(D, AL); + break; case ParsedAttr::AT_SYCLExternal: handleSimpleAttribute<SYCLExternalAttr>(S, D, AL); break; diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 17cb1e4..72b2ac9 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -3544,40 +3544,6 @@ bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) { llvm_unreachable("Unhandled scalar cast"); } -// Detect if a type contains a bitfield. Will be removed when -// bitfield support is added to HLSLElementwiseCast and HLSLAggregateSplatCast -bool SemaHLSL::ContainsBitField(QualType BaseTy) { - llvm::SmallVector<QualType, 16> WorkList; - WorkList.push_back(BaseTy); - while (!WorkList.empty()) { - QualType T = WorkList.pop_back_val(); - T = T.getCanonicalType().getUnqualifiedType(); - // only check aggregate types - if (const auto *AT = dyn_cast<ConstantArrayType>(T)) { - WorkList.push_back(AT->getElementType()); - continue; - } - if (const auto *RT = dyn_cast<RecordType>(T)) { - const RecordDecl *RD = RT->getOriginalDecl()->getDefinitionOrSelf(); - if (RD->isUnion()) - continue; - - const CXXRecordDecl *CXXD = dyn_cast<CXXRecordDecl>(RD); - - if (CXXD && CXXD->isStandardLayout()) - RD = CXXD->getStandardLayoutBaseWithFields(); - - for (const auto *FD : RD->fields()) { - if (FD->isBitField()) - return true; - WorkList.push_back(FD->getType()); - } - continue; - } - } - return false; -} - // Can perform an HLSL Aggregate splat cast if the Dest is an aggregate and the // Src is a scalar or a vector of length 1 // Or if Dest is a vector and Src is a vector of length 1 diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index f3969a9..ca99834 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -2883,12 +2883,12 @@ SemaOpenACC::CreateFirstPrivateInitRecipe(const Expr *VarExpr) { return OpenACCFirstPrivateRecipe(AllocaDecl, Temporary); } -OpenACCReductionRecipe SemaOpenACC::CreateReductionInitRecipe( +OpenACCReductionRecipeWithStorage SemaOpenACC::CreateReductionInitRecipe( OpenACCReductionOperator ReductionOperator, const Expr *VarExpr) { // We don't strip bounds here, so that we are doing our recipe init at the // 'lowest' possible level. Codegen is going to have to do its own 'looping'. if (!VarExpr || VarExpr->getType()->isDependentType()) - return OpenACCReductionRecipe::Empty(); + return OpenACCReductionRecipeWithStorage::Empty(); QualType VarTy = VarExpr->getType().getNonReferenceType().getUnqualifiedType(); @@ -2898,6 +2898,15 @@ OpenACCReductionRecipe SemaOpenACC::CreateReductionInitRecipe( dyn_cast<ArraySectionExpr>(VarExpr->IgnoreParenImpCasts())) VarTy = ASE->getElementType(); + llvm::SmallVector<OpenACCReductionRecipe::CombinerRecipe, 1> CombinerRecipes; + + // We use the 'set-ness' of the alloca-decl to determine whether the combiner + // is 'set' or not, so we can skip any attempts at it if we're going to fail + // at any of the combiners. + if (CreateReductionCombinerRecipe(VarExpr->getBeginLoc(), ReductionOperator, + VarTy, CombinerRecipes)) + return OpenACCReductionRecipeWithStorage::Empty(); + VarDecl *AllocaDecl = CreateAllocaDecl( getASTContext(), SemaRef.getCurContext(), VarExpr->getBeginLoc(), &getASTContext().Idents.get("openacc.reduction.init"), VarTy); @@ -2946,5 +2955,163 @@ OpenACCReductionRecipe SemaOpenACC::CreateReductionInitRecipe( AllocaDecl->setInit(Init.get()); AllocaDecl->setInitStyle(VarDecl::CallInit); } - return OpenACCReductionRecipe(AllocaDecl, {}); + + return OpenACCReductionRecipeWithStorage(AllocaDecl, CombinerRecipes); +} + +bool SemaOpenACC::CreateReductionCombinerRecipe( + SourceLocation Loc, OpenACCReductionOperator ReductionOperator, + QualType VarTy, + llvm::SmallVectorImpl<OpenACCReductionRecipe::CombinerRecipe> + &CombinerRecipes) { + // Now we can try to generate the 'combiner' recipe. This is a little + // complicated in that if the 'VarTy' is an array type, we want to take its + // element type so we can generate that. Additionally, if this is a struct, + // we have two options: If there is overloaded operators, we want to take + // THOSE, else we want to do the individual elements. + + BinaryOperatorKind BinOp; + switch (ReductionOperator) { + case OpenACCReductionOperator::Invalid: + // This can only happen when there is an error, and since these inits + // are used for code generation, we can just ignore/not bother doing any + // initialization here. + CombinerRecipes.push_back({nullptr, nullptr, nullptr}); + return false; + case OpenACCReductionOperator::Addition: + BinOp = BinaryOperatorKind::BO_AddAssign; + break; + case OpenACCReductionOperator::Multiplication: + BinOp = BinaryOperatorKind::BO_MulAssign; + break; + case OpenACCReductionOperator::BitwiseAnd: + BinOp = BinaryOperatorKind::BO_AndAssign; + break; + case OpenACCReductionOperator::BitwiseOr: + BinOp = BinaryOperatorKind::BO_OrAssign; + break; + case OpenACCReductionOperator::BitwiseXOr: + BinOp = BinaryOperatorKind::BO_XorAssign; + break; + + case OpenACCReductionOperator::Max: + case OpenACCReductionOperator::Min: + case OpenACCReductionOperator::And: + case OpenACCReductionOperator::Or: + // We just want a 'NYI' error in the backend, so leave an empty combiner + // recipe, and claim success. + CombinerRecipes.push_back({nullptr, nullptr, nullptr}); + return false; + } + + // If VarTy is an array type, at the top level only, we want to do our + // compares/decomp/etc at the element level. + if (auto *AT = getASTContext().getAsArrayType(VarTy)) + VarTy = AT->getElementType(); + + assert(!VarTy->isArrayType() && "Only 1 level of array allowed"); + + auto tryCombiner = [&, this](DeclRefExpr *LHSDRE, DeclRefExpr *RHSDRE, + bool IncludeTrap) { + // TODO: OpenACC: we have to figure out based on the bin-op how to do the + // ones that we can't just use compound operators for. So &&, ||, max, and + // min aren't really clear what we could do here. + if (IncludeTrap) { + // Trap all of the errors here, we'll emit our own at the end. + Sema::TentativeAnalysisScope Trap{SemaRef}; + + return SemaRef.BuildBinOp(SemaRef.getCurScope(), Loc, BinOp, LHSDRE, + RHSDRE, + /*ForFoldExpr=*/false); + } else { + return SemaRef.BuildBinOp(SemaRef.getCurScope(), Loc, BinOp, LHSDRE, + RHSDRE, + /*ForFoldExpr=*/false); + } + }; + + struct CombinerAttemptTy { + VarDecl *LHS; + DeclRefExpr *LHSDRE; + VarDecl *RHS; + DeclRefExpr *RHSDRE; + Expr *Op; + }; + + auto formCombiner = [&, this](QualType Ty) -> CombinerAttemptTy { + VarDecl *LHSDecl = CreateAllocaDecl( + getASTContext(), SemaRef.getCurContext(), Loc, + &getASTContext().Idents.get("openacc.reduction.combiner.lhs"), Ty); + auto *LHSDRE = DeclRefExpr::Create( + getASTContext(), NestedNameSpecifierLoc{}, SourceLocation{}, LHSDecl, + /*ReferstoEnclosingVariableOrCapture=*/false, + DeclarationNameInfo{DeclarationName{LHSDecl->getDeclName()}, + LHSDecl->getBeginLoc()}, + Ty, clang::VK_LValue, LHSDecl, nullptr, NOUR_None); + VarDecl *RHSDecl = CreateAllocaDecl( + getASTContext(), SemaRef.getCurContext(), Loc, + &getASTContext().Idents.get("openacc.reduction.combiner.lhs"), Ty); + auto *RHSDRE = DeclRefExpr::Create( + getASTContext(), NestedNameSpecifierLoc{}, SourceLocation{}, RHSDecl, + /*ReferstoEnclosingVariableOrCapture=*/false, + DeclarationNameInfo{DeclarationName{RHSDecl->getDeclName()}, + RHSDecl->getBeginLoc()}, + Ty, clang::VK_LValue, RHSDecl, nullptr, NOUR_None); + + ExprResult BinOpResult = tryCombiner(LHSDRE, RHSDRE, /*IncludeTrap=*/true); + + return {LHSDecl, LHSDRE, RHSDecl, RHSDRE, BinOpResult.get()}; + }; + + CombinerAttemptTy TopLevelCombinerInfo = formCombiner(VarTy); + + if (TopLevelCombinerInfo.Op) { + if (!TopLevelCombinerInfo.Op->containsErrors() && + TopLevelCombinerInfo.Op->isInstantiationDependent()) { + // If this is instantiation dependent, we're just going to 'give up' here + // and count on us to get it right during instantaition. + CombinerRecipes.push_back({nullptr, nullptr, nullptr}); + return false; + } else if (!TopLevelCombinerInfo.Op->containsErrors()) { + // Else, we succeeded, we can just return this combiner. + CombinerRecipes.push_back({TopLevelCombinerInfo.LHS, + TopLevelCombinerInfo.RHS, + TopLevelCombinerInfo.Op}); + return false; + } + } + + // Since the 'root' level didn't fail, the only thing that could be successful + // is a struct that we decompose on its individual fields. + + RecordDecl *RD = VarTy->getAsRecordDecl(); + if (!RD) { + Diag(Loc, diag::err_acc_reduction_recipe_no_op) << VarTy; + tryCombiner(TopLevelCombinerInfo.LHSDRE, TopLevelCombinerInfo.RHSDRE, + /*IncludeTrap=*/false); + return true; + } + + for (const FieldDecl *FD : RD->fields()) { + CombinerAttemptTy FieldCombinerInfo = formCombiner(FD->getType()); + + if (!FieldCombinerInfo.Op || FieldCombinerInfo.Op->containsErrors()) { + Diag(Loc, diag::err_acc_reduction_recipe_no_op) << FD->getType(); + Diag(FD->getBeginLoc(), diag::note_acc_reduction_recipe_noop_field) << RD; + tryCombiner(FieldCombinerInfo.LHSDRE, FieldCombinerInfo.RHSDRE, + /*IncludeTrap=*/false); + return true; + } + + if (FieldCombinerInfo.Op->isInstantiationDependent()) { + // If this is instantiation dependent, we're just going to 'give up' here + // and count on us to get it right during instantaition. + CombinerRecipes.push_back({nullptr, nullptr, nullptr}); + } else { + CombinerRecipes.push_back( + {FieldCombinerInfo.LHS, FieldCombinerInfo.RHS, FieldCombinerInfo.Op}); + } + } + + return false; } diff --git a/clang/lib/Sema/SemaOpenACCClause.cpp b/clang/lib/Sema/SemaOpenACCClause.cpp index 881e960..17078e8 100644 --- a/clang/lib/Sema/SemaOpenACCClause.cpp +++ b/clang/lib/Sema/SemaOpenACCClause.cpp @@ -1772,7 +1772,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitReductionClause( } SmallVector<Expr *> ValidVars; - SmallVector<OpenACCReductionRecipe> Recipes; + SmallVector<OpenACCReductionRecipeWithStorage> Recipes; for (Expr *Var : Clause.getVarList()) { ExprResult Res = SemaRef.CheckReductionVar(Clause.getDirectiveKind(), @@ -1924,7 +1924,7 @@ bool SemaOpenACC::CheckReductionVarType(Expr *VarExpr) { // off here. This will result in CurType being the actual 'type' of the // expression, which is what we are looking to check. QualType CurType = isa<ArraySectionExpr>(VarExpr) - ? ArraySectionExpr::getBaseOriginalType(VarExpr) + ? cast<ArraySectionExpr>(VarExpr)->getElementType() : VarExpr->getType(); // This can happen when we have a dependent type in an array element that the @@ -2196,7 +2196,7 @@ OpenACCClause *SemaOpenACC::CheckReductionClause( ArrayRef<const OpenACCClause *> ExistingClauses, OpenACCDirectiveKind DirectiveKind, SourceLocation BeginLoc, SourceLocation LParenLoc, OpenACCReductionOperator ReductionOp, - ArrayRef<Expr *> Vars, ArrayRef<OpenACCReductionRecipe> Recipes, + ArrayRef<Expr *> Vars, ArrayRef<OpenACCReductionRecipeWithStorage> Recipes, SourceLocation EndLoc) { if (DirectiveKind == OpenACCDirectiveKind::Loop || isOpenACCCombinedDirectiveKind(DirectiveKind)) { diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 0fa21e8..5b5b1b6 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -16491,6 +16491,9 @@ OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, case OMPC_ordered: Res = ActOnOpenMPOrderedClause(StartLoc, EndLoc, LParenLoc, Expr); break; + case OMPC_nowait: + Res = ActOnOpenMPNowaitClause(StartLoc, EndLoc, LParenLoc, Expr); + break; case OMPC_priority: Res = ActOnOpenMPPriorityClause(Expr, StartLoc, LParenLoc, EndLoc); break; @@ -16546,7 +16549,6 @@ OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, case OMPC_aligned: case OMPC_copyin: case OMPC_copyprivate: - case OMPC_nowait: case OMPC_untied: case OMPC_mergeable: case OMPC_threadprivate: @@ -17955,7 +17957,9 @@ OMPClause *SemaOpenMP::ActOnOpenMPClause(OpenMPClauseKind Kind, Res = ActOnOpenMPOrderedClause(StartLoc, EndLoc); break; case OMPC_nowait: - Res = ActOnOpenMPNowaitClause(StartLoc, EndLoc); + Res = ActOnOpenMPNowaitClause(StartLoc, EndLoc, + /*LParenLoc=*/SourceLocation(), + /*Condition=*/nullptr); break; case OMPC_untied: Res = ActOnOpenMPUntiedClause(StartLoc, EndLoc); @@ -18107,9 +18111,24 @@ OMPClause *SemaOpenMP::ActOnOpenMPClause(OpenMPClauseKind Kind, } OMPClause *SemaOpenMP::ActOnOpenMPNowaitClause(SourceLocation StartLoc, - SourceLocation EndLoc) { + SourceLocation EndLoc, + SourceLocation LParenLoc, + Expr *Condition) { + Expr *ValExpr = Condition; + if (Condition && LParenLoc.isValid()) { + if (!Condition->isValueDependent() && !Condition->isTypeDependent() && + !Condition->isInstantiationDependent() && + !Condition->containsUnexpandedParameterPack()) { + ExprResult Val = SemaRef.CheckBooleanCondition(StartLoc, Condition); + if (Val.isInvalid()) + return nullptr; + + ValExpr = Val.get(); + } + } DSAStack->setNowaitRegion(); - return new (getASTContext()) OMPNowaitClause(StartLoc, EndLoc); + return new (getASTContext()) + OMPNowaitClause(ValExpr, StartLoc, LParenLoc, EndLoc); } OMPClause *SemaOpenMP::ActOnOpenMPUntiedClause(SourceLocation StartLoc, diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 2f97f62..b981c35 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -199,7 +199,7 @@ void SemaSYCL::handleKernelAttr(Decl *D, const ParsedAttr &AL) { return; } - handleSimpleAttribute<DeviceKernelAttr>(*this, D, AL); + handleSimpleAttribute<SYCLKernelAttr>(*this, D, AL); } void SemaSYCL::handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL) { diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 3819f77..73fd33a 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -710,9 +710,9 @@ static void instantiateDependentAMDGPUMaxNumWorkGroupsAttr( // This doesn't take any template parameters, but we have a custom action that // needs to happen when the kernel itself is instantiated. We need to run the // ItaniumMangler to mark the names required to name this kernel. -static void instantiateDependentDeviceKernelAttr( +static void instantiateDependentSYCLKernelAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, - const DeviceKernelAttr &Attr, Decl *New) { + const SYCLKernelAttr &Attr, Decl *New) { New->addAttr(Attr.clone(S.getASTContext())); } @@ -966,8 +966,8 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, continue; } - if (auto *A = dyn_cast<DeviceKernelAttr>(TmplAttr)) { - instantiateDependentDeviceKernelAttr(*this, TemplateArgs, *A, New); + if (auto *A = dyn_cast<SYCLKernelAttr>(TmplAttr)) { + instantiateDependentSYCLKernelAttr(*this, TemplateArgs, *A, New); continue; } @@ -5727,7 +5727,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, Function->setDeclarationNameLoc(NameLocPointsToPattern()); EnterExpressionEvaluationContextForFunction EvalContext( - *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); + *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated, Function); Qualifiers ThisTypeQuals; CXXRecordDecl *ThisContext = nullptr; diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index bee613a..a9e7c34 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -3780,12 +3780,10 @@ static CallingConv getCCForDeclaratorChunk( } } } - if (!S.getLangOpts().isSYCL()) { - for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) { - if (AL.getKind() == ParsedAttr::AT_DeviceKernel) { - CC = CC_DeviceKernel; - break; - } + for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) { + if (AL.getKind() == ParsedAttr::AT_DeviceKernel) { + CC = CC_DeviceKernel; + break; } } return CC; diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 940324b..04a5e4b 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1865,6 +1865,17 @@ public: LParenLoc, Num); } + /// Build a new OpenMP 'nowait' clause. + /// + /// By default, performs semantic analysis to build the new OpenMP clause. + /// Subclasses may override this routine to provide different behavior. + OMPClause *RebuildOMPNowaitClause(Expr *Condition, SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + return getSema().OpenMP().ActOnOpenMPNowaitClause(StartLoc, EndLoc, + LParenLoc, Condition); + } + /// Build a new OpenMP 'private' clause. /// /// By default, performs semantic analysis to build the new OpenMP clause. @@ -10612,8 +10623,14 @@ TreeTransform<Derived>::TransformOMPDetachClause(OMPDetachClause *C) { template <typename Derived> OMPClause * TreeTransform<Derived>::TransformOMPNowaitClause(OMPNowaitClause *C) { - // No need to rebuild this clause, no template-dependent parameters. - return C; + ExprResult Cond; + if (auto *Condition = C->getCondition()) { + Cond = getDerived().TransformExpr(Condition); + if (Cond.isInvalid()) + return nullptr; + } + return getDerived().RebuildOMPNowaitClause(Cond.get(), C->getBeginLoc(), + C->getLParenLoc(), C->getEndLoc()); } template <typename Derived> @@ -12374,7 +12391,7 @@ void OpenACCClauseTransform<Derived>::VisitReductionClause( const OpenACCReductionClause &C) { SmallVector<Expr *> TransformedVars = VisitVarList(C.getVarList()); SmallVector<Expr *> ValidVars; - llvm::SmallVector<OpenACCReductionRecipe> Recipes; + llvm::SmallVector<OpenACCReductionRecipeWithStorage> Recipes; for (const auto [Var, OrigRecipe] : llvm::zip(TransformedVars, C.getRecipes())) { @@ -12384,7 +12401,7 @@ void OpenACCClauseTransform<Derived>::VisitReductionClause( ValidVars.push_back(Res.get()); if (OrigRecipe.isSet()) - Recipes.push_back(OrigRecipe); + Recipes.emplace_back(OrigRecipe.AllocaDecl, OrigRecipe.CombinerRecipes); else Recipes.push_back(Self.getSema().OpenACC().CreateReductionInitRecipe( C.getReductionOp(), Res.get())); diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 868f0cc..32f7a0e 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3190,6 +3190,10 @@ ASTReader::ReadControlBlock(ModuleFile &F, F.Kind == MK_ImplicitModule) N = ForceValidateUserInputs ? NumUserInputs : 0; + if (N != 0) + Diag(diag::remark_module_validation) + << N << F.ModuleName << F.FileName; + for (unsigned I = 0; I < N; ++I) { InputFile IF = getInputFile(F, I+1, Complain); if (!IF.getFile() || IF.isOutOfDate()) @@ -11684,7 +11688,10 @@ void OMPClauseReader::VisitOMPDetachClause(OMPDetachClause *C) { C->setLParenLoc(Record.readSourceLocation()); } -void OMPClauseReader::VisitOMPNowaitClause(OMPNowaitClause *) {} +void OMPClauseReader::VisitOMPNowaitClause(OMPNowaitClause *C) { + C->setCondition(Record.readSubExpr()); + C->setLParenLoc(Record.readSourceLocation()); +} void OMPClauseReader::VisitOMPUntiedClause(OMPUntiedClause *) {} @@ -13006,7 +13013,7 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() { SourceLocation LParenLoc = readSourceLocation(); OpenACCReductionOperator Op = readEnum<OpenACCReductionOperator>(); llvm::SmallVector<Expr *> VarList = readOpenACCVarList(); - llvm::SmallVector<OpenACCReductionRecipe> RecipeList; + llvm::SmallVector<OpenACCReductionRecipeWithStorage> RecipeList; for (unsigned I = 0; I < VarList.size(); ++I) { VarDecl *Recipe = readDeclAs<VarDecl>(); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 82ccde8..377e396 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7942,7 +7942,10 @@ void OMPClauseWriter::VisitOMPOrderedClause(OMPOrderedClause *C) { Record.AddSourceLocation(C->getLParenLoc()); } -void OMPClauseWriter::VisitOMPNowaitClause(OMPNowaitClause *) {} +void OMPClauseWriter::VisitOMPNowaitClause(OMPNowaitClause *C) { + Record.AddStmt(C->getCondition()); + Record.AddSourceLocation(C->getLParenLoc()); +} void OMPClauseWriter::VisitOMPUntiedClause(OMPUntiedClause *) {} diff --git a/clang/lib/Serialization/ModuleCache.cpp b/clang/lib/Serialization/ModuleCache.cpp index 9668727..9850956 100644 --- a/clang/lib/Serialization/ModuleCache.cpp +++ b/clang/lib/Serialization/ModuleCache.cpp @@ -115,8 +115,10 @@ public: } std::time_t getModuleTimestamp(StringRef ModuleFilename) override { + std::string TimestampFilename = + serialization::ModuleFile::getTimestampFilename(ModuleFilename); llvm::sys::fs::file_status Status; - if (llvm::sys::fs::status(ModuleFilename, Status) != std::error_code{}) + if (llvm::sys::fs::status(TimestampFilename, Status) != std::error_code{}) return 0; return llvm::sys::toTimeT(Status.getLastModificationTime()); } diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 0ae784c..1444114 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -251,6 +251,8 @@ public: const Expr *Ex, const MemRegion *MR, bool hypothetical); + static const StringLiteral *getStringLiteralFromRegion(const MemRegion *MR); + SVal getCStringLength(CheckerContext &C, ProgramStateRef &state, const Expr *Ex, @@ -983,6 +985,21 @@ SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, return strLength; } +const StringLiteral * +CStringChecker::getStringLiteralFromRegion(const MemRegion *MR) { + switch (MR->getKind()) { + case MemRegion::StringRegionKind: + return cast<StringRegion>(MR)->getStringLiteral(); + case MemRegion::NonParamVarRegionKind: + if (const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl(); + Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) + return dyn_cast_or_null<StringLiteral>(Decl->getInit()); + return nullptr; + default: + return nullptr; + } +} + SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, const Expr *Ex, SVal Buf, bool hypothetical) const { @@ -1013,30 +1030,19 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, // its length. For anything we can't figure out, just return UnknownVal. MR = MR->StripCasts(); - switch (MR->getKind()) { - case MemRegion::StringRegionKind: { - // Modifying the contents of string regions is undefined [C99 6.4.5p6], - // so we can assume that the byte length is the correct C string length. - SValBuilder &svalBuilder = C.getSValBuilder(); - QualType sizeTy = svalBuilder.getContext().getSizeType(); - const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); - return svalBuilder.makeIntVal(strLit->getLength(), sizeTy); - } - case MemRegion::NonParamVarRegionKind: { + if (const StringLiteral *StrLit = getStringLiteralFromRegion(MR)) { // If we have a global constant with a string literal initializer, // compute the initializer's length. - const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl(); - if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) { - if (const Expr *Init = Decl->getInit()) { - if (auto *StrLit = dyn_cast<StringLiteral>(Init)) { - SValBuilder &SvalBuilder = C.getSValBuilder(); - QualType SizeTy = SvalBuilder.getContext().getSizeType(); - return SvalBuilder.makeIntVal(StrLit->getLength(), SizeTy); - } - } - } - [[fallthrough]]; + // Modifying the contents of string regions is undefined [C99 6.4.5p6], + // so we can assume that the byte length is the correct C string length. + // FIXME: Embedded null characters are not handled. + SValBuilder &SVB = C.getSValBuilder(); + return SVB.makeIntVal(StrLit->getLength(), SVB.getContext().getSizeType()); } + + switch (MR->getKind()) { + case MemRegion::StringRegionKind: + case MemRegion::NonParamVarRegionKind: case MemRegion::SymbolicRegionKind: case MemRegion::AllocaRegionKind: case MemRegion::ParamVarRegionKind: @@ -1046,10 +1052,28 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, case MemRegion::CompoundLiteralRegionKind: // FIXME: Can we track this? Is it necessary? return UnknownVal(); - case MemRegion::ElementRegionKind: - // FIXME: How can we handle this? It's not good enough to subtract the - // offset from the base string length; consider "123\x00567" and &a[5]. + case MemRegion::ElementRegionKind: { + // If an offset into the string literal is used, use the original length + // minus the offset. + // FIXME: Embedded null characters are not handled. + const ElementRegion *ER = cast<ElementRegion>(MR); + const SubRegion *SuperReg = + cast<SubRegion>(ER->getSuperRegion()->StripCasts()); + const StringLiteral *StrLit = getStringLiteralFromRegion(SuperReg); + if (!StrLit) + return UnknownVal(); + SValBuilder &SVB = C.getSValBuilder(); + NonLoc Idx = ER->getIndex(); + QualType SizeTy = SVB.getContext().getSizeType(); + NonLoc LengthVal = + SVB.makeIntVal(StrLit->getLength(), SizeTy).castAs<NonLoc>(); + if (state->assume(SVB.evalBinOpNN(state, BO_LE, Idx, LengthVal, + SVB.getConditionType()) + .castAs<DefinedOrUnknownSVal>(), + true)) + return SVB.evalBinOp(state, BO_Sub, LengthVal, Idx, SizeTy); return UnknownVal(); + } default: // Other regions (mostly non-data) can't have a reliable C string length. // In this case, an error is emitted and UndefinedVal is returned. @@ -1074,6 +1098,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, ProgramStateRef &state, const Expr *expr, SVal val) const { + // FIXME: use getStringLiteralFromRegion (and remove unused parameters)? // Get the memory region pointed to by the val. const MemRegion *bufRegion = val.getAsRegion(); diff --git a/clang/lib/StaticAnalyzer/Core/EntryPointStats.cpp b/clang/lib/StaticAnalyzer/Core/EntryPointStats.cpp index abfb176..c207a7b 100644 --- a/clang/lib/StaticAnalyzer/Core/EntryPointStats.cpp +++ b/clang/lib/StaticAnalyzer/Core/EntryPointStats.cpp @@ -24,15 +24,21 @@ using namespace ento; namespace { struct Registry { + std::vector<UnsignedEPStat *> ExplicitlySetStats; + std::vector<UnsignedMaxEPStat *> MaxStats; std::vector<CounterEPStat *> CounterStats; - std::vector<UnsignedMaxEPStat *> UnsignedMaxStats; - std::vector<UnsignedEPStat *> UnsignedStats; bool IsLocked = false; struct Snapshot { const Decl *EntryPoint; - std::vector<unsigned> UnsignedStatValues; + // Explicitly set statistics may not have a value set, so they are separate + // from other unsigned statistics + std::vector<std::optional<unsigned>> ExplicitlySetStatValues; + // These are counting and maximizing statistics that initialize to 0, which + // is meaningful even if they are never updated, so their value is always + // present. + std::vector<unsigned> MaxOrCountStatValues; void dumpAsCSV(llvm::raw_ostream &OS) const; }; @@ -46,10 +52,16 @@ static llvm::ManagedStatic<Registry> StatsRegistry; namespace { template <typename Callback> void enumerateStatVectors(const Callback &Fn) { + // This order is important, it matches the order of the Snapshot fields: + // - ExplicitlySetStatValues + Fn(StatsRegistry->ExplicitlySetStats); + // - MaxOrCountStatValues + Fn(StatsRegistry->MaxStats); Fn(StatsRegistry->CounterStats); - Fn(StatsRegistry->UnsignedMaxStats); - Fn(StatsRegistry->UnsignedStats); } + +void clearSnapshots(void *) { StatsRegistry->Snapshots.clear(); } + } // namespace static void checkStatName(const EntryPointStat *M) { @@ -69,7 +81,8 @@ static void checkStatName(const EntryPointStat *M) { } } -void EntryPointStat::lockRegistry(llvm::StringRef CPPFileName) { +void EntryPointStat::lockRegistry(llvm::StringRef CPPFileName, + ASTContext &Ctx) { auto CmpByNames = [](const EntryPointStat *L, const EntryPointStat *R) { return L->name() < R->name(); }; @@ -80,6 +93,10 @@ void EntryPointStat::lockRegistry(llvm::StringRef CPPFileName) { StatsRegistry->IsLocked = true; llvm::raw_string_ostream OS(StatsRegistry->EscapedCPPFileName); llvm::printEscapedString(CPPFileName, OS); + // Make sure snapshots (that reference function Decl's) do not persist after + // the AST is destroyed. This is especially relevant in the context of unit + // tests that construct and destruct multiple ASTs in the same process. + Ctx.AddDeallocation(clearSnapshots, nullptr); } [[maybe_unused]] static bool isRegistered(llvm::StringLiteral Name) { @@ -101,30 +118,36 @@ UnsignedMaxEPStat::UnsignedMaxEPStat(llvm::StringLiteral Name) : EntryPointStat(Name) { assert(!StatsRegistry->IsLocked); assert(!isRegistered(Name)); - StatsRegistry->UnsignedMaxStats.push_back(this); + StatsRegistry->MaxStats.push_back(this); } UnsignedEPStat::UnsignedEPStat(llvm::StringLiteral Name) : EntryPointStat(Name) { assert(!StatsRegistry->IsLocked); assert(!isRegistered(Name)); - StatsRegistry->UnsignedStats.push_back(this); + StatsRegistry->ExplicitlySetStats.push_back(this); } -static std::vector<unsigned> consumeUnsignedStats() { - std::vector<unsigned> Result; - Result.reserve(StatsRegistry->CounterStats.size() + - StatsRegistry->UnsignedMaxStats.size() + - StatsRegistry->UnsignedStats.size()); - for (auto *M : StatsRegistry->CounterStats) { +static std::vector<std::optional<unsigned>> consumeExplicitlySetStats() { + std::vector<std::optional<unsigned>> Result; + Result.reserve(StatsRegistry->ExplicitlySetStats.size()); + for (auto *M : StatsRegistry->ExplicitlySetStats) { Result.push_back(M->value()); M->reset(); } - for (auto *M : StatsRegistry->UnsignedMaxStats) { + return Result; +} + +static std::vector<unsigned> consumeMaxAndCounterStats() { + std::vector<unsigned> Result; + Result.reserve(StatsRegistry->CounterStats.size() + + StatsRegistry->MaxStats.size()); + // Order is important, it must match the order in enumerateStatVectors + for (auto *M : StatsRegistry->MaxStats) { Result.push_back(M->value()); M->reset(); } - for (auto *M : StatsRegistry->UnsignedStats) { + for (auto *M : StatsRegistry->CounterStats) { Result.push_back(M->value()); M->reset(); } @@ -150,20 +173,33 @@ static std::string getUSR(const Decl *D) { } void Registry::Snapshot::dumpAsCSV(llvm::raw_ostream &OS) const { + auto PrintAsUnsignOpt = [&OS](std::optional<unsigned> U) { + OS << (U.has_value() ? std::to_string(*U) : ""); + }; + auto CommaIfNeeded = [&OS](const auto &Vec1, const auto &Vec2) { + if (!Vec1.empty() && !Vec2.empty()) + OS << ","; + }; + auto PrintAsUnsigned = [&OS](unsigned U) { OS << U; }; + OS << '"'; llvm::printEscapedString(getUSR(EntryPoint), OS); OS << "\",\""; OS << StatsRegistry->EscapedCPPFileName << "\",\""; llvm::printEscapedString( clang::AnalysisDeclContext::getFunctionName(EntryPoint), OS); - OS << "\""; - OS << (UnsignedStatValues.empty() ? "" : ","); - llvm::interleave(UnsignedStatValues, OS, [&OS](unsigned U) { OS << U; }, ","); + OS << "\","; + llvm::interleave(ExplicitlySetStatValues, OS, PrintAsUnsignOpt, ","); + CommaIfNeeded(ExplicitlySetStatValues, MaxOrCountStatValues); + llvm::interleave(MaxOrCountStatValues, OS, PrintAsUnsigned, ","); } void EntryPointStat::takeSnapshot(const Decl *EntryPoint) { - auto UnsignedValues = consumeUnsignedStats(); - StatsRegistry->Snapshots.push_back({EntryPoint, std::move(UnsignedValues)}); + auto ExplicitlySetValues = consumeExplicitlySetStats(); + auto MaxOrCounterValues = consumeMaxAndCounterStats(); + StatsRegistry->Snapshots.push_back({EntryPoint, + std::move(ExplicitlySetValues), + std::move(MaxOrCounterValues)}); } void EntryPointStat::dumpStatsAsCSV(llvm::StringRef FileName) { diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index 84a9c43..6108931 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -1111,6 +1111,10 @@ SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state, assert(!BinaryOperator::isComparisonOp(op) && "arguments to comparison ops must be of the same type"); + SVal simplifiedRhs = simplifySVal(state, rhs); + if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>()) + rhs = *simplifiedRhsAsNonLoc; + // Special case: rhs is a zero constant. if (rhs.isZeroConstant()) return lhs; diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp index cf01e2f..4efde59 100644 --- a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp +++ b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" +#include <cmath> #include <memory> #include <utility> @@ -125,6 +126,7 @@ public: std::unique_ptr<llvm::Timer> SyntaxCheckTimer; std::unique_ptr<llvm::Timer> ExprEngineTimer; std::unique_ptr<llvm::Timer> BugReporterTimer; + bool ShouldClearTimersToPreventDisplayingThem; /// The information about analyzed functions shared throughout the /// translation unit. @@ -138,11 +140,12 @@ public: Injector(std::move(injector)), CTU(CI), MacroExpansions(CI.getLangOpts()) { - EntryPointStat::lockRegistry(getMainFileName(CI.getInvocation())); + EntryPointStat::lockRegistry(getMainFileName(CI.getInvocation()), + CI.getASTContext()); DigestAnalyzerOptions(); if (Opts.AnalyzerDisplayProgress || Opts.PrintStats || - Opts.ShouldSerializeStats) { + Opts.ShouldSerializeStats || !Opts.DumpEntryPointStatsToCSV.empty()) { AnalyzerTimers = std::make_unique<llvm::TimerGroup>( "analyzer", "Analyzer timers"); SyntaxCheckTimer = std::make_unique<llvm::Timer>( @@ -154,6 +157,12 @@ public: *AnalyzerTimers); } + // Avoid displaying the timers created above in case we only want to record + // per-entry-point stats. + ShouldClearTimersToPreventDisplayingThem = !Opts.AnalyzerDisplayProgress && + !Opts.PrintStats && + !Opts.ShouldSerializeStats; + if (Opts.PrintStats || Opts.ShouldSerializeStats) { llvm::EnableStatistics(/* DoPrintOnExit= */ false); } @@ -276,6 +285,9 @@ public: checkerMgr->runCheckersOnASTDecl(D, *Mgr, *RecVisitorBR); if (SyntaxCheckTimer) SyntaxCheckTimer->stopTimer(); + if (AnalyzerTimers && ShouldClearTimersToPreventDisplayingThem) { + AnalyzerTimers->clear(); + } } return true; } @@ -569,6 +581,9 @@ void AnalysisConsumer::runAnalysisOnTranslationUnit(ASTContext &C) { checkerMgr->runCheckersOnASTDecl(TU, *Mgr, BR); if (SyntaxCheckTimer) SyntaxCheckTimer->stopTimer(); + if (AnalyzerTimers && ShouldClearTimersToPreventDisplayingThem) { + AnalyzerTimers->clear(); + } // Run the AST-only checks using the order in which functions are defined. // If inlining is not turned on, use the simplest function order for path @@ -745,6 +760,9 @@ void AnalysisConsumer::HandleCode(Decl *D, AnalysisMode Mode, llvm::TimeRecord CheckerEndTime = SyntaxCheckTimer->getTotalTime(); CheckerEndTime -= CheckerStartTime; DisplayTime(CheckerEndTime); + if (AnalyzerTimers && ShouldClearTimersToPreventDisplayingThem) { + AnalyzerTimers->clear(); + } } } @@ -788,7 +806,12 @@ void AnalysisConsumer::RunPathSensitiveChecks(Decl *D, ExprEngineTimer->stopTimer(); llvm::TimeRecord ExprEngineEndTime = ExprEngineTimer->getTotalTime(); ExprEngineEndTime -= ExprEngineStartTime; + PathRunningTime.set(static_cast<unsigned>( + std::lround(ExprEngineEndTime.getWallTime() * 1000))); DisplayTime(ExprEngineEndTime); + if (AnalyzerTimers && ShouldClearTimersToPreventDisplayingThem) { + AnalyzerTimers->clear(); + } } if (!Mgr->options.DumpExplodedGraphTo.empty()) @@ -799,6 +822,9 @@ void AnalysisConsumer::RunPathSensitiveChecks(Decl *D, Eng.ViewGraph(Mgr->options.TrimGraph); flushReports(BugReporterTimer.get(), Eng.getBugReporter()); + if (AnalyzerTimers && ShouldClearTimersToPreventDisplayingThem) { + AnalyzerTimers->clear(); + } } //===----------------------------------------------------------------------===// |