diff options
Diffstat (limited to 'clang/lib')
28 files changed, 794 insertions, 549 deletions
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 056bfe3..a8b41ba 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -330,76 +330,6 @@ void ASTContext::addComment(const RawComment &RC) { Comments.addComment(RC, LangOpts.CommentOpts, BumpAlloc); } -/// If we have a 'templated' declaration for a template, adjust 'D' to -/// refer to the actual template. -/// If we have an implicit instantiation, adjust 'D' to refer to template. -static const Decl &adjustDeclToTemplate(const Decl &D) { - if (const auto *FD = dyn_cast<FunctionDecl>(&D)) { - // Is this function declaration part of a function template? - if (const FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate()) - return *FTD; - - // Nothing to do if function is not an implicit instantiation. - if (FD->getTemplateSpecializationKind() != TSK_ImplicitInstantiation) - return D; - - // Function is an implicit instantiation of a function template? - if (const FunctionTemplateDecl *FTD = FD->getPrimaryTemplate()) - return *FTD; - - // Function is instantiated from a member definition of a class template? - if (const FunctionDecl *MemberDecl = - FD->getInstantiatedFromMemberFunction()) - return *MemberDecl; - - return D; - } - if (const auto *VD = dyn_cast<VarDecl>(&D)) { - // Static data member is instantiated from a member definition of a class - // template? - if (VD->isStaticDataMember()) - if (const VarDecl *MemberDecl = VD->getInstantiatedFromStaticDataMember()) - return *MemberDecl; - - return D; - } - if (const auto *CRD = dyn_cast<CXXRecordDecl>(&D)) { - // Is this class declaration part of a class template? - if (const ClassTemplateDecl *CTD = CRD->getDescribedClassTemplate()) - return *CTD; - - // Class is an implicit instantiation of a class template or partial - // specialization? - if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(CRD)) { - if (CTSD->getSpecializationKind() != TSK_ImplicitInstantiation) - return D; - llvm::PointerUnion<ClassTemplateDecl *, - ClassTemplatePartialSpecializationDecl *> - PU = CTSD->getSpecializedTemplateOrPartial(); - return isa<ClassTemplateDecl *>(PU) - ? *static_cast<const Decl *>(cast<ClassTemplateDecl *>(PU)) - : *static_cast<const Decl *>( - cast<ClassTemplatePartialSpecializationDecl *>(PU)); - } - - // Class is instantiated from a member definition of a class template? - if (const MemberSpecializationInfo *Info = - CRD->getMemberSpecializationInfo()) - return *Info->getInstantiatedFrom(); - - return D; - } - if (const auto *ED = dyn_cast<EnumDecl>(&D)) { - // Enum is instantiated from a member definition of a class template? - if (const EnumDecl *MemberDecl = ED->getInstantiatedFromMemberEnum()) - return *MemberDecl; - - return D; - } - // FIXME: Adjust alias templates? - return D; -} - const RawComment *ASTContext::getRawCommentForAnyRedecl( const Decl *D, const Decl **OriginalDecl) const { @@ -976,6 +906,9 @@ void ASTContext::cleanup() { for (const auto &Value : ModuleInitializers) Value.second->~PerModuleInitializers(); ModuleInitializers.clear(); + + XRayFilter.reset(); + NoSanitizeL.reset(); } ASTContext::~ASTContext() { cleanup(); } diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 21af3d6..8904396 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1638,6 +1638,36 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func, return true; } +static bool GetDynamicDecl(InterpState &S, CodePtr OpPC, Pointer TypePtr, + const CXXRecordDecl *&DynamicDecl) { + while (TypePtr.isBaseClass()) + TypePtr = TypePtr.getBase(); + + QualType DynamicType = TypePtr.getType(); + if (TypePtr.isStatic() || TypePtr.isConst()) { + const VarDecl *VD = TypePtr.getDeclDesc()->asVarDecl(); + if (!VD->isConstexpr()) { + const Expr *E = S.Current->getExpr(OpPC); + APValue V = TypePtr.toAPValue(S.getASTContext()); + QualType TT = S.getASTContext().getLValueReferenceType(DynamicType); + S.FFDiag(E, diag::note_constexpr_polymorphic_unknown_dynamic_type) + << AccessKinds::AK_MemberCall << V.getAsString(S.getASTContext(), TT); + return false; + } + } + + if (DynamicType->isPointerType() || DynamicType->isReferenceType()) { + DynamicDecl = DynamicType->getPointeeCXXRecordDecl(); + } else if (DynamicType->isArrayType()) { + const Type *ElemType = DynamicType->getPointeeOrArrayElementType(); + assert(ElemType); + DynamicDecl = ElemType->getAsCXXRecordDecl(); + } else { + DynamicDecl = DynamicType->getAsCXXRecordDecl(); + } + return true; +} + bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func, uint32_t VarArgSize) { assert(Func->hasThisPointer()); @@ -1662,17 +1692,8 @@ bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func, } const CXXRecordDecl *DynamicDecl = nullptr; - { - Pointer TypePtr = ThisPtr; - while (TypePtr.isBaseClass()) - TypePtr = TypePtr.getBase(); - - QualType DynamicType = TypePtr.getType(); - if (DynamicType->isPointerType() || DynamicType->isReferenceType()) - DynamicDecl = DynamicType->getPointeeCXXRecordDecl(); - else - DynamicDecl = DynamicType->getAsCXXRecordDecl(); - } + if (!GetDynamicDecl(S, OpPC, ThisPtr, DynamicDecl)) + return false; assert(DynamicDecl); const auto *StaticDecl = cast<CXXRecordDecl>(Func->getParentDecl()); diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 68ebfdf..6af7ef3 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -736,25 +736,6 @@ static bool interp__builtin_expect(InterpState &S, CodePtr OpPC, return true; } -/// rotateleft(value, amount) -static bool interp__builtin_rotate(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call, bool Right) { - APSInt Amount = popToAPSInt(S, Call->getArg(1)); - APSInt Value = popToAPSInt(S, Call->getArg(0)); - - APSInt Result; - if (Right) - Result = APSInt(Value.rotr(Amount.urem(Value.getBitWidth())), - /*IsUnsigned=*/true); - else // Left. - Result = APSInt(Value.rotl(Amount.urem(Value.getBitWidth())), - /*IsUnsigned=*/true); - - pushInteger(S, Result, Call->getType()); - return true; -} - static bool interp__builtin_ffs(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { @@ -2916,7 +2897,49 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, }); Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC, + const CallExpr *Call, bool MaskZ) { + assert(Call->getNumArgs() == 5); + APInt U = popToAPSInt(S, Call->getArg(4)); // Lane mask + APInt Imm = popToAPSInt(S, Call->getArg(3)); // Ternary truth table + const Pointer &C = S.Stk.pop<Pointer>(); + const Pointer &B = S.Stk.pop<Pointer>(); + const Pointer &A = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + + unsigned DstLen = A.getNumElems(); + const QualType ElemQT = getElemType(A); + const OptPrimType ElemPT = S.getContext().classify(ElemQT); + unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT); + bool DstUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType(); + + INT_TYPE_SWITCH_NO_BOOL(*ElemPT, { + for (unsigned I = 0; I != DstLen; ++I) { + APInt ALane = A.elem<T>(I).toAPSInt(); + APInt BLane = B.elem<T>(I).toAPSInt(); + APInt CLane = C.elem<T>(I).toAPSInt(); + APInt RLane(LaneWidth, 0); + if (U[I]) { // If lane not masked, compute ternary logic. + for (unsigned Bit = 0; Bit != LaneWidth; ++Bit) { + unsigned ABit = ALane[Bit]; + unsigned BBit = BLane[Bit]; + unsigned CBit = CLane[Bit]; + unsigned Idx = (ABit << 2) | (BBit << 1) | (CBit); + RLane.setBitVal(Bit, Imm[Idx]); + } + Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned)); + } else if (MaskZ) { // If zero masked, zero the lane. + Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned)); + } else { // Just masked, put in A lane. + Dst.elem<T>(I) = static_cast<T>(APSInt(ALane, DstUnsigned)); + } + } + }); + Dst.initializeAllElements(); return true; } @@ -3160,7 +3183,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case Builtin::BI_rotl: case Builtin::BI_lrotl: case Builtin::BI_rotl64: - return interp__builtin_rotate(S, OpPC, Frame, Call, /*Right=*/false); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Value, const APSInt &Amount) -> APInt { + return Value.rotl(Amount); + }); case Builtin::BI__builtin_rotateright8: case Builtin::BI__builtin_rotateright16: @@ -3171,7 +3197,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case Builtin::BI_rotr: case Builtin::BI_lrotr: case Builtin::BI_rotr64: - return interp__builtin_rotate(S, OpPC, Frame, Call, /*Right=*/true); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Value, const APSInt &Amount) -> APInt { + return Value.rotr(Amount); + }); case Builtin::BI__builtin_ffs: case Builtin::BI__builtin_ffsl: @@ -3773,6 +3802,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); + case X86::BI__builtin_ia32_pternlogd128_mask: + case X86::BI__builtin_ia32_pternlogd256_mask: + case X86::BI__builtin_ia32_pternlogd512_mask: + case X86::BI__builtin_ia32_pternlogq128_mask: + case X86::BI__builtin_ia32_pternlogq256_mask: + case X86::BI__builtin_ia32_pternlogq512_mask: + return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/false); + case X86::BI__builtin_ia32_pternlogd128_maskz: + case X86::BI__builtin_ia32_pternlogd256_maskz: + case X86::BI__builtin_ia32_pternlogd512_maskz: + case X86::BI__builtin_ia32_pternlogq128_maskz: + case X86::BI__builtin_ia32_pternlogq256_maskz: + case X86::BI__builtin_ia32_pternlogq512_maskz: + return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/true); case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index b6bb611..e5fba1b 100644 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -1708,3 +1708,70 @@ TemplateParameterList *clang::getReplacedTemplateParameterList(const Decl *D) { llvm_unreachable("Unhandled templated declaration kind"); } } + +const Decl &clang::adjustDeclToTemplate(const Decl &D) { + if (const auto *FD = dyn_cast<FunctionDecl>(&D)) { + // Is this function declaration part of a function template? + if (const FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate()) + return *FTD; + + // Nothing to do if function is not an implicit instantiation. + if (FD->getTemplateSpecializationKind() != TSK_ImplicitInstantiation) + return D; + + // Function is an implicit instantiation of a function template? + if (const FunctionTemplateDecl *FTD = FD->getPrimaryTemplate()) + return *FTD; + + // Function is instantiated from a member definition of a class template? + if (const FunctionDecl *MemberDecl = + FD->getInstantiatedFromMemberFunction()) + return *MemberDecl; + + return D; + } + if (const auto *VD = dyn_cast<VarDecl>(&D)) { + // Static data member is instantiated from a member definition of a class + // template? + if (VD->isStaticDataMember()) + if (const VarDecl *MemberDecl = VD->getInstantiatedFromStaticDataMember()) + return *MemberDecl; + + return D; + } + if (const auto *CRD = dyn_cast<CXXRecordDecl>(&D)) { + // Is this class declaration part of a class template? + if (const ClassTemplateDecl *CTD = CRD->getDescribedClassTemplate()) + return *CTD; + + // Class is an implicit instantiation of a class template or partial + // specialization? + if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(CRD)) { + if (CTSD->getSpecializationKind() != TSK_ImplicitInstantiation) + return D; + llvm::PointerUnion<ClassTemplateDecl *, + ClassTemplatePartialSpecializationDecl *> + PU = CTSD->getSpecializedTemplateOrPartial(); + return isa<ClassTemplateDecl *>(PU) + ? *static_cast<const Decl *>(cast<ClassTemplateDecl *>(PU)) + : *static_cast<const Decl *>( + cast<ClassTemplatePartialSpecializationDecl *>(PU)); + } + + // Class is instantiated from a member definition of a class template? + if (const MemberSpecializationInfo *Info = + CRD->getMemberSpecializationInfo()) + return *Info->getInstantiatedFrom(); + + return D; + } + if (const auto *ED = dyn_cast<EnumDecl>(&D)) { + // Enum is instantiated from a member definition of a class template? + if (const EnumDecl *MemberDecl = ED->getInstantiatedFromMemberEnum()) + return *MemberDecl; + + return D; + } + // FIXME: Adjust alias templates? + return D; +} diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 7bf28d9..618e163 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12168,6 +12168,97 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case X86::BI__builtin_ia32_pternlogd128_mask: + case X86::BI__builtin_ia32_pternlogd256_mask: + case X86::BI__builtin_ia32_pternlogd512_mask: + case X86::BI__builtin_ia32_pternlogq128_mask: + case X86::BI__builtin_ia32_pternlogq256_mask: + case X86::BI__builtin_ia32_pternlogq512_mask: { + APValue AValue, BValue, CValue, ImmValue, UValue; + if (!EvaluateAsRValue(Info, E->getArg(0), AValue) || + !EvaluateAsRValue(Info, E->getArg(1), BValue) || + !EvaluateAsRValue(Info, E->getArg(2), CValue) || + !EvaluateAsRValue(Info, E->getArg(3), ImmValue) || + !EvaluateAsRValue(Info, E->getArg(4), UValue)) + return false; + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + APInt Imm = ImmValue.getInt(); + APInt U = UValue.getInt(); + unsigned ResultLen = AValue.getVectorLength(); + SmallVector<APValue, 16> ResultElements; + ResultElements.reserve(ResultLen); + + for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) { + APInt ALane = AValue.getVectorElt(EltNum).getInt(); + APInt BLane = BValue.getVectorElt(EltNum).getInt(); + APInt CLane = CValue.getVectorElt(EltNum).getInt(); + + if (U[EltNum]) { + unsigned BitWidth = ALane.getBitWidth(); + APInt ResLane(BitWidth, 0); + + for (unsigned Bit = 0; Bit < BitWidth; ++Bit) { + unsigned ABit = ALane[Bit]; + unsigned BBit = BLane[Bit]; + unsigned CBit = CLane[Bit]; + + unsigned Idx = (ABit << 2) | (BBit << 1) | CBit; + ResLane.setBitVal(Bit, Imm[Idx]); + } + ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned))); + } else { + ResultElements.push_back(APValue(APSInt(ALane, DestUnsigned))); + } + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case X86::BI__builtin_ia32_pternlogd128_maskz: + case X86::BI__builtin_ia32_pternlogd256_maskz: + case X86::BI__builtin_ia32_pternlogd512_maskz: + case X86::BI__builtin_ia32_pternlogq128_maskz: + case X86::BI__builtin_ia32_pternlogq256_maskz: + case X86::BI__builtin_ia32_pternlogq512_maskz: { + APValue AValue, BValue, CValue, ImmValue, UValue; + if (!EvaluateAsRValue(Info, E->getArg(0), AValue) || + !EvaluateAsRValue(Info, E->getArg(1), BValue) || + !EvaluateAsRValue(Info, E->getArg(2), CValue) || + !EvaluateAsRValue(Info, E->getArg(3), ImmValue) || + !EvaluateAsRValue(Info, E->getArg(4), UValue)) + return false; + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + APInt Imm = ImmValue.getInt(); + APInt U = UValue.getInt(); + unsigned ResultLen = AValue.getVectorLength(); + SmallVector<APValue, 16> ResultElements; + ResultElements.reserve(ResultLen); + + for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) { + APInt ALane = AValue.getVectorElt(EltNum).getInt(); + APInt BLane = BValue.getVectorElt(EltNum).getInt(); + APInt CLane = CValue.getVectorElt(EltNum).getInt(); + + unsigned BitWidth = ALane.getBitWidth(); + APInt ResLane(BitWidth, 0); + + if (U[EltNum]) { + for (unsigned Bit = 0; Bit < BitWidth; ++Bit) { + unsigned ABit = ALane[Bit]; + unsigned BBit = BLane[Bit]; + unsigned CBit = CLane[Bit]; + + unsigned Idx = (ABit << 2) | (BBit << 1) | CBit; + ResLane.setBitVal(Bit, Imm[Idx]); + } + } + ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned))); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case Builtin::BI__builtin_elementwise_clzg: case Builtin::BI__builtin_elementwise_ctzg: { APValue SourceLHS; @@ -14265,7 +14356,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, !EvaluateInteger(E->getArg(1), Amt, Info)) return false; - return Success(Val.rotl(Amt.urem(Val.getBitWidth())), E); + return Success(Val.rotl(Amt), E); } case Builtin::BI__builtin_rotateright8: @@ -14282,7 +14373,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, !EvaluateInteger(E->getArg(1), Amt, Info)) return false; - return Success(Val.rotr(Amt.urem(Val.getBitWidth())), E); + return Success(Val.rotr(Amt), E); } case Builtin::BI__builtin_elementwise_add_sat: { diff --git a/clang/lib/Analysis/FlowSensitive/RecordOps.cpp b/clang/lib/Analysis/FlowSensitive/RecordOps.cpp index ed827ac..03d6ed8 100644 --- a/clang/lib/Analysis/FlowSensitive/RecordOps.cpp +++ b/clang/lib/Analysis/FlowSensitive/RecordOps.cpp @@ -14,6 +14,9 @@ #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/Type.h" +#include "clang/Analysis/FlowSensitive/ASTOps.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/StringMap.h" #define DEBUG_TYPE "dataflow" @@ -79,18 +82,41 @@ void copyRecord(RecordStorageLocation &Src, RecordStorageLocation &Dst, if (SrcType == DstType || (SrcDecl != nullptr && DstDecl != nullptr && SrcDecl->isDerivedFrom(DstDecl))) { + // Dst may have children modeled from other derived types than SrcType, e.g. + // after casts of Dst to other types derived from DstType. Only copy the + // children and synthetic fields present in both Dst and SrcType. + const FieldSet FieldsInSrcType = + Env.getDataflowAnalysisContext().getModeledFields(SrcType); for (auto [Field, DstFieldLoc] : Dst.children()) - copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env); + if (const auto *FieldAsFieldDecl = dyn_cast<FieldDecl>(Field); + FieldAsFieldDecl && FieldsInSrcType.contains(FieldAsFieldDecl)) + copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env); + const llvm::StringMap<QualType> SyntheticFieldsForSrcType = + Env.getDataflowAnalysisContext().getSyntheticFields(SrcType); for (const auto &[Name, DstFieldLoc] : Dst.synthetic_fields()) - copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name), - *DstFieldLoc, Env); + if (SyntheticFieldsForSrcType.contains(Name)) + copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name), + *DstFieldLoc, Env); } else if (SrcDecl != nullptr && DstDecl != nullptr && DstDecl->isDerivedFrom(SrcDecl)) { - for (auto [Field, SrcFieldLoc] : Src.children()) - copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env); - for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields()) - copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc, - Dst.getSyntheticField(Name), Env); + // Src may have children modeled from other derived types than DstType, e.g. + // after other casts of Src to those types (likely in different branches, + // but without flow-condition-dependent field modeling). Only copy the + // children and synthetic fields of Src that are present in DstType. + const FieldSet FieldsInDstType = + Env.getDataflowAnalysisContext().getModeledFields(DstType); + for (auto [Field, SrcFieldLoc] : Src.children()) { + if (const auto *FieldAsFieldDecl = dyn_cast<FieldDecl>(Field); + FieldAsFieldDecl && FieldsInDstType.contains(FieldAsFieldDecl)) + copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env); + } + const llvm::StringMap<QualType> SyntheticFieldsForDstType = + Env.getDataflowAnalysisContext().getSyntheticFields(DstType); + for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields()) { + if (SyntheticFieldsForDstType.contains(Name)) + copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc, + Dst.getSyntheticField(Name), Env); + } } else { for (const FieldDecl *Field : Env.getDataflowAnalysisContext().getModeledFields(TypeToCopy)) { diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 552698a..dfcc7940 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -319,9 +319,12 @@ public: Opts["__opencl_c_images"] = true; Opts["__opencl_c_3d_image_writes"] = true; Opts["cl_khr_3d_image_writes"] = true; + Opts["__opencl_c_program_scope_global_variables"] = true; - Opts["__opencl_c_generic_address_space"] = - GPUKind >= llvm::AMDGPU::GK_GFX700; + if (GPUKind >= llvm::AMDGPU::GK_GFX700) { + Opts["__opencl_c_generic_address_space"] = true; + Opts["__opencl_c_device_enqueue"] = true; + } } } diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp index bbc45e5..24a5fc2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp @@ -221,10 +221,9 @@ mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca( return initialAlloca; } -mlir::Value -OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, - mlir::Value bound, - mlir::Location loc, bool inverse) { +std::pair<mlir::Value, mlir::Value> OpenACCRecipeBuilderBase::createBoundsLoop( + mlir::Value subscriptedValue, mlir::Value subscriptedValue2, + mlir::Value bound, mlir::Location loc, bool inverse) { mlir::Operation *bodyInsertLoc; mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy); @@ -249,7 +248,6 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, return cir::PtrStrideOp::create(builder, loc, eltLoad.getType(), eltLoad, idxLoad); - }; auto forStmtBuilder = [&]() { @@ -303,6 +301,8 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, if (subscriptedValue) subscriptedValue = doSubscriptOp(subscriptedValue, load); + if (subscriptedValue2) + subscriptedValue2 = doSubscriptOp(subscriptedValue2, load); bodyInsertLoc = builder.createYield(loc); }, /*stepBuilder=*/ @@ -325,7 +325,7 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, // Leave the insertion point to be inside the body, so we can loop over // these things. builder.setInsertionPoint(bodyInsertLoc); - return subscriptedValue; + return {subscriptedValue, subscriptedValue2}; } mlir::acc::ReductionOperator @@ -434,7 +434,7 @@ void OpenACCRecipeBuilderBase::createInitRecipe( mlir::Location loc, mlir::Location locEnd, SourceRange exprRange, mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds, llvm::ArrayRef<QualType> boundTypes, const VarDecl *allocaDecl, - QualType origType) { + QualType origType, bool emitInitExpr) { assert(allocaDecl && "Required recipe variable not set?"); CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, allocaDecl}; @@ -464,14 +464,15 @@ void OpenACCRecipeBuilderBase::createInitRecipe( // initialize this variable correctly. CIRGenFunction::AutoVarEmission tempDeclEmission = cgf.emitAutoVarAlloca(*allocaDecl, builder.saveInsertionPoint()); - cgf.emitAutoVarInit(tempDeclEmission); + if (emitInitExpr) + cgf.emitAutoVarInit(tempDeclEmission); } else { mlir::Value alloca = makeBoundsAlloca( block, exprRange, loc, allocaDecl->getName(), numBounds, boundTypes); // If the initializer is trivial, there is nothing to do here, so save // ourselves some effort. - if (allocaDecl->getInit() && + if (emitInitExpr && allocaDecl->getInit() && (!cgf.isTrivialInitializer(allocaDecl->getInit()) || cgf.getContext().getLangOpts().getTrivialAutoVarInit() != LangOptions::TrivialAutoVarInitKind::Uninitialized)) @@ -484,35 +485,42 @@ void OpenACCRecipeBuilderBase::createInitRecipe( void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy( mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, - CIRGenFunction::AutoVarEmission tempDeclEmission, - mlir::acc::FirstprivateRecipeOp recipe, const VarDecl *varRecipe, - const VarDecl *temporary) { - mlir::Block *block = - createRecipeBlock(recipe.getCopyRegion(), mainOp.getType(), loc, - /*numBounds=*/0, /*isInit=*/false); - builder.setInsertionPointToEnd(&recipe.getCopyRegion().back()); + const VarDecl *allocaDecl, const VarDecl *temporary, + mlir::Region ©Region, size_t numBounds) { + mlir::Block *block = createRecipeBlock(copyRegion, mainOp.getType(), loc, + numBounds, /*isInit=*/false); + builder.setInsertionPointToEnd(©Region.back()); CIRGenFunction::LexicalScope ls(cgf, loc, block); - mlir::BlockArgument fromArg = block->getArgument(0); - mlir::BlockArgument toArg = block->getArgument(1); + mlir::Value fromArg = block->getArgument(0); + mlir::Value toArg = block->getArgument(1); - mlir::Type elementTy = - mlir::cast<cir::PointerType>(mainOp.getType()).getPointee(); + llvm::MutableArrayRef<mlir::BlockArgument> boundsRange = + block->getArguments().drop_front(2); - // Set the address of the emission to be the argument, so that we initialize - // that instead of the variable in the other block. - tempDeclEmission.setAllocatedAddress( - Address{toArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)}); + for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) + std::tie(fromArg, toArg) = + createBoundsLoop(fromArg, toArg, boundArg, loc, /*inverse=*/false); + + // Set up the 'to' address. + mlir::Type elementTy = + mlir::cast<cir::PointerType>(toArg.getType()).getPointee(); + CIRGenFunction::AutoVarEmission tempDeclEmission(*allocaDecl); tempDeclEmission.emittedAsOffload = true; + tempDeclEmission.setAllocatedAddress( + Address{toArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)}); + // Set up the 'from' address from the temporary. CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, temporary}; cgf.setAddrOfLocalVar( temporary, - Address{fromArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)}); - + Address{fromArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)}); cgf.emitAutoVarInit(tempDeclEmission); + + builder.setInsertionPointToEnd(©Region.back()); mlir::acc::YieldOp::create(builder, locEnd); } + // This function generates the 'combiner' section for a reduction recipe. Note // that this function is not 'insertion point' clean, in that it alters the // insertion point to be inside of the 'combiner' section of the recipe, but diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h index 21707ad..a5da744 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h @@ -49,14 +49,16 @@ protected: // Creates a loop through an 'acc.bounds', leaving the 'insertion' point to be // the inside of the loop body. Traverses LB->UB UNLESS `inverse` is set. // Returns the 'subscriptedValue' changed with the new bounds subscript. + std::pair<mlir::Value, mlir::Value> + createBoundsLoop(mlir::Value subscriptedValue, mlir::Value subscriptedValue2, + mlir::Value bound, mlir::Location loc, bool inverse); + mlir::Value createBoundsLoop(mlir::Value subscriptedValue, mlir::Value bound, - mlir::Location loc, bool inverse); + mlir::Location loc, bool inverse) { + return createBoundsLoop(subscriptedValue, {}, bound, loc, inverse).first; + } + mlir::acc::ReductionOperator convertReductionOp(OpenACCReductionOperator op); - void createFirstprivateRecipeCopy( - mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, - CIRGenFunction::AutoVarEmission tempDeclEmission, - mlir::acc::FirstprivateRecipeOp recipe, const VarDecl *varRecipe, - const VarDecl *temporary); // This function generates the 'combiner' section for a reduction recipe. Note // that this function is not 'insertion point' clean, in that it alters the @@ -66,11 +68,19 @@ protected: mlir::Value mainOp, mlir::acc::ReductionRecipeOp recipe, size_t numBounds); + void createInitRecipe(mlir::Location loc, mlir::Location locEnd, SourceRange exprRange, mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds, llvm::ArrayRef<QualType> boundTypes, - const VarDecl *allocaDecl, QualType origType); + const VarDecl *allocaDecl, QualType origType, + bool emitInitExpr); + + void createFirstprivateRecipeCopy(mlir::Location loc, mlir::Location locEnd, + mlir::Value mainOp, + const VarDecl *allocaDecl, + const VarDecl *temporary, + mlir::Region ©Region, size_t numBounds); void createRecipeDestroySection(mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, CharUnits alignment, @@ -150,63 +160,6 @@ class OpenACCRecipeBuilder : OpenACCRecipeBuilderBase { return recipeName; } - // Create the 'init' section of the recipe, including the 'copy' section for - // 'firstprivate'. Note that this function is not 'insertion point' clean, in - // that it alters the insertion point to be inside of the 'destroy' section of - // the recipe, but doesn't restore it aftewards. - void createRecipeInitCopy(mlir::Location loc, mlir::Location locEnd, - SourceRange exprRange, mlir::Value mainOp, - RecipeTy recipe, const VarDecl *varRecipe, - const VarDecl *temporary) { - // TODO: OpenACC: when we get the 'pointer' variants for - // firstprivate/reduction, this probably should be removed/split into - // functions for the BuilderBase. - assert(varRecipe && "Required recipe variable not set?"); - - CIRGenFunction::AutoVarEmission tempDeclEmission{ - CIRGenFunction::AutoVarEmission::invalid()}; - CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, varRecipe}; - - // Do the 'init' section of the recipe IR, which does an alloca, then the - // initialization (except for firstprivate). - mlir::Block *block = - createRecipeBlock(recipe.getInitRegion(), mainOp.getType(), loc, - /*numBounds=*/0, /*isInit=*/true); - builder.setInsertionPointToEnd(&recipe.getInitRegion().back()); - CIRGenFunction::LexicalScope ls(cgf, loc, block); - - tempDeclEmission = - cgf.emitAutoVarAlloca(*varRecipe, builder.saveInsertionPoint()); - - // 'firstprivate' doesn't do its initialization in the 'init' section, - // instead it does it in the 'copy' section. SO, only do 'init' here for - // reduction. - if constexpr (std::is_same_v<RecipeTy, mlir::acc::ReductionRecipeOp>) { - // Unlike Private, the recipe here is always required as it has to do - // init, not just 'default' init. - if (!varRecipe->getInit()) - cgf.cgm.errorNYI(exprRange, "reduction init recipe"); - cgf.emitAutoVarInit(tempDeclEmission); - } - - mlir::acc::YieldOp::create(builder, locEnd); - - if constexpr (std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>) { - if (!varRecipe->getInit()) { - // If we don't have any initialization recipe, we failed during Sema to - // initialize this correctly. If we disable the - // Sema::TentativeAnalysisScopes in SemaOpenACC::CreateInitRecipe, it'll - // emit an error to tell us. However, emitting those errors during - // production is a violation of the standard, so we cannot do them. - cgf.cgm.errorNYI( - exprRange, "firstprivate copy-init recipe not properly generated"); - } - - createFirstprivateRecipeCopy(loc, locEnd, mainOp, tempDeclEmission, - recipe, varRecipe, temporary); - } - } - public: OpenACCRecipeBuilder(CIRGen::CIRGenFunction &cgf, CIRGen::CIRGenBuilderTy &builder) @@ -221,19 +174,6 @@ public: BuiltinType::ArraySection) && "array section shouldn't make it to recipe creation"); - // TODO: OpenACC: This is a bit of a hackery to get this to not change for - // the non-private recipes. This will be removed soon, when we get this - // 'right' for firstprivate and reduction. - if constexpr (std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>) { - if (numBounds) { - cgf.cgm.errorNYI(varRef->getSourceRange(), - "firstprivate-init with bounds"); - } - boundTypes = {}; - numBounds = 0; - origType = baseType; - } - mlir::ModuleOp mod = builder.getBlock() ->getParent() ->template getParentOfType<mlir::ModuleOp>(); @@ -262,21 +202,20 @@ public: if constexpr (std::is_same_v<RecipeTy, mlir::acc::PrivateRecipeOp>) { createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp, recipe.getInitRegion(), numBounds, boundTypes, varRecipe, - origType); + origType, /*emitInitExpr=*/true); } else if constexpr (std::is_same_v<RecipeTy, mlir::acc::ReductionRecipeOp>) { createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp, recipe.getInitRegion(), numBounds, boundTypes, varRecipe, - origType); + origType, /*emitInitExpr=*/true); createReductionRecipeCombiner(loc, locEnd, mainOp, recipe, numBounds); } else { static_assert(std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>); - // TODO: OpenACC: we probably want this to call createInitRecipe as well, - // but do so in a way that omits the 'initialization', so that we can do - // it separately, since it belongs in the 'copy' region. It also might - // need a way of getting the tempDeclEmission out of it for that purpose. - createRecipeInitCopy(loc, locEnd, varRef->getSourceRange(), mainOp, - recipe, varRecipe, temporary); + createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp, + recipe.getInitRegion(), numBounds, boundTypes, varRecipe, + origType, /*emitInitExpr=*/false); + createFirstprivateRecipeCopy(loc, locEnd, mainOp, varRecipe, temporary, + recipe.getCopyRegion(), numBounds); } if (origType.isDestructedType()) diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index c15637d..2eeef81 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -8,18 +8,39 @@ #include "PassDetail.h" #include "clang/AST/ASTContext.h" +#include "clang/Basic/Module.h" #include "clang/Basic/TargetInfo.h" #include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIROpsEnums.h" #include "clang/CIR/Dialect/Passes.h" #include "clang/CIR/MissingFeatures.h" +#include "llvm/Support/Path.h" #include <memory> using namespace mlir; using namespace cir; +static SmallString<128> getTransformedFileName(mlir::ModuleOp mlirModule) { + SmallString<128> fileName; + + if (mlirModule.getSymName()) + fileName = llvm::sys::path::filename(mlirModule.getSymName()->str()); + + if (fileName.empty()) + fileName = "<null>"; + + for (size_t i = 0; i < fileName.size(); ++i) { + // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens + // to be the set of C preprocessing numbers. + if (!clang::isPreprocessingNumberBody(fileName[i])) + fileName[i] = '_'; + } + + return fileName; +} + namespace { struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> { LoweringPreparePass() = default; @@ -30,9 +51,16 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> { void lowerComplexDivOp(cir::ComplexDivOp op); void lowerComplexMulOp(cir::ComplexMulOp op); void lowerUnaryOp(cir::UnaryOp op); + void lowerGlobalOp(cir::GlobalOp op); void lowerArrayDtor(cir::ArrayDtor op); void lowerArrayCtor(cir::ArrayCtor op); + /// Build the function that initializes the specified global + cir::FuncOp buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op); + + /// Build a module init function that calls all the dynamic initializers. + void buildCXXGlobalInitFunc(); + cir::FuncOp buildRuntimeFunction( mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc, cir::FuncType type, @@ -47,6 +75,10 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> { /// Tracks current module. mlir::ModuleOp mlirModule; + /// Tracks existing dynamic initializers. + llvm::StringMap<uint32_t> dynamicInitializerNames; + llvm::SmallVector<cir::FuncOp> dynamicInitializers; + void setASTContext(clang::ASTContext *c) { astCtx = c; } }; @@ -589,6 +621,111 @@ void LoweringPreparePass::lowerUnaryOp(cir::UnaryOp op) { op.erase(); } +cir::FuncOp +LoweringPreparePass::buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op) { + // TODO(cir): Store this in the GlobalOp. + // This should come from the MangleContext, but for now I'm hardcoding it. + SmallString<256> fnName("__cxx_global_var_init"); + // Get a unique name + uint32_t cnt = dynamicInitializerNames[fnName]++; + if (cnt) + fnName += "." + llvm::Twine(cnt).str(); + + // Create a variable initialization function. + CIRBaseBuilderTy builder(getContext()); + builder.setInsertionPointAfter(op); + auto fnType = cir::FuncType::get({}, builder.getVoidTy()); + FuncOp f = buildRuntimeFunction(builder, fnName, op.getLoc(), fnType, + cir::GlobalLinkageKind::InternalLinkage); + + // Move over the initialzation code of the ctor region. + mlir::Block *entryBB = f.addEntryBlock(); + if (!op.getCtorRegion().empty()) { + mlir::Block &block = op.getCtorRegion().front(); + entryBB->getOperations().splice(entryBB->begin(), block.getOperations(), + block.begin(), std::prev(block.end())); + } + + // Register the destructor call with __cxa_atexit + mlir::Region &dtorRegion = op.getDtorRegion(); + if (!dtorRegion.empty()) { + assert(!cir::MissingFeatures::opGlobalDtorLowering()); + llvm_unreachable("dtor region lowering is NYI"); + } + + // Replace cir.yield with cir.return + builder.setInsertionPointToEnd(entryBB); + mlir::Operation *yieldOp = nullptr; + if (!op.getCtorRegion().empty()) { + mlir::Block &block = op.getCtorRegion().front(); + yieldOp = &block.getOperations().back(); + } else { + assert(!cir::MissingFeatures::opGlobalDtorLowering()); + llvm_unreachable("dtor region lowering is NYI"); + } + + assert(isa<YieldOp>(*yieldOp)); + cir::ReturnOp::create(builder, yieldOp->getLoc()); + return f; +} + +void LoweringPreparePass::lowerGlobalOp(GlobalOp op) { + mlir::Region &ctorRegion = op.getCtorRegion(); + mlir::Region &dtorRegion = op.getDtorRegion(); + + if (!ctorRegion.empty() || !dtorRegion.empty()) { + // Build a variable initialization function and move the initialzation code + // in the ctor region over. + cir::FuncOp f = buildCXXGlobalVarDeclInitFunc(op); + + // Clear the ctor and dtor region + ctorRegion.getBlocks().clear(); + dtorRegion.getBlocks().clear(); + + assert(!cir::MissingFeatures::astVarDeclInterface()); + dynamicInitializers.push_back(f); + } + + assert(!cir::MissingFeatures::opGlobalAnnotations()); +} + +void LoweringPreparePass::buildCXXGlobalInitFunc() { + if (dynamicInitializers.empty()) + return; + + assert(!cir::MissingFeatures::opGlobalCtorList()); + + SmallString<256> fnName; + // Include the filename in the symbol name. Including "sub_" matches gcc + // and makes sure these symbols appear lexicographically behind the symbols + // with priority (TBD). Module implementation units behave the same + // way as a non-modular TU with imports. + // TODO: check CXX20ModuleInits + if (astCtx->getCurrentNamedModule() && + !astCtx->getCurrentNamedModule()->isModuleImplementation()) { + llvm::raw_svector_ostream out(fnName); + std::unique_ptr<clang::MangleContext> mangleCtx( + astCtx->createMangleContext()); + cast<clang::ItaniumMangleContext>(*mangleCtx) + .mangleModuleInitializer(astCtx->getCurrentNamedModule(), out); + } else { + fnName += "_GLOBAL__sub_I_"; + fnName += getTransformedFileName(mlirModule); + } + + CIRBaseBuilderTy builder(getContext()); + builder.setInsertionPointToEnd(&mlirModule.getBodyRegion().back()); + auto fnType = cir::FuncType::get({}, builder.getVoidTy()); + cir::FuncOp f = + buildRuntimeFunction(builder, fnName, mlirModule.getLoc(), fnType, + cir::GlobalLinkageKind::ExternalLinkage); + builder.setInsertionPointToStart(f.addEntryBlock()); + for (cir::FuncOp &f : dynamicInitializers) + builder.createCallOp(f.getLoc(), f, {}); + + cir::ReturnOp::create(builder, f.getLoc()); +} + static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder, clang::ASTContext *astCtx, mlir::Operation *op, mlir::Type eltTy, @@ -691,6 +828,8 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) { lowerComplexDivOp(complexDiv); else if (auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op)) lowerComplexMulOp(complexMul); + else if (auto glob = mlir::dyn_cast<cir::GlobalOp>(op)) + lowerGlobalOp(glob); else if (auto unary = mlir::dyn_cast<cir::UnaryOp>(op)) lowerUnaryOp(unary); } @@ -704,12 +843,15 @@ void LoweringPreparePass::runOnOperation() { op->walk([&](mlir::Operation *op) { if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp, - cir::ComplexMulOp, cir::ComplexDivOp, cir::UnaryOp>(op)) + cir::ComplexMulOp, cir::ComplexDivOp, cir::GlobalOp, + cir::UnaryOp>(op)) opsToTransform.push_back(op); }); for (mlir::Operation *o : opsToTransform) runOnOp(o); + + buildCXXGlobalInitFunc(); } std::unique_ptr<Pass> mlir::createLoweringPreparePass() { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index e6e4947..9f30287 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -6784,29 +6784,26 @@ LValue CodeGenFunction::EmitPseudoObjectLValue(const PseudoObjectExpr *E) { return emitPseudoObjectExpr(*this, E, true, AggValueSlot::ignored()).LV; } -void CodeGenFunction::FlattenAccessAndType( - Address Addr, QualType AddrType, - SmallVectorImpl<std::pair<Address, llvm::Value *>> &AccessList, - SmallVectorImpl<QualType> &FlatTypes) { - // WorkList is list of type we are processing + the Index List to access - // the field of that type in Addr for use in a GEP - llvm::SmallVector<std::pair<QualType, llvm::SmallVector<llvm::Value *, 4>>, - 16> +void CodeGenFunction::FlattenAccessAndTypeLValue( + LValue Val, SmallVectorImpl<LValue> &AccessList) { + + llvm::SmallVector< + std::tuple<LValue, QualType, llvm::SmallVector<llvm::Value *, 4>>, 16> WorkList; llvm::IntegerType *IdxTy = llvm::IntegerType::get(getLLVMContext(), 32); - // Addr should be a pointer so we need to 'dereference' it - WorkList.push_back({AddrType, {llvm::ConstantInt::get(IdxTy, 0)}}); + WorkList.push_back({Val, Val.getType(), {llvm::ConstantInt::get(IdxTy, 0)}}); while (!WorkList.empty()) { - auto [T, IdxList] = WorkList.pop_back_val(); + auto [LVal, T, IdxList] = WorkList.pop_back_val(); T = T.getCanonicalType().getUnqualifiedType(); assert(!isa<MatrixType>(T) && "Matrix types not yet supported in HLSL"); + if (const auto *CAT = dyn_cast<ConstantArrayType>(T)) { uint64_t Size = CAT->getZExtSize(); for (int64_t I = Size - 1; I > -1; I--) { llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList; IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I)); - WorkList.emplace_back(CAT->getElementType(), IdxListCopy); + WorkList.emplace_back(LVal, CAT->getElementType(), IdxListCopy); } } else if (const auto *RT = dyn_cast<RecordType>(T)) { const RecordDecl *Record = RT->getOriginalDecl()->getDefinitionOrSelf(); @@ -6814,44 +6811,75 @@ void CodeGenFunction::FlattenAccessAndType( const CXXRecordDecl *CXXD = dyn_cast<CXXRecordDecl>(Record); - llvm::SmallVector<QualType, 16> FieldTypes; + llvm::SmallVector< + std::tuple<LValue, QualType, llvm::SmallVector<llvm::Value *, 4>>, 16> + ReverseList; if (CXXD && CXXD->isStandardLayout()) Record = CXXD->getStandardLayoutBaseWithFields(); // deal with potential base classes if (CXXD && !CXXD->isStandardLayout()) { - for (auto &Base : CXXD->bases()) - FieldTypes.push_back(Base.getType()); + if (CXXD->getNumBases() > 0) { + assert(CXXD->getNumBases() == 1 && + "HLSL doesn't support multiple inheritance."); + auto Base = CXXD->bases_begin(); + llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList; + IdxListCopy.push_back(llvm::ConstantInt::get( + IdxTy, 0)); // base struct should be at index zero + ReverseList.emplace_back(LVal, Base->getType(), IdxListCopy); + } } - for (auto *FD : Record->fields()) - FieldTypes.push_back(FD->getType()); + const CGRecordLayout &Layout = CGM.getTypes().getCGRecordLayout(Record); - for (int64_t I = FieldTypes.size() - 1; I > -1; I--) { - llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList; - IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I)); - WorkList.insert(WorkList.end(), {FieldTypes[I], IdxListCopy}); + llvm::Type *LLVMT = ConvertTypeForMem(T); + CharUnits Align = getContext().getTypeAlignInChars(T); + LValue RLValue; + bool createdGEP = false; + for (auto *FD : Record->fields()) { + if (FD->isBitField()) { + if (FD->isUnnamedBitField()) + continue; + if (!createdGEP) { + createdGEP = true; + Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, + LLVMT, Align, "gep"); + RLValue = MakeAddrLValue(GEP, T); + } + LValue FieldLVal = EmitLValueForField(RLValue, FD, true); + ReverseList.push_back({FieldLVal, FD->getType(), {}}); + } else { + llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList; + IdxListCopy.push_back( + llvm::ConstantInt::get(IdxTy, Layout.getLLVMFieldNo(FD))); + ReverseList.emplace_back(LVal, FD->getType(), IdxListCopy); + } } + + std::reverse(ReverseList.begin(), ReverseList.end()); + llvm::append_range(WorkList, ReverseList); } else if (const auto *VT = dyn_cast<VectorType>(T)) { llvm::Type *LLVMT = ConvertTypeForMem(T); CharUnits Align = getContext().getTypeAlignInChars(T); - Address GEP = - Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "vector.gep"); + Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, LLVMT, + Align, "vector.gep"); + LValue Base = MakeAddrLValue(GEP, T); for (unsigned I = 0, E = VT->getNumElements(); I < E; I++) { - llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, I); - // gep on vector fields is not recommended so combine gep with - // extract/insert - AccessList.emplace_back(GEP, Idx); - FlatTypes.push_back(VT->getElementType()); + llvm::Constant *Idx = llvm::ConstantInt::get(IdxTy, I); + LValue LV = + LValue::MakeVectorElt(Base.getAddress(), Idx, VT->getElementType(), + Base.getBaseInfo(), TBAAAccessInfo()); + AccessList.emplace_back(LV); } - } else { - // a scalar/builtin type - llvm::Type *LLVMT = ConvertTypeForMem(T); - CharUnits Align = getContext().getTypeAlignInChars(T); - Address GEP = - Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "gep"); - AccessList.emplace_back(GEP, nullptr); - FlatTypes.push_back(T); + } else { // a scalar/builtin type + if (!IdxList.empty()) { + llvm::Type *LLVMT = ConvertTypeForMem(T); + CharUnits Align = getContext().getTypeAlignInChars(T); + Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, + LLVMT, Align, "gep"); + AccessList.emplace_back(MakeAddrLValue(GEP, T)); + } else // must be a bitfield we already created an lvalue for + AccessList.emplace_back(LVal); } } } diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index b8150a2..07b9aeb 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -488,100 +488,62 @@ static bool isTrivialFiller(Expr *E) { return false; } -static void EmitHLSLAggregateSplatCast(CodeGenFunction &CGF, Address DestVal, - QualType DestTy, llvm::Value *SrcVal, - QualType SrcTy, SourceLocation Loc) { +// emit an elementwise cast where the RHS is a scalar or vector +// or emit an aggregate splat cast +static void EmitHLSLScalarElementwiseAndSplatCasts(CodeGenFunction &CGF, + LValue DestVal, + llvm::Value *SrcVal, + QualType SrcTy, + SourceLocation Loc) { // Flatten our destination - SmallVector<QualType> DestTypes; // Flattened type - SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList; - // ^^ Flattened accesses to DestVal we want to store into - CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes); - - assert(SrcTy->isScalarType() && "Invalid HLSL Aggregate splat cast."); - for (unsigned I = 0, Size = StoreGEPList.size(); I < Size; ++I) { - llvm::Value *Cast = - CGF.EmitScalarConversion(SrcVal, SrcTy, DestTypes[I], Loc); - - // store back - llvm::Value *Idx = StoreGEPList[I].second; - if (Idx) { - llvm::Value *V = - CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert"); - Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx); - } - CGF.Builder.CreateStore(Cast, StoreGEPList[I].first); - } -} - -// emit a flat cast where the RHS is a scalar, including vector -static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal, - QualType DestTy, llvm::Value *SrcVal, - QualType SrcTy, SourceLocation Loc) { - // Flatten our destination - SmallVector<QualType, 16> DestTypes; // Flattened type - SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList; - // ^^ Flattened accesses to DestVal we want to store into - CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes); - - assert(SrcTy->isVectorType() && "HLSL Flat cast doesn't handle splatting."); - const VectorType *VT = SrcTy->getAs<VectorType>(); - SrcTy = VT->getElementType(); - assert(StoreGEPList.size() <= VT->getNumElements() && - "Cannot perform HLSL flat cast when vector source \ - object has less elements than flattened destination \ - object."); - for (unsigned I = 0, Size = StoreGEPList.size(); I < Size; I++) { - llvm::Value *Load = CGF.Builder.CreateExtractElement(SrcVal, I, "vec.load"); + SmallVector<LValue, 16> StoreList; + CGF.FlattenAccessAndTypeLValue(DestVal, StoreList); + + bool isVector = false; + if (auto *VT = SrcTy->getAs<VectorType>()) { + isVector = true; + SrcTy = VT->getElementType(); + assert(StoreList.size() <= VT->getNumElements() && + "Cannot perform HLSL flat cast when vector source \ + object has less elements than flattened destination \ + object."); + } + + for (unsigned I = 0, Size = StoreList.size(); I < Size; I++) { + LValue DestLVal = StoreList[I]; + llvm::Value *Load = + isVector ? CGF.Builder.CreateExtractElement(SrcVal, I, "vec.load") + : SrcVal; llvm::Value *Cast = - CGF.EmitScalarConversion(Load, SrcTy, DestTypes[I], Loc); - - // store back - llvm::Value *Idx = StoreGEPList[I].second; - if (Idx) { - llvm::Value *V = - CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert"); - Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx); - } - CGF.Builder.CreateStore(Cast, StoreGEPList[I].first); + CGF.EmitScalarConversion(Load, SrcTy, DestLVal.getType(), Loc); + CGF.EmitStoreThroughLValue(RValue::get(Cast), DestLVal); } } // emit a flat cast where the RHS is an aggregate -static void EmitHLSLElementwiseCast(CodeGenFunction &CGF, Address DestVal, - QualType DestTy, Address SrcVal, - QualType SrcTy, SourceLocation Loc) { +static void EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue DestVal, + LValue SrcVal, SourceLocation Loc) { // Flatten our destination - SmallVector<QualType, 16> DestTypes; // Flattened type - SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList; - // ^^ Flattened accesses to DestVal we want to store into - CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes); + SmallVector<LValue, 16> StoreList; + CGF.FlattenAccessAndTypeLValue(DestVal, StoreList); // Flatten our src - SmallVector<QualType, 16> SrcTypes; // Flattened type - SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList; - // ^^ Flattened accesses to SrcVal we want to load from - CGF.FlattenAccessAndType(SrcVal, SrcTy, LoadGEPList, SrcTypes); + SmallVector<LValue, 16> LoadList; + CGF.FlattenAccessAndTypeLValue(SrcVal, LoadList); - assert(StoreGEPList.size() <= LoadGEPList.size() && - "Cannot perform HLSL flat cast when flattened source object \ + assert(StoreList.size() <= LoadList.size() && + "Cannot perform HLSL elementwise cast when flattened source object \ has less elements than flattened destination object."); - // apply casts to what we load from LoadGEPList + // apply casts to what we load from LoadList // and store result in Dest - for (unsigned I = 0, E = StoreGEPList.size(); I < E; I++) { - llvm::Value *Idx = LoadGEPList[I].second; - llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[I].first, "load"); - Load = - Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load; - llvm::Value *Cast = - CGF.EmitScalarConversion(Load, SrcTypes[I], DestTypes[I], Loc); - - // store back - Idx = StoreGEPList[I].second; - if (Idx) { - llvm::Value *V = - CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert"); - Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx); - } - CGF.Builder.CreateStore(Cast, StoreGEPList[I].first); + for (unsigned I = 0, E = StoreList.size(); I < E; I++) { + LValue DestLVal = StoreList[I]; + LValue SrcLVal = LoadList[I]; + RValue RVal = CGF.EmitLoadOfLValue(SrcLVal, Loc); + assert(RVal.isScalar() && "All flattened source values should be scalars"); + llvm::Value *Val = RVal.getScalarVal(); + llvm::Value *Cast = CGF.EmitScalarConversion(Val, SrcLVal.getType(), + DestLVal.getType(), Loc); + CGF.EmitStoreThroughLValue(RValue::get(Cast), DestLVal); } } @@ -988,31 +950,33 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { Expr *Src = E->getSubExpr(); QualType SrcTy = Src->getType(); RValue RV = CGF.EmitAnyExpr(Src); - QualType DestTy = E->getType(); - Address DestVal = Dest.getAddress(); + LValue DestLVal = CGF.MakeAddrLValue(Dest.getAddress(), E->getType()); SourceLocation Loc = E->getExprLoc(); - assert(RV.isScalar() && "RHS of HLSL splat cast must be a scalar."); + assert(RV.isScalar() && SrcTy->isScalarType() && + "RHS of HLSL splat cast must be a scalar."); llvm::Value *SrcVal = RV.getScalarVal(); - EmitHLSLAggregateSplatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); + EmitHLSLScalarElementwiseAndSplatCasts(CGF, DestLVal, SrcVal, SrcTy, Loc); break; } case CK_HLSLElementwiseCast: { Expr *Src = E->getSubExpr(); QualType SrcTy = Src->getType(); RValue RV = CGF.EmitAnyExpr(Src); - QualType DestTy = E->getType(); - Address DestVal = Dest.getAddress(); + LValue DestLVal = CGF.MakeAddrLValue(Dest.getAddress(), E->getType()); SourceLocation Loc = E->getExprLoc(); if (RV.isScalar()) { llvm::Value *SrcVal = RV.getScalarVal(); - EmitHLSLScalarFlatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); + assert(SrcTy->isVectorType() && + "HLSL Elementwise cast doesn't handle splatting."); + EmitHLSLScalarElementwiseAndSplatCasts(CGF, DestLVal, SrcVal, SrcTy, Loc); } else { assert(RV.isAggregate() && "Can't perform HLSL Aggregate cast on a complex type."); Address SrcVal = RV.getAggregateAddress(); - EmitHLSLElementwiseCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); + EmitHLSLElementwiseCast(CGF, DestLVal, CGF.MakeAddrLValue(SrcVal, SrcTy), + Loc); } break; } diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index c961222..06d9d81 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2397,39 +2397,37 @@ bool CodeGenFunction::ShouldNullCheckClassCastValue(const CastExpr *CE) { } // RHS is an aggregate type -static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, Address RHSVal, - QualType RHSTy, QualType LHSTy, - SourceLocation Loc) { - SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList; - SmallVector<QualType, 16> SrcTypes; // Flattened type - CGF.FlattenAccessAndType(RHSVal, RHSTy, LoadGEPList, SrcTypes); - // LHS is either a vector or a builtin? +static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal, + QualType DestTy, SourceLocation Loc) { + SmallVector<LValue, 16> LoadList; + CGF.FlattenAccessAndTypeLValue(SrcVal, LoadList); + // Dest is either a vector or a builtin? // if its a vector create a temp alloca to store into and return that - if (auto *VecTy = LHSTy->getAs<VectorType>()) { - assert(SrcTypes.size() >= VecTy->getNumElements() && - "Flattened type on RHS must have more elements than vector on LHS."); + if (auto *VecTy = DestTy->getAs<VectorType>()) { + assert(LoadList.size() >= VecTy->getNumElements() && + "Flattened type on RHS must have the same number or more elements " + "than vector on LHS."); llvm::Value *V = - CGF.Builder.CreateLoad(CGF.CreateIRTemp(LHSTy, "flatcast.tmp")); + CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp")); // write to V. for (unsigned I = 0, E = VecTy->getNumElements(); I < E; I++) { - llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[I].first, "load"); - llvm::Value *Idx = LoadGEPList[I].second; - Load = Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") - : Load; - llvm::Value *Cast = CGF.EmitScalarConversion( - Load, SrcTypes[I], VecTy->getElementType(), Loc); + RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc); + assert(RVal.isScalar() && + "All flattened source values should be scalars."); + llvm::Value *Cast = + CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[I].getType(), + VecTy->getElementType(), Loc); V = CGF.Builder.CreateInsertElement(V, Cast, I); } return V; } - // i its a builtin just do an extract element or load. - assert(LHSTy->isBuiltinType() && + // if its a builtin just do an extract element or load. + assert(DestTy->isBuiltinType() && "Destination type must be a vector or builtin type."); - llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[0].first, "load"); - llvm::Value *Idx = LoadGEPList[0].second; - Load = - Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load; - return CGF.EmitScalarConversion(Load, LHSTy, SrcTypes[0], Loc); + RValue RVal = CGF.EmitLoadOfLValue(LoadList[0], Loc); + assert(RVal.isScalar() && "All flattened source values should be scalars."); + return CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[0].getType(), + DestTy, Loc); } // VisitCastExpr - Emit code for an explicit or implicit cast. Implicit casts @@ -2954,12 +2952,11 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { case CK_HLSLElementwiseCast: { RValue RV = CGF.EmitAnyExpr(E); SourceLocation Loc = CE->getExprLoc(); - QualType SrcTy = E->getType(); assert(RV.isAggregate() && "Not a valid HLSL Elementwise Cast."); // RHS is an aggregate - Address SrcVal = RV.getAggregateAddress(); - return EmitHLSLElementwiseCast(CGF, SrcVal, SrcTy, DestTy, Loc); + LValue SrcVal = CGF.MakeAddrLValue(RV.getAggregateAddress(), E->getType()); + return EmitHLSLElementwiseCast(CGF, SrcVal, DestTy, Loc); } } // end of switch diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 8cda583..fa94692 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6808,12 +6808,13 @@ public: /// they were computed by collectAttachPtrExprInfo(), if they are semantically /// different. struct AttachPtrExprComparator { - const MappableExprsHandler *Handler = nullptr; + const MappableExprsHandler &Handler; // Cache of previous equality comparison results. mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool> CachedEqualityComparisons; - AttachPtrExprComparator(const MappableExprsHandler *H) : Handler(H) {} + AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {} + AttachPtrExprComparator() = delete; // Return true iff LHS is "less than" RHS. bool operator()(const Expr *LHS, const Expr *RHS) const { @@ -6821,15 +6822,15 @@ public: return false; // First, compare by complexity (depth) - const auto ItLHS = Handler->AttachPtrComponentDepthMap.find(LHS); - const auto ItRHS = Handler->AttachPtrComponentDepthMap.find(RHS); + const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(LHS); + const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(RHS); std::optional<size_t> DepthLHS = - (ItLHS != Handler->AttachPtrComponentDepthMap.end()) ? ItLHS->second - : std::nullopt; + (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second + : std::nullopt; std::optional<size_t> DepthRHS = - (ItRHS != Handler->AttachPtrComponentDepthMap.end()) ? ItRHS->second - : std::nullopt; + (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second + : std::nullopt; // std::nullopt (no attach pointer) has lowest complexity if (!DepthLHS.has_value() && !DepthRHS.has_value()) { @@ -6877,8 +6878,8 @@ public: /// Returns true iff LHS was computed before RHS by /// collectAttachPtrExprInfo(). bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const { - const size_t &OrderLHS = Handler->AttachPtrComputationOrderMap.at(LHS); - const size_t &OrderRHS = Handler->AttachPtrComputationOrderMap.at(RHS); + const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(LHS); + const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(RHS); return OrderLHS < OrderRHS; } @@ -6897,7 +6898,7 @@ public: if (!LHS || !RHS) return false; - ASTContext &Ctx = Handler->CGF.getContext(); + ASTContext &Ctx = Handler.CGF.getContext(); // Strip away parentheses and no-op casts to get to the core expression LHS = LHS->IgnoreParenNoopCasts(Ctx); RHS = RHS->IgnoreParenNoopCasts(Ctx); @@ -7246,6 +7247,10 @@ private: llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = { {nullptr, 0}}; + /// An instance of attach-ptr-expr comparator that can be used throughout the + /// lifetime of this handler. + AttachPtrExprComparator AttachPtrComparator; + llvm::Value *getExprTypeSize(const Expr *E) const { QualType ExprTy = E->getType().getCanonicalType(); @@ -8963,7 +8968,7 @@ private: public: MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) - : CurDir(&Dir), CGF(CGF) { + : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) { // Extract firstprivate clause information. for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) for (const auto *D : C->varlist()) @@ -9009,7 +9014,7 @@ public: /// Constructor for the declare mapper directive. MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) - : CurDir(&Dir), CGF(CGF) {} + : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {} /// Generate code for the combined entry if we have a partially mapped struct /// and take care of the mapping flags of the arguments corresponding to diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index f0565c1..99de6e1 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4464,10 +4464,8 @@ public: AggValueSlot slot = AggValueSlot::ignored()); LValue EmitPseudoObjectLValue(const PseudoObjectExpr *e); - void FlattenAccessAndType( - Address Addr, QualType AddrTy, - SmallVectorImpl<std::pair<Address, llvm::Value *>> &AccessList, - SmallVectorImpl<QualType> &FlatTypes); + void FlattenAccessAndTypeLValue(LValue LVal, + SmallVectorImpl<LValue> &AccessList); llvm::Value *EmitIvarOffset(const ObjCInterfaceDecl *Interface, const ObjCIvarDecl *Ivar); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index f6f7f22..8d019d4 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -493,10 +493,15 @@ CodeGenModule::CodeGenModule(ASTContext &C, auto ReaderOrErr = llvm::IndexedInstrProfReader::create( CodeGenOpts.ProfileInstrumentUsePath, *FS, CodeGenOpts.ProfileRemappingFile); - // We're checking for profile read errors in CompilerInvocation, so if - // there was an error it should've already been caught. If it hasn't been - // somehow, trip an assertion. - assert(ReaderOrErr); + if (auto E = ReaderOrErr.takeError()) { + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, "Error in reading profile %0: %1"); + llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) { + Diags.Report(DiagID) + << CodeGenOpts.ProfileInstrumentUsePath << EI.message(); + }); + return; + } PGOReader = std::move(ReaderOrErr.get()); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 684cc09..107b9ff 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -41,6 +41,7 @@ #include "llvm/Frontend/Debug/Options.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Option/ArgList.h" +#include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Compression.h" @@ -485,19 +486,47 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C, } if (ProfileUseArg) { + SmallString<128> UsePathBuf; + StringRef UsePath; if (ProfileUseArg->getOption().matches(options::OPT_fprofile_instr_use_EQ)) - CmdArgs.push_back(Args.MakeArgString( - Twine("-fprofile-instrument-use-path=") + ProfileUseArg->getValue())); + UsePath = ProfileUseArg->getValue(); else if ((ProfileUseArg->getOption().matches( options::OPT_fprofile_use_EQ) || ProfileUseArg->getOption().matches( options::OPT_fprofile_instr_use))) { - SmallString<128> Path( - ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue()); - if (Path.empty() || llvm::sys::fs::is_directory(Path)) - llvm::sys::path::append(Path, "default.profdata"); + UsePathBuf = + ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue(); + if (UsePathBuf.empty() || llvm::sys::fs::is_directory(UsePathBuf)) + llvm::sys::path::append(UsePathBuf, "default.profdata"); + UsePath = UsePathBuf; + } + auto ReaderOrErr = + llvm::IndexedInstrProfReader::create(UsePath, D.getVFS()); + if (auto E = ReaderOrErr.takeError()) { + auto DiagID = D.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, "Error in reading profile %0: %1"); + llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) { + D.Diag(DiagID) << UsePath.str() << EI.message(); + }); + } else { + std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader = + std::move(ReaderOrErr.get()); + StringRef UseKind; + // Currently memprof profiles are only added at the IR level. Mark the + // profile type as IR in that case as well and the subsequent matching + // needs to detect which is available (might be one or both). + if (PGOReader->isIRLevelProfile() || PGOReader->hasMemoryProfile()) { + if (PGOReader->hasCSIRLevelProfile()) + UseKind = "csllvm"; + else + UseKind = "llvm"; + } else + UseKind = "clang"; + + CmdArgs.push_back( + Args.MakeArgString("-fprofile-instrument-use=" + UseKind)); CmdArgs.push_back( - Args.MakeArgString(Twine("-fprofile-instrument-use-path=") + Path)); + Args.MakeArgString("-fprofile-instrument-use-path=" + UsePath)); } } diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 4223752..50fd50a 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -533,9 +533,9 @@ static T extractMaskValue(T KeyPath) { #define PARSE_OPTION_WITH_MARSHALLING( \ ARGS, DIAGS, PREFIX_TYPE, SPELLING_OFFSET, ID, KIND, GROUP, ALIAS, \ ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ - METAVAR, VALUES, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ - IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, \ - TABLE_INDEX) \ + METAVAR, VALUES, SUBCOMMANDIDS_OFFSET, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ + DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ + MERGER, EXTRACTOR, TABLE_INDEX) \ if ((VISIBILITY) & options::CC1Option) { \ KEYPATH = MERGER(KEYPATH, DEFAULT_VALUE); \ if (IMPLIED_CHECK) \ @@ -551,8 +551,9 @@ static T extractMaskValue(T KeyPath) { #define GENERATE_OPTION_WITH_MARSHALLING( \ CONSUMER, PREFIX_TYPE, SPELLING_OFFSET, ID, KIND, GROUP, ALIAS, ALIASARGS, \ FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \ - SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK, \ - IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX) \ + SUBCOMMANDIDS_OFFSET, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ + IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, \ + TABLE_INDEX) \ if ((VISIBILITY) & options::CC1Option) { \ [&](const auto &Extracted) { \ if (ALWAYS_EMIT || \ @@ -1473,34 +1474,6 @@ static std::string serializeXRayInstrumentationBundle(const XRayInstrSet &S) { return Buffer; } -// Set the profile kind using fprofile-instrument-use-path. -static void setPGOUseInstrumentor(CodeGenOptions &Opts, - const Twine &ProfileName, - llvm::vfs::FileSystem &FS, - DiagnosticsEngine &Diags) { - auto ReaderOrErr = llvm::IndexedInstrProfReader::create(ProfileName, FS); - if (auto E = ReaderOrErr.takeError()) { - unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, - "Error in reading profile %0: %1"); - llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) { - Diags.Report(DiagID) << ProfileName.str() << EI.message(); - }); - return; - } - std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader = - std::move(ReaderOrErr.get()); - // Currently memprof profiles are only added at the IR level. Mark the profile - // type as IR in that case as well and the subsequent matching needs to detect - // which is available (might be one or both). - if (PGOReader->isIRLevelProfile() || PGOReader->hasMemoryProfile()) { - if (PGOReader->hasCSIRLevelProfile()) - Opts.setProfileUse(llvm::driver::ProfileInstrKind::ProfileCSIRInstr); - else - Opts.setProfileUse(llvm::driver::ProfileInstrKind::ProfileIRInstr); - } else - Opts.setProfileUse(llvm::driver::ProfileInstrKind::ProfileClangInstr); -} - void CompilerInvocation::setDefaultPointerAuthOptions( PointerAuthOptions &Opts, const LangOptions &LangOpts, const llvm::Triple &Triple) { @@ -5090,16 +5063,10 @@ bool CompilerInvocation::CreateFromArgsImpl( append_range(Res.getCodeGenOpts().CommandLineArgs, CommandLineArgs); } - // Set PGOOptions. Need to create a temporary VFS to read the profile - // to determine the PGO type. - if (!Res.getCodeGenOpts().ProfileInstrumentUsePath.empty()) { - auto FS = - createVFSFromOverlayFiles(Res.getHeaderSearchOpts().VFSOverlayFiles, - Diags, llvm::vfs::getRealFileSystem()); - setPGOUseInstrumentor(Res.getCodeGenOpts(), - Res.getCodeGenOpts().ProfileInstrumentUsePath, *FS, - Diags); - } + if (!Res.getCodeGenOpts().ProfileInstrumentUsePath.empty() && + Res.getCodeGenOpts().getProfileUse() == + llvm::driver::ProfileInstrKind::ProfileNone) + Diags.Report(diag::err_drv_profile_instrument_use_path_with_no_kind); FixupInvocation(Res, Diags, Args, DashX); diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h index 4bd7981..d951ba0 100644 --- a/clang/lib/Headers/avx512fp16intrin.h +++ b/clang/lib/Headers/avx512fp16intrin.h @@ -41,7 +41,8 @@ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1))); #define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 #endif -static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) { +static __inline__ _Float16 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtsh_h(__m512h __a) { return __a[0]; } diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 754f43a..965741f 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -7330,9 +7330,8 @@ _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi32_epi8 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi32_epi8(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); @@ -7360,9 +7359,8 @@ _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi32_epi8 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi32_epi8(__m256i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v8si)__A, __v8qi), (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, @@ -7370,8 +7368,7 @@ _mm256_cvtepi32_epi8 (__m256i __A) } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) -{ +_mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, (__v16qi) __O, __M); } @@ -7390,9 +7387,8 @@ _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi32_epi16 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi32_epi16(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7); @@ -7419,9 +7415,8 @@ _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi32_epi16 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi32_epi16(__m256i __A) { return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); } @@ -7446,9 +7441,8 @@ _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi64_epi8 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi64_epi8(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); @@ -7475,9 +7469,8 @@ _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi64_epi8 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi64_epi8(__m256i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); @@ -7504,9 +7497,8 @@ _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi64_epi32 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi64_epi32(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); } @@ -7532,23 +7524,20 @@ _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi64_epi32 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi64_epi32(__m256i __A) { return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm256_cvtepi64_epi32(__A), (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm256_cvtepi64_epi32(__A), (__v4si)_mm_setzero_si128()); @@ -7560,9 +7549,8 @@ _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi64_epi16 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi64_epi16(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, 3, 3, 3, 3); @@ -7590,9 +7578,8 @@ _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi64_epi16 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi64_epi16(__m256i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7); diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 11d2d5c..999e302c 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -1049,6 +1049,7 @@ ExprResult ConstraintSatisfactionChecker::Evaluate( case NormalizedConstraint::ConstraintKind::Compound: return Evaluate(static_cast<const CompoundConstraint &>(Constraint), MLTAL); } + llvm_unreachable("Unknown ConstraintKind enum"); } static bool CheckConstraintSatisfaction( @@ -2141,6 +2142,7 @@ bool SubstituteParameterMappings::substitute(NormalizedConstraint &N) { return substitute(Compound.getRHS()); } } + llvm_unreachable("Unknown ConstraintKind enum"); } } // namespace @@ -2561,7 +2563,6 @@ FormulaType SubsumptionChecker::Normalize(const NormalizedConstraint &NC) { }; switch (NC.getKind()) { - case NormalizedConstraint::ConstraintKind::Atomic: return {{find(&static_cast<const AtomicConstraint &>(NC))}}; @@ -2601,6 +2602,7 @@ FormulaType SubsumptionChecker::Normalize(const NormalizedConstraint &NC) { return Res; } } + llvm_unreachable("Unknown ConstraintKind enum"); } void SubsumptionChecker::AddUniqueClauseToFormula(Formula &F, Clause C) { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 0069b08..6eaf7b9 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -11041,17 +11041,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, << CUDA().getConfigureFuncName(); Context.setcudaConfigureCallDecl(NewFD); } - - // Variadic functions, other than a *declaration* of printf, are not allowed - // in device-side CUDA code, unless someone passed - // -fcuda-allow-variadic-functions. - if (!getLangOpts().CUDAAllowVariadicFunctions && NewFD->isVariadic() && - (NewFD->hasAttr<CUDADeviceAttr>() || - NewFD->hasAttr<CUDAGlobalAttr>()) && - !(II && II->isStr("printf") && NewFD->isExternC() && - !D.isFunctionDefinition())) { - Diag(NewFD->getLocation(), diag::err_variadic_device_fn); - } } MarkUnusedFileScopedDecl(NewFD); diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index fa30c66b..2b375b9 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -3571,9 +3571,6 @@ bool SemaHLSL::CanPerformAggregateSplatCast(Expr *Src, QualType DestTy) { if (SrcVecTy) SrcTy = SrcVecTy->getElementType(); - if (ContainsBitField(DestTy)) - return false; - llvm::SmallVector<QualType> DestTypes; BuildFlattenedTypeList(DestTy, DestTypes); @@ -3600,9 +3597,6 @@ bool SemaHLSL::CanPerformElementwiseCast(Expr *Src, QualType DestTy) { (DestTy->isScalarType() || DestTy->isVectorType())) return false; - if (ContainsBitField(DestTy) || ContainsBitField(SrcTy)) - return false; - llvm::SmallVector<QualType> DestTypes; BuildFlattenedTypeList(DestTy, DestTypes); llvm::SmallVector<QualType> SrcTypes; diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 922fcac..543db46 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -3920,6 +3920,7 @@ bool InitializationSequence::isAmbiguous() const { case FK_AddressOfUnaddressableFunction: case FK_ParenthesizedListInitFailed: case FK_DesignatedInitForNonAggregate: + case FK_HLSLInitListFlatteningFailed: return false; case FK_ReferenceInitOverloadFailed: @@ -4882,8 +4883,10 @@ static void TryListInitialization(Sema &S, bool TreatUnavailableAsInvalid) { QualType DestType = Entity.getType(); - if (S.getLangOpts().HLSL && !S.HLSL().transformInitList(Entity, InitList)) + if (S.getLangOpts().HLSL && !S.HLSL().transformInitList(Entity, InitList)) { + Sequence.SetFailed(InitializationSequence::FK_HLSLInitListFlatteningFailed); return; + } // C++ doesn't allow scalar initialization with more than one argument. // But C99 complex numbers are scalars and it makes sense there. @@ -6817,33 +6820,18 @@ void InitializationSequence::InitializeFrom(Sema &S, assert(Args.size() >= 1 && "Zero-argument case handled above"); // For HLSL ext vector types we allow list initialization behavior for C++ - // constructor syntax. This is accomplished by converting initialization - // arguments an InitListExpr late. + // functional cast expressions which look like constructor syntax. This is + // accomplished by converting initialization arguments to InitListExpr. if (S.getLangOpts().HLSL && Args.size() > 1 && DestType->isExtVectorType() && (SourceType.isNull() || !Context.hasSameUnqualifiedType(SourceType, DestType))) { - - llvm::SmallVector<Expr *> InitArgs; - for (auto *Arg : Args) { - if (Arg->getType()->isExtVectorType()) { - const auto *VTy = Arg->getType()->castAs<ExtVectorType>(); - unsigned Elm = VTy->getNumElements(); - for (unsigned Idx = 0; Idx < Elm; ++Idx) { - InitArgs.emplace_back(new (Context) ArraySubscriptExpr( - Arg, - IntegerLiteral::Create( - Context, llvm::APInt(Context.getIntWidth(Context.IntTy), Idx), - Context.IntTy, SourceLocation()), - VTy->getElementType(), Arg->getValueKind(), Arg->getObjectKind(), - SourceLocation())); - } - } else - InitArgs.emplace_back(Arg); - } - InitListExpr *ILE = new (Context) InitListExpr( - S.getASTContext(), SourceLocation(), InitArgs, SourceLocation()); + InitListExpr *ILE = new (Context) + InitListExpr(S.getASTContext(), Args.front()->getBeginLoc(), Args, + Args.back()->getEndLoc()); + ILE->setType(DestType); Args[0] = ILE; - AddListInitializationStep(DestType); + TryListInitialization(S, Entity, Kind, ILE, *this, + TreatUnavailableAsInvalid); return; } @@ -9301,6 +9289,14 @@ bool InitializationSequence::Diagnose(Sema &S, break; } + case InitializationSequence::FK_HLSLInitListFlatteningFailed: { + // Unlike C/C++ list initialization, there is no fallback if it fails. This + // allows us to diagnose the failure when it happens in the + // TryListInitialization call instead of delaying the diagnosis, which is + // beneficial because the flattening is also expensive. + break; + } + case FK_ExplicitConstructor: { S.Diag(Kind.getLocation(), diag::err_selected_explicit_constructor) << Args[0]->getSourceRange(); @@ -9499,6 +9495,10 @@ void InitializationSequence::dump(raw_ostream &OS) const { case FK_DesignatedInitForNonAggregate: OS << "designated initializer for non-aggregate type"; break; + + case FK_HLSLInitListFlatteningFailed: + OS << "HLSL initialization list flattening failed"; + break; } OS << '\n'; return; diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 7ad7049..8471f02 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -2724,16 +2724,6 @@ Expr *GenerateReductionInitRecipeExpr(ASTContext &Context, return InitExpr; } -const Expr *StripOffBounds(const Expr *VarExpr) { - while (isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(VarExpr)) { - if (const auto *AS = dyn_cast<ArraySectionExpr>(VarExpr)) - VarExpr = AS->getBase()->IgnoreParenImpCasts(); - else if (const auto *Sub = dyn_cast<ArraySubscriptExpr>(VarExpr)) - VarExpr = Sub->getBase()->IgnoreParenImpCasts(); - } - return VarExpr; -} - VarDecl *CreateAllocaDecl(ASTContext &Ctx, DeclContext *DC, SourceLocation BeginLoc, IdentifierInfo *VarName, QualType VarTy) { @@ -2794,17 +2784,18 @@ OpenACCPrivateRecipe SemaOpenACC::CreatePrivateInitRecipe(const Expr *VarExpr) { OpenACCFirstPrivateRecipe SemaOpenACC::CreateFirstPrivateInitRecipe(const Expr *VarExpr) { - // TODO: OpenACC: This shouldn't be necessary, see PrivateInitRecipe - VarExpr = StripOffBounds(VarExpr); - + // We don't strip bounds here, so that we are doing our recipe init at the + // 'lowest' possible level. Codegen is going to have to do its own 'looping'. if (!VarExpr || VarExpr->getType()->isDependentType()) return OpenACCFirstPrivateRecipe::Empty(); QualType VarTy = VarExpr->getType().getNonReferenceType().getUnqualifiedType(); - // TODO: OpenACC: for arrays/bounds versions, we're going to have to do a - // different initializer, but for now we can go ahead with this. + // Array sections are special, and we have to treat them that way. + if (const auto *ASE = + dyn_cast<ArraySectionExpr>(VarExpr->IgnoreParenImpCasts())) + VarTy = ArraySectionExpr::getBaseOriginalType(ASE); VarDecl *AllocaDecl = CreateAllocaDecl( getASTContext(), SemaRef.getCurContext(), VarExpr->getBeginLoc(), diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index b870114..5657dfe 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -4413,14 +4413,23 @@ CompareImplicitConversionSequences(Sema &S, SourceLocation Loc, Result = CompareStandardConversionSequences(S, Loc, ICS1.Standard, ICS2.Standard); else if (ICS1.isUserDefined()) { + // With lazy template loading, it is possible to find non-canonical + // FunctionDecls, depending on when redecl chains are completed. Make sure + // to compare the canonical decls of conversion functions. This avoids + // ambiguity problems for templated conversion operators. + const FunctionDecl *ConvFunc1 = ICS1.UserDefined.ConversionFunction; + if (ConvFunc1) + ConvFunc1 = ConvFunc1->getCanonicalDecl(); + const FunctionDecl *ConvFunc2 = ICS2.UserDefined.ConversionFunction; + if (ConvFunc2) + ConvFunc2 = ConvFunc2->getCanonicalDecl(); // User-defined conversion sequence U1 is a better conversion // sequence than another user-defined conversion sequence U2 if // they contain the same user-defined conversion function or // constructor and if the second standard conversion sequence of // U1 is better than the second standard conversion sequence of // U2 (C++ 13.3.3.2p3). - if (ICS1.UserDefined.ConversionFunction == - ICS2.UserDefined.ConversionFunction) + if (ConvFunc1 == ConvFunc2) Result = CompareStandardConversionSequences(S, Loc, ICS1.UserDefined.After, ICS2.UserDefined.After); diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp index e1f4d0d..b0096d8 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp @@ -509,6 +509,8 @@ bool initializeScanCompilerInstance( ScanInstance.getFrontendOpts().DisableFree = false; ScanInstance.getFrontendOpts().GenerateGlobalModuleIndex = false; ScanInstance.getFrontendOpts().UseGlobalModuleIndex = false; + ScanInstance.getFrontendOpts().GenReducedBMI = false; + ScanInstance.getFrontendOpts().ModuleOutputPath.clear(); // This will prevent us compiling individual modules asynchronously since // FileManager is not thread-safe, but it does improve performance for now. ScanInstance.getFrontendOpts().ModulesShareFileManager = true; diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index d67178c..a117bec 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -263,6 +263,10 @@ makeCommonInvocationForModuleBuild(CompilerInvocation CI) { // units. CI.getFrontendOpts().Inputs.clear(); CI.getFrontendOpts().OutputFile.clear(); + CI.getFrontendOpts().GenReducedBMI = false; + CI.getFrontendOpts().ModuleOutputPath.clear(); + CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = false; + CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = false; // LLVM options are not going to affect the AST CI.getFrontendOpts().LLVMArgs.clear(); |