diff options
Diffstat (limited to 'clang/lib')
104 files changed, 1913 insertions, 1491 deletions
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 4ab4dee..f656687 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -2063,12 +2063,9 @@ bool Compiler<Emitter>::visitCallArgs(ArrayRef<const Expr *> Args, const FunctionDecl *FuncDecl, bool Activate) { assert(VarScope->getKind() == ScopeKind::Call); - bool HasNonNullAttr = false; llvm::BitVector NonNullArgs; - if (FuncDecl && FuncDecl->hasAttr<NonNullAttr>()) { - HasNonNullAttr = true; + if (FuncDecl && FuncDecl->hasAttr<NonNullAttr>()) NonNullArgs = collectNonNullArgs(FuncDecl, Args); - } unsigned ArgIndex = 0; for (const Expr *Arg : Args) { @@ -2094,7 +2091,7 @@ bool Compiler<Emitter>::visitCallArgs(ArrayRef<const Expr *> Args, return false; } - if (HasNonNullAttr && NonNullArgs[ArgIndex]) { + if (!NonNullArgs.empty() && NonNullArgs[ArgIndex]) { PrimType ArgT = classify(Arg).value_or(PT_Ptr); if (ArgT == PT_Ptr) { if (!this->emitCheckNonNullArg(ArgT, Arg)) @@ -5997,6 +5994,23 @@ bool Compiler<Emitter>::checkLiteralType(const Expr *E) { return this->emitCheckLiteralType(E->getType().getTypePtr(), E); } +static bool initNeedsOverridenLoc(const CXXCtorInitializer *Init) { + const Expr *InitExpr = Init->getInit(); + + if (!Init->isWritten() && !Init->isInClassMemberInitializer() && + !isa<CXXConstructExpr>(InitExpr)) + return true; + + if (const auto *CE = dyn_cast<CXXConstructExpr>(InitExpr)) { + const CXXConstructorDecl *Ctor = CE->getConstructor(); + if (Ctor->isDefaulted() && Ctor->isCopyOrMoveConstructor() && + Ctor->isTrivial()) + return true; + } + + return false; +} + template <class Emitter> bool Compiler<Emitter>::compileConstructor(const CXXConstructorDecl *Ctor) { assert(!ReturnType); @@ -6071,10 +6085,7 @@ bool Compiler<Emitter>::compileConstructor(const CXXConstructorDecl *Ctor) { const Record::Field *F = R->getField(Member); LocOverrideScope<Emitter> LOS(this, SourceInfo{}, - !Init->isWritten() && - !Init->isInClassMemberInitializer() && - (!isa<CXXConstructExpr>(InitExpr) || - Member->isAnonymousStructOrUnion())); + initNeedsOverridenLoc(Init)); if (!emitFieldInitializer(F, F->Offset, InitExpr, IsUnion)) return false; } else if (const Type *Base = Init->getBaseClass()) { @@ -6104,10 +6115,7 @@ bool Compiler<Emitter>::compileConstructor(const CXXConstructorDecl *Ctor) { return false; } else if (const IndirectFieldDecl *IFD = Init->getIndirectMember()) { LocOverrideScope<Emitter> LOS(this, SourceInfo{}, - !Init->isWritten() && - !Init->isInClassMemberInitializer() && - !isa<CXXConstructExpr>(InitExpr)); - + initNeedsOverridenLoc(Init)); assert(IFD->getChainingSize() >= 2); unsigned NestedFieldOffset = 0; diff --git a/clang/lib/AST/ByteCode/Descriptor.cpp b/clang/lib/AST/ByteCode/Descriptor.cpp index 7403e90..629c1ff 100644 --- a/clang/lib/AST/ByteCode/Descriptor.cpp +++ b/clang/lib/AST/ByteCode/Descriptor.cpp @@ -153,28 +153,6 @@ static void dtorArrayDesc(Block *B, std::byte *Ptr, const Descriptor *D) { } } -static void moveArrayDesc(Block *B, std::byte *Src, std::byte *Dst, - const Descriptor *D) { - const unsigned NumElems = D->getNumElems(); - const unsigned ElemSize = - D->ElemDesc->getAllocSize() + sizeof(InlineDescriptor); - - unsigned ElemOffset = 0; - for (unsigned I = 0; I < NumElems; ++I, ElemOffset += ElemSize) { - auto *SrcPtr = Src + ElemOffset; - auto *DstPtr = Dst + ElemOffset; - - auto *SrcDesc = reinterpret_cast<InlineDescriptor *>(SrcPtr); - auto *SrcElemLoc = reinterpret_cast<std::byte *>(SrcDesc + 1); - auto *DstDesc = reinterpret_cast<InlineDescriptor *>(DstPtr); - auto *DstElemLoc = reinterpret_cast<std::byte *>(DstDesc + 1); - - *DstDesc = *SrcDesc; - if (auto Fn = D->ElemDesc->MoveFn) - Fn(B, SrcElemLoc, DstElemLoc, D->ElemDesc); - } -} - static void initField(Block *B, std::byte *Ptr, bool IsConst, bool IsMutable, bool IsVolatile, bool IsActive, bool IsUnionField, bool InUnion, const Descriptor *D, unsigned FieldOffset) { @@ -268,45 +246,6 @@ static void dtorRecord(Block *B, std::byte *Ptr, const Descriptor *D) { destroyBase(B, Ptr, F.Desc, F.Offset); } -static void moveRecord(Block *B, std::byte *Src, std::byte *Dst, - const Descriptor *D) { - assert(D); - assert(D->ElemRecord); - - // FIXME: Code duplication. - for (const auto &F : D->ElemRecord->fields()) { - auto FieldOffset = F.Offset; - const auto *SrcDesc = - reinterpret_cast<const InlineDescriptor *>(Src + FieldOffset) - 1; - auto *DestDesc = - reinterpret_cast<InlineDescriptor *>(Dst + FieldOffset) - 1; - std::memcpy(DestDesc, SrcDesc, sizeof(InlineDescriptor)); - - if (auto Fn = F.Desc->MoveFn) - Fn(B, Src + FieldOffset, Dst + FieldOffset, F.Desc); - } - - for (const auto &Base : D->ElemRecord->bases()) { - auto BaseOffset = Base.Offset; - const auto *SrcDesc = - reinterpret_cast<const InlineDescriptor *>(Src + BaseOffset) - 1; - auto *DestDesc = reinterpret_cast<InlineDescriptor *>(Dst + BaseOffset) - 1; - std::memcpy(DestDesc, SrcDesc, sizeof(InlineDescriptor)); - - if (auto Fn = Base.Desc->MoveFn) - Fn(B, Src + BaseOffset, Dst + BaseOffset, Base.Desc); - } - - for (const auto &VBase : D->ElemRecord->virtual_bases()) { - auto VBaseOffset = VBase.Offset; - const auto *SrcDesc = - reinterpret_cast<const InlineDescriptor *>(Src + VBaseOffset) - 1; - auto *DestDesc = - reinterpret_cast<InlineDescriptor *>(Dst + VBaseOffset) - 1; - std::memcpy(DestDesc, SrcDesc, sizeof(InlineDescriptor)); - } -} - static BlockCtorFn getCtorPrim(PrimType Type) { // Floating types are special. They are primitives, but need their // constructor called. @@ -337,18 +276,6 @@ static BlockDtorFn getDtorPrim(PrimType Type) { COMPOSITE_TYPE_SWITCH(Type, return dtorTy<T>, return nullptr); } -static BlockMoveFn getMovePrim(PrimType Type) { - if (Type == PT_Float) - return moveTy<PrimConv<PT_Float>::T>; - if (Type == PT_IntAP) - return moveTy<PrimConv<PT_IntAP>::T>; - if (Type == PT_IntAPS) - return moveTy<PrimConv<PT_IntAPS>::T>; - if (Type == PT_MemberPtr) - return moveTy<PrimConv<PT_MemberPtr>::T>; - COMPOSITE_TYPE_SWITCH(Type, return moveTy<T>, return nullptr); -} - static BlockCtorFn getCtorArrayPrim(PrimType Type) { TYPE_SWITCH(Type, return ctorArrayTy<T>); llvm_unreachable("unknown Expr"); @@ -359,11 +286,6 @@ static BlockDtorFn getDtorArrayPrim(PrimType Type) { llvm_unreachable("unknown Expr"); } -static BlockMoveFn getMoveArrayPrim(PrimType Type) { - TYPE_SWITCH(Type, return moveArrayTy<T>); - llvm_unreachable("unknown Expr"); -} - /// Primitives. Descriptor::Descriptor(const DeclTy &D, const Type *SourceTy, PrimType Type, MetadataSize MD, bool IsConst, bool IsTemporary, @@ -372,7 +294,7 @@ Descriptor::Descriptor(const DeclTy &D, const Type *SourceTy, PrimType Type, MDSize(MD.value_or(0)), AllocSize(align(Size + MDSize)), PrimT(Type), IsConst(IsConst), IsMutable(IsMutable), IsTemporary(IsTemporary), IsVolatile(IsVolatile), CtorFn(getCtorPrim(Type)), - DtorFn(getDtorPrim(Type)), MoveFn(getMovePrim(Type)) { + DtorFn(getDtorPrim(Type)) { assert(AllocSize >= Size); assert(Source && "Missing source"); } @@ -386,7 +308,7 @@ Descriptor::Descriptor(const DeclTy &D, PrimType Type, MetadataSize MD, AllocSize(align(MDSize) + align(Size) + sizeof(InitMapPtr)), PrimT(Type), IsConst(IsConst), IsMutable(IsMutable), IsTemporary(IsTemporary), IsArray(true), CtorFn(getCtorArrayPrim(Type)), - DtorFn(getDtorArrayPrim(Type)), MoveFn(getMoveArrayPrim(Type)) { + DtorFn(getDtorArrayPrim(Type)) { assert(Source && "Missing source"); assert(NumElems <= (MaxArrayElemBytes / ElemSize)); } @@ -399,7 +321,7 @@ Descriptor::Descriptor(const DeclTy &D, PrimType Type, MetadataSize MD, AllocSize(MDSize + sizeof(InitMapPtr) + alignof(void *)), PrimT(Type), IsConst(IsConst), IsMutable(false), IsTemporary(IsTemporary), IsArray(true), CtorFn(getCtorArrayPrim(Type)), - DtorFn(getDtorArrayPrim(Type)), MoveFn(getMoveArrayPrim(Type)) { + DtorFn(getDtorArrayPrim(Type)) { assert(Source && "Missing source"); } @@ -414,7 +336,7 @@ Descriptor::Descriptor(const DeclTy &D, const Type *SourceTy, AllocSize(std::max<size_t>(alignof(void *), Size) + MDSize), ElemDesc(Elem), IsConst(IsConst), IsMutable(IsMutable), IsTemporary(IsTemporary), IsArray(true), CtorFn(ctorArrayDesc), - DtorFn(dtorArrayDesc), MoveFn(moveArrayDesc) { + DtorFn(dtorArrayDesc) { assert(Source && "Missing source"); } @@ -425,7 +347,7 @@ Descriptor::Descriptor(const DeclTy &D, const Descriptor *Elem, MetadataSize MD, Size(UnknownSizeMark), MDSize(MD.value_or(0)), AllocSize(MDSize + alignof(void *)), ElemDesc(Elem), IsConst(true), IsMutable(false), IsTemporary(IsTemporary), IsArray(true), - CtorFn(ctorArrayDesc), DtorFn(dtorArrayDesc), MoveFn(moveArrayDesc) { + CtorFn(ctorArrayDesc), DtorFn(dtorArrayDesc) { assert(Source && "Missing source"); } @@ -437,7 +359,7 @@ Descriptor::Descriptor(const DeclTy &D, const Record *R, MetadataSize MD, Size(ElemSize), MDSize(MD.value_or(0)), AllocSize(Size + MDSize), ElemRecord(R), IsConst(IsConst), IsMutable(IsMutable), IsTemporary(IsTemporary), IsVolatile(IsVolatile), CtorFn(ctorRecord), - DtorFn(dtorRecord), MoveFn(moveRecord) { + DtorFn(dtorRecord) { assert(Source && "Missing source"); } diff --git a/clang/lib/AST/ByteCode/Descriptor.h b/clang/lib/AST/ByteCode/Descriptor.h index 4c925f6..cd34e11 100644 --- a/clang/lib/AST/ByteCode/Descriptor.h +++ b/clang/lib/AST/ByteCode/Descriptor.h @@ -41,14 +41,6 @@ using BlockCtorFn = void (*)(Block *Storage, std::byte *FieldPtr, bool IsConst, using BlockDtorFn = void (*)(Block *Storage, std::byte *FieldPtr, const Descriptor *FieldDesc); -/// Invoked when a block with pointers referencing it goes out of scope. Such -/// blocks are persisted: the move function copies all inline descriptors and -/// non-trivial fields, as existing pointers might need to reference those -/// descriptors. Data is not copied since it cannot be legally read. -using BlockMoveFn = void (*)(Block *Storage, std::byte *SrcFieldPtr, - std::byte *DstFieldPtr, - const Descriptor *FieldDesc); - enum class GlobalInitState { Initialized, NoInitializer, @@ -181,7 +173,6 @@ public: /// Storage management methods. const BlockCtorFn CtorFn = nullptr; const BlockDtorFn DtorFn = nullptr; - const BlockMoveFn MoveFn = nullptr; /// Allocates a descriptor for a primitive. Descriptor(const DeclTy &D, const Type *SourceTy, PrimType Type, diff --git a/clang/lib/AST/ByteCode/DynamicAllocator.cpp b/clang/lib/AST/ByteCode/DynamicAllocator.cpp index 9b8b664..f38d585 100644 --- a/clang/lib/AST/ByteCode/DynamicAllocator.cpp +++ b/clang/lib/AST/ByteCode/DynamicAllocator.cpp @@ -13,25 +13,6 @@ using namespace clang; using namespace clang::interp; -// FIXME: There is a peculiar problem with the way we track pointers -// to blocks and the way we allocate dynamic memory. -// -// When we have code like this: -// while (true) { -// char *buffer = new char[1024]; -// delete[] buffer; -// } -// -// We have a local variable 'buffer' pointing to the heap allocated memory. -// When deallocating the memory via delete[], that local variable still -// points to the memory, which means we will create a DeadBlock for it and move -// it over to that block, essentially duplicating the allocation. Moving -// the data is also slow. -// -// However, when we actually try to access the allocation after it has been -// freed, we need the block to still exist (alive or dead) so we can tell -// that it's a dynamic allocation. - DynamicAllocator::~DynamicAllocator() { cleanup(); } void DynamicAllocator::cleanup() { @@ -42,8 +23,11 @@ void DynamicAllocator::cleanup() { for (auto &Iter : AllocationSites) { auto &AllocSite = Iter.second; for (auto &Alloc : AllocSite.Allocations) { - Block *B = reinterpret_cast<Block *>(Alloc.Memory.get()); + Block *B = Alloc.block(); + assert(!B->IsDead); + assert(B->isInitialized()); B->invokeDtor(); + if (B->hasPointers()) { while (B->Pointers) { Pointer *Next = B->Pointers->asBlockPointer().Next; @@ -89,6 +73,12 @@ Block *DynamicAllocator::allocate(const Descriptor *D, unsigned EvalID, assert(D); assert(D->asExpr()); + // Garbage collection. Remove all dead allocations that don't have pointers to + // them anymore. + llvm::erase_if(DeadAllocations, [](Allocation &Alloc) -> bool { + return !Alloc.block()->hasPointers(); + }); + auto Memory = std::make_unique<std::byte[]>(sizeof(Block) + D->getAllocSize()); auto *B = new (Memory.get()) Block(EvalID, D, /*isStatic=*/false); @@ -128,23 +118,39 @@ bool DynamicAllocator::deallocate(const Expr *Source, return false; auto &Site = It->second; - assert(Site.size() > 0); + assert(!Site.empty()); // Find the Block to delete. auto AllocIt = llvm::find_if(Site.Allocations, [&](const Allocation &A) { - const Block *B = reinterpret_cast<const Block *>(A.Memory.get()); - return BlockToDelete == B; + return BlockToDelete == A.block(); }); assert(AllocIt != Site.Allocations.end()); - Block *B = reinterpret_cast<Block *>(AllocIt->Memory.get()); + Block *B = AllocIt->block(); + assert(B->isInitialized()); + assert(!B->IsDead); B->invokeDtor(); - S.deallocate(B); - Site.Allocations.erase(AllocIt); + // Almost all our dynamic allocations have a pointer pointing to them + // when we deallocate them, since otherwise we can't call delete() at all. + // This means that we would usually need to create DeadBlocks for all of them. + // To work around that, we instead mark them as dead without moving the data + // over to a DeadBlock and simply keep the block in a separate DeadAllocations + // list. + if (B->hasPointers()) { + B->IsDead = true; + DeadAllocations.push_back(std::move(*AllocIt)); + Site.Allocations.erase(AllocIt); + + if (Site.size() == 0) + AllocationSites.erase(It); + return true; + } - if (Site.size() == 0) + // Get rid of the allocation altogether. + Site.Allocations.erase(AllocIt); + if (Site.empty()) AllocationSites.erase(It); return true; diff --git a/clang/lib/AST/ByteCode/DynamicAllocator.h b/clang/lib/AST/ByteCode/DynamicAllocator.h index cff09bf..31d0e58 100644 --- a/clang/lib/AST/ByteCode/DynamicAllocator.h +++ b/clang/lib/AST/ByteCode/DynamicAllocator.h @@ -43,6 +43,7 @@ private: std::unique_ptr<std::byte[]> Memory; Allocation(std::unique_ptr<std::byte[]> Memory) : Memory(std::move(Memory)) {} + Block *block() const { return reinterpret_cast<Block *>(Memory.get()); } }; struct AllocationSite { @@ -55,6 +56,7 @@ private: } size_t size() const { return Allocations.size(); } + bool empty() const { return Allocations.empty(); } }; public: @@ -65,8 +67,6 @@ public: void cleanup(); - unsigned getNumAllocations() const { return AllocationSites.size(); } - /// Allocate ONE element of the given descriptor. Block *allocate(const Descriptor *D, unsigned EvalID, Form AllocForm); /// Allocate \p NumElements primitive elements of the given type. @@ -96,8 +96,13 @@ public: return llvm::make_range(AllocationSites.begin(), AllocationSites.end()); } + bool hasAllocations() const { return !AllocationSites.empty(); } + private: llvm::DenseMap<const Expr *, AllocationSite> AllocationSites; + // Allocations that have already been deallocated but had pointers + // to them. + llvm::SmallVector<Allocation> DeadAllocations; using PoolAllocTy = llvm::BumpPtrAllocator; PoolAllocTy DescAllocator; diff --git a/clang/lib/AST/ByteCode/EvalEmitter.cpp b/clang/lib/AST/ByteCode/EvalEmitter.cpp index 976b7c0..9ed61c7 100644 --- a/clang/lib/AST/ByteCode/EvalEmitter.cpp +++ b/clang/lib/AST/ByteCode/EvalEmitter.cpp @@ -292,7 +292,7 @@ bool EvalEmitter::emitGetLocal(uint32_t I, const SourceInfo &Info) { Block *B = getLocal(I); - if (!CheckLocalLoad(S, OpPC, Pointer(B))) + if (!CheckLocalLoad(S, OpPC, B)) return false; S.Stk.push<T>(*reinterpret_cast<T *>(B->data())); diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index eb4e480..b5c044c 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -211,25 +211,26 @@ static void diagnoseNonConstVariable(InterpState &S, CodePtr OpPC, S.Note(VD->getLocation(), diag::note_declared_at); } -static bool CheckTemporary(InterpState &S, CodePtr OpPC, const Pointer &Ptr, +static bool CheckTemporary(InterpState &S, CodePtr OpPC, const Block *B, AccessKinds AK) { - if (auto ID = Ptr.getDeclID()) { - if (!Ptr.isStaticTemporary()) + if (B->getDeclID()) { + if (!(B->isStatic() && B->isTemporary())) return true; const auto *MTE = dyn_cast_if_present<MaterializeTemporaryExpr>( - Ptr.getDeclDesc()->asExpr()); + B->getDescriptor()->asExpr()); if (!MTE) return true; // FIXME(perf): Since we do this check on every Load from a static // temporary, it might make sense to cache the value of the // isUsableInConstantExpressions call. - if (!MTE->isUsableInConstantExpressions(S.getASTContext()) && - Ptr.block()->getEvalID() != S.Ctx.getEvalID()) { + if (B->getEvalID() != S.Ctx.getEvalID() && + !MTE->isUsableInConstantExpressions(S.getASTContext())) { const SourceInfo &E = S.Current->getSource(OpPC); S.FFDiag(E, diag::note_constexpr_access_static_temporary, 1) << AK; - S.Note(Ptr.getDeclLoc(), diag::note_constexpr_temporary_here); + S.Note(B->getDescriptor()->getLocation(), + diag::note_constexpr_temporary_here); return false; } } @@ -517,7 +518,7 @@ bool CheckNull(InterpState &S, CodePtr OpPC, const Pointer &Ptr, bool CheckRange(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK) { - if (!Ptr.isOnePastEnd()) + if (!Ptr.isOnePastEnd() && !Ptr.isZeroSizeArray()) return true; if (S.getLangOpts().CPlusPlus) { const SourceInfo &Loc = S.Current->getSource(OpPC); @@ -658,17 +659,19 @@ static bool CheckVolatile(InterpState &S, CodePtr OpPC, const Pointer &Ptr, return false; } -bool CheckInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr, - AccessKinds AK) { +bool DiagnoseUninitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + AccessKinds AK) { assert(Ptr.isLive()); + assert(!Ptr.isInitialized()); + return DiagnoseUninitialized(S, OpPC, Ptr.isExtern(), Ptr.getDeclDesc(), AK); +} - if (Ptr.isInitialized()) - return true; - - if (Ptr.isExtern() && S.checkingPotentialConstantExpression()) +bool DiagnoseUninitialized(InterpState &S, CodePtr OpPC, bool Extern, + const Descriptor *Desc, AccessKinds AK) { + if (Extern && S.checkingPotentialConstantExpression()) return false; - if (const auto *VD = Ptr.getDeclDesc()->asVarDecl(); + if (const auto *VD = Desc->asVarDecl(); VD && (VD->isConstexpr() || VD->hasGlobalStorage())) { if (VD == S.EvaluatingDecl && @@ -703,9 +706,9 @@ bool CheckInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr, return false; } -static bool CheckLifetime(InterpState &S, CodePtr OpPC, const Pointer &Ptr, +static bool CheckLifetime(InterpState &S, CodePtr OpPC, Lifetime LT, AccessKinds AK) { - if (Ptr.getLifetime() == Lifetime::Started) + if (LT == Lifetime::Started) return true; if (!S.checkingPotentialConstantExpression()) { @@ -715,11 +718,11 @@ static bool CheckLifetime(InterpState &S, CodePtr OpPC, const Pointer &Ptr, return false; } -static bool CheckWeak(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { - if (!Ptr.isWeak()) +static bool CheckWeak(InterpState &S, CodePtr OpPC, const Block *B) { + if (!B->isWeak()) return true; - const auto *VD = Ptr.getDeclDesc()->asVarDecl(); + const auto *VD = B->getDescriptor()->asVarDecl(); assert(VD); S.FFDiag(S.Current->getLocation(OpPC), diag::note_constexpr_var_init_weak) << VD; @@ -732,32 +735,56 @@ static bool CheckWeak(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { // ones removed that are impossible on primitive global values. // For example, since those can't be members of structs, they also can't // be mutable. -bool CheckGlobalLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { - if (!CheckExtern(S, OpPC, Ptr)) +bool CheckGlobalLoad(InterpState &S, CodePtr OpPC, const Block *B) { + const auto &Desc = + *reinterpret_cast<const GlobalInlineDescriptor *>(B->rawData()); + if (!CheckExtern(S, OpPC, Pointer(const_cast<Block *>(B)))) return false; - if (!CheckConstant(S, OpPC, Ptr)) + if (!CheckConstant(S, OpPC, B->getDescriptor())) return false; - if (!CheckDummy(S, OpPC, Ptr, AK_Read)) + if (!CheckDummy(S, OpPC, B, AK_Read)) return false; - if (!CheckInitialized(S, OpPC, Ptr, AK_Read)) + if (Desc.InitState != GlobalInitState::Initialized) + return DiagnoseUninitialized(S, OpPC, B->isExtern(), B->getDescriptor(), + AK_Read); + if (!CheckTemporary(S, OpPC, B, AK_Read)) return false; - if (!CheckTemporary(S, OpPC, Ptr, AK_Read)) + if (!CheckWeak(S, OpPC, B)) return false; - if (!CheckWeak(S, OpPC, Ptr)) - return false; - if (!CheckVolatile(S, OpPC, Ptr, AK_Read)) + if (B->getDescriptor()->IsVolatile) { + if (!S.getLangOpts().CPlusPlus) + return Invalid(S, OpPC); + + const ValueDecl *D = B->getDescriptor()->asValueDecl(); + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_constexpr_access_volatile_obj, 1) + << AK_Read << 1 << D; + S.Note(D->getLocation(), diag::note_constexpr_volatile_here) << 1; return false; + } return true; } // Similarly, for local loads. -bool CheckLocalLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { - if (!CheckLifetime(S, OpPC, Ptr, AK_Read)) - return false; - if (!CheckInitialized(S, OpPC, Ptr, AK_Read)) - return false; - if (!CheckVolatile(S, OpPC, Ptr, AK_Read)) +bool CheckLocalLoad(InterpState &S, CodePtr OpPC, const Block *B) { + assert(!B->isExtern()); + const auto &Desc = *reinterpret_cast<const InlineDescriptor *>(B->rawData()); + if (!CheckLifetime(S, OpPC, Desc.LifeState, AK_Read)) + return false; + if (!Desc.IsInitialized) + return DiagnoseUninitialized(S, OpPC, /*Extern=*/false, B->getDescriptor(), + AK_Read); + if (B->getDescriptor()->IsVolatile) { + if (!S.getLangOpts().CPlusPlus) + return Invalid(S, OpPC); + + const ValueDecl *D = B->getDescriptor()->asValueDecl(); + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_constexpr_access_volatile_obj, 1) + << AK_Read << 1 << D; + S.Note(D->getLocation(), diag::note_constexpr_volatile_here) << 1; return false; + } return true; } @@ -769,19 +796,19 @@ bool CheckLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr, return false; if (!CheckConstant(S, OpPC, Ptr)) return false; - if (!CheckDummy(S, OpPC, Ptr, AK)) + if (Ptr.isBlockPointer() && !CheckDummy(S, OpPC, Ptr.block(), AK)) return false; if (!CheckRange(S, OpPC, Ptr, AK)) return false; if (!CheckActive(S, OpPC, Ptr, AK)) return false; - if (!CheckLifetime(S, OpPC, Ptr, AK)) + if (!CheckLifetime(S, OpPC, Ptr.getLifetime(), AK)) return false; - if (!CheckInitialized(S, OpPC, Ptr, AK)) + if (!Ptr.isInitialized()) + return DiagnoseUninitialized(S, OpPC, Ptr, AK); + if (Ptr.isBlockPointer() && !CheckTemporary(S, OpPC, Ptr.block(), AK)) return false; - if (!CheckTemporary(S, OpPC, Ptr, AK)) - return false; - if (!CheckWeak(S, OpPC, Ptr)) + if (Ptr.isBlockPointer() && !CheckWeak(S, OpPC, Ptr.block())) return false; if (!CheckMutable(S, OpPC, Ptr)) return false; @@ -798,21 +825,19 @@ bool CheckFinalLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { if (!CheckConstant(S, OpPC, Ptr)) return false; - if (!CheckDummy(S, OpPC, Ptr, AK_Read)) + if (Ptr.isBlockPointer() && !CheckDummy(S, OpPC, Ptr.block(), AK_Read)) return false; if (!CheckExtern(S, OpPC, Ptr)) return false; - if (!CheckRange(S, OpPC, Ptr, AK_Read)) - return false; if (!CheckActive(S, OpPC, Ptr, AK_Read)) return false; - if (!CheckLifetime(S, OpPC, Ptr, AK_Read)) - return false; - if (!CheckInitialized(S, OpPC, Ptr, AK_Read)) + if (!CheckLifetime(S, OpPC, Ptr.getLifetime(), AK_Read)) return false; - if (!CheckTemporary(S, OpPC, Ptr, AK_Read)) + if (!Ptr.isInitialized()) + return DiagnoseUninitialized(S, OpPC, Ptr, AK_Read); + if (Ptr.isBlockPointer() && !CheckTemporary(S, OpPC, Ptr.block(), AK_Read)) return false; - if (!CheckWeak(S, OpPC, Ptr)) + if (Ptr.isBlockPointer() && !CheckWeak(S, OpPC, Ptr.block())) return false; if (!CheckMutable(S, OpPC, Ptr)) return false; @@ -822,9 +847,9 @@ bool CheckFinalLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { bool CheckStore(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { if (!CheckLive(S, OpPC, Ptr, AK_Assign)) return false; - if (!CheckDummy(S, OpPC, Ptr, AK_Assign)) + if (Ptr.isBlockPointer() && !CheckDummy(S, OpPC, Ptr.block(), AK_Assign)) return false; - if (!CheckLifetime(S, OpPC, Ptr, AK_Assign)) + if (!CheckLifetime(S, OpPC, Ptr.getLifetime(), AK_Assign)) return false; if (!CheckExtern(S, OpPC, Ptr)) return false; @@ -1098,12 +1123,11 @@ bool CheckDeclRef(InterpState &S, CodePtr OpPC, const DeclRefExpr *DR) { return diagnoseUnknownDecl(S, OpPC, D); } -bool CheckDummy(InterpState &S, CodePtr OpPC, const Pointer &Ptr, - AccessKinds AK) { - if (!Ptr.isDummy()) +bool CheckDummy(InterpState &S, CodePtr OpPC, const Block *B, AccessKinds AK) { + const Descriptor *Desc = B->getDescriptor(); + if (!Desc->isDummy()) return true; - const Descriptor *Desc = Ptr.getDeclDesc(); const ValueDecl *D = Desc->asValueDecl(); if (!D) return false; @@ -1426,7 +1450,7 @@ static bool checkConstructor(InterpState &S, CodePtr OpPC, const Function *Func, bool CheckDestructor(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { if (!CheckLive(S, OpPC, Ptr, AK_Destroy)) return false; - if (!CheckTemporary(S, OpPC, Ptr, AK_Destroy)) + if (!CheckTemporary(S, OpPC, Ptr.block(), AK_Destroy)) return false; if (!CheckRange(S, OpPC, Ptr, AK_Destroy)) return false; @@ -1620,8 +1644,17 @@ bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func, const auto *StaticDecl = cast<CXXRecordDecl>(Func->getParentDecl()); const auto *InitialFunction = cast<CXXMethodDecl>(Callee); - const CXXMethodDecl *Overrider = S.getContext().getOverridingFunction( - DynamicDecl, StaticDecl, InitialFunction); + const CXXMethodDecl *Overrider; + + if (StaticDecl != DynamicDecl) { + if (!DynamicDecl->isDerivedFrom(StaticDecl)) + return false; + Overrider = S.getContext().getOverridingFunction(DynamicDecl, StaticDecl, + InitialFunction); + + } else { + Overrider = InitialFunction; + } if (Overrider != InitialFunction) { // DR1872: An instantiated virtual constexpr function can't be called in a @@ -1749,7 +1782,7 @@ static void startLifetimeRecurse(const Pointer &Ptr) { bool StartLifetime(InterpState &S, CodePtr OpPC) { const auto &Ptr = S.Stk.peek<Pointer>(); - if (!CheckDummy(S, OpPC, Ptr, AK_Destroy)) + if (Ptr.isBlockPointer() && !CheckDummy(S, OpPC, Ptr.block(), AK_Destroy)) return false; startLifetimeRecurse(Ptr.narrow()); return true; @@ -1780,7 +1813,7 @@ static void endLifetimeRecurse(const Pointer &Ptr) { /// Ends the lifetime of the peek'd pointer. bool EndLifetime(InterpState &S, CodePtr OpPC) { const auto &Ptr = S.Stk.peek<Pointer>(); - if (!CheckDummy(S, OpPC, Ptr, AK_Destroy)) + if (Ptr.isBlockPointer() && !CheckDummy(S, OpPC, Ptr.block(), AK_Destroy)) return false; endLifetimeRecurse(Ptr.narrow()); return true; @@ -1789,7 +1822,7 @@ bool EndLifetime(InterpState &S, CodePtr OpPC) { /// Ends the lifetime of the pop'd pointer. bool EndLifetimePop(InterpState &S, CodePtr OpPC) { const auto &Ptr = S.Stk.pop<Pointer>(); - if (!CheckDummy(S, OpPC, Ptr, AK_Destroy)) + if (Ptr.isBlockPointer() && !CheckDummy(S, OpPC, Ptr.block(), AK_Destroy)) return false; endLifetimeRecurse(Ptr.narrow()); return true; @@ -1804,16 +1837,16 @@ bool CheckNewTypeMismatch(InterpState &S, CodePtr OpPC, const Expr *E, // Similar to CheckStore(), but with the additional CheckTemporary() call and // the AccessKinds are different. - if (!CheckTemporary(S, OpPC, Ptr, AK_Construct)) + if (!CheckTemporary(S, OpPC, Ptr.block(), AK_Construct)) return false; if (!CheckLive(S, OpPC, Ptr, AK_Construct)) return false; - if (!CheckDummy(S, OpPC, Ptr, AK_Construct)) + if (!CheckDummy(S, OpPC, Ptr.block(), AK_Construct)) return false; // CheckLifetime for this and all base pointers. for (Pointer P = Ptr;;) { - if (!CheckLifetime(S, OpPC, P, AK_Construct)) + if (!CheckLifetime(S, OpPC, P.getLifetime(), AK_Construct)) return false; if (P.isRoot()) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 8a28106..0d3f492 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -51,8 +51,7 @@ bool CheckLive(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK); /// Checks if a pointer is a dummy pointer. -bool CheckDummy(InterpState &S, CodePtr OpPC, const Pointer &Ptr, - AccessKinds AK); +bool CheckDummy(InterpState &S, CodePtr OpPC, const Block *B, AccessKinds AK); /// Checks if a pointer is null. bool CheckNull(InterpState &S, CodePtr OpPC, const Pointer &Ptr, @@ -89,11 +88,14 @@ bool CheckLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK = AK_Read); bool CheckFinalLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr); -bool CheckInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr, - AccessKinds AK); +bool DiagnoseUninitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + AccessKinds AK); +bool DiagnoseUninitialized(InterpState &S, CodePtr OpPC, bool Extern, + const Descriptor *Desc, AccessKinds AK); + /// Checks a direct load of a primitive value from a global or local variable. -bool CheckGlobalLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr); -bool CheckLocalLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr); +bool CheckGlobalLoad(InterpState &S, CodePtr OpPC, const Block *B); +bool CheckLocalLoad(InterpState &S, CodePtr OpPC, const Block *B); /// Checks if a value can be stored in a block. bool CheckStore(InterpState &S, CodePtr OpPC, const Pointer &Ptr); @@ -1351,10 +1353,10 @@ inline bool ConstFloat(InterpState &S, CodePtr OpPC, const Floating &F) { template <PrimType Name, class T = typename PrimConv<Name>::T> bool GetLocal(InterpState &S, CodePtr OpPC, uint32_t I) { - const Pointer &Ptr = S.Current->getLocalPointer(I); - if (!CheckLocalLoad(S, OpPC, Ptr)) + const Block *B = S.Current->getLocalBlock(I); + if (!CheckLocalLoad(S, OpPC, B)) return false; - S.Stk.push<T>(Ptr.deref<T>()); + S.Stk.push<T>(B->deref<T>()); return true; } @@ -1465,22 +1467,26 @@ bool SetThisField(InterpState &S, CodePtr OpPC, uint32_t I) { template <PrimType Name, class T = typename PrimConv<Name>::T> bool GetGlobal(InterpState &S, CodePtr OpPC, uint32_t I) { - const Pointer &Ptr = S.P.getPtrGlobal(I); + const Block *B = S.P.getGlobal(I); - if (!CheckGlobalLoad(S, OpPC, Ptr)) + if (!CheckGlobalLoad(S, OpPC, B)) return false; - S.Stk.push<T>(Ptr.deref<T>()); + S.Stk.push<T>(B->deref<T>()); return true; } /// Same as GetGlobal, but without the checks. template <PrimType Name, class T = typename PrimConv<Name>::T> bool GetGlobalUnchecked(InterpState &S, CodePtr OpPC, uint32_t I) { - const Pointer &Ptr = S.P.getPtrGlobal(I); - if (!CheckInitialized(S, OpPC, Ptr, AK_Read)) - return false; - S.Stk.push<T>(Ptr.deref<T>()); + const Block *B = S.P.getGlobal(I); + const auto &Desc = + *reinterpret_cast<const GlobalInlineDescriptor *>(B->rawData()); + if (Desc.InitState != GlobalInitState::Initialized) + return DiagnoseUninitialized(S, OpPC, B->isExtern(), B->getDescriptor(), + AK_Read); + + S.Stk.push<T>(B->deref<T>()); return true; } @@ -2351,8 +2357,8 @@ static inline bool IncDecPtrHelper(InterpState &S, CodePtr OpPC, static inline bool IncPtr(InterpState &S, CodePtr OpPC) { const Pointer &Ptr = S.Stk.pop<Pointer>(); - if (!CheckInitialized(S, OpPC, Ptr, AK_Increment)) - return false; + if (!Ptr.isInitialized()) + return DiagnoseUninitialized(S, OpPC, Ptr, AK_Increment); return IncDecPtrHelper<ArithOp::Add>(S, OpPC, Ptr); } @@ -2360,8 +2366,8 @@ static inline bool IncPtr(InterpState &S, CodePtr OpPC) { static inline bool DecPtr(InterpState &S, CodePtr OpPC) { const Pointer &Ptr = S.Stk.pop<Pointer>(); - if (!CheckInitialized(S, OpPC, Ptr, AK_Decrement)) - return false; + if (!Ptr.isInitialized()) + return DiagnoseUninitialized(S, OpPC, Ptr, AK_Decrement); return IncDecPtrHelper<ArithOp::Sub>(S, OpPC, Ptr); } @@ -3195,6 +3201,9 @@ inline bool GetMemberPtr(InterpState &S, CodePtr OpPC, const ValueDecl *D) { inline bool GetMemberPtrBase(InterpState &S, CodePtr OpPC) { const auto &MP = S.Stk.pop<MemberPointer>(); + if (!MP.isBaseCastPossible()) + return false; + S.Stk.push<Pointer>(MP.getBase()); return true; } diff --git a/clang/lib/AST/ByteCode/InterpBlock.cpp b/clang/lib/AST/ByteCode/InterpBlock.cpp index 963b54e..8b7f6a7 100644 --- a/clang/lib/AST/ByteCode/InterpBlock.cpp +++ b/clang/lib/AST/ByteCode/InterpBlock.cpp @@ -64,7 +64,7 @@ void Block::removePointer(Pointer *P) { } void Block::cleanup() { - if (Pointers == nullptr && IsDead) + if (Pointers == nullptr && !IsDynamic && IsDead) (reinterpret_cast<DeadBlock *>(this + 1) - 1)->free(); } @@ -133,8 +133,7 @@ DeadBlock::DeadBlock(DeadBlock *&Root, Block *Blk) } void DeadBlock::free() { - if (B.IsInitialized) - B.invokeDtor(); + assert(!B.isInitialized()); if (Prev) Prev->Next = Next; diff --git a/clang/lib/AST/ByteCode/InterpBlock.h b/clang/lib/AST/ByteCode/InterpBlock.h index 5162223..07194d6 100644 --- a/clang/lib/AST/ByteCode/InterpBlock.h +++ b/clang/lib/AST/ByteCode/InterpBlock.h @@ -103,6 +103,10 @@ public: return reinterpret_cast<const std::byte *>(this) + sizeof(Block); } + template <typename T> T deref() const { + return *reinterpret_cast<const T *>(data()); + } + /// Invokes the constructor. void invokeCtor() { assert(!IsInitialized); diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index f908d02..c835bd4 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -276,7 +276,7 @@ static bool interp__builtin_strlen(InterpState &S, CodePtr OpPC, if (!CheckLive(S, OpPC, StrPtr, AK_Read)) return false; - if (!CheckDummy(S, OpPC, StrPtr, AK_Read)) + if (!CheckDummy(S, OpPC, StrPtr.block(), AK_Read)) return false; assert(StrPtr.getFieldDesc()->isPrimitiveArray()); @@ -2232,7 +2232,7 @@ static bool interp__builtin_is_within_lifetime(InterpState &S, CodePtr OpPC, return false; if (!CheckMutable(S, OpPC, Ptr)) return false; - if (!CheckDummy(S, OpPC, Ptr, AK_Read)) + if (!CheckDummy(S, OpPC, Ptr.block(), AK_Read)) return false; } diff --git a/clang/lib/AST/ByteCode/InterpFrame.cpp b/clang/lib/AST/ByteCode/InterpFrame.cpp index 14f99c7..f2eac86 100644 --- a/clang/lib/AST/ByteCode/InterpFrame.cpp +++ b/clang/lib/AST/ByteCode/InterpFrame.cpp @@ -133,6 +133,11 @@ static bool shouldSkipInBacktrace(const Function *F) { MD && MD->getParent()->isAnonymousStructOrUnion()) return true; + if (const auto *Ctor = dyn_cast<CXXConstructorDecl>(FD); + Ctor && Ctor->isDefaulted() && Ctor->isTrivial() && + Ctor->isCopyOrMoveConstructor() && Ctor->inits().empty()) + return true; + return false; } @@ -226,6 +231,10 @@ Pointer InterpFrame::getLocalPointer(unsigned Offset) const { return Pointer(localBlock(Offset)); } +Block *InterpFrame::getLocalBlock(unsigned Offset) const { + return localBlock(Offset); +} + Pointer InterpFrame::getParamPointer(unsigned Off) { // Return the block if it was created previously. if (auto Pt = Params.find(Off); Pt != Params.end()) diff --git a/clang/lib/AST/ByteCode/InterpFrame.h b/clang/lib/AST/ByteCode/InterpFrame.h index cfebe93..4be5391 100644 --- a/clang/lib/AST/ByteCode/InterpFrame.h +++ b/clang/lib/AST/ByteCode/InterpFrame.h @@ -86,6 +86,7 @@ public: /// Returns a pointer to a local variables. Pointer getLocalPointer(unsigned Offset) const; + Block *getLocalBlock(unsigned Offset) const; /// Returns the value of an argument. template <typename T> const T &getParam(unsigned Offset) const { diff --git a/clang/lib/AST/ByteCode/InterpState.cpp b/clang/lib/AST/ByteCode/InterpState.cpp index a06b125..32f940c 100644 --- a/clang/lib/AST/ByteCode/InterpState.cpp +++ b/clang/lib/AST/ByteCode/InterpState.cpp @@ -76,9 +76,7 @@ bool InterpState::reportOverflow(const Expr *E, const llvm::APSInt &Value) { void InterpState::deallocate(Block *B) { assert(B); - const Descriptor *Desc = B->getDescriptor(); - assert(Desc); - + assert(!B->isDynamic()); // The block might have a pointer saved in a field in its data // that points to the block itself. We call the dtor first, // which will destroy all the data but leave InlineDescriptors @@ -87,6 +85,7 @@ void InterpState::deallocate(Block *B) { if (B->IsInitialized) B->invokeDtor(); + assert(!B->isInitialized()); if (B->hasPointers()) { size_t Size = B->getSize(); // Allocate a new block, transferring over pointers. @@ -95,24 +94,20 @@ void InterpState::deallocate(Block *B) { auto *D = new (Memory) DeadBlock(DeadBlocks, B); // Since the block doesn't hold any actual data anymore, we can just // memcpy() everything over. - std::memcpy(D->rawData(), B->rawData(), Desc->getAllocSize()); - D->B.IsInitialized = B->IsInitialized; - - // We moved the contents over to the DeadBlock. - B->IsInitialized = false; + std::memcpy(D->rawData(), B->rawData(), Size); + D->B.IsInitialized = false; } } bool InterpState::maybeDiagnoseDanglingAllocations() { - bool NoAllocationsLeft = (Alloc.getNumAllocations() == 0); + bool NoAllocationsLeft = !Alloc.hasAllocations(); if (!checkingPotentialConstantExpression()) { - for (const auto &It : Alloc.allocation_sites()) { - assert(It.second.size() > 0); + for (const auto &[Source, Site] : Alloc.allocation_sites()) { + assert(!Site.empty()); - const Expr *Source = It.first; CCEDiag(Source->getExprLoc(), diag::note_constexpr_memory_leak) - << (It.second.size() - 1) << Source->getSourceRange(); + << (Site.size() - 1) << Source->getSourceRange(); } } // Keep evaluating before C++20, since the CXXNewExpr wasn't valid there diff --git a/clang/lib/AST/ByteCode/MemberPointer.h b/clang/lib/AST/ByteCode/MemberPointer.h index b17ce25..8dd75ca 100644 --- a/clang/lib/AST/ByteCode/MemberPointer.h +++ b/clang/lib/AST/ByteCode/MemberPointer.h @@ -51,6 +51,12 @@ public: FunctionPointer toFunctionPointer(const Context &Ctx) const; + bool isBaseCastPossible() const { + if (PtrOffset < 0) + return true; + return static_cast<uint64_t>(PtrOffset) <= Base.getByteOffset(); + } + Pointer getBase() const { if (PtrOffset < 0) return Base.atField(-PtrOffset); diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp index e61846d..2e5821a 100644 --- a/clang/lib/AST/CommentParser.cpp +++ b/clang/lib/AST/CommentParser.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/AST/CommentParser.h" +#include "clang/AST/Comment.h" #include "clang/AST/CommentCommandTraits.h" #include "clang/AST/CommentSema.h" #include "clang/Basic/CharInfo.h" @@ -569,6 +570,8 @@ BlockCommandComment *Parser::parseBlockCommand() { InlineCommandComment *Parser::parseInlineCommand() { assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); + CommandMarkerKind CMK = + Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At; const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); const Token CommandTok = Tok; @@ -580,7 +583,7 @@ InlineCommandComment *Parser::parseInlineCommand() { InlineCommandComment *IC = S.actOnInlineCommand( CommandTok.getLocation(), CommandTok.getEndLocation(), - CommandTok.getCommandID(), Args); + CommandTok.getCommandID(), CMK, Args); if (Args.size() < Info->NumArgs) { Diag(CommandTok.getEndLocation().getLocWithOffset(1), diff --git a/clang/lib/AST/CommentSema.cpp b/clang/lib/AST/CommentSema.cpp index 88520d7..c02983b 100644 --- a/clang/lib/AST/CommentSema.cpp +++ b/clang/lib/AST/CommentSema.cpp @@ -363,12 +363,13 @@ void Sema::actOnTParamCommandFinish(TParamCommandComment *Command, InlineCommandComment * Sema::actOnInlineCommand(SourceLocation CommandLocBegin, SourceLocation CommandLocEnd, unsigned CommandID, + CommandMarkerKind CommandMarker, ArrayRef<Comment::Argument> Args) { StringRef CommandName = Traits.getCommandInfo(CommandID)->Name; - return new (Allocator) - InlineCommandComment(CommandLocBegin, CommandLocEnd, CommandID, - getInlineCommandRenderKind(CommandName), Args); + return new (Allocator) InlineCommandComment( + CommandLocBegin, CommandLocEnd, CommandID, + getInlineCommandRenderKind(CommandName), CommandMarker, Args); } InlineContentComment *Sema::actOnUnknownCommand(SourceLocation LocBegin, diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 34af9cc..d9c6632 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -9860,11 +9860,15 @@ bool PointerExprEvaluator::VisitCastExpr(const CastExpr *E) { if (Value.isInt()) { unsigned Size = Info.Ctx.getTypeSize(E->getType()); uint64_t N = Value.getInt().extOrTrunc(Size).getZExtValue(); - Result.Base = (Expr*)nullptr; - Result.InvalidBase = false; - Result.Offset = CharUnits::fromQuantity(N); - Result.Designator.setInvalid(); - Result.IsNullPtr = false; + if (N == Info.Ctx.getTargetNullPointerValue(E->getType())) { + Result.setNull(Info.Ctx, E->getType()); + } else { + Result.Base = (Expr *)nullptr; + Result.InvalidBase = false; + Result.Offset = CharUnits::fromQuantity(N); + Result.Designator.setInvalid(); + Result.IsNullPtr = false; + } return true; } else { // In rare instances, the value isn't an lvalue. @@ -11624,7 +11628,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } case Builtin::BI__builtin_elementwise_add_sat: - case Builtin::BI__builtin_elementwise_sub_sat: { + case Builtin::BI__builtin_elementwise_sub_sat: + case clang::X86::BI__builtin_ia32_pmulhuw128: + case clang::X86::BI__builtin_ia32_pmulhuw256: + case clang::X86::BI__builtin_ia32_pmulhuw512: + case clang::X86::BI__builtin_ia32_pmulhw128: + case clang::X86::BI__builtin_ia32_pmulhw256: + case clang::X86::BI__builtin_ia32_pmulhw512: { APValue SourceLHS, SourceRHS; if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) @@ -11649,6 +11659,18 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS), DestEltTy->isUnsignedIntegerOrEnumerationType()))); break; + case clang::X86::BI__builtin_ia32_pmulhuw128: + case clang::X86::BI__builtin_ia32_pmulhuw256: + case clang::X86::BI__builtin_ia32_pmulhuw512: + ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhu(LHS, RHS), + /*isUnsigned=*/true))); + break; + case clang::X86::BI__builtin_ia32_pmulhw128: + case clang::X86::BI__builtin_ia32_pmulhw256: + case clang::X86::BI__builtin_ia32_pmulhw512: + ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS), + /*isUnsigned=*/false))); + break; } } diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp index f7a98bd..fe20004 100644 --- a/clang/lib/AST/OpenACCClause.cpp +++ b/clang/lib/AST/OpenACCClause.cpp @@ -329,10 +329,11 @@ OpenACCPrivateClause::Create(const ASTContext &C, SourceLocation BeginLoc, OpenACCFirstPrivateClause *OpenACCFirstPrivateClause::Create( const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc, - ArrayRef<Expr *> VarList, ArrayRef<VarDecl *> InitRecipes, + ArrayRef<Expr *> VarList, ArrayRef<OpenACCFirstPrivateRecipe> InitRecipes, SourceLocation EndLoc) { - void *Mem = - C.Allocate(OpenACCFirstPrivateClause::totalSizeToAlloc<Expr *, VarDecl *>( + void *Mem = C.Allocate( + OpenACCFirstPrivateClause::totalSizeToAlloc<Expr *, + OpenACCFirstPrivateRecipe>( VarList.size(), InitRecipes.size())); return new (Mem) OpenACCFirstPrivateClause(BeginLoc, LParenLoc, VarList, InitRecipes, EndLoc); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 4c36f24..0297f9c 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -2645,8 +2645,10 @@ void OpenACCClauseProfiler::VisitFirstPrivateClause( const OpenACCFirstPrivateClause &Clause) { VisitClauseWithVarList(Clause); - for (auto *VD : Clause.getInitRecipes()) - Profiler.VisitDecl(VD); + for (auto &Recipe : Clause.getInitRecipes()) { + Profiler.VisitDecl(Recipe.RecipeDecl); + Profiler.VisitDecl(Recipe.InitFromTemporary); + } } void OpenACCClauseProfiler::VisitAttachClause( diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 141edc8..03d7413 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -5246,6 +5246,15 @@ bool Type::isHLSLResourceRecord() const { return HLSLAttributedResourceType::findHandleTypeOnResource(this) != nullptr; } +bool Type::isHLSLResourceRecordArray() const { + const Type *Ty = getUnqualifiedDesugaredType(); + if (!Ty->isArrayType()) + return false; + while (isa<ConstantArrayType>(Ty)) + Ty = Ty->getArrayElementTypeNoTypeQual(); + return Ty->isHLSLResourceRecord(); +} + bool Type::isHLSLIntangibleType() const { const Type *Ty = getUnqualifiedDesugaredType(); diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 40dff7e..f4ead3d 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -1986,6 +1986,14 @@ public: const auto *FD = dyn_cast<FunctionDecl>(CE->getDirectCallee()); if (!FD) return false; + + bool IsGlobalAndNotInAnyNamespace = + FD->isGlobal() && !FD->getEnclosingNamespaceContext()->isNamespace(); + + // A libc function must either be in the std:: namespace or a global + // function that is not in any namespace: + if (!FD->isInStdNamespace() && !IsGlobalAndNotInAnyNamespace) + return false; auto isSingleStringLiteralArg = false; if (CE->getNumArgs() == 1) { isSingleStringLiteralArg = diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp index be21ce9..b8663eb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp @@ -16,6 +16,7 @@ // //===----------------------------------------------------------------------===// +#include "CIRGenCleanup.h" #include "CIRGenFunction.h" #include "clang/CIR/MissingFeatures.h" @@ -33,6 +34,52 @@ using namespace clang::CIRGen; void EHScopeStack::Cleanup::anchor() {} +/// Push an entry of the given size onto this protected-scope stack. +char *EHScopeStack::allocate(size_t size) { + size = llvm::alignTo(size, ScopeStackAlignment); + if (!startOfBuffer) { + unsigned capacity = llvm::PowerOf2Ceil(std::max(size, 1024ul)); + startOfBuffer = std::make_unique<char[]>(capacity); + startOfData = endOfBuffer = startOfBuffer.get() + capacity; + } else if (static_cast<size_t>(startOfData - startOfBuffer.get()) < size) { + unsigned currentCapacity = endOfBuffer - startOfBuffer.get(); + unsigned usedCapacity = + currentCapacity - (startOfData - startOfBuffer.get()); + unsigned requiredCapacity = usedCapacity + size; + // We know from the 'else if' condition that requiredCapacity is greater + // than currentCapacity. + unsigned newCapacity = llvm::PowerOf2Ceil(requiredCapacity); + + std::unique_ptr<char[]> newStartOfBuffer = + std::make_unique<char[]>(newCapacity); + char *newEndOfBuffer = newStartOfBuffer.get() + newCapacity; + char *newStartOfData = newEndOfBuffer - usedCapacity; + memcpy(newStartOfData, startOfData, usedCapacity); + startOfBuffer.swap(newStartOfBuffer); + endOfBuffer = newEndOfBuffer; + startOfData = newStartOfData; + } + + assert(startOfBuffer.get() + size <= startOfData); + startOfData -= size; + return startOfData; +} + +void EHScopeStack::deallocate(size_t size) { + startOfData += llvm::alignTo(size, ScopeStackAlignment); +} + +void *EHScopeStack::pushCleanup(CleanupKind kind, size_t size) { + char *buffer = allocate(size); + + // When the full implementation is upstreamed, this will allocate + // extra memory for and construct a wrapper object that is used to + // manage the cleanup generation. + assert(!cir::MissingFeatures::ehCleanupScope()); + + return buffer; +} + static mlir::Block *getCurCleanupBlock(CIRGenFunction &cgf) { mlir::OpBuilder::InsertionGuard guard(cgf.getBuilder()); mlir::Block *cleanup = @@ -44,26 +91,34 @@ static mlir::Block *getCurCleanupBlock(CIRGenFunction &cgf) { /// current insertion point is threaded through the cleanup, as are /// any branch fixups on the cleanup. void CIRGenFunction::popCleanupBlock() { - assert(!ehStack.cleanupStack.empty() && "cleanup stack is empty!"); + assert(!ehStack.empty() && "cleanup stack is empty!"); + + // The memory for the cleanup continues to be owned by the EHScopeStack + // allocator, so we just destroy the object rather than attempting to + // free it. + EHScopeStack::Cleanup &cleanup = *ehStack.begin(); + + // The eventual implementation here will use the EHCleanupScope helper class. + assert(!cir::MissingFeatures::ehCleanupScope()); + mlir::OpBuilder::InsertionGuard guard(builder); - std::unique_ptr<EHScopeStack::Cleanup> cleanup = - ehStack.cleanupStack.pop_back_val(); assert(!cir::MissingFeatures::ehCleanupFlags()); mlir::Block *cleanupEntry = getCurCleanupBlock(*this); builder.setInsertionPointToEnd(cleanupEntry); - cleanup->emit(*this); + cleanup.emit(*this); + + ehStack.deallocate(cleanup.getSize()); } /// Pops cleanup blocks until the given savepoint is reached. -void CIRGenFunction::popCleanupBlocks(size_t oldCleanupStackDepth) { +void CIRGenFunction::popCleanupBlocks( + EHScopeStack::stable_iterator oldCleanupStackDepth) { assert(!cir::MissingFeatures::ehstackBranches()); - assert(ehStack.getStackDepth() >= oldCleanupStackDepth); - // Pop cleanup blocks until we reach the base stack depth for the // current scope. - while (ehStack.getStackDepth() > oldCleanupStackDepth) { + while (ehStack.stable_begin() != oldCleanupStackDepth) { popCleanupBlock(); } } diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.h b/clang/lib/CIR/CodeGen/CIRGenCleanup.h new file mode 100644 index 0000000..7361c8c --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.h @@ -0,0 +1,43 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// These classes support the generation of CIR for cleanups, initially based +// on LLVM IR cleanup handling, but ought to change as CIR evolves. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_LIB_CIR_CODEGEN_CIRGENCLEANUP_H +#define CLANG_LIB_CIR_CODEGEN_CIRGENCLEANUP_H + +#include "EHScopeStack.h" + +namespace clang::CIRGen { + +/// A non-stable pointer into the scope stack. +class EHScopeStack::iterator { + char *ptr = nullptr; + + friend class EHScopeStack; + explicit iterator(char *ptr) : ptr(ptr) {} + +public: + iterator() = default; + + EHScopeStack::Cleanup *get() const { + return reinterpret_cast<EHScopeStack::Cleanup *>(ptr); + } + + EHScopeStack::Cleanup &operator*() const { return *get(); } +}; + +inline EHScopeStack::iterator EHScopeStack::begin() const { + return iterator(startOfData); +} + +} // namespace clang::CIRGen +#endif // CLANG_LIB_CIR_CODEGEN_CIRGENCLEANUP_H diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp index 78d375c..715d101 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp @@ -667,6 +667,12 @@ struct DestroyObject final : EHScopeStack::Cleanup { void emit(CIRGenFunction &cgf) override { cgf.emitDestroy(addr, type, destroyer); } + + // This is a placeholder until EHCleanupScope is implemented. + size_t getSize() const override { + assert(!cir::MissingFeatures::ehCleanupScope()); + return sizeof(DestroyObject); + } }; } // namespace diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp index 3aa170e..cba06a1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp @@ -116,6 +116,15 @@ public: mlir::Value emitPromotedComplexOperand(const Expr *e, QualType promotionTy); + LValue emitCompoundAssignLValue( + const CompoundAssignOperator *e, + mlir::Value (ComplexExprEmitter::*func)(const BinOpInfo &), + RValue &value); + + mlir::Value emitCompoundAssign( + const CompoundAssignOperator *e, + mlir::Value (ComplexExprEmitter::*func)(const BinOpInfo &)); + mlir::Value emitBinAdd(const BinOpInfo &op); mlir::Value emitBinSub(const BinOpInfo &op); mlir::Value emitBinMul(const BinOpInfo &op); @@ -153,6 +162,19 @@ public: HANDLEBINOP(Sub) HANDLEBINOP(Mul) #undef HANDLEBINOP + + // Compound assignments. + mlir::Value VisitBinAddAssign(const CompoundAssignOperator *e) { + return emitCompoundAssign(e, &ComplexExprEmitter::emitBinAdd); + } + + mlir::Value VisitBinSubAssign(const CompoundAssignOperator *e) { + return emitCompoundAssign(e, &ComplexExprEmitter::emitBinSub); + } + + mlir::Value VisitBinMulAssign(const CompoundAssignOperator *e) { + return emitCompoundAssign(e, &ComplexExprEmitter::emitBinMul); + } }; } // namespace @@ -166,6 +188,12 @@ static const ComplexType *getComplexType(QualType type) { } #endif // NDEBUG +static mlir::Value createComplexFromReal(CIRGenBuilderTy &builder, + mlir::Location loc, mlir::Value real) { + mlir::Value imag = builder.getNullValue(real.getType(), loc); + return builder.createComplexCreate(loc, real, imag); +} + LValue ComplexExprEmitter::emitBinAssignLValue(const BinaryOperator *e, mlir::Value &value) { assert(cgf.getContext().hasSameUnqualifiedType(e->getLHS()->getType(), @@ -602,7 +630,7 @@ mlir::Value ComplexExprEmitter::emitPromoted(const Expr *e, mlir::Value result = Visit(const_cast<Expr *>(e)); if (!promotionTy.isNull()) - cgf.cgm.errorNYI("emitPromoted emitPromotedValue"); + return cgf.emitPromotedValue(result, promotionTy); return result; } @@ -630,6 +658,104 @@ ComplexExprEmitter::emitBinOps(const BinaryOperator *e, QualType promotionTy) { return binOpInfo; } +LValue ComplexExprEmitter::emitCompoundAssignLValue( + const CompoundAssignOperator *e, + mlir::Value (ComplexExprEmitter::*func)(const BinOpInfo &), RValue &value) { + QualType lhsTy = e->getLHS()->getType(); + QualType rhsTy = e->getRHS()->getType(); + SourceLocation exprLoc = e->getExprLoc(); + mlir::Location loc = cgf.getLoc(exprLoc); + + if (lhsTy->getAs<AtomicType>()) { + cgf.cgm.errorNYI("emitCompoundAssignLValue AtmoicType"); + return {}; + } + + BinOpInfo opInfo{loc}; + opInfo.fpFeatures = e->getFPFeaturesInEffect(cgf.getLangOpts()); + + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + + // Load the RHS and LHS operands. + // __block variables need to have the rhs evaluated first, plus this should + // improve codegen a little. + QualType promotionTypeCR = getPromotionType(e->getComputationResultType()); + opInfo.ty = promotionTypeCR.isNull() ? e->getComputationResultType() + : promotionTypeCR; + + QualType complexElementTy = + opInfo.ty->castAs<ComplexType>()->getElementType(); + QualType promotionTypeRHS = getPromotionType(rhsTy); + + // The RHS should have been converted to the computation type. + if (e->getRHS()->getType()->isRealFloatingType()) { + if (!promotionTypeRHS.isNull()) { + opInfo.rhs = createComplexFromReal( + cgf.getBuilder(), loc, + cgf.emitPromotedScalarExpr(e->getRHS(), promotionTypeRHS)); + } else { + assert(cgf.getContext().hasSameUnqualifiedType(complexElementTy, rhsTy)); + opInfo.rhs = createComplexFromReal(cgf.getBuilder(), loc, + cgf.emitScalarExpr(e->getRHS())); + } + } else { + if (!promotionTypeRHS.isNull()) { + opInfo.rhs = cgf.emitPromotedComplexExpr(e->getRHS(), promotionTypeRHS); + } else { + assert(cgf.getContext().hasSameUnqualifiedType(opInfo.ty, rhsTy)); + opInfo.rhs = Visit(e->getRHS()); + } + } + + LValue lhs = cgf.emitLValue(e->getLHS()); + + // Load from the l-value and convert it. + QualType promotionTypeLHS = getPromotionType(e->getComputationLHSType()); + if (lhsTy->isAnyComplexType()) { + mlir::Value lhsValue = emitLoadOfLValue(lhs, exprLoc); + QualType destTy = promotionTypeLHS.isNull() ? opInfo.ty : promotionTypeLHS; + opInfo.lhs = emitComplexToComplexCast(lhsValue, lhsTy, destTy, exprLoc); + } else { + cgf.cgm.errorNYI("emitCompoundAssignLValue emitLoadOfScalar"); + return {}; + } + + // Expand the binary operator. + mlir::Value result = (this->*func)(opInfo); + + // Truncate the result and store it into the LHS lvalue. + if (lhsTy->isAnyComplexType()) { + mlir::Value resultValue = + emitComplexToComplexCast(result, opInfo.ty, lhsTy, exprLoc); + emitStoreOfComplex(loc, resultValue, lhs, /*isInit*/ false); + value = RValue::getComplex(resultValue); + } else { + mlir::Value resultValue = + cgf.emitComplexToScalarConversion(result, opInfo.ty, lhsTy, exprLoc); + cgf.emitStoreOfScalar(resultValue, lhs, /*isInit*/ false); + value = RValue::get(resultValue); + } + + return lhs; +} + +mlir::Value ComplexExprEmitter::emitCompoundAssign( + const CompoundAssignOperator *e, + mlir::Value (ComplexExprEmitter::*func)(const BinOpInfo &)) { + RValue val; + LValue lv = emitCompoundAssignLValue(e, func, val); + + // The result of an assignment in C is the assigned r-value. + if (!cgf.getLangOpts().CPlusPlus) + return val.getComplexValue(); + + // If the lvalue is non-volatile, return the computed value of the assignment. + if (!lv.isVolatileQualified()) + return val.getComplexValue(); + + return emitLoadOfLValue(lv, e->getExprLoc()); +} + mlir::Value ComplexExprEmitter::emitBinAdd(const BinOpInfo &op) { assert(!cir::MissingFeatures::fastMathFlags()); assert(!cir::MissingFeatures::cgFPOptionsRAII()); @@ -654,7 +780,7 @@ getComplexRangeAttr(LangOptions::ComplexRangeKind range) { case LangOptions::CX_Basic: return cir::ComplexRangeKind::Basic; case LangOptions::CX_None: - // The default value for ComplexRangeKind is Full is no option is selected + // The default value for ComplexRangeKind is Full if no option is selected return cir::ComplexRangeKind::Full; } } @@ -685,6 +811,31 @@ mlir::Value CIRGenFunction::emitComplexExpr(const Expr *e) { return ComplexExprEmitter(*this).Visit(const_cast<Expr *>(e)); } +using CompoundFunc = + mlir::Value (ComplexExprEmitter::*)(const ComplexExprEmitter::BinOpInfo &); + +static CompoundFunc getComplexOp(BinaryOperatorKind op) { + switch (op) { + case BO_MulAssign: + return &ComplexExprEmitter::emitBinMul; + case BO_DivAssign: + llvm_unreachable("getComplexOp: BO_DivAssign"); + case BO_SubAssign: + return &ComplexExprEmitter::emitBinSub; + case BO_AddAssign: + return &ComplexExprEmitter::emitBinAdd; + default: + llvm_unreachable("unexpected complex compound assignment"); + } +} + +LValue CIRGenFunction::emitComplexCompoundAssignmentLValue( + const CompoundAssignOperator *e) { + CompoundFunc op = getComplexOp(e->getOpcode()); + RValue val; + return ComplexExprEmitter(*this).emitCompoundAssignLValue(e, op, val); +} + mlir::Value CIRGenFunction::emitComplexPrePostIncDec(const UnaryOperator *e, LValue lv, cir::UnaryOpKind op, @@ -729,3 +880,11 @@ mlir::Value CIRGenFunction::emitPromotedComplexExpr(const Expr *e, QualType promotionType) { return ComplexExprEmitter(*this).emitPromoted(e, promotionType); } + +mlir::Value CIRGenFunction::emitPromotedValue(mlir::Value result, + QualType promotionType) { + assert(!mlir::cast<cir::ComplexType>(result.getType()).isIntegerComplex() && + "integral complex will never be promoted"); + return builder.createCast(cir::CastKind::float_complex, result, + convertType(promotionType)); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 32c1c1a..3e06513 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -1955,6 +1955,29 @@ mlir::Value CIRGenFunction::emitScalarConversion(mlir::Value src, .emitScalarConversion(src, srcTy, dstTy, loc); } +mlir::Value CIRGenFunction::emitComplexToScalarConversion(mlir::Value src, + QualType srcTy, + QualType dstTy, + SourceLocation loc) { + assert(srcTy->isAnyComplexType() && hasScalarEvaluationKind(dstTy) && + "Invalid complex -> scalar conversion"); + + QualType complexElemTy = srcTy->castAs<ComplexType>()->getElementType(); + if (dstTy->isBooleanType()) { + auto kind = complexElemTy->isFloatingType() + ? cir::CastKind::float_complex_to_bool + : cir::CastKind::int_complex_to_bool; + return builder.createCast(getLoc(loc), kind, src, convertType(dstTy)); + } + + auto kind = complexElemTy->isFloatingType() + ? cir::CastKind::float_complex_to_real + : cir::CastKind::int_complex_to_real; + mlir::Value real = + builder.createCast(getLoc(loc), kind, src, convertType(complexElemTy)); + return emitScalarConversion(real, complexElemTy, dstTy, loc); +} + mlir::Value ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *e) { // Perform vector logical not on comparison with zero vector. if (e->getType()->isVectorType() && diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index eb05c93..dedd01c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -28,8 +28,6 @@ CIRGenFunction::CIRGenFunction(CIRGenModule &cgm, CIRGenBuilderTy &builder, bool suppressNewContext) : CIRGenTypeCache(cgm), cgm{cgm}, builder(builder) { ehStack.setCGF(this); - currentCleanupStackDepth = 0; - assert(ehStack.getStackDepth() == 0); } CIRGenFunction::~CIRGenFunction() {} @@ -409,6 +407,8 @@ void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType, const auto *fd = dyn_cast_or_null<FunctionDecl>(d); curFuncDecl = d->getNonClosureContext(); + prologueCleanupDepth = ehStack.stable_begin(); + mlir::Block *entryBB = &fn.getBlocks().front(); builder.setInsertionPointToStart(entryBB); @@ -475,11 +475,11 @@ void CIRGenFunction::finishFunction(SourceLocation endLoc) { // important to do this before we enter the return block or return // edges will be *really* confused. // TODO(cir): Use prologueCleanupDepth here. - bool hasCleanups = ehStack.getStackDepth() != currentCleanupStackDepth; + bool hasCleanups = ehStack.stable_begin() != prologueCleanupDepth; if (hasCleanups) { assert(!cir::MissingFeatures::generateDebugInfo()); // FIXME(cir): should we clearInsertionPoint? breaks many testcases - popCleanupBlocks(currentCleanupStackDepth); + popCleanupBlocks(prologueCleanupDepth); } } @@ -785,9 +785,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) { } if (!ty->isAnyComplexType()) return emitCompoundAssignmentLValue(cast<CompoundAssignOperator>(e)); - cgm.errorNYI(e->getSourceRange(), - "CompoundAssignOperator with ComplexType"); - return LValue(); + + return emitComplexCompoundAssignmentLValue(cast<CompoundAssignOperator>(e)); } case Expr::CallExprClass: case Expr::CXXMemberCallExprClass: diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 3d92545..bdbc77c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -601,9 +601,13 @@ public: FunctionArgList args, clang::SourceLocation loc, clang::SourceLocation startLoc); + /// The cleanup depth enclosing all the cleanups associated with the + /// parameters. + EHScopeStack::stable_iterator prologueCleanupDepth; + /// Takes the old cleanup stack size and emits the cleanup blocks /// that have been added. - void popCleanupBlocks(size_t oldCleanupStackDepth); + void popCleanupBlocks(EHScopeStack::stable_iterator oldCleanupStackDepth); void popCleanupBlock(); /// Push a cleanup to be run at the end of the current full-expression. Safe @@ -622,7 +626,7 @@ public: /// Enters a new scope for capturing cleanups, all of which /// will be executed once the scope is exited. class RunCleanupsScope { - size_t cleanupStackDepth, oldCleanupStackDepth; + EHScopeStack::stable_iterator cleanupStackDepth, oldCleanupStackDepth; protected: bool performCleanup; @@ -638,7 +642,7 @@ public: /// Enter a new cleanup scope. explicit RunCleanupsScope(CIRGenFunction &cgf) : performCleanup(true), cgf(cgf) { - cleanupStackDepth = cgf.ehStack.getStackDepth(); + cleanupStackDepth = cgf.ehStack.stable_begin(); oldCleanupStackDepth = cgf.currentCleanupStackDepth; cgf.currentCleanupStackDepth = cleanupStackDepth; } @@ -663,7 +667,7 @@ public: }; // Cleanup stack depth of the RunCleanupsScope that was pushed most recently. - size_t currentCleanupStackDepth; + EHScopeStack::stable_iterator currentCleanupStackDepth = ehStack.stable_end(); public: /// Represents a scope, including function bodies, compound statements, and @@ -944,6 +948,11 @@ public: /// sanitizer is enabled, a runtime check is also emitted. mlir::Value emitCheckedArgForAssume(const Expr *e); + /// Emit a conversion from the specified complex type to the specified + /// destination type, where the destination type is an LLVM scalar type. + mlir::Value emitComplexToScalarConversion(mlir::Value src, QualType srcTy, + QualType dstTy, SourceLocation loc); + LValue emitCompoundAssignmentLValue(const clang::CompoundAssignOperator *e); LValue emitCompoundLiteralLValue(const CompoundLiteralExpr *e); @@ -1047,6 +1056,8 @@ public: mlir::Value emitPromotedScalarExpr(const Expr *e, QualType promotionType); + mlir::Value emitPromotedValue(mlir::Value result, QualType promotionType); + /// Emit the computation of the specified expression of scalar type. mlir::Value emitScalarExpr(const clang::Expr *e); @@ -1076,6 +1087,7 @@ public: cir::UnaryOpKind op, bool isPre); LValue emitComplexAssignmentLValue(const BinaryOperator *e); + LValue emitComplexCompoundAssignmentLValue(const CompoundAssignOperator *e); void emitCompoundStmt(const clang::CompoundStmt &s); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index b143682..ff6d293 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -1307,7 +1307,8 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) { } case Decl::Var: - case Decl::Decomposition: { + case Decl::Decomposition: + case Decl::VarTemplateSpecialization: { auto *vd = cast<VarDecl>(decl); if (isa<DecompositionDecl>(decl)) { errorNYI(decl->getSourceRange(), "global variable decompositions"); @@ -1342,6 +1343,8 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) { case Decl::StaticAssert: case Decl::TypeAliasTemplate: case Decl::UsingShadow: + case Decl::VarTemplate: + case Decl::VarTemplatePartialSpecialization: break; case Decl::CXXConstructor: @@ -1362,6 +1365,21 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) { assert(!cir::MissingFeatures::generateDebugInfo()); assert(!cir::MissingFeatures::cxxRecordStaticMembers()); break; + + case Decl::FileScopeAsm: + // File-scope asm is ignored during device-side CUDA compilation. + if (langOpts.CUDA && langOpts.CUDAIsDevice) + break; + // File-scope asm is ignored during device-side OpenMP compilation. + if (langOpts.OpenMPIsTargetDevice) + break; + // File-scope asm is ignored during device-side SYCL compilation. + if (langOpts.SYCLIsDevice) + break; + auto *file_asm = cast<FileScopeAsmDecl>(decl); + std::string line = file_asm->getAsmString(); + globalScopeAsm.push_back(builder.getStringAttr(line)); + break; } } @@ -1975,6 +1993,9 @@ void CIRGenModule::release() { emitDeferred(); applyReplacements(); + theModule->setAttr(cir::CIRDialect::getModuleLevelAsmAttrName(), + builder.getArrayAttr(globalScopeAsm)); + // There's a lot of code that is not implemented yet. assert(!cir::MissingFeatures::cgmRelease()); } diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 5d07d38..163a0fc 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -90,6 +90,8 @@ private: /// for FunctionDecls's. CIRGenFunction *curCGF = nullptr; + llvm::SmallVector<mlir::Attribute> globalScopeAsm; + public: mlir::ModuleOp getModule() const { return theModule; } CIRGenBuilderTy &getBuilder() { return builder; } diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index 50642e7..332babd 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -412,7 +412,7 @@ mlir::LogicalResult CIRGenFunction::emitReturnStmt(const ReturnStmt &s) { auto *retBlock = curLexScope->getOrCreateRetBlock(*this, loc); // This should emit a branch through the cleanup block if one exists. builder.create<cir::BrOp>(loc, retBlock); - if (ehStack.getStackDepth() != currentCleanupStackDepth) + if (ehStack.stable_begin() != currentCleanupStackDepth) cgm.errorNYI(s.getSourceRange(), "return with cleanup stack"); builder.createBlock(builder.getBlock()->getParent()); diff --git a/clang/lib/CIR/CodeGen/EHScopeStack.h b/clang/lib/CIR/CodeGen/EHScopeStack.h index 22750ac..47478f6 100644 --- a/clang/lib/CIR/CodeGen/EHScopeStack.h +++ b/clang/lib/CIR/CodeGen/EHScopeStack.h @@ -42,7 +42,47 @@ enum CleanupKind : unsigned { /// A stack of scopes which respond to exceptions, including cleanups /// and catch blocks. class EHScopeStack { + friend class CIRGenFunction; + public: + // TODO(ogcg): Switch to alignof(uint64_t) instead of 8 + enum { ScopeStackAlignment = 8 }; + + /// A saved depth on the scope stack. This is necessary because + /// pushing scopes onto the stack invalidates iterators. + class stable_iterator { + friend class EHScopeStack; + + /// Offset from startOfData to endOfBuffer. + ptrdiff_t size = -1; + + explicit stable_iterator(ptrdiff_t size) : size(size) {} + + public: + static stable_iterator invalid() { return stable_iterator(-1); } + stable_iterator() = default; + + bool isValid() const { return size >= 0; } + + /// Returns true if this scope encloses I. + /// Returns false if I is invalid. + /// This scope must be valid. + bool encloses(stable_iterator other) const { return size <= other.size; } + + /// Returns true if this scope strictly encloses I: that is, + /// if it encloses I and is not I. + /// Returns false is I is invalid. + /// This scope must be valid. + bool strictlyEncloses(stable_iterator I) const { return size < I.size; } + + friend bool operator==(stable_iterator A, stable_iterator B) { + return A.size == B.size; + } + friend bool operator!=(stable_iterator A, stable_iterator B) { + return A.size != B.size; + } + }; + /// Information for lazily generating a cleanup. Subclasses must be /// POD-like: cleanups will not be destructed, and they will be /// allocated on the cleanup stack and freely copied and moved @@ -68,30 +108,75 @@ public: /// // \param flags cleanup kind. virtual void emit(CIRGenFunction &cgf) = 0; - }; - // Classic codegen has a finely tuned custom allocator and a complex stack - // management scheme. We'll probably eventually want to find a way to share - // that implementation. For now, we will use a very simplified implementation - // to get cleanups working. - llvm::SmallVector<std::unique_ptr<Cleanup>, 8> cleanupStack; + // This is a placeholder until EHScope is implemented. + virtual size_t getSize() const = 0; + }; private: + // The implementation for this class is in CIRGenCleanup.h and + // CIRGenCleanup.cpp; the definition is here because it's used as a + // member of CIRGenFunction. + + /// The start of the scope-stack buffer, i.e. the allocated pointer + /// for the buffer. All of these pointers are either simultaneously + /// null or simultaneously valid. + std::unique_ptr<char[]> startOfBuffer; + + /// The end of the buffer. + char *endOfBuffer = nullptr; + + /// The first valid entry in the buffer. + char *startOfData = nullptr; + /// The CGF this Stack belong to CIRGenFunction *cgf = nullptr; + // This class uses a custom allocator for maximum efficiency because cleanups + // are allocated and freed very frequently. It's basically a bump pointer + // allocator, but we can't use LLVM's BumpPtrAllocator because we use offsets + // into the buffer as stable iterators. + char *allocate(size_t size); + void deallocate(size_t size); + + void *pushCleanup(CleanupKind kind, size_t dataSize); + public: EHScopeStack() = default; ~EHScopeStack() = default; /// Push a lazily-created cleanup on the stack. template <class T, class... As> void pushCleanup(CleanupKind kind, As... a) { - cleanupStack.push_back(std::make_unique<T>(a...)); + static_assert(alignof(T) <= ScopeStackAlignment, + "Cleanup's alignment is too large."); + void *buffer = pushCleanup(kind, sizeof(T)); + [[maybe_unused]] Cleanup *obj = new (buffer) T(a...); } void setCGF(CIRGenFunction *inCGF) { cgf = inCGF; } - size_t getStackDepth() const { return cleanupStack.size(); } + /// Pops a cleanup scope off the stack. This is private to CIRGenCleanup.cpp. + void popCleanup(); + + /// Determines whether the exception-scopes stack is empty. + bool empty() const { return startOfData == endOfBuffer; } + + /// An unstable reference to a scope-stack depth. Invalidated by + /// pushes but not pops. + class iterator; + + /// Returns an iterator pointing to the innermost EH scope. + iterator begin() const; + + /// Create a stable reference to the top of the EH stack. The + /// returned reference is valid until that scope is popped off the + /// stack. + stable_iterator stable_begin() const { + return stable_iterator(endOfBuffer - startOfData); + } + + /// Create a stable reference to the bottom of the EH stack. + static stable_iterator stable_end() { return stable_iterator(0); } }; } // namespace clang::CIRGen diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index d3fcac1..53ab04e 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1444,6 +1444,27 @@ cir::GetGlobalOp::verifySymbolUses(SymbolTableCollection &symbolTable) { } //===----------------------------------------------------------------------===// +// VTableAddrPointOp +//===----------------------------------------------------------------------===// + +LogicalResult +cir::VTableAddrPointOp::verifySymbolUses(SymbolTableCollection &symbolTable) { + StringRef name = getName(); + + // Verify that the result type underlying pointer type matches the type of + // the referenced cir.global or cir.func op. + auto op = symbolTable.lookupNearestSymbolFrom<GlobalOp>(*this, getNameAttr()); + if (!op) + return emitOpError("'") + << name << "' does not reference a valid cir.global"; + std::optional<mlir::Attribute> init = op.getInitialValue(); + if (!init) + return success(); + assert(!cir::MissingFeatures::vtableInitializer()); + return success(); +} + +//===----------------------------------------------------------------------===// // FuncOp //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 7e1c9fb..43a1b51 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -2143,6 +2143,11 @@ void ConvertCIRToLLVMPass::processCIRAttrs(mlir::ModuleOp module) { module->getAttr(cir::CIRDialect::getTripleAttrName())) module->setAttr(mlir::LLVM::LLVMDialect::getTargetTripleAttrName(), tripleAttr); + + if (mlir::Attribute asmAttr = + module->getAttr(cir::CIRDialect::getModuleLevelAsmAttrName())) + module->setAttr(mlir::LLVM::LLVMDialect::getModuleLevelAsmAttrName(), + asmAttr); } void ConvertCIRToLLVMPass::runOnOperation() { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a648bde..071667a 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5985,8 +5985,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Create a temporary array to hold the sizes of local pointer arguments // for the block. \p First is the position of the first size argument. - auto CreateArrayForSizeVar = [=](unsigned First) - -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> { + auto CreateArrayForSizeVar = + [=](unsigned First) -> std::pair<llvm::Value *, llvm::Value *> { llvm::APInt ArraySize(32, NumArgs - First); QualType SizeArrayTy = getContext().getConstantArrayType( getContext().getSizeType(), ArraySize, nullptr, @@ -5999,9 +5999,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // actually the Alloca ascasted to the default AS, hence the // stripPointerCasts() llvm::Value *Alloca = TmpPtr->stripPointerCasts(); - llvm::Value *TmpSize = EmitLifetimeStart( - CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca); llvm::Value *ElemPtr; + EmitLifetimeStart(Alloca); // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. auto *Zero = llvm::ConstantInt::get(IntTy, 0); @@ -6018,7 +6017,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } // Return the Alloca itself rather than a potential ascast as this is only // used by the paired EmitLifetimeEnd. - return {ElemPtr, TmpSize, Alloca}; + return {ElemPtr, Alloca}; }; // Could have events and/or varargs. @@ -6030,7 +6029,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Value *Kernel = Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); - auto [ElemPtr, TmpSize, TmpPtr] = CreateArrayForSizeVar(4); + auto [ElemPtr, TmpPtr] = CreateArrayForSizeVar(4); // Create a vector of the arguments, as well as a constant value to // express to the runtime the number of variadic arguments. @@ -6045,8 +6044,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false); auto Call = RValue::get( EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); - if (TmpSize) - EmitLifetimeEnd(TmpSize, TmpPtr); + EmitLifetimeEnd(TmpPtr); return Call; } // Any calls now have event arguments passed. @@ -6111,15 +6109,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ArgTys.push_back(Int32Ty); Name = "__enqueue_kernel_events_varargs"; - auto [ElemPtr, TmpSize, TmpPtr] = CreateArrayForSizeVar(7); + auto [ElemPtr, TmpPtr] = CreateArrayForSizeVar(7); Args.push_back(ElemPtr); ArgTys.push_back(ElemPtr->getType()); llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false); auto Call = RValue::get( EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); - if (TmpSize) - EmitLifetimeEnd(TmpSize, TmpPtr); + EmitLifetimeEnd(TmpPtr); return Call; } llvm_unreachable("Unexpected enqueue_kernel signature"); diff --git a/clang/lib/CodeGen/CGCXXABI.h b/clang/lib/CodeGen/CGCXXABI.h index 96fe046..2dd320d 100644 --- a/clang/lib/CodeGen/CGCXXABI.h +++ b/clang/lib/CodeGen/CGCXXABI.h @@ -294,14 +294,22 @@ public: Address Value, QualType SrcRecordTy) = 0; + struct ExactDynamicCastInfo { + bool RequiresCastToPrimaryBase; + CharUnits Offset; + }; + + virtual std::optional<ExactDynamicCastInfo> + getExactDynamicCastInfo(QualType SrcRecordTy, QualType DestTy, + QualType DestRecordTy) = 0; + /// Emit a dynamic_cast from SrcRecordTy to DestRecordTy. The cast fails if /// the dynamic type of Value is not exactly DestRecordTy. - virtual llvm::Value *emitExactDynamicCast(CodeGenFunction &CGF, Address Value, - QualType SrcRecordTy, - QualType DestTy, - QualType DestRecordTy, - llvm::BasicBlock *CastSuccess, - llvm::BasicBlock *CastFail) = 0; + virtual llvm::Value *emitExactDynamicCast( + CodeGenFunction &CGF, Address Value, QualType SrcRecordTy, + QualType DestTy, QualType DestRecordTy, + const ExactDynamicCastInfo &CastInfo, llvm::BasicBlock *CastSuccess, + llvm::BasicBlock *CastFail) = 0; virtual bool EmitBadCastCall(CodeGenFunction &CGF) = 0; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index d9bd443..6e0c2c1 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -4319,10 +4319,7 @@ static void emitWriteback(CodeGenFunction &CGF, if (writeback.WritebackExpr) { CGF.EmitIgnoredExpr(writeback.WritebackExpr); - - if (writeback.LifetimeSz) - CGF.EmitLifetimeEnd(writeback.LifetimeSz, - writeback.Temporary.getBasePointer()); + CGF.EmitLifetimeEnd(writeback.Temporary.getBasePointer()); return; } @@ -5282,7 +5279,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // If the call returns a temporary with struct return, create a temporary // alloca to hold the result, unless one is given to us. Address SRetPtr = Address::invalid(); - llvm::Value *UnusedReturnSizePtr = nullptr; + bool NeedSRetLifetimeEnd = false; if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { // For virtual function pointer thunks and musttail calls, we must always // forward an incoming SRet pointer to the callee, because a local alloca @@ -5296,11 +5293,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, SRetPtr = ReturnValue.getAddress(); } else { SRetPtr = CreateMemTempWithoutCast(RetTy, "tmp"); - if (HaveInsertPoint() && ReturnValue.isUnused()) { - llvm::TypeSize size = - CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy)); - UnusedReturnSizePtr = EmitLifetimeStart(size, SRetPtr.getBasePointer()); - } + if (HaveInsertPoint() && ReturnValue.isUnused()) + NeedSRetLifetimeEnd = EmitLifetimeStart(SRetPtr.getBasePointer()); } if (IRFunctionArgs.hasSRetArg()) { // A mismatch between the allocated return value's AS and the target's @@ -5484,15 +5478,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Val = Builder.CreateFreeze(Val); IRCallArgs[FirstIRArg] = Val; - // Emit lifetime markers for the temporary alloca. - llvm::TypeSize ByvalTempElementSize = - CGM.getDataLayout().getTypeAllocSize(AI.getElementType()); - llvm::Value *LifetimeSize = - EmitLifetimeStart(ByvalTempElementSize, AI.getPointer()); - - // Add cleanup code to emit the end lifetime marker after the call. - if (LifetimeSize) // In case we disabled lifetime markers. - CallLifetimeEndAfterCall.emplace_back(AI, LifetimeSize); + // Emit lifetime markers for the temporary alloca and add cleanup code to + // emit the end lifetime marker after the call. + if (EmitLifetimeStart(AI.getPointer())) + CallLifetimeEndAfterCall.emplace_back(AI); // Generate the copy. I->copyInto(*this, AI); @@ -5653,9 +5642,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, auto unpaddedCoercionType = ArgInfo.getUnpaddedCoerceAndExpandType(); auto *unpaddedStruct = dyn_cast<llvm::StructType>(unpaddedCoercionType); - llvm::Value *tempSize = nullptr; Address addr = Address::invalid(); RawAddress AllocaAddr = RawAddress::invalid(); + bool NeedLifetimeEnd = false; if (I->isAggregate()) { addr = I->hasLValue() ? I->getKnownLValue().getAddress() : I->getKnownRValue().getAggregateAddress(); @@ -5665,7 +5654,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, assert(RV.isScalar()); // complex should always just be direct llvm::Type *scalarType = RV.getScalarVal()->getType(); - auto scalarSize = CGM.getDataLayout().getTypeAllocSize(scalarType); auto scalarAlign = CGM.getDataLayout().getPrefTypeAlign(scalarType); // Materialize to a temporary. @@ -5674,7 +5662,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, layout->getAlignment(), scalarAlign)), "tmp", /*ArraySize=*/nullptr, &AllocaAddr); - tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer()); + NeedLifetimeEnd = EmitLifetimeStart(AllocaAddr.getPointer()); Builder.CreateStore(RV.getScalarVal(), addr); } @@ -5699,10 +5687,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } assert(IRArgPos == FirstIRArg + NumIRArgs); - if (tempSize) { - EmitLifetimeEnd(tempSize, AllocaAddr.getPointer()); - } - + if (NeedLifetimeEnd) + EmitLifetimeEnd(AllocaAddr.getPointer()); break; } @@ -5871,9 +5857,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // can't depend on being inside of an ExprWithCleanups, so we need to manually // pop this cleanup later on. Being eager about this is OK, since this // temporary is 'invisible' outside of the callee. - if (UnusedReturnSizePtr) - pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetPtr, - UnusedReturnSizePtr); + if (NeedSRetLifetimeEnd) + pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetPtr); llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest(); @@ -6007,7 +5992,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // insertion point; this allows the rest of IRGen to discard // unreachable code. if (CI->doesNotReturn()) { - if (UnusedReturnSizePtr) + if (NeedSRetLifetimeEnd) PopCleanupBlock(); // Strip away the noreturn attribute to better diagnose unreachable UB. @@ -6122,7 +6107,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, case ABIArgInfo::InAlloca: case ABIArgInfo::Indirect: { RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation()); - if (UnusedReturnSizePtr) + if (NeedSRetLifetimeEnd) PopCleanupBlock(); return ret; } diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h index 0b4e3f9..3157b7f 100644 --- a/clang/lib/CodeGen/CGCall.h +++ b/clang/lib/CodeGen/CGCall.h @@ -289,9 +289,6 @@ public: /// An Expression (optional) that performs the writeback with any required /// casting. const Expr *WritebackExpr; - - // Size for optional lifetime end on the temporary. - llvm::Value *LifetimeSz; }; struct CallArgCleanup { @@ -321,9 +318,8 @@ public: } void addWriteback(LValue srcLV, Address temporary, llvm::Value *toUse, - const Expr *writebackExpr = nullptr, - llvm::Value *lifetimeSz = nullptr) { - Writeback writeback = {srcLV, temporary, toUse, writebackExpr, lifetimeSz}; + const Expr *writebackExpr = nullptr) { + Writeback writeback = {srcLV, temporary, toUse, writebackExpr}; Writebacks.push_back(writeback); } diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index ff2dada..0cade0d 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -1351,30 +1351,27 @@ void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D) { } /// Emit a lifetime.begin marker if some criteria are satisfied. -/// \return a pointer to the temporary size Value if a marker was emitted, null -/// otherwise -llvm::Value *CodeGenFunction::EmitLifetimeStart(llvm::TypeSize Size, - llvm::Value *Addr) { +/// \return whether the marker was emitted. +bool CodeGenFunction::EmitLifetimeStart(llvm::Value *Addr) { if (!ShouldEmitLifetimeMarkers) - return nullptr; + return false; assert(Addr->getType()->getPointerAddressSpace() == CGM.getDataLayout().getAllocaAddrSpace() && "Pointer should be in alloca address space"); - llvm::Value *SizeV = llvm::ConstantInt::get( - Int64Ty, Size.isScalable() ? -1 : Size.getFixedValue()); - llvm::CallInst *C = - Builder.CreateCall(CGM.getLLVMLifetimeStartFn(), {SizeV, Addr}); + llvm::CallInst *C = Builder.CreateCall(CGM.getLLVMLifetimeStartFn(), {Addr}); C->setDoesNotThrow(); - return SizeV; + return true; } -void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) { +void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Addr) { + if (!ShouldEmitLifetimeMarkers) + return; + assert(Addr->getType()->getPointerAddressSpace() == CGM.getDataLayout().getAllocaAddrSpace() && "Pointer should be in alloca address space"); - llvm::CallInst *C = - Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Size, Addr}); + llvm::CallInst *C = Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Addr}); C->setDoesNotThrow(); } @@ -1632,9 +1629,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // is rare. if (!Bypasses.IsBypassed(&D) && !(!getLangOpts().CPlusPlus && hasLabelBeenSeenInCurrentScope())) { - llvm::TypeSize Size = CGM.getDataLayout().getTypeAllocSize(allocaTy); - emission.SizeForLifetimeMarkers = - EmitLifetimeStart(Size, AllocaAddr.getPointer()); + emission.UseLifetimeMarkers = + EmitLifetimeStart(AllocaAddr.getPointer()); } } else { assert(!emission.useLifetimeMarkers()); @@ -1727,9 +1723,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Make sure we call @llvm.lifetime.end. if (emission.useLifetimeMarkers()) - EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, - emission.getOriginalAllocatedAddress(), - emission.getSizeForLifetimeMarkers()); + EHStack.pushCleanup<CallLifetimeEnd>( + NormalEHLifetimeMarker, emission.getOriginalAllocatedAddress()); // Analogous to lifetime markers, we use a 'cleanup' to emit fake.use // calls for local variables. We are exempting volatile variables and diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 5a3d4e4..f1affef 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -588,11 +588,9 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { } else { switch (M->getStorageDuration()) { case SD_Automatic: - if (auto *Size = EmitLifetimeStart( - CGM.getDataLayout().getTypeAllocSize(Alloca.getElementType()), - Alloca.getPointer())) { + if (EmitLifetimeStart(Alloca.getPointer())) { pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker, - Alloca, Size); + Alloca); } break; @@ -623,11 +621,8 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { Block, llvm::BasicBlock::iterator(Block->back()))); } - if (auto *Size = EmitLifetimeStart( - CGM.getDataLayout().getTypeAllocSize(Alloca.getElementType()), - Alloca.getPointer())) { - pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Alloca, - Size); + if (EmitLifetimeStart(Alloca.getPointer())) { + pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Alloca); } if (OldConditional) { @@ -3789,33 +3784,50 @@ void CodeGenFunction::EmitCheck( Branch->setMetadata(llvm::LLVMContext::MD_prof, Node); EmitBlock(Handlers); + // Clear arguments for the MinimalRuntime handler. + if (CGM.getCodeGenOpts().SanitizeMinimalRuntime) { + switch (CheckHandler) { + case SanitizerHandler::TypeMismatch: + // Pass value pointer only. It adds minimal overhead. + StaticArgs = {}; + assert(DynamicArgs.size() == 1); + break; + default: + // No arguments for other checks. + StaticArgs = {}; + DynamicArgs = {}; + break; + } + } + // Handler functions take an i8* pointing to the (handler-specific) static // information block, followed by a sequence of intptr_t arguments // representing operand values. SmallVector<llvm::Value *, 4> Args; SmallVector<llvm::Type *, 4> ArgTypes; - if (!CGM.getCodeGenOpts().SanitizeMinimalRuntime) { - Args.reserve(DynamicArgs.size() + 1); - ArgTypes.reserve(DynamicArgs.size() + 1); - - // Emit handler arguments and create handler function type. - if (!StaticArgs.empty()) { - llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); - auto *InfoPtr = new llvm::GlobalVariable( - CGM.getModule(), Info->getType(), false, - llvm::GlobalVariable::PrivateLinkage, Info, "", nullptr, - llvm::GlobalVariable::NotThreadLocal, - CGM.getDataLayout().getDefaultGlobalsAddressSpace()); - InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); - Args.push_back(InfoPtr); - ArgTypes.push_back(Args.back()->getType()); - } - for (llvm::Value *DynamicArg : DynamicArgs) { - Args.push_back(EmitCheckValue(DynamicArg)); - ArgTypes.push_back(IntPtrTy); - } + Args.reserve(DynamicArgs.size() + 1); + ArgTypes.reserve(DynamicArgs.size() + 1); + + // Emit handler arguments and create handler function type. + if (!StaticArgs.empty()) { + llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); + auto *InfoPtr = new llvm::GlobalVariable( + CGM.getModule(), Info->getType(), + // Non-constant global is used in a handler to deduplicate reports. + // TODO: change deduplication logic and make it constant. + /*isConstant=*/false, llvm::GlobalVariable::PrivateLinkage, Info, "", + nullptr, llvm::GlobalVariable::NotThreadLocal, + CGM.getDataLayout().getDefaultGlobalsAddressSpace()); + InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); + Args.push_back(InfoPtr); + ArgTypes.push_back(Args.back()->getType()); + } + + for (llvm::Value *DynamicArg : DynamicArgs) { + Args.push_back(EmitCheckValue(DynamicArg)); + ArgTypes.push_back(IntPtrTy); } llvm::FunctionType *FnType = @@ -5767,13 +5779,10 @@ LValue CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, llvm::Value *Addr = TempLV.getAddress().getBasePointer(); llvm::Type *ElTy = ConvertTypeForMem(TempLV.getType()); - llvm::TypeSize Sz = CGM.getDataLayout().getTypeAllocSize(ElTy); - - llvm::Value *LifetimeSize = EmitLifetimeStart(Sz, Addr); + EmitLifetimeStart(Addr); Address TmpAddr(Addr, ElTy, TempLV.getAlignment()); - Args.addWriteback(BaseLV, TmpAddr, nullptr, E->getWritebackCast(), - LifetimeSize); + Args.addWriteback(BaseLV, TmpAddr, nullptr, E->getWritebackCast()); Args.add(RValue::get(TmpAddr, *this), Ty); return TempLV; } diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index cad6731..e2f11b86 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -300,16 +300,12 @@ void AggExprEmitter::withReturnValueSlot( Address RetAddr = Address::invalid(); EHScopeStack::stable_iterator LifetimeEndBlock; - llvm::Value *LifetimeSizePtr = nullptr; llvm::IntrinsicInst *LifetimeStartInst = nullptr; if (!UseTemp) { RetAddr = Dest.getAddress(); } else { RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp"); - llvm::TypeSize Size = - CGF.CGM.getDataLayout().getTypeAllocSize(CGF.ConvertTypeForMem(RetTy)); - LifetimeSizePtr = CGF.EmitLifetimeStart(Size, RetAddr.getBasePointer()); - if (LifetimeSizePtr) { + if (CGF.EmitLifetimeStart(RetAddr.getBasePointer())) { LifetimeStartInst = cast<llvm::IntrinsicInst>(std::prev(Builder.GetInsertPoint())); assert(LifetimeStartInst->getIntrinsicID() == @@ -317,7 +313,7 @@ void AggExprEmitter::withReturnValueSlot( "Last insertion wasn't a lifetime.start?"); CGF.pushFullExprCleanup<CodeGenFunction::CallLifetimeEnd>( - NormalEHLifetimeMarker, RetAddr, LifetimeSizePtr); + NormalEHLifetimeMarker, RetAddr); LifetimeEndBlock = CGF.EHStack.stable_begin(); } } @@ -338,7 +334,7 @@ void AggExprEmitter::withReturnValueSlot( // Since we're not guaranteed to be in an ExprWithCleanups, clean up // eagerly. CGF.DeactivateCleanupBlock(LifetimeEndBlock, LifetimeStartInst); - CGF.EmitLifetimeEnd(LifetimeSizePtr, RetAddr.getBasePointer()); + CGF.EmitLifetimeEnd(RetAddr.getBasePointer()); } } diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index fef1baf..49d5d8a 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -2295,6 +2295,18 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, CGM.getCXXABI().shouldEmitExactDynamicCast(DestRecordTy) && !getLangOpts().PointerAuthCalls; + std::optional<CGCXXABI::ExactDynamicCastInfo> ExactCastInfo; + if (IsExact) { + ExactCastInfo = CGM.getCXXABI().getExactDynamicCastInfo(SrcRecordTy, DestTy, + DestRecordTy); + if (!ExactCastInfo) { + llvm::Value *NullValue = EmitDynamicCastToNull(*this, DestTy); + if (!Builder.GetInsertBlock()) + EmitBlock(createBasicBlock("dynamic_cast.unreachable")); + return NullValue; + } + } + // C++ [expr.dynamic.cast]p4: // If the value of v is a null pointer value in the pointer case, the result // is the null pointer value of type T. @@ -2322,7 +2334,8 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, // If the destination type is effectively final, this pointer points to the // right type if and only if its vptr has the right value. Value = CGM.getCXXABI().emitExactDynamicCast( - *this, ThisAddr, SrcRecordTy, DestTy, DestRecordTy, CastEnd, CastNull); + *this, ThisAddr, SrcRecordTy, DestTy, DestRecordTy, *ExactCastInfo, + CastEnd, CastNull); } else { assert(DestRecordTy->isRecordType() && "destination type must be a record type!"); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 44931d0..7eed4ee 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -4183,9 +4183,8 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) { return phi; } -/// Emit pointer + index arithmetic. -static Value *emitPointerArithmetic(CodeGenFunction &CGF, - const BinOpInfo &op, +/// This function is used for BO_Add/BO_Sub/BO_AddAssign/BO_SubAssign. +static Value *emitPointerArithmetic(CodeGenFunction &CGF, const BinOpInfo &op, bool isSubtraction) { // Must have binary (not unary) expr here. Unary pointer // increment/decrement doesn't use this path. @@ -4202,11 +4201,19 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, std::swap(pointerOperand, indexOperand); } + return CGF.EmitPointerArithmetic(expr, pointerOperand, pointer, indexOperand, + index, isSubtraction); +} + +/// Emit pointer + index arithmetic. +llvm::Value *CodeGenFunction::EmitPointerArithmetic( + const BinaryOperator *BO, Expr *pointerOperand, llvm::Value *pointer, + Expr *indexOperand, llvm::Value *index, bool isSubtraction) { bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType(); unsigned width = cast<llvm::IntegerType>(index->getType())->getBitWidth(); - auto &DL = CGF.CGM.getDataLayout(); - auto PtrTy = cast<llvm::PointerType>(pointer->getType()); + auto &DL = CGM.getDataLayout(); + auto *PtrTy = cast<llvm::PointerType>(pointer->getType()); // Some versions of glibc and gcc use idioms (particularly in their malloc // routines) that add a pointer-sized integer (known to be a pointer value) @@ -4227,79 +4234,77 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, // // Note that we do not suppress the pointer overflow check in this case. if (BinaryOperator::isNullPointerArithmeticExtension( - CGF.getContext(), op.Opcode, expr->getLHS(), expr->getRHS())) { - Value *Ptr = CGF.Builder.CreateIntToPtr(index, pointer->getType()); - if (CGF.getLangOpts().PointerOverflowDefined || - !CGF.SanOpts.has(SanitizerKind::PointerOverflow) || - NullPointerIsDefined(CGF.Builder.GetInsertBlock()->getParent(), + getContext(), BO->getOpcode(), pointerOperand, indexOperand)) { + llvm::Value *Ptr = Builder.CreateIntToPtr(index, pointer->getType()); + if (getLangOpts().PointerOverflowDefined || + !SanOpts.has(SanitizerKind::PointerOverflow) || + NullPointerIsDefined(Builder.GetInsertBlock()->getParent(), PtrTy->getPointerAddressSpace())) return Ptr; // The inbounds GEP of null is valid iff the index is zero. auto CheckOrdinal = SanitizerKind::SO_PointerOverflow; auto CheckHandler = SanitizerHandler::PointerOverflow; - SanitizerDebugLocation SanScope(&CGF, {CheckOrdinal}, CheckHandler); - Value *IsZeroIndex = CGF.Builder.CreateIsNull(index); - llvm::Constant *StaticArgs[] = { - CGF.EmitCheckSourceLocation(op.E->getExprLoc())}; + SanitizerDebugLocation SanScope(this, {CheckOrdinal}, CheckHandler); + llvm::Value *IsZeroIndex = Builder.CreateIsNull(index); + llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(BO->getExprLoc())}; llvm::Type *IntPtrTy = DL.getIntPtrType(PtrTy); - Value *IntPtr = llvm::Constant::getNullValue(IntPtrTy); - Value *ComputedGEP = CGF.Builder.CreateZExtOrTrunc(index, IntPtrTy); - Value *DynamicArgs[] = {IntPtr, ComputedGEP}; - CGF.EmitCheck({{IsZeroIndex, CheckOrdinal}}, CheckHandler, StaticArgs, - DynamicArgs); + llvm::Value *IntPtr = llvm::Constant::getNullValue(IntPtrTy); + llvm::Value *ComputedGEP = Builder.CreateZExtOrTrunc(index, IntPtrTy); + llvm::Value *DynamicArgs[] = {IntPtr, ComputedGEP}; + EmitCheck({{IsZeroIndex, CheckOrdinal}}, CheckHandler, StaticArgs, + DynamicArgs); return Ptr; } if (width != DL.getIndexTypeSizeInBits(PtrTy)) { // Zero-extend or sign-extend the pointer value according to // whether the index is signed or not. - index = CGF.Builder.CreateIntCast(index, DL.getIndexType(PtrTy), isSigned, - "idx.ext"); + index = Builder.CreateIntCast(index, DL.getIndexType(PtrTy), isSigned, + "idx.ext"); } // If this is subtraction, negate the index. if (isSubtraction) - index = CGF.Builder.CreateNeg(index, "idx.neg"); + index = Builder.CreateNeg(index, "idx.neg"); - if (CGF.SanOpts.has(SanitizerKind::ArrayBounds)) - CGF.EmitBoundsCheck(op.E, pointerOperand, index, indexOperand->getType(), - /*Accessed*/ false); + if (SanOpts.has(SanitizerKind::ArrayBounds)) + EmitBoundsCheck(BO, pointerOperand, index, indexOperand->getType(), + /*Accessed*/ false); - const PointerType *pointerType - = pointerOperand->getType()->getAs<PointerType>(); + const PointerType *pointerType = + pointerOperand->getType()->getAs<PointerType>(); if (!pointerType) { QualType objectType = pointerOperand->getType() - ->castAs<ObjCObjectPointerType>() - ->getPointeeType(); - llvm::Value *objectSize - = CGF.CGM.getSize(CGF.getContext().getTypeSizeInChars(objectType)); + ->castAs<ObjCObjectPointerType>() + ->getPointeeType(); + llvm::Value *objectSize = + CGM.getSize(getContext().getTypeSizeInChars(objectType)); - index = CGF.Builder.CreateMul(index, objectSize); + index = Builder.CreateMul(index, objectSize); - Value *result = - CGF.Builder.CreateGEP(CGF.Int8Ty, pointer, index, "add.ptr"); - return CGF.Builder.CreateBitCast(result, pointer->getType()); + llvm::Value *result = Builder.CreateGEP(Int8Ty, pointer, index, "add.ptr"); + return Builder.CreateBitCast(result, pointer->getType()); } QualType elementType = pointerType->getPointeeType(); - if (const VariableArrayType *vla - = CGF.getContext().getAsVariableArrayType(elementType)) { + if (const VariableArrayType *vla = + getContext().getAsVariableArrayType(elementType)) { // The element count here is the total number of non-VLA elements. - llvm::Value *numElements = CGF.getVLASize(vla).NumElts; + llvm::Value *numElements = getVLASize(vla).NumElts; // Effectively, the multiply by the VLA size is part of the GEP. // GEP indexes are signed, and scaling an index isn't permitted to // signed-overflow, so we use the same semantics for our explicit // multiply. We suppress this if overflow is not undefined behavior. - llvm::Type *elemTy = CGF.ConvertTypeForMem(vla->getElementType()); - if (CGF.getLangOpts().PointerOverflowDefined) { - index = CGF.Builder.CreateMul(index, numElements, "vla.index"); - pointer = CGF.Builder.CreateGEP(elemTy, pointer, index, "add.ptr"); + llvm::Type *elemTy = ConvertTypeForMem(vla->getElementType()); + if (getLangOpts().PointerOverflowDefined) { + index = Builder.CreateMul(index, numElements, "vla.index"); + pointer = Builder.CreateGEP(elemTy, pointer, index, "add.ptr"); } else { - index = CGF.Builder.CreateNSWMul(index, numElements, "vla.index"); - pointer = CGF.EmitCheckedInBoundsGEP( - elemTy, pointer, index, isSigned, isSubtraction, op.E->getExprLoc(), - "add.ptr"); + index = Builder.CreateNSWMul(index, numElements, "vla.index"); + pointer = + EmitCheckedInBoundsGEP(elemTy, pointer, index, isSigned, + isSubtraction, BO->getExprLoc(), "add.ptr"); } return pointer; } @@ -4309,16 +4314,15 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, // future proof. llvm::Type *elemTy; if (elementType->isVoidType() || elementType->isFunctionType()) - elemTy = CGF.Int8Ty; + elemTy = Int8Ty; else - elemTy = CGF.ConvertTypeForMem(elementType); + elemTy = ConvertTypeForMem(elementType); - if (CGF.getLangOpts().PointerOverflowDefined) - return CGF.Builder.CreateGEP(elemTy, pointer, index, "add.ptr"); + if (getLangOpts().PointerOverflowDefined) + return Builder.CreateGEP(elemTy, pointer, index, "add.ptr"); - return CGF.EmitCheckedInBoundsGEP( - elemTy, pointer, index, isSigned, isSubtraction, op.E->getExprLoc(), - "add.ptr"); + return EmitCheckedInBoundsGEP(elemTy, pointer, index, isSigned, isSubtraction, + BO->getExprLoc(), "add.ptr"); } // Construct an fmuladd intrinsic to represent a fused mul-add of MulOp and diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index f64ac20..918cb3e 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -103,13 +103,6 @@ llvm::Triple::ArchType CGHLSLRuntime::getArch() { return CGM.getTarget().getTriple().getArch(); } -// Returns true if the type is an HLSL resource class or an array of them -static bool isResourceRecordTypeOrArrayOf(const clang::Type *Ty) { - while (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(Ty)) - Ty = CAT->getArrayElementTypeNoTypeQual(); - return Ty->isHLSLResourceRecord(); -} - // Emits constant global variables for buffer constants declarations // and creates metadata linking the constant globals with the buffer global. void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl, @@ -146,7 +139,7 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl, if (VDTy.getAddressSpace() != LangAS::hlsl_constant) { if (VD->getStorageClass() == SC_Static || VDTy.getAddressSpace() == LangAS::hlsl_groupshared || - isResourceRecordTypeOrArrayOf(VDTy.getTypePtr())) { + VDTy->isHLSLResourceRecord() || VDTy->isHLSLResourceRecordArray()) { // Emit static and groupshared variables and resource classes inside // cbuffer as regular globals CGM.EmitGlobal(VD); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 6c32c98..bf16d72 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -701,14 +701,12 @@ public: bool isRedundantBeforeReturn() override { return true; } llvm::Value *Addr; - llvm::Value *Size; public: - CallLifetimeEnd(RawAddress addr, llvm::Value *size) - : Addr(addr.getPointer()), Size(size) {} + CallLifetimeEnd(RawAddress addr) : Addr(addr.getPointer()) {} void Emit(CodeGenFunction &CGF, Flags flags) override { - CGF.EmitLifetimeEnd(Size, Addr); + CGF.EmitLifetimeEnd(Addr); } }; @@ -3233,8 +3231,8 @@ public: void EmitSehTryScopeBegin(); void EmitSehTryScopeEnd(); - llvm::Value *EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr); - void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr); + bool EmitLifetimeStart(llvm::Value *Addr); + void EmitLifetimeEnd(llvm::Value *Addr); llvm::Value *EmitCXXNewExpr(const CXXNewExpr *E); void EmitCXXDeleteExpr(const CXXDeleteExpr *E); @@ -3417,8 +3415,8 @@ public: /// initializer. bool IsConstantAggregate; - /// Non-null if we should use lifetime annotations. - llvm::Value *SizeForLifetimeMarkers; + /// True if lifetime markers should be used. + bool UseLifetimeMarkers; /// Address with original alloca instruction. Invalid if the variable was /// emitted as a global constant. @@ -3432,20 +3430,14 @@ public: AutoVarEmission(const VarDecl &variable) : Variable(&variable), Addr(Address::invalid()), NRVOFlag(nullptr), IsEscapingByRef(false), IsConstantAggregate(false), - SizeForLifetimeMarkers(nullptr), AllocaAddr(RawAddress::invalid()) {} + UseLifetimeMarkers(false), AllocaAddr(RawAddress::invalid()) {} bool wasEmittedAsGlobal() const { return !Addr.isValid(); } public: static AutoVarEmission invalid() { return AutoVarEmission(Invalid()); } - bool useLifetimeMarkers() const { - return SizeForLifetimeMarkers != nullptr; - } - llvm::Value *getSizeForLifetimeMarkers() const { - assert(useLifetimeMarkers()); - return SizeForLifetimeMarkers; - } + bool useLifetimeMarkers() const { return UseLifetimeMarkers; } /// Returns the raw, allocated address, which is not necessarily /// the address of the object itself. It is casted to default @@ -5220,6 +5212,12 @@ public: /// operation is a subtraction. enum { NotSubtraction = false, IsSubtraction = true }; + /// Emit pointer + index arithmetic. + llvm::Value *EmitPointerArithmetic(const BinaryOperator *BO, + Expr *pointerOperand, llvm::Value *pointer, + Expr *indexOperand, llvm::Value *index, + bool isSubtraction); + /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to /// detect undefined behavior when the pointer overflow sanitizer is enabled. /// \p SignedIndices indicates whether any of the GEP indices are signed. diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 38aaceb..05fb137 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -2269,6 +2269,11 @@ struct CounterCoverageMappingBuilder // Track LHS True/False Decision. const auto DecisionLHS = MCDCBuilder.pop(); + if (auto Gap = + findGapAreaBetween(getEnd(E->getLHS()), getStart(E->getRHS()))) { + fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), getRegionCounter(E)); + } + // Counter tracks the right hand side of a logical and operator. extendRegion(E->getRHS()); propagateCounts(getRegionCounter(E), E->getRHS()); @@ -2330,6 +2335,11 @@ struct CounterCoverageMappingBuilder // Track LHS True/False Decision. const auto DecisionLHS = MCDCBuilder.pop(); + if (auto Gap = + findGapAreaBetween(getEnd(E->getLHS()), getStart(E->getRHS()))) { + fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), getRegionCounter(E)); + } + // Counter tracks the right hand side of a logical or operator. extendRegion(E->getRHS()); propagateCounts(getRegionCounter(E), E->getRHS()); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index aae1481..5ffc1ed 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -226,6 +226,10 @@ public: return hasUniqueVTablePointer(DestRecordTy); } + std::optional<ExactDynamicCastInfo> + getExactDynamicCastInfo(QualType SrcRecordTy, QualType DestTy, + QualType DestRecordTy) override; + llvm::Value *emitDynamicCastCall(CodeGenFunction &CGF, Address Value, QualType SrcRecordTy, QualType DestTy, QualType DestRecordTy, @@ -234,6 +238,7 @@ public: llvm::Value *emitExactDynamicCast(CodeGenFunction &CGF, Address ThisAddr, QualType SrcRecordTy, QualType DestTy, QualType DestRecordTy, + const ExactDynamicCastInfo &CastInfo, llvm::BasicBlock *CastSuccess, llvm::BasicBlock *CastFail) override; @@ -1681,10 +1686,11 @@ llvm::Value *ItaniumCXXABI::emitDynamicCastCall( return Value; } -llvm::Value *ItaniumCXXABI::emitExactDynamicCast( - CodeGenFunction &CGF, Address ThisAddr, QualType SrcRecordTy, - QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastSuccess, - llvm::BasicBlock *CastFail) { +std::optional<CGCXXABI::ExactDynamicCastInfo> +ItaniumCXXABI::getExactDynamicCastInfo(QualType SrcRecordTy, QualType DestTy, + QualType DestRecordTy) { + assert(shouldEmitExactDynamicCast(DestRecordTy)); + ASTContext &Context = getContext(); // Find all the inheritance paths. @@ -1722,41 +1728,56 @@ llvm::Value *ItaniumCXXABI::emitExactDynamicCast( if (!Offset) Offset = PathOffset; else if (Offset != PathOffset) { - // Base appears in at least two different places. Find the most-derived - // object and see if it's a DestDecl. Note that the most-derived object - // must be at least as aligned as this base class subobject, and must - // have a vptr at offset 0. - ThisAddr = Address(emitDynamicCastToVoid(CGF, ThisAddr, SrcRecordTy), - CGF.VoidPtrTy, ThisAddr.getAlignment()); - SrcDecl = DestDecl; - Offset = CharUnits::Zero(); - break; + // Base appears in at least two different places. + return ExactDynamicCastInfo{/*RequiresCastToPrimaryBase=*/true, + CharUnits::Zero()}; } } + if (!Offset) + return std::nullopt; + return ExactDynamicCastInfo{/*RequiresCastToPrimaryBase=*/false, *Offset}; +} - if (!Offset) { - // If there are no public inheritance paths, the cast always fails. - CGF.EmitBranch(CastFail); - return llvm::PoisonValue::get(CGF.VoidPtrTy); - } +llvm::Value *ItaniumCXXABI::emitExactDynamicCast( + CodeGenFunction &CGF, Address ThisAddr, QualType SrcRecordTy, + QualType DestTy, QualType DestRecordTy, + const ExactDynamicCastInfo &ExactCastInfo, llvm::BasicBlock *CastSuccess, + llvm::BasicBlock *CastFail) { + const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl(); + const CXXRecordDecl *DestDecl = DestRecordTy->getAsCXXRecordDecl(); + + llvm::Value *VTable = nullptr; + if (ExactCastInfo.RequiresCastToPrimaryBase) { + // Base appears in at least two different places. Find the most-derived + // object and see if it's a DestDecl. Note that the most-derived object + // must be at least as aligned as this base class subobject, and must + // have a vptr at offset 0. + llvm::Value *PrimaryBase = + emitDynamicCastToVoid(CGF, ThisAddr, SrcRecordTy); + ThisAddr = Address(PrimaryBase, CGF.VoidPtrTy, ThisAddr.getAlignment()); + SrcDecl = DestDecl; + Address VTablePtrPtr = ThisAddr.withElementType(CGF.VoidPtrPtrTy); + VTable = CGF.Builder.CreateLoad(VTablePtrPtr, "vtable"); + } else + VTable = CGF.GetVTablePtr(ThisAddr, CGF.UnqualPtrTy, SrcDecl); // Compare the vptr against the expected vptr for the destination type at - // this offset. Note that we do not know what type ThisAddr points to in - // the case where the derived class multiply inherits from the base class - // so we can't use GetVTablePtr, so we load the vptr directly instead. - llvm::Instruction *VPtr = CGF.Builder.CreateLoad( - ThisAddr.withElementType(CGF.VoidPtrPtrTy), "vtable"); - CGM.DecorateInstructionWithTBAA( - VPtr, CGM.getTBAAVTablePtrAccessInfo(CGF.VoidPtrPtrTy)); - llvm::Value *Success = CGF.Builder.CreateICmpEQ( - VPtr, getVTableAddressPoint(BaseSubobject(SrcDecl, *Offset), DestDecl)); - llvm::Value *Result = ThisAddr.emitRawPointer(CGF); - if (!Offset->isZero()) - Result = CGF.Builder.CreateInBoundsGEP( - CGF.CharTy, Result, - {llvm::ConstantInt::get(CGF.PtrDiffTy, -Offset->getQuantity())}); + // this offset. + llvm::Constant *ExpectedVTable = getVTableAddressPoint( + BaseSubobject(SrcDecl, ExactCastInfo.Offset), DestDecl); + llvm::Value *Success = CGF.Builder.CreateICmpEQ(VTable, ExpectedVTable); + llvm::Value *AdjustedThisPtr = ThisAddr.emitRawPointer(CGF); + + if (!ExactCastInfo.Offset.isZero()) { + CharUnits::QuantityType Offset = ExactCastInfo.Offset.getQuantity(); + llvm::Constant *OffsetConstant = + llvm::ConstantInt::get(CGF.PtrDiffTy, -Offset); + AdjustedThisPtr = CGF.Builder.CreateInBoundsGEP(CGF.CharTy, AdjustedThisPtr, + OffsetConstant); + } + CGF.Builder.CreateCondBr(Success, CastSuccess, CastFail); - return Result; + return AdjustedThisPtr; } llvm::Value *ItaniumCXXABI::emitDynamicCastToVoid(CodeGenFunction &CGF, diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 700ffa4..e8d2451 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -158,9 +158,15 @@ public: // TODO: Add support for exact dynamic_casts. return false; } + std::optional<ExactDynamicCastInfo> + getExactDynamicCastInfo(QualType SrcRecordTy, QualType DestTy, + QualType DestRecordTy) override { + llvm_unreachable("unsupported"); + } llvm::Value *emitExactDynamicCast(CodeGenFunction &CGF, Address Value, QualType SrcRecordTy, QualType DestTy, QualType DestRecordTy, + const ExactDynamicCastInfo &CastInfo, llvm::BasicBlock *CastSuccess, llvm::BasicBlock *CastFail) override { llvm_unreachable("unsupported"); diff --git a/clang/lib/CodeGen/TargetBuiltins/WebAssembly.cpp b/clang/lib/CodeGen/TargetBuiltins/WebAssembly.cpp index 33a8d8f..1a1889a 100644 --- a/clang/lib/CodeGen/TargetBuiltins/WebAssembly.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/WebAssembly.cpp @@ -246,35 +246,26 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, llvm::FunctionType *LLVMFuncTy = cast<llvm::FunctionType>(ConvertType(QualType(FuncTy, 0))); + bool VarArg = LLVMFuncTy->isVarArg(); unsigned NParams = LLVMFuncTy->getNumParams(); std::vector<Value *> Args; - Args.reserve(NParams + 3); + Args.reserve(NParams + 3 + VarArg); // The only real argument is the FuncRef Args.push_back(FuncRef); // Add the type information - auto addType = [this, &Args](llvm::Type *T) { - if (T->isVoidTy()) { - // Do nothing - } else if (T->isFloatingPointTy()) { - Args.push_back(ConstantFP::get(T, 0)); - } else if (T->isIntegerTy()) { - Args.push_back(ConstantInt::get(T, 0)); - } else if (T->isPointerTy()) { - Args.push_back(ConstantPointerNull::get(llvm::PointerType::get( - getLLVMContext(), T->getPointerAddressSpace()))); - } else { - // TODO: Handle reference types. For now, we reject them in Sema. - llvm_unreachable("Unhandled type"); - } - }; - - addType(LLVMFuncTy->getReturnType()); + llvm::Type *RetType = LLVMFuncTy->getReturnType(); + if (!RetType->isVoidTy()) { + Args.push_back(PoisonValue::get(RetType)); + } // The token type indicates the boundary between return types and param // types. Args.push_back(PoisonValue::get(llvm::Type::getTokenTy(getLLVMContext()))); for (unsigned i = 0; i < NParams; i++) { - addType(LLVMFuncTy->getParamType(i)); + Args.push_back(PoisonValue::get(LLVMFuncTy->getParamType(i))); + } + if (VarArg) { + Args.push_back(PoisonValue::get(Builder.getPtrTy())); } Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_test_func); return Builder.CreateCall(Callee, Args); diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index e23d19d..b508709 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -1051,18 +1051,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vfmsubsd3_mask3: return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2, /*NegAcc*/ true); - case X86::BI__builtin_ia32_vfmaddph: - case X86::BI__builtin_ia32_vfmaddps: - case X86::BI__builtin_ia32_vfmaddpd: - case X86::BI__builtin_ia32_vfmaddph256: - case X86::BI__builtin_ia32_vfmaddps256: - case X86::BI__builtin_ia32_vfmaddpd256: case X86::BI__builtin_ia32_vfmaddph512_mask: case X86::BI__builtin_ia32_vfmaddph512_maskz: case X86::BI__builtin_ia32_vfmaddph512_mask3: - case X86::BI__builtin_ia32_vfmaddbf16128: - case X86::BI__builtin_ia32_vfmaddbf16256: - case X86::BI__builtin_ia32_vfmaddbf16512: case X86::BI__builtin_ia32_vfmaddps512_mask: case X86::BI__builtin_ia32_vfmaddps512_maskz: case X86::BI__builtin_ia32_vfmaddps512_mask3: diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 586f287..8c0bba9 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1012,6 +1012,7 @@ inferOffloadToolchains(Compilation &C, Action::OffloadKind Kind) { Arg *A = new Arg(Opt, C.getArgs().getArgString(Index), Index, C.getArgs().MakeArgString(Triple.split("-").first), C.getArgs().MakeArgString("--offload-arch=" + Arch)); + A->claim(); C.getArgs().append(A); C.getArgs().AddSynthesizedArg(A); Triples.insert(Triple); diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 25c6b5a..7667dbd 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -855,17 +855,30 @@ void ToolChain::addFortranRuntimeLibs(const ArgList &Args, void ToolChain::addFortranRuntimeLibraryPath(const llvm::opt::ArgList &Args, ArgStringList &CmdArgs) const { - // Default to the <driver-path>/../lib directory. This works fine on the - // platforms that we have tested so far. We will probably have to re-fine - // this in the future. In particular, on some platforms, we may need to use - // lib64 instead of lib. + auto AddLibSearchPathIfExists = [&](const Twine &Path) { + // Linker may emit warnings about non-existing directories + if (!llvm::sys::fs::is_directory(Path)) + return; + + if (getTriple().isKnownWindowsMSVCEnvironment()) + CmdArgs.push_back(Args.MakeArgString("-libpath:" + Path)); + else + CmdArgs.push_back(Args.MakeArgString("-L" + Path)); + }; + + // Search for flang_rt.* at the same location as clang_rt.* with + // LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=0. On most platforms, flang_rt is + // located at the path returned by getRuntimePath() which is already added to + // the library search path. This exception is for Apple-Darwin. + AddLibSearchPathIfExists(getCompilerRTPath()); + + // Fall back to the non-resource directory <driver-path>/../lib. We will + // probably have to refine this in the future. In particular, on some + // platforms, we may need to use lib64 instead of lib. SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(getDriver().Dir); llvm::sys::path::append(DefaultLibPath, "lib"); - if (getTriple().isKnownWindowsMSVCEnvironment()) - CmdArgs.push_back(Args.MakeArgString("-libpath:" + DefaultLibPath)); - else - CmdArgs.push_back(Args.MakeArgString("-L" + DefaultLibPath)); + AddLibSearchPathIfExists(DefaultLibPath); } void ToolChain::addFlangRTLibPath(const ArgList &Args, diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index d64290f..9f99edad 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1485,6 +1485,14 @@ static bool compileModuleAndReadASTImpl(CompilerInstance &ImportingInstance, return false; } + // The module is built successfully, we can update its timestamp now. + if (ImportingInstance.getPreprocessor() + .getHeaderSearchInfo() + .getHeaderSearchOpts() + .ModulesValidateOncePerBuildSession) { + ImportingInstance.getModuleCache().updateModuleTimestamp(ModuleFileName); + } + return readASTAfterCompileModule(ImportingInstance, ImportLoc, ModuleNameLoc, Module, ModuleFileName, /*OutOfDate=*/nullptr, /*Missing=*/nullptr); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 9f77e62..ccc3154 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3936,47 +3936,18 @@ void CompilerInvocationBase::GenerateLangArgs(const LangOptions &Opts, GenerateArg(Consumer, OPT_fsanitize_ignorelist_EQ, F); switch (Opts.getClangABICompat()) { - case LangOptions::ClangABI::Ver3_8: - GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, "3.8"); +#define ABI_VER_MAJOR_MINOR(Major, Minor) \ + case LangOptions::ClangABI::Ver##Major##_##Minor: \ + GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, #Major "." #Minor); \ break; - case LangOptions::ClangABI::Ver4: - GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, "4.0"); +#define ABI_VER_MAJOR(Major) \ + case LangOptions::ClangABI::Ver##Major: \ + GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, #Major ".0"); \ break; - case LangOptions::ClangABI::Ver6: - GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, "6.0"); - break; - case LangOptions::ClangABI::Ver7: - GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, "7.0"); - break; - case LangOptions::ClangABI::Ver9: - GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, "9.0"); - break; - case LangOptions::ClangABI::Ver11: - GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, "11.0"); - break; - case LangOptions::ClangABI::Ver12: - GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, "12.0"); - break; - case LangOptions::ClangABI::Ver14: - GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, "14.0"); - break; - case LangOptions::ClangABI::Ver15: - GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, "15.0"); - break; - case LangOptions::ClangABI::Ver17: - GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, "17.0"); - break; - case LangOptions::ClangABI::Ver18: - GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, "18.0"); - break; - case LangOptions::ClangABI::Ver19: - GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, "19.0"); - break; - case LangOptions::ClangABI::Ver20: - GenerateArg(Consumer, OPT_fclang_abi_compat_EQ, "20.0"); - break; - case LangOptions::ClangABI::Latest: +#define ABI_VER_LATEST(Latest) \ + case LangOptions::ClangABI::Latest: \ break; +#include "clang/Basic/ABIVersions.def" } if (Opts.getSignReturnAddressScope() == @@ -4482,32 +4453,18 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, !VerParts.second.getAsInteger(10, Minor) : VerParts.first.size() == Ver.size() || VerParts.second == "0")) { // Got a valid version number. - if (Major == 3 && Minor <= 8) - Opts.setClangABICompat(LangOptions::ClangABI::Ver3_8); - else if (Major <= 4) - Opts.setClangABICompat(LangOptions::ClangABI::Ver4); - else if (Major <= 6) - Opts.setClangABICompat(LangOptions::ClangABI::Ver6); - else if (Major <= 7) - Opts.setClangABICompat(LangOptions::ClangABI::Ver7); - else if (Major <= 9) - Opts.setClangABICompat(LangOptions::ClangABI::Ver9); - else if (Major <= 11) - Opts.setClangABICompat(LangOptions::ClangABI::Ver11); - else if (Major <= 12) - Opts.setClangABICompat(LangOptions::ClangABI::Ver12); - else if (Major <= 14) - Opts.setClangABICompat(LangOptions::ClangABI::Ver14); - else if (Major <= 15) - Opts.setClangABICompat(LangOptions::ClangABI::Ver15); - else if (Major <= 17) - Opts.setClangABICompat(LangOptions::ClangABI::Ver17); - else if (Major <= 18) - Opts.setClangABICompat(LangOptions::ClangABI::Ver18); - else if (Major <= 19) - Opts.setClangABICompat(LangOptions::ClangABI::Ver19); - else if (Major <= 20) - Opts.setClangABICompat(LangOptions::ClangABI::Ver20); +#define ABI_VER_MAJOR_MINOR(Major_, Minor_) \ + if (std::tie(Major, Minor) <= std::tuple(Major_, Minor_)) \ + Opts.setClangABICompat(LangOptions::ClangABI::Ver##Major_##_##Minor_); \ + else +#define ABI_VER_MAJOR(Major_) \ + if (Major <= Major_) \ + Opts.setClangABICompat(LangOptions::ClangABI::Ver##Major_); \ + else +#define ABI_VER_LATEST(Latest) \ + { /* Equivalent to latest version - do nothing */ \ + } +#include "clang/Basic/ABIVersions.def" } else if (Ver != "latest") { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h index 75290d2..95e9bd7a 100644 --- a/clang/lib/Headers/avx10_2_512bf16intrin.h +++ b/clang/lib/Headers/avx10_2_512bf16intrin.h @@ -441,8 +441,8 @@ _mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) { static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B, - (__v32bf)__C); + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, (__v32bf)__B, + (__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 @@ -469,8 +469,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B, - -(__v32bf)__C); + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, (__v32bf)__B, + -(__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 @@ -497,8 +497,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B, - (__v32bf)__C); + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, -(__v32bf)__B, + (__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pbh( @@ -527,8 +527,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B, - -(__v32bf)__C); + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, -(__v32bf)__B, + -(__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pbh( diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h index 66797ae..0c7f381 100644 --- a/clang/lib/Headers/avx10_2bf16intrin.h +++ b/clang/lib/Headers/avx10_2bf16intrin.h @@ -852,8 +852,8 @@ _mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) { static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B, - (__v16bf)__C); + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, (__v16bf)__B, + (__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 @@ -880,8 +880,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B, - -(__v16bf)__C); + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, (__v16bf)__B, + -(__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 @@ -908,8 +908,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fnmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B, - (__v16bf)__C); + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, -(__v16bf)__B, + (__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pbh( @@ -938,8 +938,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fnmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B, - -(__v16bf)__C); + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, -(__v16bf)__B, + -(__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pbh( @@ -969,8 +969,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pbh( static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmadd_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B, - (__v8bf)__C); + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, (__v8bf)__B, + (__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -997,8 +997,8 @@ _mm_maskz_fmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsub_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B, - -(__v8bf)__C); + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, (__v8bf)__B, + -(__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -1025,8 +1025,8 @@ _mm_maskz_fmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmadd_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B, - (__v8bf)__C); + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, -(__v8bf)__B, + (__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -1053,8 +1053,8 @@ _mm_maskz_fnmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmsub_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B, - -(__v8bf)__C); + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, -(__v8bf)__B, + -(__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index dc9fc07..55e7102 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -31,6 +31,14 @@ __min_vector_width__(128))) #endif +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#else +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#endif + /* SSE4 Multiple Packed Sums of Absolute Difference. */ /// Computes sixteen sum of absolute difference (SAD) operations on sets of /// four unsigned 8-bit integers from the 256-bit integer vectors \a X and @@ -460,7 +468,7 @@ _mm256_adds_epu16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a & (__v4du)__b); @@ -478,7 +486,7 @@ _mm256_and_si256(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_andnot_si256(__m256i __a, __m256i __b) { return (__m256i)(~(__v4du)__a & (__v4du)__b); @@ -1721,10 +1729,10 @@ _mm256_mulhrs_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhi_epu16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b); + return (__m256i)__builtin_ia32_pmulhuw256((__v16hu)__a, (__v16hu)__b); } /// Multiplies signed 16-bit integer elements of two 256-bit vectors of @@ -1740,7 +1748,7 @@ _mm256_mulhi_epu16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b); @@ -1759,7 +1767,7 @@ _mm256_mulhi_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mullo_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a * (__v16hu)__b); @@ -1822,7 +1830,7 @@ _mm256_mul_epu32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_or_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a | (__v4du)__b); @@ -2974,7 +2982,7 @@ _mm256_unpacklo_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_xor_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a ^ (__v4du)__b); @@ -5289,5 +5297,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y) #undef __DEFAULT_FN_ATTRS256 #undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR #endif /* __AVX2INTRIN_H */ diff --git a/clang/lib/Headers/avx512bitalgintrin.h b/clang/lib/Headers/avx512bitalgintrin.h index 3c446b3..9a1ff8f3 100644 --- a/clang/lib/Headers/avx512bitalgintrin.h +++ b/clang/lib/Headers/avx512bitalgintrin.h @@ -20,7 +20,13 @@ __target__("avx512bitalg,evex512"), \ __min_vector_width__(512))) -static __inline__ __m512i __DEFAULT_FN_ATTRS +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_popcnt_epi16(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v32hu)__A); @@ -42,7 +48,7 @@ _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_popcnt_epi8(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v64qu)__A); @@ -80,7 +86,7 @@ _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) __B); } - #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index c854720..233d4a6 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -25,6 +25,14 @@ typedef unsigned long long __mmask64; __attribute__((__always_inline__, __nodebug__, \ __target__("avx512bw,no-evex512"))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + static __inline __mmask32 __DEFAULT_FN_ATTRS _knot_mask32(__mmask32 __M) { @@ -438,7 +446,7 @@ _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullo_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A * (__v32hu) __B); } @@ -1082,7 +1090,7 @@ _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mulhi_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); @@ -1105,10 +1113,10 @@ _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mulhi_epu16(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B); + return (__m512i)__builtin_ia32_pmulhuw512((__v32hu) __A, (__v32hu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -2010,5 +2018,7 @@ _mm512_sad_epu8 (__m512i __A, __m512i __B) #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS512_CONSTEXPR +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 88b48e3..62325b9 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -20,6 +20,14 @@ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512dq,no-evex512"))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + static __inline __mmask8 __DEFAULT_FN_ATTRS _knot_mask8(__mmask8 __M) { @@ -167,7 +175,7 @@ _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A ^ (__v8du)__B); } @@ -186,7 +194,7 @@ _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) { (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_ps (__m512 __A, __m512 __B) { return (__m512)((__v16su)__A ^ (__v16su)__B); } @@ -205,7 +213,7 @@ _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A | (__v8du)__B); } @@ -224,7 +232,7 @@ _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) { (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_ps(__m512 __A, __m512 __B) { return (__m512)((__v16su)__A | (__v16su)__B); } @@ -243,7 +251,7 @@ _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A & (__v8du)__B); } @@ -262,7 +270,7 @@ _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) { (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_ps(__m512 __A, __m512 __B) { return (__m512)((__v16su)__A & (__v16su)__B); } @@ -281,7 +289,7 @@ _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_pd(__m512d __A, __m512d __B) { return (__m512d)(~(__v8du)__A & (__v8du)__B); } @@ -300,7 +308,7 @@ _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) { (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_ps(__m512 __A, __m512 __B) { return (__m512)(~(__v16su)__A & (__v16su)__B); } @@ -1375,5 +1383,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS512_CONSTEXPR +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 45e7eeb..95b80cc 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -277,20 +277,20 @@ _mm512_setzero_pd(void) { return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; } -static __inline __m512 __DEFAULT_FN_ATTRS512 +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_ps(float __w) { return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512d __DEFAULT_FN_ATTRS512 +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pd(double __w) { return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi8(char __w) { return __extension__ (__m512i)(__v64qi){ @@ -304,7 +304,7 @@ _mm512_set1_epi8(char __w) __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi16(short __w) { return __extension__ (__m512i)(__v32hi){ @@ -314,7 +314,7 @@ _mm512_set1_epi16(short __w) __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s) { return __extension__ (__m512i)(__v16si){ @@ -330,7 +330,7 @@ _mm512_maskz_set1_epi32(__mmask16 __M, int __A) (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi64(long long __d) { return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; @@ -645,7 +645,7 @@ _mm512_zextsi256_si512(__m256i __a) } /* Bitwise operators */ -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a & (__v16su)__b); @@ -666,7 +666,7 @@ _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a & (__v8du)__b); @@ -687,13 +687,13 @@ _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_si512 (__m512i __A, __m512i __B) { return (__m512i)(~(__v8du)__A & (__v8du)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi32 (__m512i __A, __m512i __B) { return (__m512i)(~(__v16su)__A & (__v16su)__B); @@ -714,7 +714,7 @@ _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B) __U, __A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi64(__m512i __A, __m512i __B) { return (__m512i)(~(__v8du)__A & (__v8du)__B); @@ -735,7 +735,7 @@ _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B) __U, __A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a | (__v16su)__b); @@ -755,7 +755,7 @@ _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a | (__v8du)__b); @@ -775,7 +775,7 @@ _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a ^ (__v16su)__b); @@ -795,7 +795,7 @@ _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a ^ (__v8du)__b); @@ -815,19 +815,19 @@ _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a & (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a | (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a ^ (__v8du)__b); @@ -835,45 +835,38 @@ _mm512_xor_si512(__m512i __a, __m512i __b) /* Arithmetic */ -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_add_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_add_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a + (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_add_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_add_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a + (__v16sf)__b); } -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_mul_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mul_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a * (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_mul_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mul_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a * (__v16sf)__b); } -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_sub_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sub_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a - (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_sub_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sub_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a - (__v16sf)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_add_epi64 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_add_epi64(__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A + (__v8du) __B); } @@ -2315,9 +2308,8 @@ _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_div_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a/(__v8df)__b); } @@ -2335,9 +2327,8 @@ _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { (__v8df)_mm512_setzero_pd()); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_div_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_div_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a/(__v16sf)__b); } @@ -4123,9 +4114,8 @@ _mm512_cvtss_f32(__m512 __a) /* Unpack and Interleave */ -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_unpackhi_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpackhi_pd(__m512d __a, __m512d __b) { return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); } @@ -4146,9 +4136,8 @@ _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) (__v8df)_mm512_setzero_pd()); } -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_unpacklo_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpacklo_pd(__m512d __a, __m512d __b) { return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); } @@ -4169,9 +4158,8 @@ _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) (__v8df)_mm512_setzero_pd()); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_unpackhi_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpackhi_ps(__m512 __a, __m512 __b) { return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 2, 18, 3, 19, 2+4, 18+4, 3+4, 19+4, @@ -4195,9 +4183,8 @@ _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) (__v16sf)_mm512_setzero_ps()); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_unpacklo_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpacklo_ps(__m512 __a, __m512 __b) { return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 0, 16, 1, 17, 0+4, 16+4, 1+4, 17+4, @@ -5303,7 +5290,7 @@ _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movedup_pd (__m512d __A) { return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A, @@ -8665,7 +8652,7 @@ _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) _mm512_setzero_si512()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movehdup_ps (__m512 __A) { return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, @@ -8688,7 +8675,7 @@ _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_moveldup_ps (__m512 __A) { return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, @@ -9337,19 +9324,23 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) * This takes log2(n) steps where n is the number of elements in the vector. */ -static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) { +static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_reduce_add_epi64(__m512i __W) { return __builtin_reduce_add((__v8di)__W); } -static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) { +static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_reduce_mul_epi64(__m512i __W) { return __builtin_reduce_mul((__v8di)__W); } -static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) { +static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_reduce_and_epi64(__m512i __W) { return __builtin_reduce_and((__v8di)__W); } -static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) { +static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_reduce_or_epi64(__m512i __W) { return __builtin_reduce_or((__v8di)__W); } @@ -9400,22 +9391,22 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { return __builtin_ia32_reduce_fmul_pd512(1.0, __W); } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi32(__m512i __W) { return __builtin_reduce_add((__v16si)__W); } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi32(__m512i __W) { return __builtin_reduce_mul((__v16si)__W); } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi32(__m512i __W) { return __builtin_reduce_and((__v16si)__W); } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi32(__m512i __W) { return __builtin_reduce_or((__v16si)__W); } @@ -9466,22 +9457,22 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { return __builtin_ia32_reduce_fmul_ps512(1.0f, __W); } -static __inline__ long long __DEFAULT_FN_ATTRS512 +static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi64(__m512i __V) { return __builtin_reduce_max((__v8di)__V); } -static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 +static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu64(__m512i __V) { return __builtin_reduce_max((__v8du)__V); } -static __inline__ long long __DEFAULT_FN_ATTRS512 +static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi64(__m512i __V) { return __builtin_reduce_min((__v8di)__V); } -static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 +static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu64(__m512i __V) { return __builtin_reduce_min((__v8du)__V); } @@ -9509,22 +9500,22 @@ _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V); return __builtin_reduce_min((__v8du)__V); } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi32(__m512i __V) { return __builtin_reduce_max((__v16si)__V); } -static __inline__ unsigned int __DEFAULT_FN_ATTRS512 +static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu32(__m512i __V) { return __builtin_reduce_max((__v16su)__V); } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi32(__m512i __V) { return __builtin_reduce_min((__v16si)__V); } -static __inline__ unsigned int __DEFAULT_FN_ATTRS512 +static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu32(__m512i __V) { return __builtin_reduce_min((__v16su)__V); } diff --git a/clang/lib/Headers/avx512vlbitalgintrin.h b/clang/lib/Headers/avx512vlbitalgintrin.h index 1b01fe0..739e78a 100644 --- a/clang/lib/Headers/avx512vlbitalgintrin.h +++ b/clang/lib/Headers/avx512vlbitalgintrin.h @@ -24,7 +24,15 @@ __target__("avx512vl,avx512bitalg,no-evex512"), \ __min_vector_width__(256))) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_popcnt_epi16(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v16hu)__A); @@ -46,7 +54,7 @@ _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_popcnt_epi16(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v8hu)__A); @@ -68,7 +76,7 @@ _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_popcnt_epi8(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v32qu)__A); @@ -90,7 +98,7 @@ _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_popcnt_epi8(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v16qu)__A); @@ -147,5 +155,7 @@ _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h index a12acb7..1f8cca7 100644 --- a/clang/lib/Headers/avx512vlfp16intrin.h +++ b/clang/lib/Headers/avx512vlfp16intrin.h @@ -1419,8 +1419,8 @@ _mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) { static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, - (__v8hf)__C); + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, + (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A, @@ -1429,7 +1429,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } @@ -1437,7 +1437,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__C); } @@ -1445,15 +1445,15 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, - -(__v8hf)__C); + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, + -(__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A, @@ -1476,7 +1476,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__C); } @@ -1484,7 +1484,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } @@ -1492,22 +1492,22 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), + __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A, __m256h __B, __m256h __C) { - return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, - (__v16hf)__C); + return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, + (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__A); } @@ -1515,7 +1515,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__C); } @@ -1523,22 +1523,22 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A, __m256h __B, __m256h __C) { - return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, - -(__v16hf)__C); + return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, + -(__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__A); } @@ -1546,7 +1546,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } @@ -1554,7 +1554,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__C); } @@ -1562,7 +1562,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } @@ -1570,7 +1570,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } @@ -1684,7 +1684,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)__C); } @@ -1692,7 +1692,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__C); } @@ -1715,45 +1715,45 @@ _mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, - (__v8hf)__C); + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, + (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C) { - return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, - (__v16hf)__C); + return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, + (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C), (__v16hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, - -(__v8hf)__C); + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, + -(__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), (__v8hf)__A); } @@ -1761,22 +1761,22 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), (__v8hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C) { - return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, - -(__v16hf)__C); + return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, + -(__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), (__v16hf)__A); } @@ -1784,7 +1784,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), (__v16hf)__C); } diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 2a5f7b4..cbad39a 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -899,321 +899,289 @@ _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df) __A); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, (__v2df)__C), + (__v2df)__A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df) __C); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, (__v2df)__C), + (__v2df)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df)_mm_setzero_pd()); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, (__v2df)__C), + (__v2df)_mm_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - -(__v2df) __C), - (__v2df) __A); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, -(__v2df)__C), + (__v2df)__A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - -(__v2df) __C), - (__v2df)_mm_setzero_pd()); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, -(__v2df)__C), + (__v2df)_mm_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd (-(__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df) __C); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, (__v2df)__C), + (__v2df)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd (-(__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df)_mm_setzero_pd()); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, (__v2df)__C), + (__v2df)_mm_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd (-(__v2df) __A, - (__v2df) __B, - -(__v2df) __C), - (__v2df)_mm_setzero_pd()); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, -(__v2df)__C), + (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df) __A); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, (__v4df)__C), + (__v4df)__A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df) __C); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, (__v4df)__C), + (__v4df)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df)_mm256_setzero_pd()); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, (__v4df)__C), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - -(__v4df) __C), - (__v4df) __A); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, -(__v4df)__C), + (__v4df)__A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - -(__v4df) __C), - (__v4df)_mm256_setzero_pd()); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, -(__v4df)__C), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 (-(__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df) __C); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, (__v4df)__C), + (__v4df)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 (-(__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df)_mm256_setzero_pd()); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, (__v4df)__C), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 (-(__v4df) __A, - (__v4df) __B, - -(__v4df) __C), - (__v4df)_mm256_setzero_pd()); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, -(__v4df)__C), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf) __A); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, (__v4sf)__C), + (__v4sf)__A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf) __C); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, (__v4sf)__C), + (__v4sf)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf)_mm_setzero_ps()); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, (__v4sf)__C), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C), - (__v4sf) __A); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C), + (__v4sf)__A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C), - (__v4sf)_mm_setzero_ps()); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps (-(__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf) __C); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C), + (__v4sf)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps (-(__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf)_mm_setzero_ps()); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps (-(__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C), - (__v4sf)_mm_setzero_ps()); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf) __A); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, (__v8sf)__C), + (__v8sf)__A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf) __C); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, (__v8sf)__C), + (__v8sf)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf)_mm256_setzero_ps()); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, (__v8sf)__C), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C), - (__v8sf) __A); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C), + (__v8sf)__A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C), - (__v8sf)_mm256_setzero_ps()); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 (-(__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf) __C); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C), + (__v8sf)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 (-(__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf)_mm256_setzero_ps()); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 (-(__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C), - (__v8sf)_mm256_setzero_ps()); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 @@ -1420,41 +1388,37 @@ _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - -(__v2df) __C), - (__v2df) __C); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, -(__v2df)__C), + (__v2df)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - -(__v4df) __C), - (__v4df) __C); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, -(__v4df)__C), + (__v4df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C), - (__v4sf) __C); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C), + (__v4sf)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C), - (__v8sf) __C); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C), + (__v8sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 @@ -1500,121 +1464,109 @@ _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - -(__v2df) __B, - (__v2df) __C), - (__v2df) __A); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, -(__v2df)__B, (__v2df)__C), + (__v2df)__A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - -(__v4df) __B, - (__v4df) __C), - (__v4df) __A); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, -(__v4df)__B, (__v4df)__C), + (__v4df)__A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - -(__v4sf) __B, - (__v4sf) __C), - (__v4sf) __A); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C), + (__v4sf)__A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - -(__v8sf) __B, - (__v8sf) __C), - (__v8sf) __A); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, -(__v8sf)__B, (__v8sf)__C), + (__v8sf)__A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - -(__v2df) __B, - -(__v2df) __C), - (__v2df) __A); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, -(__v2df)__B, -(__v2df)__C), + (__v2df)__A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - -(__v2df) __B, - -(__v2df) __C), - (__v2df) __C); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, -(__v2df)__B, -(__v2df)__C), + (__v2df)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - -(__v4df) __B, - -(__v4df) __C), - (__v4df) __A); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, -(__v4df)__B, -(__v4df)__C), + (__v4df)__A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - -(__v4df) __B, - -(__v4df) __C), - (__v4df) __C); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, -(__v4df)__B, -(__v4df)__C), + (__v4df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - -(__v4sf) __B, - -(__v4sf) __C), - (__v4sf) __A); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C), + (__v4sf)__A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - -(__v4sf) __B, - -(__v4sf) __C), - (__v4sf) __C); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C), + (__v4sf)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - -(__v8sf) __B, - -(__v8sf) __C), - (__v8sf) __A); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, -(__v8sf)__B, -(__v8sf)__C), + (__v8sf)__A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - -(__v8sf) __B, - -(__v8sf) __C), - (__v8sf) __C); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, -(__v8sf)__B, -(__v8sf)__C), + (__v8sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 diff --git a/clang/lib/Headers/avx512vpopcntdqintrin.h b/clang/lib/Headers/avx512vpopcntdqintrin.h index e24c2c5..79fc6e1 100644 --- a/clang/lib/Headers/avx512vpopcntdqintrin.h +++ b/clang/lib/Headers/avx512vpopcntdqintrin.h @@ -60,5 +60,6 @@ _mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) { } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512vpopcntdqvlintrin.h b/clang/lib/Headers/avx512vpopcntdqvlintrin.h index b6c819b..d14cb1e 100644 --- a/clang/lib/Headers/avx512vpopcntdqvlintrin.h +++ b/clang/lib/Headers/avx512vpopcntdqvlintrin.h @@ -99,5 +99,7 @@ _mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 8e497a9..5a6d48b 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -87,9 +87,8 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32))); /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the sums of both /// operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_add_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_add_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a+(__v4df)__b); } @@ -105,9 +104,8 @@ _mm256_add_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the sums of both /// operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_add_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_add_ps(__m256 __a, + __m256 __b) { return (__m256)((__v8sf)__a+(__v8sf)__b); } @@ -123,9 +121,8 @@ _mm256_add_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing the subtrahend. /// \returns A 256-bit vector of [4 x double] containing the differences between /// both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_sub_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_sub_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a-(__v4df)__b); } @@ -141,9 +138,8 @@ _mm256_sub_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing the subtrahend. /// \returns A 256-bit vector of [8 x float] containing the differences between /// both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_sub_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a, + __m256 __b) { return (__m256)((__v8sf)__a-(__v8sf)__b); } @@ -197,9 +193,8 @@ _mm256_addsub_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing the divisor. /// \returns A 256-bit vector of [4 x double] containing the quotients of both /// operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_div_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_div_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a/(__v4df)__b); } @@ -215,9 +210,8 @@ _mm256_div_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing the divisor. /// \returns A 256-bit vector of [8 x float] containing the quotients of both /// operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_div_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_div_ps(__m256 __a, + __m256 __b) { return (__m256)((__v8sf)__a/(__v8sf)__b); } @@ -317,9 +311,8 @@ _mm256_min_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing one of the operands. /// \returns A 256-bit vector of [4 x double] containing the products of both /// operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_mul_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_mul_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a * (__v4df)__b); } @@ -335,9 +328,8 @@ _mm256_mul_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the operands. /// \returns A 256-bit vector of [8 x float] containing the products of both /// operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_mul_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_ps(__m256 __a, + __m256 __b) { return (__m256)((__v8sf)__a * (__v8sf)__b); } @@ -555,7 +547,7 @@ _mm256_rcp_ps(__m256 __a) /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the /// values between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_and_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a & (__v4du)__b); @@ -573,7 +565,7 @@ _mm256_and_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the /// values between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_and_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a & (__v8su)__b); @@ -594,7 +586,7 @@ _mm256_and_ps(__m256 __a, __m256 __b) /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the /// values of the second operand and the one's complement of the first /// operand. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_andnot_pd(__m256d __a, __m256d __b) { return (__m256d)(~(__v4du)__a & (__v4du)__b); @@ -615,7 +607,7 @@ _mm256_andnot_pd(__m256d __a, __m256d __b) /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the /// values of the second operand and the one's complement of the first /// operand. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_andnot_ps(__m256 __a, __m256 __b) { return (__m256)(~(__v8su)__a & (__v8su)__b); @@ -633,7 +625,7 @@ _mm256_andnot_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise OR of the /// values between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_or_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a | (__v4du)__b); @@ -651,7 +643,7 @@ _mm256_or_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise OR of the /// values between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_or_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a | (__v8su)__b); @@ -669,7 +661,7 @@ _mm256_or_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise XOR of the /// values between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_xor_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a ^ (__v4du)__b); @@ -687,7 +679,7 @@ _mm256_xor_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise XOR of the /// values between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_xor_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a ^ (__v8su)__b); @@ -2392,7 +2384,7 @@ _mm256_cvtss_f32(__m256 __a) /// return value. /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated /// values. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_movehdup_ps(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7); @@ -2417,7 +2409,7 @@ _mm256_movehdup_ps(__m256 __a) /// return value. /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated /// values. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_moveldup_ps(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6); @@ -2439,7 +2431,7 @@ _mm256_moveldup_ps(__m256 __a) /// the return value. /// \returns A 256-bit vector of [4 x double] containing the moved and /// duplicated values. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_movedup_pd(__m256d __a) { return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2); @@ -2462,9 +2454,8 @@ _mm256_movedup_pd(__m256d __a) /// Bits [127:64] are written to bits [127:64] of the return value. \n /// Bits [255:192] are written to bits [255:192] of the return value. \n /// \returns A 256-bit vector of [4 x double] containing the interleaved values. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_unpackhi_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_unpackhi_pd(__m256d __a, __m256d __b) { return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2); } @@ -2484,9 +2475,8 @@ _mm256_unpackhi_pd(__m256d __a, __m256d __b) /// Bits [63:0] are written to bits [127:64] of the return value. \n /// Bits [191:128] are written to bits [255:192] of the return value. \n /// \returns A 256-bit vector of [4 x double] containing the interleaved values. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_unpacklo_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_unpacklo_pd(__m256d __a, __m256d __b) { return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2); } @@ -2511,9 +2501,8 @@ _mm256_unpacklo_pd(__m256d __a, __m256d __b) /// Bits [223:192] are written to bits [191:160] of the return value. \n /// Bits [255:224] are written to bits [255:224] of the return value. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_unpackhi_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_unpackhi_ps(__m256 __a, __m256 __b) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1); } @@ -2538,9 +2527,8 @@ _mm256_unpackhi_ps(__m256 __a, __m256 __b) /// Bits [159:128] are written to bits [191:160] of the return value. \n /// Bits [191:160] are written to bits [255:224] of the return value. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_unpacklo_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_unpacklo_ps(__m256 __a, __m256 __b) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1); } @@ -3777,7 +3765,7 @@ _mm256_set_ps(float __a, float __b, float __c, float __d, /// \param __i7 /// A 32-bit integral value used to initialize bits [31:0] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) { @@ -3825,7 +3813,7 @@ _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, /// \param __w00 /// A 16-bit integral value used to initialize bits [15:0] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, @@ -3908,7 +3896,7 @@ _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, /// \param __b00 /// An 8-bit integral value used to initialize bits [7:0] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, @@ -3943,7 +3931,7 @@ _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, /// \param __d /// A 64-bit integral value used to initialize bits [63:0] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d) { return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a }; @@ -4044,7 +4032,7 @@ _mm256_setr_ps(float __a, float __b, float __c, float __d, /// \param __i7 /// A 32-bit integral value used to initialize bits [255:224] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) { @@ -4092,7 +4080,7 @@ _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, /// \param __w00 /// A 16-bit integral value used to initialize bits [255:240] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, @@ -4177,7 +4165,7 @@ _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, /// \param __b00 /// An 8-bit integral value used to initialize bits [255:248] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, @@ -4210,7 +4198,7 @@ _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, /// \param __d /// A 64-bit integral value used to initialize bits [255:192] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d) { return _mm256_set_epi64x(__d, __c, __b, __a); @@ -4267,7 +4255,7 @@ _mm256_set1_ps(float __w) /// A 32-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [8 x i32]. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi32(int __i) { return _mm256_set_epi32(__i, __i, __i, __i, __i, __i, __i, __i); @@ -4285,7 +4273,7 @@ _mm256_set1_epi32(int __i) /// A 16-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [16 x i16]. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi16(short __w) { return _mm256_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w, @@ -4303,7 +4291,7 @@ _mm256_set1_epi16(short __w) /// An 8-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [32 x i8]. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi8(char __b) { return _mm256_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, @@ -4324,7 +4312,7 @@ _mm256_set1_epi8(char __b) /// A 64-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [4 x i64]. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi64x(long long __q) { return _mm256_set_epi64x(__q, __q, __q, __q); @@ -4379,7 +4367,7 @@ _mm256_setzero_si256(void) { /// A 256-bit floating-point vector of [4 x double]. /// \returns A 256-bit floating-point vector of [8 x float] containing the same /// bitwise pattern as the parameter. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castpd_ps(__m256d __a) { return (__m256)__a; @@ -4396,7 +4384,7 @@ _mm256_castpd_ps(__m256d __a) /// A 256-bit floating-point vector of [4 x double]. /// \returns A 256-bit integer vector containing the same bitwise pattern as the /// parameter. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castpd_si256(__m256d __a) { return (__m256i)__a; @@ -4413,7 +4401,7 @@ _mm256_castpd_si256(__m256d __a) /// A 256-bit floating-point vector of [8 x float]. /// \returns A 256-bit floating-point vector of [4 x double] containing the same /// bitwise pattern as the parameter. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castps_pd(__m256 __a) { return (__m256d)__a; @@ -4430,7 +4418,7 @@ _mm256_castps_pd(__m256 __a) /// A 256-bit floating-point vector of [8 x float]. /// \returns A 256-bit integer vector containing the same bitwise pattern as the /// parameter. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castps_si256(__m256 __a) { return (__m256i)__a; @@ -4447,7 +4435,7 @@ _mm256_castps_si256(__m256 __a) /// A 256-bit integer vector. /// \returns A 256-bit floating-point vector of [8 x float] containing the same /// bitwise pattern as the parameter. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castsi256_ps(__m256i __a) { return (__m256)__a; @@ -4464,7 +4452,7 @@ _mm256_castsi256_ps(__m256i __a) /// A 256-bit integer vector. /// \returns A 256-bit floating-point vector of [4 x double] containing the same /// bitwise pattern as the parameter. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castsi256_pd(__m256i __a) { return (__m256d)__a; @@ -4481,7 +4469,7 @@ _mm256_castsi256_pd(__m256i __a) /// A 256-bit floating-point vector of [4 x double]. /// \returns A 128-bit floating-point vector of [2 x double] containing the /// lower 128 bits of the parameter. -static __inline __m128d __DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castpd256_pd128(__m256d __a) { return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1); @@ -4498,7 +4486,7 @@ _mm256_castpd256_pd128(__m256d __a) /// A 256-bit floating-point vector of [8 x float]. /// \returns A 128-bit floating-point vector of [4 x float] containing the /// lower 128 bits of the parameter. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castps256_ps128(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3); @@ -4514,7 +4502,7 @@ _mm256_castps256_ps128(__m256 __a) /// A 256-bit integer vector. /// \returns A 128-bit integer vector containing the lower 128 bits of the /// parameter. -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castsi256_si128(__m256i __a) { return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1); diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h index 52addb7..ce8c79e 100644 --- a/clang/lib/Headers/cpuid.h +++ b/clang/lib/Headers/cpuid.h @@ -345,10 +345,15 @@ static __inline int __get_cpuid_count (unsigned int __leaf, // In some configurations, __cpuidex is defined as a builtin (primarily // -fms-extensions) which will conflict with the __cpuidex definition below. #if !(__has_builtin(__cpuidex)) +// In some cases, offloading will set the host as the aux triple and define the +// builtin. Given __has_builtin does not detect builtins on aux triples, we need +// to explicitly check for some offloading cases. +#ifndef __NVPTX__ static __inline void __cpuidex(int __cpu_info[4], int __leaf, int __subleaf) { __cpuid_count(__leaf, __subleaf, __cpu_info[0], __cpu_info[1], __cpu_info[2], __cpu_info[3]); } #endif +#endif #endif /* __CPUID_H */ diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 78e8a42..60d2000 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2127,8 +2127,9 @@ _mm_add_epi32(__m128i __a, __m128i __b) { /// \param __b /// A 64-bit integer. /// \returns A 64-bit integer containing the sum of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) { - return (__m64)(((unsigned long long)__a) + ((unsigned long long)__b)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_si64(__m64 __a, + __m64 __b) { + return (__m64)(((__v1du)__a)[0] + ((__v1du)__b)[0]); } /// Adds the corresponding elements of two 128-bit vectors of [2 x i64], @@ -2393,8 +2394,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of /// each of the eight 32-bit products. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mulhi_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); } @@ -2412,9 +2413,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, /// A 128-bit unsigned [8 x i16] vector. /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits /// of each of the eight 32-bit products. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, - __m128i __b) { - return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mulhi_epu16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_pmulhuw128((__v8hu)__a, (__v8hu)__b); } /// Multiplies the corresponding elements of two signed [8 x i16] @@ -2431,8 +2432,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of /// each of the eight 32-bit products. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mullo_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a * (__v8hu)__b); } @@ -2557,8 +2558,9 @@ _mm_sub_epi32(__m128i __a, __m128i __b) { /// A 64-bit integer vector containing the subtrahend. /// \returns A 64-bit integer vector containing the difference of the values in /// the operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) { - return (__m64)((unsigned long long)__a - (unsigned long long)__b); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_si64(__m64 __a, + __m64 __b) { + return (__m64)(((__v1du)__a)[0] - ((__v1du)__b)[0]); } /// Subtracts the corresponding elements of two [2 x i64] vectors. @@ -2676,8 +2678,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise AND of the values /// in both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_and_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a & (__v2du)__b); } @@ -2695,8 +2697,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, /// A 128-bit vector containing the right source operand. /// \returns A 128-bit integer vector containing the bitwise AND of the one's /// complement of the first operand and the values in the second operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_andnot_si128(__m128i __a, __m128i __b) { return (__m128i)(~(__v2du)__a & (__v2du)__b); } /// Performs a bitwise OR of two 128-bit integer vectors. @@ -2711,8 +2713,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise OR of the values /// in both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_or_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a | (__v2du)__b); } @@ -2728,8 +2730,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the /// values in both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_xor_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a ^ (__v2du)__b); } diff --git a/clang/lib/Headers/fma4intrin.h b/clang/lib/Headers/fma4intrin.h index 694801b..69977fb 100644 --- a/clang/lib/Headers/fma4intrin.h +++ b/clang/lib/Headers/fma4intrin.h @@ -23,13 +23,15 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B, + (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 @@ -47,13 +49,15 @@ _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); + return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); + return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B, + -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 @@ -71,13 +75,15 @@ _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, + (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 @@ -95,13 +101,15 @@ _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); + return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); + return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, + -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 @@ -143,49 +151,57 @@ _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B, + (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); + return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, + -(__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); + return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B, + -(__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, + (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); + return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, + -(__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); + return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, + -(__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/fmaintrin.h b/clang/lib/Headers/fmaintrin.h index 22d1a78..24584a9 100644 --- a/clang/lib/Headers/fmaintrin.h +++ b/clang/lib/Headers/fmaintrin.h @@ -35,7 +35,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } /// Computes a multiply-add of 128-bit vectors of [2 x double]. @@ -55,7 +56,8 @@ _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B, + (__v2df)__C); } /// Computes a scalar multiply-add of the single-precision values in the @@ -133,7 +135,8 @@ _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); + return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); } /// Computes a multiply-subtract of 128-bit vectors of [2 x double]. @@ -153,7 +156,8 @@ _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); + return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B, + -(__v2df)__C); } /// Computes a scalar multiply-subtract of the single-precision values in @@ -231,7 +235,8 @@ _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } /// Computes a negated multiply-add of 128-bit vectors of [2 x double]. @@ -251,7 +256,8 @@ _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, + (__v2df)__C); } /// Computes a scalar negated multiply-add of the single-precision values in @@ -329,7 +335,8 @@ _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); + return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); } /// Computes a negated multiply-subtract of 128-bit vectors of [2 x double]. @@ -349,7 +356,8 @@ _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); + return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, + -(__v2df)__C); } /// Computes a scalar negated multiply-subtract of the single-precision @@ -531,7 +539,8 @@ _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); } /// Computes a multiply-add of 256-bit vectors of [4 x double]. @@ -551,7 +560,8 @@ _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B, + (__v4df)__C); } /// Computes a multiply-subtract of 256-bit vectors of [8 x float]. @@ -571,7 +581,8 @@ _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); + return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, + -(__v8sf)__C); } /// Computes a multiply-subtract of 256-bit vectors of [4 x double]. @@ -591,7 +602,8 @@ _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); + return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B, + -(__v4df)__C); } /// Computes a negated multiply-add of 256-bit vectors of [8 x float]. @@ -611,7 +623,8 @@ _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); } /// Computes a negated multiply-add of 256-bit vectors of [4 x double]. @@ -631,7 +644,8 @@ _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, + (__v4df)__C); } /// Computes a negated multiply-subtract of 256-bit vectors of [8 x float]. @@ -651,7 +665,8 @@ _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); + return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, + -(__v8sf)__C); } /// Computes a negated multiply-subtract of 256-bit vectors of [4 x double]. @@ -671,7 +686,8 @@ _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); + return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, + -(__v4df)__C); } /// Computes a multiply with alternating add/subtract of 256-bit vectors of diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h index dc0fa5c..3961b79 100644 --- a/clang/lib/Headers/mmintrin.h +++ b/clang/lib/Headers/mmintrin.h @@ -57,6 +57,9 @@ typedef char __v16qi __attribute__((__vector_size__(16))); #define __trunc64(x) \ (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) +#define __zext128(x) \ + (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ + 1, 2, 3) #define __anyext128(x) \ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ 1, -1, -1) @@ -85,7 +88,7 @@ _mm_empty(void) { /// A 32-bit integer value. /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the /// parameter. The upper 32 bits are set to 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtsi32_si64(int __i) { return __extension__ (__m64)(__v2si){__i, 0}; @@ -102,7 +105,7 @@ _mm_cvtsi32_si64(int __i) /// A 64-bit integer vector. /// \returns A 32-bit signed integer value containing the lower 32 bits of the /// parameter. -static __inline__ int __DEFAULT_FN_ATTRS_SSE2 +static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtsi64_si32(__m64 __m) { return ((__v2si)__m)[0]; @@ -118,10 +121,10 @@ _mm_cvtsi64_si32(__m64 __m) /// A 64-bit signed integer. /// \returns A 64-bit integer vector containing the same bitwise pattern as the /// parameter. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtsi64_m64(long long __i) { - return (__m64)__i; + return __extension__ (__m64)(__v1di){__i}; } /// Casts a 64-bit integer vector into a 64-bit signed integer value. @@ -134,10 +137,10 @@ _mm_cvtsi64_m64(long long __i) /// A 64-bit integer vector. /// \returns A 64-bit signed integer containing the same bitwise pattern as the /// parameter. -static __inline__ long long __DEFAULT_FN_ATTRS_SSE2 +static __inline__ long long __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtm64_si64(__m64 __m) { - return (long long)__m; + return ((__v1di)__m)[0]; } /// Converts, with saturation, 16-bit signed integers from both 64-bit integer @@ -379,7 +382,7 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_add_pi8(__m64 __m1, __m64 __m2) { return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2)); @@ -400,7 +403,7 @@ _mm_add_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_add_pi16(__m64 __m1, __m64 __m2) { return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2)); @@ -421,7 +424,7 @@ _mm_add_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_add_pi32(__m64 __m1, __m64 __m2) { return (__m64)(((__v2su)__m1) + ((__v2su)__m2)); @@ -536,7 +539,7 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_sub_pi8(__m64 __m1, __m64 __m2) { return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2)); @@ -557,7 +560,7 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_sub_pi16(__m64 __m1, __m64 __m2) { return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2)); @@ -578,7 +581,7 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32] containing the subtrahends. /// \returns A 64-bit integer vector of [2 x i32] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_sub_pi32(__m64 __m1, __m64 __m2) { return (__m64)(((__v2su)__m1) - ((__v2su)__m2)); @@ -723,11 +726,11 @@ _mm_madd_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits /// of the products of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_mulhi_pi16(__m64 __m1, __m64 __m2) { - return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__anyext128(__m1), - (__v8hi)__anyext128(__m2))); + return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__zext128(__m1), + (__v8hi)__zext128(__m2))); } /// Multiplies each 16-bit signed integer element of the first 64-bit @@ -745,7 +748,7 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits /// of the products of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_mullo_pi16(__m64 __m1, __m64 __m2) { return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2)); @@ -1134,7 +1137,7 @@ _mm_srli_si64(__m64 __m, int __count) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise AND of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_and_si64(__m64 __m1, __m64 __m2) { return (__m64)(((__v1du)__m1) & ((__v1du)__m2)); @@ -1155,7 +1158,7 @@ _mm_and_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise AND of the second /// parameter and the one's complement of the first parameter. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_andnot_si64(__m64 __m1, __m64 __m2) { return (__m64)(~((__v1du)__m1) & ((__v1du)__m2)); @@ -1173,7 +1176,7 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise OR of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_or_si64(__m64 __m1, __m64 __m2) { return (__m64)(((__v1du)__m1) | ((__v1du)__m2)); @@ -1191,7 +1194,7 @@ _mm_or_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_xor_si64(__m64 __m1, __m64 __m2) { return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2)); @@ -1213,7 +1216,7 @@ _mm_xor_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) { return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2)); @@ -1235,7 +1238,7 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) { return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2)); @@ -1257,7 +1260,7 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) { return (__m64)(((__v2si)__m1) == ((__v2si)__m2)); @@ -1279,7 +1282,7 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) { /* This function always performs a signed comparison, but __v8qi is a char @@ -1303,7 +1306,7 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) { return (__m64)((__v4hi)__m1 > (__v4hi)__m2); @@ -1325,7 +1328,7 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) { return (__m64)((__v2si)__m1 > (__v2si)__m2); diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 6a64369..7bf6b84 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -24,6 +24,7 @@ typedef float __m128_u __attribute__((__vector_size__(16), __aligned__(1))); /* Unsigned types */ typedef unsigned int __v4su __attribute__((__vector_size__(16))); +typedef unsigned short __v8hu __attribute__((__vector_size__(16))); /* This header should only be included in a hosted environment as it depends on * a standard library to provide allocation routines. */ @@ -2447,11 +2448,11 @@ _mm_movemask_pi8(__m64 __a) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the products of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_mulhi_pu16(__m64 __a, __m64 __b) { - return __trunc64(__builtin_ia32_pmulhuw128((__v8hi)__anyext128(__a), - (__v8hi)__anyext128(__b))); + return __trunc64(__builtin_ia32_pmulhuw128((__v8hu)__zext128(__a), + (__v8hu)__zext128(__b))); } /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the diff --git a/clang/lib/Interpreter/RemoteJITUtils.cpp b/clang/lib/Interpreter/RemoteJITUtils.cpp index c0e663b..c100f46 100644 --- a/clang/lib/Interpreter/RemoteJITUtils.cpp +++ b/clang/lib/Interpreter/RemoteJITUtils.cpp @@ -33,6 +33,10 @@ using namespace llvm; using namespace llvm::orc; +#if LLVM_ON_UNIX +static std::vector<pid_t> LaunchedExecutorPID; +#endif + Expected<uint64_t> getSlabAllocSize(StringRef SizeString) { SizeString = SizeString.trim(); @@ -89,9 +93,14 @@ createSharedMemoryManager(SimpleRemoteEPC &SREPC, SlabSize, SREPC, SAs); } +// Launches an out-of-process executor for remote JIT. The calling program can +// provide a CustomizeFork callback, which allows it to run custom code in the +// child process before exec. This enables sending custom setup or code to be +// executed in the child (out-of-process) executor. Expected<std::unique_ptr<SimpleRemoteEPC>> launchExecutor(StringRef ExecutablePath, bool UseSharedMemory, - llvm::StringRef SlabAllocateSizeString) { + llvm::StringRef SlabAllocateSizeString, + std::function<void()> CustomizeFork) { #ifndef LLVM_ON_UNIX // FIXME: Add support for Windows. return make_error<StringError>("-" + ExecutablePath + @@ -134,6 +143,9 @@ launchExecutor(StringRef ExecutablePath, bool UseSharedMemory, close(ToExecutor[WriteEnd]); close(FromExecutor[ReadEnd]); + if (CustomizeFork) + CustomizeFork(); + // Execute the child process. std::unique_ptr<char[]> ExecutorPath, FDSpecifier; { @@ -158,6 +170,8 @@ launchExecutor(StringRef ExecutablePath, bool UseSharedMemory, } // else we're the parent... + LaunchedExecutorPID.push_back(ChildPID); + // Close the child ends of the pipes close(ToExecutor[ReadEnd]); close(FromExecutor[WriteEnd]); @@ -265,3 +279,18 @@ connectTCPSocket(StringRef NetworkAddress, bool UseSharedMemory, std::move(S), *SockFD, *SockFD); #endif } + +#if LLVM_ON_UNIX + +pid_t getLastLaunchedExecutorPID() { + if (!LaunchedExecutorPID.size()) + return -1; + return LaunchedExecutorPID.back(); +} + +pid_t getNthLaunchedExecutorPID(int n) { + if (n - 1 < 0 || n - 1 >= static_cast<int>(LaunchedExecutorPID.size())) + return -1; + return LaunchedExecutorPID.at(n - 1); +} +#endif
\ No newline at end of file diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 0edfd60..9c55541 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -3497,6 +3497,19 @@ void Sema::DeclareGlobalAllocationFunction(DeclarationName Name, } auto CreateAllocationFunctionDecl = [&](Attr *ExtraAttr) { + // The MSVC STL has explicit cdecl on its (host-side) allocation function + // specializations for the allocation, so in order to prevent a CC clash + // we use the host's CC, if available, or CC_C as a fallback, for the + // host-side implicit decls, knowing these do not get emitted when compiling + // for device. + if (getLangOpts().CUDAIsDevice && ExtraAttr && + isa<CUDAHostAttr>(ExtraAttr) && + Context.getTargetInfo().getTriple().isSPIRV()) { + if (auto *ATI = Context.getAuxTargetInfo()) + EPI.ExtInfo = EPI.ExtInfo.withCallingConv(ATI->getDefaultCallingConv()); + else + EPI.ExtInfo = EPI.ExtInfo.withCallingConv(CallingConv::CC_C); + } QualType FnType = Context.getFunctionType(Return, Params, EPI); FunctionDecl *Alloc = FunctionDecl::Create( Context, GlobalCtx, SourceLocation(), SourceLocation(), Name, FnType, diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 8536e04..17f17f8 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -337,16 +337,9 @@ static bool isZeroSizedArray(const ConstantArrayType *CAT) { return CAT != nullptr; } -// Returns true if the record type is an HLSL resource class or an array of -// resource classes -static bool isResourceRecordTypeOrArrayOf(const Type *Ty) { - while (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(Ty)) - Ty = CAT->getArrayElementTypeNoTypeQual(); - return HLSLAttributedResourceType::findHandleTypeOnResource(Ty) != nullptr; -} - static bool isResourceRecordTypeOrArrayOf(VarDecl *VD) { - return isResourceRecordTypeOrArrayOf(VD->getType().getTypePtr()); + const Type *Ty = VD->getType().getTypePtr(); + return Ty->isHLSLResourceRecord() || Ty->isHLSLResourceRecordArray(); } // Returns true if the type is a leaf element type that is not valid to be @@ -355,7 +348,7 @@ static bool isResourceRecordTypeOrArrayOf(VarDecl *VD) { // type or if it is a record type that needs to be inspected further. static bool isInvalidConstantBufferLeafElementType(const Type *Ty) { Ty = Ty->getUnqualifiedDesugaredType(); - if (isResourceRecordTypeOrArrayOf(Ty)) + if (Ty->isHLSLResourceRecord() || Ty->isHLSLResourceRecordArray()) return true; if (Ty->isRecordType()) return Ty->getAsCXXRecordDecl()->isEmpty(); @@ -3597,7 +3590,7 @@ void SemaHLSL::deduceAddressSpace(VarDecl *Decl) { return; // Resource handles. - if (isResourceRecordTypeOrArrayOf(Type->getUnqualifiedDesugaredType())) + if (Type->isHLSLResourceRecord() || Type->isHLSLResourceRecordArray()) return; // Only static globals belong to the Private address space. @@ -3637,10 +3630,7 @@ void SemaHLSL::ActOnVariableDeclarator(VarDecl *VD) { if (VD->getType()->isHLSLIntangibleType()) collectResourceBindingsOnVarDecl(VD); - const Type *VarType = VD->getType().getTypePtr(); - while (VarType->isArrayType()) - VarType = VarType->getArrayElementTypeNoTypeQual(); - if (VarType->isHLSLResourceRecord() || + if (isResourceRecordTypeOrArrayOf(VD) || VD->hasAttr<HLSLVkConstantIdAttr>()) { // Make the variable for resources static. The global externally visible // storage is accessed through the handle, which is a member. The variable diff --git a/clang/lib/Sema/SemaObjC.cpp b/clang/lib/Sema/SemaObjC.cpp index 0f39a98..bde00bd 100644 --- a/clang/lib/Sema/SemaObjC.cpp +++ b/clang/lib/Sema/SemaObjC.cpp @@ -691,7 +691,7 @@ static QualType applyObjCTypeArgs(Sema &S, SourceLocation loc, QualType type, if (!anyPackExpansions && finalTypeArgs.size() != numTypeParams) { S.Diag(loc, diag::err_objc_type_args_wrong_arity) << (typeArgs.size() < typeParams->size()) << objcClass->getDeclName() - << (unsigned)finalTypeArgs.size() << (unsigned)numTypeParams; + << (unsigned)finalTypeArgs.size() << numTypeParams; S.Diag(objcClass->getLocation(), diag::note_previous_decl) << objcClass; if (failOnError) diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 62fe3d1..4d58b4a 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -646,8 +646,17 @@ ExprResult CheckVarType(SemaOpenACC &S, OpenACCClauseKind CK, Expr *VarExpr, if (auto *RefTy = InnerTy->getAs<ReferenceType>()) InnerTy = RefTy->getPointeeType(); - if (auto *ArrTy = InnerTy->getAsArrayTypeUnsafe()) + if (auto *ArrTy = InnerTy->getAsArrayTypeUnsafe()) { + // Non constant arrays decay to 'pointer', so warn and return that we're + // successful. + if (!ArrTy->isConstantArrayType()) { + S.Diag(InnerLoc, clang::diag::warn_acc_var_referenced_non_const_array) + << InnerTy << CK; + return VarExpr; + } + return CheckVarType(S, CK, VarExpr, InnerLoc, ArrTy->getElementType()); + } auto *RD = InnerTy->getAsCXXRecordDecl(); @@ -2575,8 +2584,8 @@ SemaOpenACC::ActOnOpenACCAsteriskSizeExpr(SourceLocation AsteriskLoc) { return BuildOpenACCAsteriskSizeExpr(AsteriskLoc); } -VarDecl *SemaOpenACC::CreateInitRecipe(OpenACCClauseKind CK, - const Expr *VarExpr) { +std::pair<VarDecl *, VarDecl *> +SemaOpenACC::CreateInitRecipe(OpenACCClauseKind CK, const Expr *VarExpr) { // Strip off any array subscripts/array section exprs to get to the type of // the variable. while (isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(VarExpr)) { @@ -2590,7 +2599,7 @@ VarDecl *SemaOpenACC::CreateInitRecipe(OpenACCClauseKind CK, // fill in with nullptr. We'll count on TreeTransform to make this if // necessary. if (!VarExpr || VarExpr->getType()->isDependentType()) - return nullptr; + return {nullptr, nullptr}; QualType VarTy = VarExpr->getType().getNonReferenceType().getUnqualifiedType(); @@ -2602,6 +2611,7 @@ VarDecl *SemaOpenACC::CreateInitRecipe(OpenACCClauseKind CK, getASTContext().getTrivialTypeSourceInfo(VarTy), SC_Auto); ExprResult Init; + VarDecl *Temporary = nullptr; if (CK == OpenACCClauseKind::Private) { // Trap errors so we don't get weird ones here. If we can't init, we'll just @@ -2626,5 +2636,5 @@ VarDecl *SemaOpenACC::CreateInitRecipe(OpenACCClauseKind CK, Recipe->setInitStyle(VarDecl::CallInit); } - return Recipe; + return {Recipe, Temporary}; } diff --git a/clang/lib/Sema/SemaOpenACCClause.cpp b/clang/lib/Sema/SemaOpenACCClause.cpp index 88d217f..e8a18243 100644 --- a/clang/lib/Sema/SemaOpenACCClause.cpp +++ b/clang/lib/Sema/SemaOpenACCClause.cpp @@ -800,7 +800,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitPrivateClause( // Assemble the recipes list. for (const Expr *VarExpr : Clause.getVarList()) InitRecipes.push_back( - SemaRef.CreateInitRecipe(OpenACCClauseKind::Private, VarExpr)); + SemaRef.CreateInitRecipe(OpenACCClauseKind::Private, VarExpr).first); return OpenACCPrivateClause::Create( Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getVarList(), @@ -813,7 +813,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitFirstPrivateClause( // really isn't anything to do here. GCC does some duplicate-finding, though // it isn't apparent in the standard where this is justified. - llvm::SmallVector<VarDecl *> InitRecipes; + llvm::SmallVector<OpenACCFirstPrivateRecipe> InitRecipes; // Assemble the recipes list. for (const Expr *VarExpr : Clause.getVarList()) diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index b6b8932..2d8fdb5 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -367,7 +367,7 @@ bool Sema::DiagnoseUnknownTemplateName(const IdentifierInfo &II, // The code is missing a 'template' keyword prior to the dependent template // name. - NestedNameSpecifier *Qualifier = (NestedNameSpecifier *)SS->getScopeRep(); + NestedNameSpecifier *Qualifier = SS->getScopeRep(); SuggestedTemplate = TemplateTy::make(Context.getDependentTemplateName( {Qualifier, &II, /*HasTemplateKeyword=*/false})); Diag(IILoc, diag::err_template_kw_missing) diff --git a/clang/lib/Sema/SemaWasm.cpp b/clang/lib/Sema/SemaWasm.cpp index 8998492..e773113 100644 --- a/clang/lib/Sema/SemaWasm.cpp +++ b/clang/lib/Sema/SemaWasm.cpp @@ -17,6 +17,7 @@ #include "clang/Basic/AddressSpaces.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/TargetBuiltins.h" +#include "clang/Basic/TargetInfo.h" #include "clang/Sema/Attr.h" #include "clang/Sema/Sema.h" @@ -227,7 +228,8 @@ bool SemaWasm::BuiltinWasmTableCopy(CallExpr *TheCall) { return false; } -bool SemaWasm::BuiltinWasmTestFunctionPointerSignature(CallExpr *TheCall) { +bool SemaWasm::BuiltinWasmTestFunctionPointerSignature(const TargetInfo &TI, + CallExpr *TheCall) { if (SemaRef.checkArgCount(TheCall, 1)) return true; @@ -250,27 +252,31 @@ bool SemaWasm::BuiltinWasmTestFunctionPointerSignature(CallExpr *TheCall) { << ArgType << FuncPtrArg->getSourceRange(); } - // Check that the function pointer doesn't use reference types - if (FuncTy->getReturnType().isWebAssemblyReferenceType()) { - return Diag( - FuncPtrArg->getBeginLoc(), - diag::err_wasm_builtin_test_fp_sig_cannot_include_reference_type) - << 0 << FuncTy->getReturnType() << FuncPtrArg->getSourceRange(); - } - auto NParams = FuncTy->getNumParams(); - for (unsigned I = 0; I < NParams; I++) { - if (FuncTy->getParamType(I).isWebAssemblyReferenceType()) { + if (TI.getABI() == "experimental-mv") { + auto isStructOrUnion = [](QualType T) { + return T->isUnionType() || T->isStructureType(); + }; + if (isStructOrUnion(FuncTy->getReturnType())) { return Diag( FuncPtrArg->getBeginLoc(), diag:: - err_wasm_builtin_test_fp_sig_cannot_include_reference_type) - << 1 << FuncPtrArg->getSourceRange(); + err_wasm_builtin_test_fp_sig_cannot_include_struct_or_union) + << 0 << FuncTy->getReturnType() << FuncPtrArg->getSourceRange(); + } + auto NParams = FuncTy->getNumParams(); + for (unsigned I = 0; I < NParams; I++) { + if (isStructOrUnion(FuncTy->getParamType(I))) { + return Diag( + FuncPtrArg->getBeginLoc(), + diag:: + err_wasm_builtin_test_fp_sig_cannot_include_struct_or_union) + << 1 << FuncPtrArg->getSourceRange(); + } } } // Set return type to int (the result of the test) TheCall->setType(getASTContext().IntTy); - return false; } @@ -297,7 +303,7 @@ bool SemaWasm::CheckWebAssemblyBuiltinFunctionCall(const TargetInfo &TI, case WebAssembly::BI__builtin_wasm_table_copy: return BuiltinWasmTableCopy(TheCall); case WebAssembly::BI__builtin_wasm_test_function_pointer_signature: - return BuiltinWasmTestFunctionPointerSignature(TheCall); + return BuiltinWasmTestFunctionPointerSignature(TI, TheCall); } return false; diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 6ce5535..0030946 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -11901,8 +11901,11 @@ void OpenACCClauseTransform<Derived>::VisitPrivateClause( if (InitRecipe) InitRecipes.push_back(InitRecipe); else - InitRecipes.push_back(Self.getSema().OpenACC().CreateInitRecipe( - OpenACCClauseKind::Private, VarRef.get())); + InitRecipes.push_back( + Self.getSema() + .OpenACC() + .CreateInitRecipe(OpenACCClauseKind::Private, VarRef.get()) + .first); } } ParsedClause.setVarListDetails(InstantiatedVarList, @@ -11942,7 +11945,7 @@ template <typename Derived> void OpenACCClauseTransform<Derived>::VisitFirstPrivateClause( const OpenACCFirstPrivateClause &C) { llvm::SmallVector<Expr *> InstantiatedVarList; - llvm::SmallVector<VarDecl *> InitRecipes; + llvm::SmallVector<OpenACCFirstPrivateRecipe> InitRecipes; for (const auto [RefExpr, InitRecipe] : llvm::zip(C.getVarList(), C.getInitRecipes())) { @@ -11953,7 +11956,7 @@ void OpenACCClauseTransform<Derived>::VisitFirstPrivateClause( // We only have to create a new one if it is dependent, and Sema won't // make one of these unless the type is non-dependent. - if (InitRecipe) + if (InitRecipe.RecipeDecl) InitRecipes.push_back(InitRecipe); else InitRecipes.push_back(Self.getSema().OpenACC().CreateInitRecipe( diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 1402f40..ed0ec9e 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -12877,9 +12877,12 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() { case OpenACCClauseKind::FirstPrivate: { SourceLocation LParenLoc = readSourceLocation(); llvm::SmallVector<Expr *> VarList = readOpenACCVarList(); - llvm::SmallVector<VarDecl *> RecipeList; - for (unsigned I = 0; I < VarList.size(); ++I) - RecipeList.push_back(readDeclAs<VarDecl>()); + llvm::SmallVector<OpenACCFirstPrivateRecipe> RecipeList; + for (unsigned I = 0; I < VarList.size(); ++I) { + VarDecl *Recipe = readDeclAs<VarDecl>(); + VarDecl *RecipeTemp = readDeclAs<VarDecl>(); + RecipeList.push_back({Recipe, RecipeTemp}); + } return OpenACCFirstPrivateClause::Create(getContext(), BeginLoc, LParenLoc, VarList, RecipeList, EndLoc); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index c038d4d..c072acd 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -5461,11 +5461,6 @@ ASTWriter::WriteAST(llvm::PointerUnion<Sema *, Preprocessor *> Subject, WritingAST = false; - if (WritingModule && PPRef.getHeaderSearchInfo() - .getHeaderSearchOpts() - .ModulesValidateOncePerBuildSession) - ModCache.updateModuleTimestamp(OutputFile); - if (ShouldCacheASTInMemory) { // Construct MemoryBuffer and update buffer manager. ModCache.getInMemoryModuleCache().addBuiltPCM( @@ -8762,8 +8757,10 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { writeSourceLocation(FPC->getLParenLoc()); writeOpenACCVarList(FPC); - for (VarDecl *VD : FPC->getInitRecipes()) - AddDeclRef(VD); + for (const OpenACCFirstPrivateRecipe &R : FPC->getInitRecipes()) { + AddDeclRef(R.RecipeDecl); + AddDeclRef(R.InitFromTemporary); + } return; } case OpenACCClauseKind::Attach: { diff --git a/clang/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp index 3b3def7..e64153d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp @@ -183,7 +183,8 @@ public: llvm::errs() << "NewAllocator\n"; } - void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const { + void checkBind(SVal Loc, SVal Val, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const { if (isCallbackEnabled(C, "Bind")) llvm::errs() << "Bind\n"; } diff --git a/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp index 837cbbc..921114a 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp @@ -29,7 +29,8 @@ class BoolAssignmentChecker : public Checker<check::Bind> { bool IsTainted = false) const; public: - void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal Loc, SVal Val, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; }; } // end anonymous namespace @@ -55,6 +56,7 @@ static bool isBooleanType(QualType Ty) { } void BoolAssignmentChecker::checkBind(SVal Loc, SVal Val, const Stmt *S, + bool AtDeclInit, CheckerContext &C) const { // We are only interested in stores into Booleans. diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index fd0a398..0e5fc0a 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -97,10 +97,6 @@ public: CheckerFrontendWithBugType UninitializedRead{ "Accessing unitialized/garbage values"}; - // FIXME: This bug type should be removed because it is only emitted in a - // situation that is practically impossible. - const BugType AdditionOverflow{&OutOfBounds, "API"}; - StringRef getDebugTag() const override { return "MallocChecker"; } static void *getTag() { static int tag; return &tag; } @@ -330,7 +326,6 @@ public: const Stmt *S, StringRef WarningMsg) const; void emitNotCStringBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, StringRef WarningMsg) const; - void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const; void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State, const Expr *E, const MemRegion *R, StringRef Msg) const; @@ -843,22 +838,6 @@ void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State, } } -void CStringChecker::emitAdditionOverflowBug(CheckerContext &C, - ProgramStateRef State) const { - if (ExplodedNode *N = C.generateErrorNode(State)) { - // This isn't a great error message, but this should never occur in real - // code anyway -- you'd have to create a buffer longer than a size_t can - // represent, which is sort of a contradiction. - const char *WarningMsg = - "This expression will create a string whose length is too big to " - "be represented as a size_t"; - - auto Report = std::make_unique<PathSensitiveBugReport>(AdditionOverflow, - WarningMsg, N); - C.emitReport(std::move(Report)); - } -} - ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, ProgramStateRef state, NonLoc left, @@ -896,19 +875,22 @@ ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, *maxMinusRightNL, cmpTy); - ProgramStateRef stateOverflow, stateOkay; - std::tie(stateOverflow, stateOkay) = - state->assume(willOverflow.castAs<DefinedOrUnknownSVal>()); + auto [StateOverflow, StateOkay] = + state->assume(willOverflow.castAs<DefinedOrUnknownSVal>()); - if (stateOverflow && !stateOkay) { - // We have an overflow. Emit a bug report. - emitAdditionOverflowBug(C, stateOverflow); + if (StateOverflow && !StateOkay) { + // On this path the analyzer is convinced that the addition of these two + // values would overflow `size_t` which must be caused by the inaccuracy + // of our modeling because this method is called in situations where the + // summands are size/length values which are much less than SIZE_MAX. To + // avoid false positives let's just sink this invalid path. + C.addSink(StateOverflow); return nullptr; } // From now on, assume an overflow didn't occur. - assert(stateOkay); - state = stateOkay; + assert(StateOkay); + state = StateOkay; } return state; diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp index 350db4b..392c7ee 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp @@ -175,9 +175,12 @@ public: /// \param Loc The value of the location (pointer). /// \param Val The value which will be stored at the location Loc. /// \param S The bind is performed while processing the statement S. + /// \param AtDeclInit Whether the bind is performed during declaration + /// initialization. /// /// check::Bind - void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &) const {} + void checkBind(SVal Loc, SVal Val, const Stmt *S, bool AtDeclInit, + CheckerContext &) const {} /// Called after a CFG edge is taken within a function. /// diff --git a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp index 152129e..395d724 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp @@ -48,7 +48,8 @@ class DereferenceChecker public: void checkLocation(SVal location, bool isLoad, const Stmt* S, CheckerContext &C) const; - void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal L, SVal V, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; static void AddDerefSource(raw_ostream &os, SmallVectorImpl<SourceRange> &Ranges, @@ -309,7 +310,7 @@ void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S, } void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S, - CheckerContext &C) const { + bool AtDeclInit, CheckerContext &C) const { // If we're binding to a reference, check if the value is known to be null. if (V.isUndef()) return; diff --git a/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp index 7ad54c0..7eb9a1d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp @@ -150,7 +150,8 @@ public: IteratorModeling() = default; void checkPostCall(const CallEvent &Call, CheckerContext &C) const; - void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal Loc, SVal Val, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; void checkPostStmt(const UnaryOperator *UO, CheckerContext &C) const; void checkPostStmt(const BinaryOperator *BO, CheckerContext &C) const; void checkPostStmt(const MaterializeTemporaryExpr *MTE, @@ -234,7 +235,7 @@ void IteratorModeling::checkPostCall(const CallEvent &Call, } void IteratorModeling::checkBind(SVal Loc, SVal Val, const Stmt *S, - CheckerContext &C) const { + bool AtDeclInit, CheckerContext &C) const { auto State = C.getState(); const auto *Pos = getIteratorPosition(State, Val); if (Pos) { diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index 369d619..efb9809 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -3156,7 +3156,7 @@ void MallocChecker::checkPreCall(const CallEvent &Call, for (unsigned I = 0, E = Call.getNumArgs(); I != E; ++I) { SVal ArgSVal = Call.getArgSVal(I); if (isa<Loc>(ArgSVal)) { - SymbolRef Sym = ArgSVal.getAsSymbol(); + SymbolRef Sym = ArgSVal.getAsSymbol(/*IncludeBaseRegions=*/true); if (!Sym) continue; if (checkUseAfterFree(Sym, C, Call.getArgExpr(I))) diff --git a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp index 9744d1a..eeb6b72 100644 --- a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp @@ -97,7 +97,8 @@ public: // libraries. bool NoDiagnoseCallsToSystemHeaders = false; - void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal L, SVal V, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; void checkPostStmt(const ExplicitCastExpr *CE, CheckerContext &C) const; void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const; void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const; @@ -1250,7 +1251,7 @@ static bool isARCNilInitializedLocal(CheckerContext &C, const Stmt *S) { /// Propagate the nullability information through binds and warn when nullable /// pointer or null symbol is assigned to a pointer with a nonnull type. void NullabilityChecker::checkBind(SVal L, SVal V, const Stmt *S, - CheckerContext &C) const { + bool AtDeclInit, CheckerContext &C) const { const TypedValueRegion *TVR = dyn_cast_or_null<TypedValueRegion>(L.getAsRegion()); if (!TVR) diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp index ace3426..e40b4f8 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp @@ -73,7 +73,8 @@ public: void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const; void checkLocation(SVal location, bool isLoad, const Stmt *S, CheckerContext &C) const; - void checkBind(SVal loc, SVal val, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal loc, SVal val, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; void checkPreCall(const CallEvent &CE, CheckerContext &C) const; void checkPostCall(const CallEvent &CE, CheckerContext &C) const; @@ -311,9 +312,8 @@ void ObjCSelfInitChecker::checkLocation(SVal location, bool isLoad, C); } - void ObjCSelfInitChecker::checkBind(SVal loc, SVal val, const Stmt *S, - CheckerContext &C) const { + bool AtDeclInit, CheckerContext &C) const { // Allow assignment of anything to self. Self is a local variable in the // initializer, so it is legal to assign anything to it, like results of // static functions/method calls. After self is assigned something we cannot diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp index 62bc321..1762505 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp @@ -840,20 +840,27 @@ ProgramStateRef RetainCountChecker::updateSymbol(ProgramStateRef state, const RefCountBug & RetainCountChecker::errorKindToBugKind(RefVal::Kind ErrorKind, SymbolRef Sym) const { + const RefCountFrontend &FE = getPreferredFrontend(); + switch (ErrorKind) { case RefVal::ErrorUseAfterRelease: - return *UseAfterRelease; + return FE.UseAfterRelease; case RefVal::ErrorReleaseNotOwned: - return *ReleaseNotOwned; + return FE.ReleaseNotOwned; case RefVal::ErrorDeallocNotOwned: if (Sym->getType()->getPointeeCXXRecordDecl()) - return *FreeNotOwned; - return *DeallocNotOwned; + return FE.FreeNotOwned; + return FE.DeallocNotOwned; default: llvm_unreachable("Unhandled error."); } } +bool RetainCountChecker::isReleaseUnownedError(RefVal::Kind ErrorKind) const { + return ErrorKind == RefVal::ErrorReleaseNotOwned || + ErrorKind == RefVal::ErrorDeallocNotOwned; +} + void RetainCountChecker::processNonLeakError(ProgramStateRef St, SourceRange ErrorRange, RefVal::Kind ErrorKind, @@ -874,8 +881,8 @@ void RetainCountChecker::processNonLeakError(ProgramStateRef St, return; auto report = std::make_unique<RefCountReport>( - errorKindToBugKind(ErrorKind, Sym), - C.getASTContext().getLangOpts(), N, Sym); + errorKindToBugKind(ErrorKind, Sym), C.getASTContext().getLangOpts(), N, + Sym, /*isLeak=*/false, isReleaseUnownedError(ErrorKind)); report->addRange(ErrorRange); C.emitReport(std::move(report)); } @@ -1090,8 +1097,8 @@ ExplodedNode * RetainCountChecker::checkReturnWithRetEffect(const ReturnStmt *S, ExplodedNode *N = C.addTransition(state, Pred); if (N) { const LangOptions &LOpts = C.getASTContext().getLangOpts(); - auto R = - std::make_unique<RefLeakReport>(*LeakAtReturn, LOpts, N, Sym, C); + auto R = std::make_unique<RefLeakReport>( + getPreferredFrontend().LeakAtReturn, LOpts, N, Sym, C); C.emitReport(std::move(R)); } return N; @@ -1113,7 +1120,8 @@ ExplodedNode * RetainCountChecker::checkReturnWithRetEffect(const ReturnStmt *S, ExplodedNode *N = C.addTransition(state, Pred); if (N) { auto R = std::make_unique<RefCountReport>( - *ReturnNotOwnedForOwned, C.getASTContext().getLangOpts(), N, Sym); + getPreferredFrontend().ReturnNotOwnedForOwned, + C.getASTContext().getLangOpts(), N, Sym); C.emitReport(std::move(R)); } return N; @@ -1128,7 +1136,7 @@ ExplodedNode * RetainCountChecker::checkReturnWithRetEffect(const ReturnStmt *S, //===----------------------------------------------------------------------===// void RetainCountChecker::checkBind(SVal loc, SVal val, const Stmt *S, - CheckerContext &C) const { + bool AtDeclInit, CheckerContext &C) const { ProgramStateRef state = C.getState(); const MemRegion *MR = loc.getAsRegion(); @@ -1261,8 +1269,8 @@ ProgramStateRef RetainCountChecker::handleAutoreleaseCounts( os << "has a +" << V.getCount() << " retain count"; const LangOptions &LOpts = Ctx.getASTContext().getLangOpts(); - auto R = std::make_unique<RefCountReport>(*OverAutorelease, LOpts, N, Sym, - os.str()); + auto R = std::make_unique<RefCountReport>( + getPreferredFrontend().OverAutorelease, LOpts, N, Sym, os.str()); Ctx.emitReport(std::move(R)); } @@ -1307,8 +1315,10 @@ RetainCountChecker::processLeaks(ProgramStateRef state, const LangOptions &LOpts = Ctx.getASTContext().getLangOpts(); if (N) { + const RefCountFrontend &FE = getPreferredFrontend(); + const RefCountBug &BT = Pred ? FE.LeakWithinFunction : FE.LeakAtReturn; + for (SymbolRef L : Leaked) { - const RefCountBug &BT = Pred ? *LeakWithinFunction : *LeakAtReturn; Ctx.emitReport(std::make_unique<RefLeakReport>(BT, LOpts, N, L, Ctx)); } } @@ -1463,44 +1473,31 @@ std::unique_ptr<SimpleProgramPointTag> RetainCountChecker::DeallocSentTag; std::unique_ptr<SimpleProgramPointTag> RetainCountChecker::CastFailTag; void ento::registerRetainCountBase(CheckerManager &Mgr) { - auto *Chk = Mgr.registerChecker<RetainCountChecker>(); + auto *Chk = Mgr.getChecker<RetainCountChecker>(); Chk->DeallocSentTag = std::make_unique<SimpleProgramPointTag>( "RetainCountChecker", "DeallocSent"); Chk->CastFailTag = std::make_unique<SimpleProgramPointTag>( "RetainCountChecker", "DynamicCastFail"); } -bool ento::shouldRegisterRetainCountBase(const CheckerManager &mgr) { +bool ento::shouldRegisterRetainCountBase(const CheckerManager &) { return true; } + void ento::registerRetainCountChecker(CheckerManager &Mgr) { auto *Chk = Mgr.getChecker<RetainCountChecker>(); - Chk->TrackObjCAndCFObjects = true; + Chk->RetainCount.enable(Mgr); Chk->TrackNSCFStartParam = Mgr.getAnalyzerOptions().getCheckerBooleanOption( Mgr.getCurrentCheckerName(), "TrackNSCFStartParam"); - -#define INIT_BUGTYPE(KIND) \ - Chk->KIND = std::make_unique<RefCountBug>(Mgr.getCurrentCheckerName(), \ - RefCountBug::KIND); - // TODO: Ideally, we should have a checker for each of these bug types. - INIT_BUGTYPE(UseAfterRelease) - INIT_BUGTYPE(ReleaseNotOwned) - INIT_BUGTYPE(DeallocNotOwned) - INIT_BUGTYPE(FreeNotOwned) - INIT_BUGTYPE(OverAutorelease) - INIT_BUGTYPE(ReturnNotOwnedForOwned) - INIT_BUGTYPE(LeakWithinFunction) - INIT_BUGTYPE(LeakAtReturn) -#undef INIT_BUGTYPE } -bool ento::shouldRegisterRetainCountChecker(const CheckerManager &mgr) { +bool ento::shouldRegisterRetainCountChecker(const CheckerManager &) { return true; } void ento::registerOSObjectRetainCountChecker(CheckerManager &Mgr) { auto *Chk = Mgr.getChecker<RetainCountChecker>(); - Chk->TrackOSObjects = true; + Chk->OSObjectRetainCount.enable(Mgr); // FIXME: We want bug reports to always have the same checker name associated // with them, yet here, if RetainCountChecker is disabled but @@ -1511,21 +1508,8 @@ void ento::registerOSObjectRetainCountChecker(CheckerManager &Mgr) { // diagnostics, and **hidden checker options** with the fine-tuning of // modeling. Following this logic, OSObjectRetainCountChecker should be the // latter, but we can't just remove it for backward compatibility reasons. -#define LAZY_INIT_BUGTYPE(KIND) \ - if (!Chk->KIND) \ - Chk->KIND = std::make_unique<RefCountBug>(Mgr.getCurrentCheckerName(), \ - RefCountBug::KIND); - LAZY_INIT_BUGTYPE(UseAfterRelease) - LAZY_INIT_BUGTYPE(ReleaseNotOwned) - LAZY_INIT_BUGTYPE(DeallocNotOwned) - LAZY_INIT_BUGTYPE(FreeNotOwned) - LAZY_INIT_BUGTYPE(OverAutorelease) - LAZY_INIT_BUGTYPE(ReturnNotOwnedForOwned) - LAZY_INIT_BUGTYPE(LeakWithinFunction) - LAZY_INIT_BUGTYPE(LeakAtReturn) -#undef LAZY_INIT_BUGTYPE } -bool ento::shouldRegisterOSObjectRetainCountChecker(const CheckerManager &mgr) { +bool ento::shouldRegisterOSObjectRetainCountChecker(const CheckerManager &) { return true; } diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h index 0e81143..dc8bad6 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h @@ -235,51 +235,32 @@ public: }; class RetainCountChecker - : public Checker< check::Bind, - check::DeadSymbols, - check::BeginFunction, - check::EndFunction, - check::PostStmt<BlockExpr>, - check::PostStmt<CastExpr>, - check::PostStmt<ObjCArrayLiteral>, - check::PostStmt<ObjCDictionaryLiteral>, - check::PostStmt<ObjCBoxedExpr>, - check::PostStmt<ObjCIvarRefExpr>, - check::PostCall, - check::RegionChanges, - eval::Assume, - eval::Call > { + : public CheckerFamily< + check::Bind, check::DeadSymbols, check::BeginFunction, + check::EndFunction, check::PostStmt<BlockExpr>, + check::PostStmt<CastExpr>, check::PostStmt<ObjCArrayLiteral>, + check::PostStmt<ObjCDictionaryLiteral>, + check::PostStmt<ObjCBoxedExpr>, check::PostStmt<ObjCIvarRefExpr>, + check::PostCall, check::RegionChanges, eval::Assume, eval::Call> { public: - std::unique_ptr<RefCountBug> UseAfterRelease; - std::unique_ptr<RefCountBug> ReleaseNotOwned; - std::unique_ptr<RefCountBug> DeallocNotOwned; - std::unique_ptr<RefCountBug> FreeNotOwned; - std::unique_ptr<RefCountBug> OverAutorelease; - std::unique_ptr<RefCountBug> ReturnNotOwnedForOwned; - std::unique_ptr<RefCountBug> LeakWithinFunction; - std::unique_ptr<RefCountBug> LeakAtReturn; + RefCountFrontend RetainCount; + RefCountFrontend OSObjectRetainCount; mutable std::unique_ptr<RetainSummaryManager> Summaries; static std::unique_ptr<SimpleProgramPointTag> DeallocSentTag; static std::unique_ptr<SimpleProgramPointTag> CastFailTag; - /// Track Objective-C and CoreFoundation objects. - bool TrackObjCAndCFObjects = false; - - /// Track sublcasses of OSObject. - bool TrackOSObjects = false; - /// Track initial parameters (for the entry point) for NS/CF objects. bool TrackNSCFStartParam = false; - RetainCountChecker() {}; + StringRef getDebugTag() const override { return "RetainCountChecker"; } RetainSummaryManager &getSummaryManager(ASTContext &Ctx) const { if (!Summaries) - Summaries.reset( - new RetainSummaryManager(Ctx, TrackObjCAndCFObjects, TrackOSObjects)); + Summaries = std::make_unique<RetainSummaryManager>( + Ctx, RetainCount.isEnabled(), OSObjectRetainCount.isEnabled()); return *Summaries; } @@ -287,10 +268,20 @@ public: return getSummaryManager(C.getASTContext()); } + const RefCountFrontend &getPreferredFrontend() const { + // FIXME: The two frontends of this checker family are in an unusual + // relationship: if they are both enabled, then all bug reports are + // reported by RetainCount (i.e. `osx.cocoa.RetainCount`), even the bugs + // that "belong to" OSObjectRetainCount (i.e. `osx.OSObjectRetainCount`). + // This is counter-intuitive and should be fixed to avoid confusion. + return RetainCount.isEnabled() ? RetainCount : OSObjectRetainCount; + } + void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, const char *Sep) const override; - void checkBind(SVal loc, SVal val, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal loc, SVal val, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; void checkPostStmt(const BlockExpr *BE, CheckerContext &C) const; void checkPostStmt(const CastExpr *CE, CheckerContext &C) const; @@ -337,6 +328,8 @@ public: const RefCountBug &errorKindToBugKind(RefVal::Kind ErrorKind, SymbolRef Sym) const; + bool isReleaseUnownedError(RefVal::Kind ErrorKind) const; + void processNonLeakError(ProgramStateRef St, SourceRange ErrorRange, RefVal::Kind ErrorKind, SymbolRef Sym, CheckerContext &C) const; diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp index c9f5dc9..cad2c72 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp @@ -21,57 +21,6 @@ using namespace clang; using namespace ento; using namespace retaincountchecker; -StringRef RefCountBug::bugTypeToName(RefCountBug::RefCountBugKind BT) { - switch (BT) { - case UseAfterRelease: - return "Use-after-release"; - case ReleaseNotOwned: - return "Bad release"; - case DeallocNotOwned: - return "-dealloc sent to non-exclusively owned object"; - case FreeNotOwned: - return "freeing non-exclusively owned object"; - case OverAutorelease: - return "Object autoreleased too many times"; - case ReturnNotOwnedForOwned: - return "Method should return an owned object"; - case LeakWithinFunction: - return "Leak"; - case LeakAtReturn: - return "Leak of returned object"; - } - llvm_unreachable("Unknown RefCountBugKind"); -} - -StringRef RefCountBug::getDescription() const { - switch (BT) { - case UseAfterRelease: - return "Reference-counted object is used after it is released"; - case ReleaseNotOwned: - return "Incorrect decrement of the reference count of an object that is " - "not owned at this point by the caller"; - case DeallocNotOwned: - return "-dealloc sent to object that may be referenced elsewhere"; - case FreeNotOwned: - return "'free' called on an object that may be referenced elsewhere"; - case OverAutorelease: - return "Object autoreleased too many times"; - case ReturnNotOwnedForOwned: - return "Object with a +0 retain count returned to caller where a +1 " - "(owning) retain count is expected"; - case LeakWithinFunction: - case LeakAtReturn: - return ""; - } - llvm_unreachable("Unknown RefCountBugKind"); -} - -RefCountBug::RefCountBug(CheckerNameRef Checker, RefCountBugKind BT) - : BugType(Checker, bugTypeToName(BT), categories::MemoryRefCount, - /*SuppressOnSink=*/BT == LeakWithinFunction || - BT == LeakAtReturn), - BT(BT) {} - static bool isNumericLiteralExpression(const Expr *E) { // FIXME: This set of cases was copied from SemaExprObjC. return isa<IntegerLiteral, CharacterLiteral, FloatingLiteral, @@ -312,9 +261,11 @@ namespace retaincountchecker { class RefCountReportVisitor : public BugReporterVisitor { protected: SymbolRef Sym; + bool IsReleaseUnowned; public: - RefCountReportVisitor(SymbolRef sym) : Sym(sym) {} + RefCountReportVisitor(SymbolRef S, bool IRU) + : Sym(S), IsReleaseUnowned(IRU) {} void Profile(llvm::FoldingSetNodeID &ID) const override { static int x = 0; @@ -334,7 +285,8 @@ public: class RefLeakReportVisitor : public RefCountReportVisitor { public: RefLeakReportVisitor(SymbolRef Sym, const MemRegion *LastBinding) - : RefCountReportVisitor(Sym), LastBinding(LastBinding) {} + : RefCountReportVisitor(Sym, /*IsReleaseUnowned=*/false), + LastBinding(LastBinding) {} PathDiagnosticPieceRef getEndPath(BugReporterContext &BRC, const ExplodedNode *N, @@ -452,12 +404,6 @@ annotateStartParameter(const ExplodedNode *N, SymbolRef Sym, PathDiagnosticPieceRef RefCountReportVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &BR) { - - const auto &BT = static_cast<const RefCountBug&>(BR.getBugType()); - - bool IsFreeUnowned = BT.getBugType() == RefCountBug::FreeNotOwned || - BT.getBugType() == RefCountBug::DeallocNotOwned; - const SourceManager &SM = BRC.getSourceManager(); CallEventManager &CEMgr = BRC.getStateManager().getCallEventManager(); if (auto CE = N->getLocationAs<CallExitBegin>()) @@ -490,7 +436,7 @@ RefCountReportVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BRC, std::string sbuf; llvm::raw_string_ostream os(sbuf); - if (PrevT && IsFreeUnowned && CurrV.isNotOwned() && PrevT->isOwned()) { + if (PrevT && IsReleaseUnowned && CurrV.isNotOwned() && PrevT->isOwned()) { os << "Object is now not exclusively owned"; auto Pos = PathDiagnosticLocation::create(N->getLocation(), SM); return std::make_shared<PathDiagnosticEventPiece>(Pos, sbuf); @@ -815,10 +761,8 @@ RefLeakReportVisitor::getEndPath(BugReporterContext &BRC, if (K == ObjKind::ObjC || K == ObjKind::CF) { os << "whose name ('" << *FD << "') does not contain 'Copy' or 'Create'. This violates the " - "naming" - " convention rules given in the Memory Management Guide for " - "Core" - " Foundation"; + "naming convention rules given in the Memory Management Guide " + "for Core Foundation"; } else if (RV->getObjKind() == ObjKind::OS) { std::string FuncName = FD->getNameAsString(); os << "whose name ('" << FuncName << "') starts with '" @@ -836,19 +780,20 @@ RefLeakReportVisitor::getEndPath(BugReporterContext &BRC, } RefCountReport::RefCountReport(const RefCountBug &D, const LangOptions &LOpts, - ExplodedNode *n, SymbolRef sym, bool isLeak) - : PathSensitiveBugReport(D, D.getDescription(), n), Sym(sym), + ExplodedNode *n, SymbolRef sym, bool isLeak, + bool IsReleaseUnowned) + : PathSensitiveBugReport(D, D.getReportMessage(), n), Sym(sym), isLeak(isLeak) { if (!isLeak) - addVisitor<RefCountReportVisitor>(sym); + addVisitor<RefCountReportVisitor>(sym, IsReleaseUnowned); } RefCountReport::RefCountReport(const RefCountBug &D, const LangOptions &LOpts, ExplodedNode *n, SymbolRef sym, StringRef endText) - : PathSensitiveBugReport(D, D.getDescription(), endText, n) { + : PathSensitiveBugReport(D, D.getReportMessage(), endText, n) { - addVisitor<RefCountReportVisitor>(sym); + addVisitor<RefCountReportVisitor>(sym, /*IsReleaseUnowned=*/false); } void RefLeakReport::deriveParamLocation(CheckerContext &Ctx) { diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h index d059008..6ceb86f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h @@ -25,25 +25,44 @@ namespace ento { namespace retaincountchecker { class RefCountBug : public BugType { + StringRef ReportMessage; + public: - enum RefCountBugKind { - UseAfterRelease, - ReleaseNotOwned, - DeallocNotOwned, - FreeNotOwned, - OverAutorelease, - ReturnNotOwnedForOwned, - LeakWithinFunction, - LeakAtReturn, - }; - RefCountBug(CheckerNameRef Checker, RefCountBugKind BT); - StringRef getDescription() const; - - RefCountBugKind getBugType() const { return BT; } - -private: - RefCountBugKind BT; - static StringRef bugTypeToName(RefCountBugKind BT); + RefCountBug(const CheckerFrontend *CF, StringRef Desc, StringRef ReportMsg, + bool SuppressOnSink = false) + : BugType(CF, Desc, categories::MemoryRefCount, SuppressOnSink), + ReportMessage(ReportMsg) {} + StringRef getReportMessage() const { return ReportMessage; } +}; + +class RefCountFrontend : public CheckerFrontend { +public: + const RefCountBug UseAfterRelease{ + this, "Use-after-release", + "Reference-counted object is used after it is released"}; + const RefCountBug ReleaseNotOwned{ + this, "Bad release", + "Incorrect decrement of the reference count of an object that is not " + "owned at this point by the caller"}; + const RefCountBug DeallocNotOwned{ + this, "-dealloc sent to non-exclusively owned object", + "-dealloc sent to object that may be referenced elsewhere"}; + const RefCountBug FreeNotOwned{ + this, "freeing non-exclusively owned object", + "'free' called on an object that may be referenced elsewhere"}; + const RefCountBug OverAutorelease{this, "Object autoreleased too many times", + "Object autoreleased too many times"}; + const RefCountBug ReturnNotOwnedForOwned{ + this, "Method should return an owned object", + "Object with a +0 retain count returned to caller where a +1 (owning) " + "retain count is expected"}; + // For these two bug types the report message will be generated dynamically + // by `RefLeakReport::createDescription` so the empty string taken from the + // BugType will be ignored (overwritten). + const RefCountBug LeakWithinFunction{this, "Leak", /*ReportMsg=*/"", + /*SuppressOnSink=*/true}; + const RefCountBug LeakAtReturn{this, "Leak of returned object", + /*ReportMsg=*/"", /*SuppressOnSink=*/true}; }; class RefCountReport : public PathSensitiveBugReport { @@ -53,8 +72,8 @@ protected: public: RefCountReport(const RefCountBug &D, const LangOptions &LOpts, - ExplodedNode *n, SymbolRef sym, - bool isLeak=false); + ExplodedNode *n, SymbolRef sym, bool isLeak = false, + bool IsReleaseUnowned = false); RefCountReport(const RefCountBug &D, const LangOptions &LOpts, ExplodedNode *n, SymbolRef sym, diff --git a/clang/lib/StaticAnalyzer/Checkers/StoreToImmutableChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StoreToImmutableChecker.cpp index afad419..2bb3917 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StoreToImmutableChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StoreToImmutableChecker.cpp @@ -26,53 +26,11 @@ class StoreToImmutableChecker : public Checker<check::Bind> { const BugType BT{this, "Write to immutable memory", "CERT Environment (ENV)"}; public: - void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal Loc, SVal Val, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; }; } // end anonymous namespace -static bool isInitializationContext(const Stmt *S, CheckerContext &C) { - // Check if this is a DeclStmt (variable declaration) - if (isa<DeclStmt>(S)) - return true; - - // This part is specific for initialization of const lambdas pre-C++17. - // Lets look at the AST of the statement: - // ``` - // const auto lambda = [](){}; - // ``` - // - // The relevant part of the AST for this case prior to C++17 is: - // ... - // `-DeclStmt - // `-VarDecl - // `-ExprWithCleanups - // `-CXXConstructExpr - // ... - // In C++17 and later, the AST is different: - // ... - // `-DeclStmt - // `-VarDecl - // `-ImplicitCastExpr - // `-LambdaExpr - // |-CXXRecordDecl - // `-CXXConstructExpr - // ... - // And even beside this, the statement `S` that is given to the checkBind - // callback is the VarDecl in C++17 and later, and the CXXConstructExpr in - // C++14 and before. So in order to support the C++14 we need the following - // ugly hack to detect whether this construction is used to initialize a - // variable. - // - // FIXME: This should be eliminated by improving the API of checkBind to - // ensure that it consistently passes the `VarDecl` (instead of the - // `CXXConstructExpr`) when the constructor call denotes the initialization - // of a variable with a lambda, or maybe less preferably, try the more - // invasive approach of passing the information forward to the checkers - // whether the current bind is an initialization or an assignment. - const auto *ConstructExp = dyn_cast<CXXConstructExpr>(S); - return ConstructExp && ConstructExp->isElidable(); -} - static bool isEffectivelyConstRegion(const MemRegion *MR, CheckerContext &C) { if (isa<GlobalImmutableSpaceRegion>(MR)) return true; @@ -128,6 +86,7 @@ getInnermostEnclosingConstDeclRegion(const MemRegion *MR, CheckerContext &C) { } void StoreToImmutableChecker::checkBind(SVal Loc, SVal Val, const Stmt *S, + bool AtDeclInit, CheckerContext &C) const { // We are only interested in stores to memory regions const MemRegion *MR = Loc.getAsRegion(); @@ -136,9 +95,7 @@ void StoreToImmutableChecker::checkBind(SVal Loc, SVal Val, const Stmt *S, // Skip variable declarations and initializations - we only want to catch // actual writes - // FIXME: If the API of checkBind would allow to distinguish between - // initialization and assignment, we could use that instead. - if (isInitializationContext(S, C)) + if (AtDeclInit) return; // Check if the region is in the global immutable space diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp index e98de33..7f8923c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp @@ -26,13 +26,13 @@ class UndefinedAssignmentChecker const BugType BT{this, "Assigned value is uninitialized"}; public: - void checkBind(SVal location, SVal val, const Stmt *S, + void checkBind(SVal location, SVal val, const Stmt *S, bool AtDeclInit, CheckerContext &C) const; }; } void UndefinedAssignmentChecker::checkBind(SVal location, SVal val, - const Stmt *StoreE, + const Stmt *StoreE, bool AtDeclInit, CheckerContext &C) const { if (!val.isUndef()) return; diff --git a/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp index cb73ac6..116dd93 100644 --- a/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp @@ -62,7 +62,8 @@ public: void checkPreCall(const CallEvent &Call, CheckerContext &C) const; void checkPostCall(const CallEvent &Call, CheckerContext &C) const; - void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal L, SVal V, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const; }; @@ -188,7 +189,7 @@ void VforkChecker::checkPreCall(const CallEvent &Call, } // Prohibit writes in child process (except for vfork's lhs). -void VforkChecker::checkBind(SVal L, SVal V, const Stmt *S, +void VforkChecker::checkBind(SVal L, SVal V, const Stmt *S, bool AtDeclInit, CheckerContext &C) const { ProgramStateRef State = C.getState(); if (!isChildProcess(State)) diff --git a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp index 0fe677e..44c6f9f 100644 --- a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp @@ -376,11 +376,13 @@ namespace { const Stmt *S; ExprEngine &Eng; const ProgramPoint &PP; + bool AtDeclInit; - CheckBindContext(const CheckersTy &checkers, - SVal loc, SVal val, const Stmt *s, ExprEngine &eng, + CheckBindContext(const CheckersTy &checkers, SVal loc, SVal val, + const Stmt *s, bool AtDeclInit, ExprEngine &eng, const ProgramPoint &pp) - : Checkers(checkers), Loc(loc), Val(val), S(s), Eng(eng), PP(pp) {} + : Checkers(checkers), Loc(loc), Val(val), S(s), Eng(eng), PP(pp), + AtDeclInit(AtDeclInit) {} CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); } CheckersTy::const_iterator checkers_end() { return Checkers.end(); } @@ -391,7 +393,7 @@ namespace { const ProgramPoint &L = PP.withTag(checkFn.Checker); CheckerContext C(Bldr, Eng, Pred, L); - checkFn(Loc, Val, S, C); + checkFn(Loc, Val, S, AtDeclInit, C); } }; @@ -408,10 +410,10 @@ namespace { /// Run checkers for binding of a value to a location. void CheckerManager::runCheckersForBind(ExplodedNodeSet &Dst, const ExplodedNodeSet &Src, - SVal location, SVal val, - const Stmt *S, ExprEngine &Eng, + SVal location, SVal val, const Stmt *S, + bool AtDeclInit, ExprEngine &Eng, const ProgramPoint &PP) { - CheckBindContext C(BindCheckers, location, val, S, Eng, PP); + CheckBindContext C(BindCheckers, location, val, S, AtDeclInit, Eng, PP); llvm::TimeTraceScope TimeScope{ "CheckerManager::runCheckersForBind", [&val]() { return getTimeTraceBindMetadata(val); }}; diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index d874844..c853c00 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -3714,9 +3714,8 @@ ExprEngine::notifyCheckersOfPointerEscape(ProgramStateRef State, /// evalBind - Handle the semantics of binding a value to a specific location. /// This method is used by evalStore and (soon) VisitDeclStmt, and others. void ExprEngine::evalBind(ExplodedNodeSet &Dst, const Stmt *StoreE, - ExplodedNode *Pred, - SVal location, SVal Val, - bool atDeclInit, const ProgramPoint *PP) { + ExplodedNode *Pred, SVal location, SVal Val, + bool AtDeclInit, const ProgramPoint *PP) { const LocationContext *LC = Pred->getLocationContext(); PostStmt PS(StoreE, LC); if (!PP) @@ -3725,7 +3724,7 @@ void ExprEngine::evalBind(ExplodedNodeSet &Dst, const Stmt *StoreE, // Do a previsit of the bind. ExplodedNodeSet CheckedSet; getCheckerManager().runCheckersForBind(CheckedSet, Pred, location, Val, - StoreE, *this, *PP); + StoreE, AtDeclInit, *this, *PP); StmtNodeBuilder Bldr(CheckedSet, Dst, *currBldrCtx); @@ -3748,8 +3747,8 @@ void ExprEngine::evalBind(ExplodedNodeSet &Dst, const Stmt *StoreE, // When binding the value, pass on the hint that this is a initialization. // For initializations, we do not need to inform clients of region // changes. - state = state->bindLoc(location.castAs<Loc>(), - Val, LC, /* notifyChanges = */ !atDeclInit); + state = state->bindLoc(location.castAs<Loc>(), Val, LC, + /* notifyChanges = */ !AtDeclInit); const MemRegion *LocReg = nullptr; if (std::optional<loc::MemRegionVal> LocRegVal = diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp index fe70558..c0b28d2 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -85,7 +85,7 @@ void ExprEngine::performTrivialCopy(NodeBuilder &Bldr, ExplodedNode *Pred, evalLocation(Tmp, CallExpr, VExpr, Pred, Pred->getState(), V, /*isLoad=*/true); for (ExplodedNode *N : Tmp) - evalBind(Dst, CallExpr, N, ThisVal, V, true); + evalBind(Dst, CallExpr, N, ThisVal, V, !AlwaysReturnsLValue); PostStmt PS(CallExpr, LCtx); for (ExplodedNode *N : Dst) { |