diff options
Diffstat (limited to 'clang/lib')
55 files changed, 688 insertions, 671 deletions
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index cc99efa..f656687 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -2063,12 +2063,9 @@ bool Compiler<Emitter>::visitCallArgs(ArrayRef<const Expr *> Args, const FunctionDecl *FuncDecl, bool Activate) { assert(VarScope->getKind() == ScopeKind::Call); - bool HasNonNullAttr = false; llvm::BitVector NonNullArgs; - if (FuncDecl && FuncDecl->hasAttr<NonNullAttr>()) { - HasNonNullAttr = true; + if (FuncDecl && FuncDecl->hasAttr<NonNullAttr>()) NonNullArgs = collectNonNullArgs(FuncDecl, Args); - } unsigned ArgIndex = 0; for (const Expr *Arg : Args) { @@ -2094,7 +2091,7 @@ bool Compiler<Emitter>::visitCallArgs(ArrayRef<const Expr *> Args, return false; } - if (HasNonNullAttr && NonNullArgs[ArgIndex]) { + if (!NonNullArgs.empty() && NonNullArgs[ArgIndex]) { PrimType ArgT = classify(Arg).value_or(PT_Ptr); if (ArgT == PT_Ptr) { if (!this->emitCheckNonNullArg(ArgT, Arg)) diff --git a/clang/lib/AST/ByteCode/DynamicAllocator.cpp b/clang/lib/AST/ByteCode/DynamicAllocator.cpp index bbef941..f38d585 100644 --- a/clang/lib/AST/ByteCode/DynamicAllocator.cpp +++ b/clang/lib/AST/ByteCode/DynamicAllocator.cpp @@ -13,25 +13,6 @@ using namespace clang; using namespace clang::interp; -// FIXME: There is a peculiar problem with the way we track pointers -// to blocks and the way we allocate dynamic memory. -// -// When we have code like this: -// while (true) { -// char *buffer = new char[1024]; -// delete[] buffer; -// } -// -// We have a local variable 'buffer' pointing to the heap allocated memory. -// When deallocating the memory via delete[], that local variable still -// points to the memory, which means we will create a DeadBlock for it and move -// it over to that block, essentially duplicating the allocation. Moving -// the data is also slow. -// -// However, when we actually try to access the allocation after it has been -// freed, we need the block to still exist (alive or dead) so we can tell -// that it's a dynamic allocation. - DynamicAllocator::~DynamicAllocator() { cleanup(); } void DynamicAllocator::cleanup() { @@ -42,8 +23,11 @@ void DynamicAllocator::cleanup() { for (auto &Iter : AllocationSites) { auto &AllocSite = Iter.second; for (auto &Alloc : AllocSite.Allocations) { - Block *B = reinterpret_cast<Block *>(Alloc.Memory.get()); + Block *B = Alloc.block(); + assert(!B->IsDead); + assert(B->isInitialized()); B->invokeDtor(); + if (B->hasPointers()) { while (B->Pointers) { Pointer *Next = B->Pointers->asBlockPointer().Next; @@ -89,6 +73,12 @@ Block *DynamicAllocator::allocate(const Descriptor *D, unsigned EvalID, assert(D); assert(D->asExpr()); + // Garbage collection. Remove all dead allocations that don't have pointers to + // them anymore. + llvm::erase_if(DeadAllocations, [](Allocation &Alloc) -> bool { + return !Alloc.block()->hasPointers(); + }); + auto Memory = std::make_unique<std::byte[]>(sizeof(Block) + D->getAllocSize()); auto *B = new (Memory.get()) Block(EvalID, D, /*isStatic=*/false); @@ -132,18 +122,34 @@ bool DynamicAllocator::deallocate(const Expr *Source, // Find the Block to delete. auto AllocIt = llvm::find_if(Site.Allocations, [&](const Allocation &A) { - const Block *B = reinterpret_cast<const Block *>(A.Memory.get()); - return BlockToDelete == B; + return BlockToDelete == A.block(); }); assert(AllocIt != Site.Allocations.end()); - Block *B = reinterpret_cast<Block *>(AllocIt->Memory.get()); + Block *B = AllocIt->block(); + assert(B->isInitialized()); + assert(!B->IsDead); B->invokeDtor(); - S.deallocate(B); - Site.Allocations.erase(AllocIt); + // Almost all our dynamic allocations have a pointer pointing to them + // when we deallocate them, since otherwise we can't call delete() at all. + // This means that we would usually need to create DeadBlocks for all of them. + // To work around that, we instead mark them as dead without moving the data + // over to a DeadBlock and simply keep the block in a separate DeadAllocations + // list. + if (B->hasPointers()) { + B->IsDead = true; + DeadAllocations.push_back(std::move(*AllocIt)); + Site.Allocations.erase(AllocIt); + + if (Site.size() == 0) + AllocationSites.erase(It); + return true; + } + // Get rid of the allocation altogether. + Site.Allocations.erase(AllocIt); if (Site.empty()) AllocationSites.erase(It); diff --git a/clang/lib/AST/ByteCode/DynamicAllocator.h b/clang/lib/AST/ByteCode/DynamicAllocator.h index cba5e34..31d0e58 100644 --- a/clang/lib/AST/ByteCode/DynamicAllocator.h +++ b/clang/lib/AST/ByteCode/DynamicAllocator.h @@ -43,6 +43,7 @@ private: std::unique_ptr<std::byte[]> Memory; Allocation(std::unique_ptr<std::byte[]> Memory) : Memory(std::move(Memory)) {} + Block *block() const { return reinterpret_cast<Block *>(Memory.get()); } }; struct AllocationSite { @@ -99,6 +100,9 @@ public: private: llvm::DenseMap<const Expr *, AllocationSite> AllocationSites; + // Allocations that have already been deallocated but had pointers + // to them. + llvm::SmallVector<Allocation> DeadAllocations; using PoolAllocTy = llvm::BumpPtrAllocator; PoolAllocTy DescAllocator; diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index bc14bd3d..b5c044c 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -518,7 +518,7 @@ bool CheckNull(InterpState &S, CodePtr OpPC, const Pointer &Ptr, bool CheckRange(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK) { - if (!Ptr.isOnePastEnd()) + if (!Ptr.isOnePastEnd() && !Ptr.isZeroSizeArray()) return true; if (S.getLangOpts().CPlusPlus) { const SourceInfo &Loc = S.Current->getSource(OpPC); @@ -829,8 +829,6 @@ bool CheckFinalLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { return false; if (!CheckExtern(S, OpPC, Ptr)) return false; - if (!CheckRange(S, OpPC, Ptr, AK_Read)) - return false; if (!CheckActive(S, OpPC, Ptr, AK_Read)) return false; if (!CheckLifetime(S, OpPC, Ptr.getLifetime(), AK_Read)) diff --git a/clang/lib/AST/ByteCode/InterpBlock.cpp b/clang/lib/AST/ByteCode/InterpBlock.cpp index 963b54e..8b7f6a7 100644 --- a/clang/lib/AST/ByteCode/InterpBlock.cpp +++ b/clang/lib/AST/ByteCode/InterpBlock.cpp @@ -64,7 +64,7 @@ void Block::removePointer(Pointer *P) { } void Block::cleanup() { - if (Pointers == nullptr && IsDead) + if (Pointers == nullptr && !IsDynamic && IsDead) (reinterpret_cast<DeadBlock *>(this + 1) - 1)->free(); } @@ -133,8 +133,7 @@ DeadBlock::DeadBlock(DeadBlock *&Root, Block *Blk) } void DeadBlock::free() { - if (B.IsInitialized) - B.invokeDtor(); + assert(!B.isInitialized()); if (Prev) Prev->Next = Next; diff --git a/clang/lib/AST/ByteCode/InterpState.cpp b/clang/lib/AST/ByteCode/InterpState.cpp index 49c9b54..32f940c 100644 --- a/clang/lib/AST/ByteCode/InterpState.cpp +++ b/clang/lib/AST/ByteCode/InterpState.cpp @@ -76,6 +76,7 @@ bool InterpState::reportOverflow(const Expr *E, const llvm::APSInt &Value) { void InterpState::deallocate(Block *B) { assert(B); + assert(!B->isDynamic()); // The block might have a pointer saved in a field in its data // that points to the block itself. We call the dtor first, // which will destroy all the data but leave InlineDescriptors @@ -84,6 +85,7 @@ void InterpState::deallocate(Block *B) { if (B->IsInitialized) B->invokeDtor(); + assert(!B->isInitialized()); if (B->hasPointers()) { size_t Size = B->getSize(); // Allocate a new block, transferring over pointers. @@ -92,11 +94,8 @@ void InterpState::deallocate(Block *B) { auto *D = new (Memory) DeadBlock(DeadBlocks, B); // Since the block doesn't hold any actual data anymore, we can just // memcpy() everything over. - std::memcpy(D->rawData(), B->rawData(), B->getSize()); - D->B.IsInitialized = B->IsInitialized; - - // We moved the contents over to the DeadBlock. - B->IsInitialized = false; + std::memcpy(D->rawData(), B->rawData(), Size); + D->B.IsInitialized = false; } } diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp index e61846d..2e5821a 100644 --- a/clang/lib/AST/CommentParser.cpp +++ b/clang/lib/AST/CommentParser.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/AST/CommentParser.h" +#include "clang/AST/Comment.h" #include "clang/AST/CommentCommandTraits.h" #include "clang/AST/CommentSema.h" #include "clang/Basic/CharInfo.h" @@ -569,6 +570,8 @@ BlockCommandComment *Parser::parseBlockCommand() { InlineCommandComment *Parser::parseInlineCommand() { assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); + CommandMarkerKind CMK = + Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At; const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); const Token CommandTok = Tok; @@ -580,7 +583,7 @@ InlineCommandComment *Parser::parseInlineCommand() { InlineCommandComment *IC = S.actOnInlineCommand( CommandTok.getLocation(), CommandTok.getEndLocation(), - CommandTok.getCommandID(), Args); + CommandTok.getCommandID(), CMK, Args); if (Args.size() < Info->NumArgs) { Diag(CommandTok.getEndLocation().getLocWithOffset(1), diff --git a/clang/lib/AST/CommentSema.cpp b/clang/lib/AST/CommentSema.cpp index 88520d7..c02983b 100644 --- a/clang/lib/AST/CommentSema.cpp +++ b/clang/lib/AST/CommentSema.cpp @@ -363,12 +363,13 @@ void Sema::actOnTParamCommandFinish(TParamCommandComment *Command, InlineCommandComment * Sema::actOnInlineCommand(SourceLocation CommandLocBegin, SourceLocation CommandLocEnd, unsigned CommandID, + CommandMarkerKind CommandMarker, ArrayRef<Comment::Argument> Args) { StringRef CommandName = Traits.getCommandInfo(CommandID)->Name; - return new (Allocator) - InlineCommandComment(CommandLocBegin, CommandLocEnd, CommandID, - getInlineCommandRenderKind(CommandName), Args); + return new (Allocator) InlineCommandComment( + CommandLocBegin, CommandLocEnd, CommandID, + getInlineCommandRenderKind(CommandName), CommandMarker, Args); } InlineContentComment *Sema::actOnUnknownCommand(SourceLocation LocBegin, diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3679327..d9c6632 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11628,7 +11628,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } case Builtin::BI__builtin_elementwise_add_sat: - case Builtin::BI__builtin_elementwise_sub_sat: { + case Builtin::BI__builtin_elementwise_sub_sat: + case clang::X86::BI__builtin_ia32_pmulhuw128: + case clang::X86::BI__builtin_ia32_pmulhuw256: + case clang::X86::BI__builtin_ia32_pmulhuw512: + case clang::X86::BI__builtin_ia32_pmulhw128: + case clang::X86::BI__builtin_ia32_pmulhw256: + case clang::X86::BI__builtin_ia32_pmulhw512: { APValue SourceLHS, SourceRHS; if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) @@ -11653,6 +11659,18 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS), DestEltTy->isUnsignedIntegerOrEnumerationType()))); break; + case clang::X86::BI__builtin_ia32_pmulhuw128: + case clang::X86::BI__builtin_ia32_pmulhuw256: + case clang::X86::BI__builtin_ia32_pmulhuw512: + ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhu(LHS, RHS), + /*isUnsigned=*/true))); + break; + case clang::X86::BI__builtin_ia32_pmulhw128: + case clang::X86::BI__builtin_ia32_pmulhw256: + case clang::X86::BI__builtin_ia32_pmulhw512: + ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS), + /*isUnsigned=*/false))); + break; } } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a648bde..071667a 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5985,8 +5985,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Create a temporary array to hold the sizes of local pointer arguments // for the block. \p First is the position of the first size argument. - auto CreateArrayForSizeVar = [=](unsigned First) - -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> { + auto CreateArrayForSizeVar = + [=](unsigned First) -> std::pair<llvm::Value *, llvm::Value *> { llvm::APInt ArraySize(32, NumArgs - First); QualType SizeArrayTy = getContext().getConstantArrayType( getContext().getSizeType(), ArraySize, nullptr, @@ -5999,9 +5999,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // actually the Alloca ascasted to the default AS, hence the // stripPointerCasts() llvm::Value *Alloca = TmpPtr->stripPointerCasts(); - llvm::Value *TmpSize = EmitLifetimeStart( - CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca); llvm::Value *ElemPtr; + EmitLifetimeStart(Alloca); // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. auto *Zero = llvm::ConstantInt::get(IntTy, 0); @@ -6018,7 +6017,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } // Return the Alloca itself rather than a potential ascast as this is only // used by the paired EmitLifetimeEnd. - return {ElemPtr, TmpSize, Alloca}; + return {ElemPtr, Alloca}; }; // Could have events and/or varargs. @@ -6030,7 +6029,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Value *Kernel = Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); - auto [ElemPtr, TmpSize, TmpPtr] = CreateArrayForSizeVar(4); + auto [ElemPtr, TmpPtr] = CreateArrayForSizeVar(4); // Create a vector of the arguments, as well as a constant value to // express to the runtime the number of variadic arguments. @@ -6045,8 +6044,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false); auto Call = RValue::get( EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); - if (TmpSize) - EmitLifetimeEnd(TmpSize, TmpPtr); + EmitLifetimeEnd(TmpPtr); return Call; } // Any calls now have event arguments passed. @@ -6111,15 +6109,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ArgTys.push_back(Int32Ty); Name = "__enqueue_kernel_events_varargs"; - auto [ElemPtr, TmpSize, TmpPtr] = CreateArrayForSizeVar(7); + auto [ElemPtr, TmpPtr] = CreateArrayForSizeVar(7); Args.push_back(ElemPtr); ArgTys.push_back(ElemPtr->getType()); llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false); auto Call = RValue::get( EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); - if (TmpSize) - EmitLifetimeEnd(TmpSize, TmpPtr); + EmitLifetimeEnd(TmpPtr); return Call; } llvm_unreachable("Unexpected enqueue_kernel signature"); diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index d9bd443..6e0c2c1 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -4319,10 +4319,7 @@ static void emitWriteback(CodeGenFunction &CGF, if (writeback.WritebackExpr) { CGF.EmitIgnoredExpr(writeback.WritebackExpr); - - if (writeback.LifetimeSz) - CGF.EmitLifetimeEnd(writeback.LifetimeSz, - writeback.Temporary.getBasePointer()); + CGF.EmitLifetimeEnd(writeback.Temporary.getBasePointer()); return; } @@ -5282,7 +5279,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // If the call returns a temporary with struct return, create a temporary // alloca to hold the result, unless one is given to us. Address SRetPtr = Address::invalid(); - llvm::Value *UnusedReturnSizePtr = nullptr; + bool NeedSRetLifetimeEnd = false; if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { // For virtual function pointer thunks and musttail calls, we must always // forward an incoming SRet pointer to the callee, because a local alloca @@ -5296,11 +5293,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, SRetPtr = ReturnValue.getAddress(); } else { SRetPtr = CreateMemTempWithoutCast(RetTy, "tmp"); - if (HaveInsertPoint() && ReturnValue.isUnused()) { - llvm::TypeSize size = - CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy)); - UnusedReturnSizePtr = EmitLifetimeStart(size, SRetPtr.getBasePointer()); - } + if (HaveInsertPoint() && ReturnValue.isUnused()) + NeedSRetLifetimeEnd = EmitLifetimeStart(SRetPtr.getBasePointer()); } if (IRFunctionArgs.hasSRetArg()) { // A mismatch between the allocated return value's AS and the target's @@ -5484,15 +5478,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Val = Builder.CreateFreeze(Val); IRCallArgs[FirstIRArg] = Val; - // Emit lifetime markers for the temporary alloca. - llvm::TypeSize ByvalTempElementSize = - CGM.getDataLayout().getTypeAllocSize(AI.getElementType()); - llvm::Value *LifetimeSize = - EmitLifetimeStart(ByvalTempElementSize, AI.getPointer()); - - // Add cleanup code to emit the end lifetime marker after the call. - if (LifetimeSize) // In case we disabled lifetime markers. - CallLifetimeEndAfterCall.emplace_back(AI, LifetimeSize); + // Emit lifetime markers for the temporary alloca and add cleanup code to + // emit the end lifetime marker after the call. + if (EmitLifetimeStart(AI.getPointer())) + CallLifetimeEndAfterCall.emplace_back(AI); // Generate the copy. I->copyInto(*this, AI); @@ -5653,9 +5642,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, auto unpaddedCoercionType = ArgInfo.getUnpaddedCoerceAndExpandType(); auto *unpaddedStruct = dyn_cast<llvm::StructType>(unpaddedCoercionType); - llvm::Value *tempSize = nullptr; Address addr = Address::invalid(); RawAddress AllocaAddr = RawAddress::invalid(); + bool NeedLifetimeEnd = false; if (I->isAggregate()) { addr = I->hasLValue() ? I->getKnownLValue().getAddress() : I->getKnownRValue().getAggregateAddress(); @@ -5665,7 +5654,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, assert(RV.isScalar()); // complex should always just be direct llvm::Type *scalarType = RV.getScalarVal()->getType(); - auto scalarSize = CGM.getDataLayout().getTypeAllocSize(scalarType); auto scalarAlign = CGM.getDataLayout().getPrefTypeAlign(scalarType); // Materialize to a temporary. @@ -5674,7 +5662,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, layout->getAlignment(), scalarAlign)), "tmp", /*ArraySize=*/nullptr, &AllocaAddr); - tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer()); + NeedLifetimeEnd = EmitLifetimeStart(AllocaAddr.getPointer()); Builder.CreateStore(RV.getScalarVal(), addr); } @@ -5699,10 +5687,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } assert(IRArgPos == FirstIRArg + NumIRArgs); - if (tempSize) { - EmitLifetimeEnd(tempSize, AllocaAddr.getPointer()); - } - + if (NeedLifetimeEnd) + EmitLifetimeEnd(AllocaAddr.getPointer()); break; } @@ -5871,9 +5857,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // can't depend on being inside of an ExprWithCleanups, so we need to manually // pop this cleanup later on. Being eager about this is OK, since this // temporary is 'invisible' outside of the callee. - if (UnusedReturnSizePtr) - pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetPtr, - UnusedReturnSizePtr); + if (NeedSRetLifetimeEnd) + pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetPtr); llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest(); @@ -6007,7 +5992,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // insertion point; this allows the rest of IRGen to discard // unreachable code. if (CI->doesNotReturn()) { - if (UnusedReturnSizePtr) + if (NeedSRetLifetimeEnd) PopCleanupBlock(); // Strip away the noreturn attribute to better diagnose unreachable UB. @@ -6122,7 +6107,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, case ABIArgInfo::InAlloca: case ABIArgInfo::Indirect: { RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation()); - if (UnusedReturnSizePtr) + if (NeedSRetLifetimeEnd) PopCleanupBlock(); return ret; } diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h index 0b4e3f9..3157b7f 100644 --- a/clang/lib/CodeGen/CGCall.h +++ b/clang/lib/CodeGen/CGCall.h @@ -289,9 +289,6 @@ public: /// An Expression (optional) that performs the writeback with any required /// casting. const Expr *WritebackExpr; - - // Size for optional lifetime end on the temporary. - llvm::Value *LifetimeSz; }; struct CallArgCleanup { @@ -321,9 +318,8 @@ public: } void addWriteback(LValue srcLV, Address temporary, llvm::Value *toUse, - const Expr *writebackExpr = nullptr, - llvm::Value *lifetimeSz = nullptr) { - Writeback writeback = {srcLV, temporary, toUse, writebackExpr, lifetimeSz}; + const Expr *writebackExpr = nullptr) { + Writeback writeback = {srcLV, temporary, toUse, writebackExpr}; Writebacks.push_back(writeback); } diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index ff2dada..0cade0d 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -1351,30 +1351,27 @@ void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D) { } /// Emit a lifetime.begin marker if some criteria are satisfied. -/// \return a pointer to the temporary size Value if a marker was emitted, null -/// otherwise -llvm::Value *CodeGenFunction::EmitLifetimeStart(llvm::TypeSize Size, - llvm::Value *Addr) { +/// \return whether the marker was emitted. +bool CodeGenFunction::EmitLifetimeStart(llvm::Value *Addr) { if (!ShouldEmitLifetimeMarkers) - return nullptr; + return false; assert(Addr->getType()->getPointerAddressSpace() == CGM.getDataLayout().getAllocaAddrSpace() && "Pointer should be in alloca address space"); - llvm::Value *SizeV = llvm::ConstantInt::get( - Int64Ty, Size.isScalable() ? -1 : Size.getFixedValue()); - llvm::CallInst *C = - Builder.CreateCall(CGM.getLLVMLifetimeStartFn(), {SizeV, Addr}); + llvm::CallInst *C = Builder.CreateCall(CGM.getLLVMLifetimeStartFn(), {Addr}); C->setDoesNotThrow(); - return SizeV; + return true; } -void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) { +void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Addr) { + if (!ShouldEmitLifetimeMarkers) + return; + assert(Addr->getType()->getPointerAddressSpace() == CGM.getDataLayout().getAllocaAddrSpace() && "Pointer should be in alloca address space"); - llvm::CallInst *C = - Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Size, Addr}); + llvm::CallInst *C = Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Addr}); C->setDoesNotThrow(); } @@ -1632,9 +1629,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // is rare. if (!Bypasses.IsBypassed(&D) && !(!getLangOpts().CPlusPlus && hasLabelBeenSeenInCurrentScope())) { - llvm::TypeSize Size = CGM.getDataLayout().getTypeAllocSize(allocaTy); - emission.SizeForLifetimeMarkers = - EmitLifetimeStart(Size, AllocaAddr.getPointer()); + emission.UseLifetimeMarkers = + EmitLifetimeStart(AllocaAddr.getPointer()); } } else { assert(!emission.useLifetimeMarkers()); @@ -1727,9 +1723,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Make sure we call @llvm.lifetime.end. if (emission.useLifetimeMarkers()) - EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, - emission.getOriginalAllocatedAddress(), - emission.getSizeForLifetimeMarkers()); + EHStack.pushCleanup<CallLifetimeEnd>( + NormalEHLifetimeMarker, emission.getOriginalAllocatedAddress()); // Analogous to lifetime markers, we use a 'cleanup' to emit fake.use // calls for local variables. We are exempting volatile variables and diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index ed35a05..f1affef 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -588,11 +588,9 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { } else { switch (M->getStorageDuration()) { case SD_Automatic: - if (auto *Size = EmitLifetimeStart( - CGM.getDataLayout().getTypeAllocSize(Alloca.getElementType()), - Alloca.getPointer())) { + if (EmitLifetimeStart(Alloca.getPointer())) { pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker, - Alloca, Size); + Alloca); } break; @@ -623,11 +621,8 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { Block, llvm::BasicBlock::iterator(Block->back()))); } - if (auto *Size = EmitLifetimeStart( - CGM.getDataLayout().getTypeAllocSize(Alloca.getElementType()), - Alloca.getPointer())) { - pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Alloca, - Size); + if (EmitLifetimeStart(Alloca.getPointer())) { + pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Alloca); } if (OldConditional) { @@ -5784,13 +5779,10 @@ LValue CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, llvm::Value *Addr = TempLV.getAddress().getBasePointer(); llvm::Type *ElTy = ConvertTypeForMem(TempLV.getType()); - llvm::TypeSize Sz = CGM.getDataLayout().getTypeAllocSize(ElTy); - - llvm::Value *LifetimeSize = EmitLifetimeStart(Sz, Addr); + EmitLifetimeStart(Addr); Address TmpAddr(Addr, ElTy, TempLV.getAlignment()); - Args.addWriteback(BaseLV, TmpAddr, nullptr, E->getWritebackCast(), - LifetimeSize); + Args.addWriteback(BaseLV, TmpAddr, nullptr, E->getWritebackCast()); Args.add(RValue::get(TmpAddr, *this), Ty); return TempLV; } diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index cad6731..e2f11b86 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -300,16 +300,12 @@ void AggExprEmitter::withReturnValueSlot( Address RetAddr = Address::invalid(); EHScopeStack::stable_iterator LifetimeEndBlock; - llvm::Value *LifetimeSizePtr = nullptr; llvm::IntrinsicInst *LifetimeStartInst = nullptr; if (!UseTemp) { RetAddr = Dest.getAddress(); } else { RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp"); - llvm::TypeSize Size = - CGF.CGM.getDataLayout().getTypeAllocSize(CGF.ConvertTypeForMem(RetTy)); - LifetimeSizePtr = CGF.EmitLifetimeStart(Size, RetAddr.getBasePointer()); - if (LifetimeSizePtr) { + if (CGF.EmitLifetimeStart(RetAddr.getBasePointer())) { LifetimeStartInst = cast<llvm::IntrinsicInst>(std::prev(Builder.GetInsertPoint())); assert(LifetimeStartInst->getIntrinsicID() == @@ -317,7 +313,7 @@ void AggExprEmitter::withReturnValueSlot( "Last insertion wasn't a lifetime.start?"); CGF.pushFullExprCleanup<CodeGenFunction::CallLifetimeEnd>( - NormalEHLifetimeMarker, RetAddr, LifetimeSizePtr); + NormalEHLifetimeMarker, RetAddr); LifetimeEndBlock = CGF.EHStack.stable_begin(); } } @@ -338,7 +334,7 @@ void AggExprEmitter::withReturnValueSlot( // Since we're not guaranteed to be in an ExprWithCleanups, clean up // eagerly. CGF.DeactivateCleanupBlock(LifetimeEndBlock, LifetimeStartInst); - CGF.EmitLifetimeEnd(LifetimeSizePtr, RetAddr.getBasePointer()); + CGF.EmitLifetimeEnd(RetAddr.getBasePointer()); } } diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 44931d0..7eed4ee 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -4183,9 +4183,8 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) { return phi; } -/// Emit pointer + index arithmetic. -static Value *emitPointerArithmetic(CodeGenFunction &CGF, - const BinOpInfo &op, +/// This function is used for BO_Add/BO_Sub/BO_AddAssign/BO_SubAssign. +static Value *emitPointerArithmetic(CodeGenFunction &CGF, const BinOpInfo &op, bool isSubtraction) { // Must have binary (not unary) expr here. Unary pointer // increment/decrement doesn't use this path. @@ -4202,11 +4201,19 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, std::swap(pointerOperand, indexOperand); } + return CGF.EmitPointerArithmetic(expr, pointerOperand, pointer, indexOperand, + index, isSubtraction); +} + +/// Emit pointer + index arithmetic. +llvm::Value *CodeGenFunction::EmitPointerArithmetic( + const BinaryOperator *BO, Expr *pointerOperand, llvm::Value *pointer, + Expr *indexOperand, llvm::Value *index, bool isSubtraction) { bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType(); unsigned width = cast<llvm::IntegerType>(index->getType())->getBitWidth(); - auto &DL = CGF.CGM.getDataLayout(); - auto PtrTy = cast<llvm::PointerType>(pointer->getType()); + auto &DL = CGM.getDataLayout(); + auto *PtrTy = cast<llvm::PointerType>(pointer->getType()); // Some versions of glibc and gcc use idioms (particularly in their malloc // routines) that add a pointer-sized integer (known to be a pointer value) @@ -4227,79 +4234,77 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, // // Note that we do not suppress the pointer overflow check in this case. if (BinaryOperator::isNullPointerArithmeticExtension( - CGF.getContext(), op.Opcode, expr->getLHS(), expr->getRHS())) { - Value *Ptr = CGF.Builder.CreateIntToPtr(index, pointer->getType()); - if (CGF.getLangOpts().PointerOverflowDefined || - !CGF.SanOpts.has(SanitizerKind::PointerOverflow) || - NullPointerIsDefined(CGF.Builder.GetInsertBlock()->getParent(), + getContext(), BO->getOpcode(), pointerOperand, indexOperand)) { + llvm::Value *Ptr = Builder.CreateIntToPtr(index, pointer->getType()); + if (getLangOpts().PointerOverflowDefined || + !SanOpts.has(SanitizerKind::PointerOverflow) || + NullPointerIsDefined(Builder.GetInsertBlock()->getParent(), PtrTy->getPointerAddressSpace())) return Ptr; // The inbounds GEP of null is valid iff the index is zero. auto CheckOrdinal = SanitizerKind::SO_PointerOverflow; auto CheckHandler = SanitizerHandler::PointerOverflow; - SanitizerDebugLocation SanScope(&CGF, {CheckOrdinal}, CheckHandler); - Value *IsZeroIndex = CGF.Builder.CreateIsNull(index); - llvm::Constant *StaticArgs[] = { - CGF.EmitCheckSourceLocation(op.E->getExprLoc())}; + SanitizerDebugLocation SanScope(this, {CheckOrdinal}, CheckHandler); + llvm::Value *IsZeroIndex = Builder.CreateIsNull(index); + llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(BO->getExprLoc())}; llvm::Type *IntPtrTy = DL.getIntPtrType(PtrTy); - Value *IntPtr = llvm::Constant::getNullValue(IntPtrTy); - Value *ComputedGEP = CGF.Builder.CreateZExtOrTrunc(index, IntPtrTy); - Value *DynamicArgs[] = {IntPtr, ComputedGEP}; - CGF.EmitCheck({{IsZeroIndex, CheckOrdinal}}, CheckHandler, StaticArgs, - DynamicArgs); + llvm::Value *IntPtr = llvm::Constant::getNullValue(IntPtrTy); + llvm::Value *ComputedGEP = Builder.CreateZExtOrTrunc(index, IntPtrTy); + llvm::Value *DynamicArgs[] = {IntPtr, ComputedGEP}; + EmitCheck({{IsZeroIndex, CheckOrdinal}}, CheckHandler, StaticArgs, + DynamicArgs); return Ptr; } if (width != DL.getIndexTypeSizeInBits(PtrTy)) { // Zero-extend or sign-extend the pointer value according to // whether the index is signed or not. - index = CGF.Builder.CreateIntCast(index, DL.getIndexType(PtrTy), isSigned, - "idx.ext"); + index = Builder.CreateIntCast(index, DL.getIndexType(PtrTy), isSigned, + "idx.ext"); } // If this is subtraction, negate the index. if (isSubtraction) - index = CGF.Builder.CreateNeg(index, "idx.neg"); + index = Builder.CreateNeg(index, "idx.neg"); - if (CGF.SanOpts.has(SanitizerKind::ArrayBounds)) - CGF.EmitBoundsCheck(op.E, pointerOperand, index, indexOperand->getType(), - /*Accessed*/ false); + if (SanOpts.has(SanitizerKind::ArrayBounds)) + EmitBoundsCheck(BO, pointerOperand, index, indexOperand->getType(), + /*Accessed*/ false); - const PointerType *pointerType - = pointerOperand->getType()->getAs<PointerType>(); + const PointerType *pointerType = + pointerOperand->getType()->getAs<PointerType>(); if (!pointerType) { QualType objectType = pointerOperand->getType() - ->castAs<ObjCObjectPointerType>() - ->getPointeeType(); - llvm::Value *objectSize - = CGF.CGM.getSize(CGF.getContext().getTypeSizeInChars(objectType)); + ->castAs<ObjCObjectPointerType>() + ->getPointeeType(); + llvm::Value *objectSize = + CGM.getSize(getContext().getTypeSizeInChars(objectType)); - index = CGF.Builder.CreateMul(index, objectSize); + index = Builder.CreateMul(index, objectSize); - Value *result = - CGF.Builder.CreateGEP(CGF.Int8Ty, pointer, index, "add.ptr"); - return CGF.Builder.CreateBitCast(result, pointer->getType()); + llvm::Value *result = Builder.CreateGEP(Int8Ty, pointer, index, "add.ptr"); + return Builder.CreateBitCast(result, pointer->getType()); } QualType elementType = pointerType->getPointeeType(); - if (const VariableArrayType *vla - = CGF.getContext().getAsVariableArrayType(elementType)) { + if (const VariableArrayType *vla = + getContext().getAsVariableArrayType(elementType)) { // The element count here is the total number of non-VLA elements. - llvm::Value *numElements = CGF.getVLASize(vla).NumElts; + llvm::Value *numElements = getVLASize(vla).NumElts; // Effectively, the multiply by the VLA size is part of the GEP. // GEP indexes are signed, and scaling an index isn't permitted to // signed-overflow, so we use the same semantics for our explicit // multiply. We suppress this if overflow is not undefined behavior. - llvm::Type *elemTy = CGF.ConvertTypeForMem(vla->getElementType()); - if (CGF.getLangOpts().PointerOverflowDefined) { - index = CGF.Builder.CreateMul(index, numElements, "vla.index"); - pointer = CGF.Builder.CreateGEP(elemTy, pointer, index, "add.ptr"); + llvm::Type *elemTy = ConvertTypeForMem(vla->getElementType()); + if (getLangOpts().PointerOverflowDefined) { + index = Builder.CreateMul(index, numElements, "vla.index"); + pointer = Builder.CreateGEP(elemTy, pointer, index, "add.ptr"); } else { - index = CGF.Builder.CreateNSWMul(index, numElements, "vla.index"); - pointer = CGF.EmitCheckedInBoundsGEP( - elemTy, pointer, index, isSigned, isSubtraction, op.E->getExprLoc(), - "add.ptr"); + index = Builder.CreateNSWMul(index, numElements, "vla.index"); + pointer = + EmitCheckedInBoundsGEP(elemTy, pointer, index, isSigned, + isSubtraction, BO->getExprLoc(), "add.ptr"); } return pointer; } @@ -4309,16 +4314,15 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, // future proof. llvm::Type *elemTy; if (elementType->isVoidType() || elementType->isFunctionType()) - elemTy = CGF.Int8Ty; + elemTy = Int8Ty; else - elemTy = CGF.ConvertTypeForMem(elementType); + elemTy = ConvertTypeForMem(elementType); - if (CGF.getLangOpts().PointerOverflowDefined) - return CGF.Builder.CreateGEP(elemTy, pointer, index, "add.ptr"); + if (getLangOpts().PointerOverflowDefined) + return Builder.CreateGEP(elemTy, pointer, index, "add.ptr"); - return CGF.EmitCheckedInBoundsGEP( - elemTy, pointer, index, isSigned, isSubtraction, op.E->getExprLoc(), - "add.ptr"); + return EmitCheckedInBoundsGEP(elemTy, pointer, index, isSigned, isSubtraction, + BO->getExprLoc(), "add.ptr"); } // Construct an fmuladd intrinsic to represent a fused mul-add of MulOp and diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 6c32c98..bf16d72 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -701,14 +701,12 @@ public: bool isRedundantBeforeReturn() override { return true; } llvm::Value *Addr; - llvm::Value *Size; public: - CallLifetimeEnd(RawAddress addr, llvm::Value *size) - : Addr(addr.getPointer()), Size(size) {} + CallLifetimeEnd(RawAddress addr) : Addr(addr.getPointer()) {} void Emit(CodeGenFunction &CGF, Flags flags) override { - CGF.EmitLifetimeEnd(Size, Addr); + CGF.EmitLifetimeEnd(Addr); } }; @@ -3233,8 +3231,8 @@ public: void EmitSehTryScopeBegin(); void EmitSehTryScopeEnd(); - llvm::Value *EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr); - void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr); + bool EmitLifetimeStart(llvm::Value *Addr); + void EmitLifetimeEnd(llvm::Value *Addr); llvm::Value *EmitCXXNewExpr(const CXXNewExpr *E); void EmitCXXDeleteExpr(const CXXDeleteExpr *E); @@ -3417,8 +3415,8 @@ public: /// initializer. bool IsConstantAggregate; - /// Non-null if we should use lifetime annotations. - llvm::Value *SizeForLifetimeMarkers; + /// True if lifetime markers should be used. + bool UseLifetimeMarkers; /// Address with original alloca instruction. Invalid if the variable was /// emitted as a global constant. @@ -3432,20 +3430,14 @@ public: AutoVarEmission(const VarDecl &variable) : Variable(&variable), Addr(Address::invalid()), NRVOFlag(nullptr), IsEscapingByRef(false), IsConstantAggregate(false), - SizeForLifetimeMarkers(nullptr), AllocaAddr(RawAddress::invalid()) {} + UseLifetimeMarkers(false), AllocaAddr(RawAddress::invalid()) {} bool wasEmittedAsGlobal() const { return !Addr.isValid(); } public: static AutoVarEmission invalid() { return AutoVarEmission(Invalid()); } - bool useLifetimeMarkers() const { - return SizeForLifetimeMarkers != nullptr; - } - llvm::Value *getSizeForLifetimeMarkers() const { - assert(useLifetimeMarkers()); - return SizeForLifetimeMarkers; - } + bool useLifetimeMarkers() const { return UseLifetimeMarkers; } /// Returns the raw, allocated address, which is not necessarily /// the address of the object itself. It is casted to default @@ -5220,6 +5212,12 @@ public: /// operation is a subtraction. enum { NotSubtraction = false, IsSubtraction = true }; + /// Emit pointer + index arithmetic. + llvm::Value *EmitPointerArithmetic(const BinaryOperator *BO, + Expr *pointerOperand, llvm::Value *pointer, + Expr *indexOperand, llvm::Value *index, + bool isSubtraction); + /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to /// detect undefined behavior when the pointer overflow sanitizer is enabled. /// \p SignedIndices indicates whether any of the GEP indices are signed. diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 38aaceb..05fb137 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -2269,6 +2269,11 @@ struct CounterCoverageMappingBuilder // Track LHS True/False Decision. const auto DecisionLHS = MCDCBuilder.pop(); + if (auto Gap = + findGapAreaBetween(getEnd(E->getLHS()), getStart(E->getRHS()))) { + fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), getRegionCounter(E)); + } + // Counter tracks the right hand side of a logical and operator. extendRegion(E->getRHS()); propagateCounts(getRegionCounter(E), E->getRHS()); @@ -2330,6 +2335,11 @@ struct CounterCoverageMappingBuilder // Track LHS True/False Decision. const auto DecisionLHS = MCDCBuilder.pop(); + if (auto Gap = + findGapAreaBetween(getEnd(E->getLHS()), getStart(E->getRHS()))) { + fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), getRegionCounter(E)); + } + // Counter tracks the right hand side of a logical or operator. extendRegion(E->getRHS()); propagateCounts(getRegionCounter(E), E->getRHS()); diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index e23d19d..b508709 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -1051,18 +1051,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vfmsubsd3_mask3: return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2, /*NegAcc*/ true); - case X86::BI__builtin_ia32_vfmaddph: - case X86::BI__builtin_ia32_vfmaddps: - case X86::BI__builtin_ia32_vfmaddpd: - case X86::BI__builtin_ia32_vfmaddph256: - case X86::BI__builtin_ia32_vfmaddps256: - case X86::BI__builtin_ia32_vfmaddpd256: case X86::BI__builtin_ia32_vfmaddph512_mask: case X86::BI__builtin_ia32_vfmaddph512_maskz: case X86::BI__builtin_ia32_vfmaddph512_mask3: - case X86::BI__builtin_ia32_vfmaddbf16128: - case X86::BI__builtin_ia32_vfmaddbf16256: - case X86::BI__builtin_ia32_vfmaddbf16512: case X86::BI__builtin_ia32_vfmaddps512_mask: case X86::BI__builtin_ia32_vfmaddps512_maskz: case X86::BI__builtin_ia32_vfmaddps512_mask3: diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h index 75290d2..95e9bd7a 100644 --- a/clang/lib/Headers/avx10_2_512bf16intrin.h +++ b/clang/lib/Headers/avx10_2_512bf16intrin.h @@ -441,8 +441,8 @@ _mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) { static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B, - (__v32bf)__C); + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, (__v32bf)__B, + (__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 @@ -469,8 +469,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B, - -(__v32bf)__C); + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, (__v32bf)__B, + -(__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 @@ -497,8 +497,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B, - (__v32bf)__C); + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, -(__v32bf)__B, + (__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pbh( @@ -527,8 +527,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B, - -(__v32bf)__C); + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, -(__v32bf)__B, + -(__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pbh( diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h index 66797ae..0c7f381 100644 --- a/clang/lib/Headers/avx10_2bf16intrin.h +++ b/clang/lib/Headers/avx10_2bf16intrin.h @@ -852,8 +852,8 @@ _mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) { static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B, - (__v16bf)__C); + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, (__v16bf)__B, + (__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 @@ -880,8 +880,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B, - -(__v16bf)__C); + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, (__v16bf)__B, + -(__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 @@ -908,8 +908,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fnmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B, - (__v16bf)__C); + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, -(__v16bf)__B, + (__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pbh( @@ -938,8 +938,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fnmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B, - -(__v16bf)__C); + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, -(__v16bf)__B, + -(__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pbh( @@ -969,8 +969,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pbh( static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmadd_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B, - (__v8bf)__C); + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, (__v8bf)__B, + (__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -997,8 +997,8 @@ _mm_maskz_fmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsub_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B, - -(__v8bf)__C); + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, (__v8bf)__B, + -(__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -1025,8 +1025,8 @@ _mm_maskz_fmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmadd_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B, - (__v8bf)__C); + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, -(__v8bf)__B, + (__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -1053,8 +1053,8 @@ _mm_maskz_fnmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmsub_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B, - -(__v8bf)__C); + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, -(__v8bf)__B, + -(__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 3c3a3d1..55e7102 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1729,10 +1729,10 @@ _mm256_mulhrs_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhi_epu16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b); + return (__m256i)__builtin_ia32_pmulhuw256((__v16hu)__a, (__v16hu)__b); } /// Multiplies signed 16-bit integer elements of two 256-bit vectors of @@ -1748,7 +1748,7 @@ _mm256_mulhi_epu16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b); @@ -1767,7 +1767,7 @@ _mm256_mulhi_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mullo_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a * (__v16hu)__b); diff --git a/clang/lib/Headers/avx512bitalgintrin.h b/clang/lib/Headers/avx512bitalgintrin.h index 3c446b3..9a1ff8f3 100644 --- a/clang/lib/Headers/avx512bitalgintrin.h +++ b/clang/lib/Headers/avx512bitalgintrin.h @@ -20,7 +20,13 @@ __target__("avx512bitalg,evex512"), \ __min_vector_width__(512))) -static __inline__ __m512i __DEFAULT_FN_ATTRS +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_popcnt_epi16(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v32hu)__A); @@ -42,7 +48,7 @@ _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_popcnt_epi8(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v64qu)__A); @@ -80,7 +86,7 @@ _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) __B); } - #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index c854720..233d4a6 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -25,6 +25,14 @@ typedef unsigned long long __mmask64; __attribute__((__always_inline__, __nodebug__, \ __target__("avx512bw,no-evex512"))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + static __inline __mmask32 __DEFAULT_FN_ATTRS _knot_mask32(__mmask32 __M) { @@ -438,7 +446,7 @@ _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullo_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A * (__v32hu) __B); } @@ -1082,7 +1090,7 @@ _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mulhi_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); @@ -1105,10 +1113,10 @@ _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mulhi_epu16(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B); + return (__m512i)__builtin_ia32_pmulhuw512((__v32hu) __A, (__v32hu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -2010,5 +2018,7 @@ _mm512_sad_epu8 (__m512i __A, __m512i __B) #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS512_CONSTEXPR +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 74343c3..95b80cc 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -277,20 +277,20 @@ _mm512_setzero_pd(void) { return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; } -static __inline __m512 __DEFAULT_FN_ATTRS512 +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_ps(float __w) { return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512d __DEFAULT_FN_ATTRS512 +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pd(double __w) { return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi8(char __w) { return __extension__ (__m512i)(__v64qi){ @@ -304,7 +304,7 @@ _mm512_set1_epi8(char __w) __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi16(short __w) { return __extension__ (__m512i)(__v32hi){ @@ -314,7 +314,7 @@ _mm512_set1_epi16(short __w) __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s) { return __extension__ (__m512i)(__v16si){ @@ -330,7 +330,7 @@ _mm512_maskz_set1_epi32(__mmask16 __M, int __A) (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi64(long long __d) { return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; diff --git a/clang/lib/Headers/avx512vlbitalgintrin.h b/clang/lib/Headers/avx512vlbitalgintrin.h index 1b01fe0..739e78a 100644 --- a/clang/lib/Headers/avx512vlbitalgintrin.h +++ b/clang/lib/Headers/avx512vlbitalgintrin.h @@ -24,7 +24,15 @@ __target__("avx512vl,avx512bitalg,no-evex512"), \ __min_vector_width__(256))) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_popcnt_epi16(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v16hu)__A); @@ -46,7 +54,7 @@ _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_popcnt_epi16(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v8hu)__A); @@ -68,7 +76,7 @@ _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_popcnt_epi8(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v32qu)__A); @@ -90,7 +98,7 @@ _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_popcnt_epi8(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v16qu)__A); @@ -147,5 +155,7 @@ _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h index a12acb7..1f8cca7 100644 --- a/clang/lib/Headers/avx512vlfp16intrin.h +++ b/clang/lib/Headers/avx512vlfp16intrin.h @@ -1419,8 +1419,8 @@ _mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) { static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, - (__v8hf)__C); + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, + (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A, @@ -1429,7 +1429,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } @@ -1437,7 +1437,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__C); } @@ -1445,15 +1445,15 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, - -(__v8hf)__C); + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, + -(__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A, @@ -1476,7 +1476,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__C); } @@ -1484,7 +1484,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } @@ -1492,22 +1492,22 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), + __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A, __m256h __B, __m256h __C) { - return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, - (__v16hf)__C); + return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, + (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__A); } @@ -1515,7 +1515,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__C); } @@ -1523,22 +1523,22 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A, __m256h __B, __m256h __C) { - return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, - -(__v16hf)__C); + return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, + -(__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__A); } @@ -1546,7 +1546,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } @@ -1554,7 +1554,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__C); } @@ -1562,7 +1562,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } @@ -1570,7 +1570,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } @@ -1684,7 +1684,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)__C); } @@ -1692,7 +1692,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__C); } @@ -1715,45 +1715,45 @@ _mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, - (__v8hf)__C); + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, + (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C) { - return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, - (__v16hf)__C); + return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, + (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C), (__v16hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, - -(__v8hf)__C); + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, + -(__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), (__v8hf)__A); } @@ -1761,22 +1761,22 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), (__v8hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C) { - return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, - -(__v16hf)__C); + return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, + -(__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), (__v16hf)__A); } @@ -1784,7 +1784,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), (__v16hf)__C); } diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 2a5f7b4..cbad39a 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -899,321 +899,289 @@ _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df) __A); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, (__v2df)__C), + (__v2df)__A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df) __C); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, (__v2df)__C), + (__v2df)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df)_mm_setzero_pd()); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, (__v2df)__C), + (__v2df)_mm_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - -(__v2df) __C), - (__v2df) __A); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, -(__v2df)__C), + (__v2df)__A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - -(__v2df) __C), - (__v2df)_mm_setzero_pd()); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, -(__v2df)__C), + (__v2df)_mm_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd (-(__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df) __C); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, (__v2df)__C), + (__v2df)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd (-(__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df)_mm_setzero_pd()); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, (__v2df)__C), + (__v2df)_mm_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd (-(__v2df) __A, - (__v2df) __B, - -(__v2df) __C), - (__v2df)_mm_setzero_pd()); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, -(__v2df)__C), + (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df) __A); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, (__v4df)__C), + (__v4df)__A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df) __C); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, (__v4df)__C), + (__v4df)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df)_mm256_setzero_pd()); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, (__v4df)__C), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - -(__v4df) __C), - (__v4df) __A); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, -(__v4df)__C), + (__v4df)__A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - -(__v4df) __C), - (__v4df)_mm256_setzero_pd()); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, -(__v4df)__C), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 (-(__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df) __C); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, (__v4df)__C), + (__v4df)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 (-(__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df)_mm256_setzero_pd()); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, (__v4df)__C), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 (-(__v4df) __A, - (__v4df) __B, - -(__v4df) __C), - (__v4df)_mm256_setzero_pd()); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, -(__v4df)__C), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf) __A); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, (__v4sf)__C), + (__v4sf)__A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf) __C); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, (__v4sf)__C), + (__v4sf)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf)_mm_setzero_ps()); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, (__v4sf)__C), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C), - (__v4sf) __A); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C), + (__v4sf)__A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C), - (__v4sf)_mm_setzero_ps()); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps (-(__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf) __C); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C), + (__v4sf)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps (-(__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf)_mm_setzero_ps()); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps (-(__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C), - (__v4sf)_mm_setzero_ps()); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf) __A); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, (__v8sf)__C), + (__v8sf)__A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf) __C); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, (__v8sf)__C), + (__v8sf)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf)_mm256_setzero_ps()); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, (__v8sf)__C), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C), - (__v8sf) __A); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C), + (__v8sf)__A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C), - (__v8sf)_mm256_setzero_ps()); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 (-(__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf) __C); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C), + (__v8sf)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 (-(__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf)_mm256_setzero_ps()); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 (-(__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C), - (__v8sf)_mm256_setzero_ps()); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 @@ -1420,41 +1388,37 @@ _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - -(__v2df) __C), - (__v2df) __C); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, -(__v2df)__C), + (__v2df)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - -(__v4df) __C), - (__v4df) __C); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, -(__v4df)__C), + (__v4df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C), - (__v4sf) __C); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C), + (__v4sf)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C), - (__v8sf) __C); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C), + (__v8sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 @@ -1500,121 +1464,109 @@ _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - -(__v2df) __B, - (__v2df) __C), - (__v2df) __A); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, -(__v2df)__B, (__v2df)__C), + (__v2df)__A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - -(__v4df) __B, - (__v4df) __C), - (__v4df) __A); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, -(__v4df)__B, (__v4df)__C), + (__v4df)__A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - -(__v4sf) __B, - (__v4sf) __C), - (__v4sf) __A); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C), + (__v4sf)__A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - -(__v8sf) __B, - (__v8sf) __C), - (__v8sf) __A); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, -(__v8sf)__B, (__v8sf)__C), + (__v8sf)__A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - -(__v2df) __B, - -(__v2df) __C), - (__v2df) __A); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, -(__v2df)__B, -(__v2df)__C), + (__v2df)__A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - -(__v2df) __B, - -(__v2df) __C), - (__v2df) __C); + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v2df)__A, -(__v2df)__B, -(__v2df)__C), + (__v2df)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - -(__v4df) __B, - -(__v4df) __C), - (__v4df) __A); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, -(__v4df)__B, -(__v4df)__C), + (__v4df)__A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - -(__v4df) __B, - -(__v4df) __C), - (__v4df) __C); + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v4df)__A, -(__v4df)__B, -(__v4df)__C), + (__v4df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - -(__v4sf) __B, - -(__v4sf) __C), - (__v4sf) __A); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C), + (__v4sf)__A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - -(__v4sf) __B, - -(__v4sf) __C), - (__v4sf) __C); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, + __builtin_elementwise_fma((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C), + (__v4sf)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - -(__v8sf) __B, - -(__v8sf) __C), - (__v8sf) __A); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, -(__v8sf)__B, -(__v8sf)__C), + (__v8sf)__A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - -(__v8sf) __B, - -(__v8sf) __C), - (__v8sf) __C); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, + __builtin_elementwise_fma((__v8sf)__A, -(__v8sf)__B, -(__v8sf)__C), + (__v8sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 diff --git a/clang/lib/Headers/avx512vpopcntdqintrin.h b/clang/lib/Headers/avx512vpopcntdqintrin.h index e24c2c5..79fc6e1 100644 --- a/clang/lib/Headers/avx512vpopcntdqintrin.h +++ b/clang/lib/Headers/avx512vpopcntdqintrin.h @@ -60,5 +60,6 @@ _mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) { } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512vpopcntdqvlintrin.h b/clang/lib/Headers/avx512vpopcntdqvlintrin.h index b6c819b..d14cb1e 100644 --- a/clang/lib/Headers/avx512vpopcntdqvlintrin.h +++ b/clang/lib/Headers/avx512vpopcntdqvlintrin.h @@ -99,5 +99,7 @@ _mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 2be4f68..5a6d48b 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -4367,7 +4367,7 @@ _mm256_setzero_si256(void) { /// A 256-bit floating-point vector of [4 x double]. /// \returns A 256-bit floating-point vector of [8 x float] containing the same /// bitwise pattern as the parameter. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castpd_ps(__m256d __a) { return (__m256)__a; @@ -4384,7 +4384,7 @@ _mm256_castpd_ps(__m256d __a) /// A 256-bit floating-point vector of [4 x double]. /// \returns A 256-bit integer vector containing the same bitwise pattern as the /// parameter. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castpd_si256(__m256d __a) { return (__m256i)__a; @@ -4401,7 +4401,7 @@ _mm256_castpd_si256(__m256d __a) /// A 256-bit floating-point vector of [8 x float]. /// \returns A 256-bit floating-point vector of [4 x double] containing the same /// bitwise pattern as the parameter. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castps_pd(__m256 __a) { return (__m256d)__a; @@ -4418,7 +4418,7 @@ _mm256_castps_pd(__m256 __a) /// A 256-bit floating-point vector of [8 x float]. /// \returns A 256-bit integer vector containing the same bitwise pattern as the /// parameter. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castps_si256(__m256 __a) { return (__m256i)__a; @@ -4435,7 +4435,7 @@ _mm256_castps_si256(__m256 __a) /// A 256-bit integer vector. /// \returns A 256-bit floating-point vector of [8 x float] containing the same /// bitwise pattern as the parameter. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castsi256_ps(__m256i __a) { return (__m256)__a; @@ -4452,7 +4452,7 @@ _mm256_castsi256_ps(__m256i __a) /// A 256-bit integer vector. /// \returns A 256-bit floating-point vector of [4 x double] containing the same /// bitwise pattern as the parameter. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castsi256_pd(__m256i __a) { return (__m256d)__a; @@ -4469,7 +4469,7 @@ _mm256_castsi256_pd(__m256i __a) /// A 256-bit floating-point vector of [4 x double]. /// \returns A 128-bit floating-point vector of [2 x double] containing the /// lower 128 bits of the parameter. -static __inline __m128d __DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castpd256_pd128(__m256d __a) { return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1); @@ -4486,7 +4486,7 @@ _mm256_castpd256_pd128(__m256d __a) /// A 256-bit floating-point vector of [8 x float]. /// \returns A 128-bit floating-point vector of [4 x float] containing the /// lower 128 bits of the parameter. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castps256_ps128(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3); @@ -4502,7 +4502,7 @@ _mm256_castps256_ps128(__m256 __a) /// A 256-bit integer vector. /// \returns A 128-bit integer vector containing the lower 128 bits of the /// parameter. -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castsi256_si128(__m256i __a) { return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 770bb5c..60d2000 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2394,8 +2394,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of /// each of the eight 32-bit products. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mulhi_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); } @@ -2413,9 +2413,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, /// A 128-bit unsigned [8 x i16] vector. /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits /// of each of the eight 32-bit products. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, - __m128i __b) { - return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mulhi_epu16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_pmulhuw128((__v8hu)__a, (__v8hu)__b); } /// Multiplies the corresponding elements of two signed [8 x i16] @@ -2432,8 +2432,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of /// each of the eight 32-bit products. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mullo_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a * (__v8hu)__b); } diff --git a/clang/lib/Headers/fma4intrin.h b/clang/lib/Headers/fma4intrin.h index 694801b..69977fb 100644 --- a/clang/lib/Headers/fma4intrin.h +++ b/clang/lib/Headers/fma4intrin.h @@ -23,13 +23,15 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B, + (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 @@ -47,13 +49,15 @@ _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); + return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); + return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B, + -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 @@ -71,13 +75,15 @@ _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, + (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 @@ -95,13 +101,15 @@ _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); + return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); + return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, + -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 @@ -143,49 +151,57 @@ _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B, + (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); + return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, + -(__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); + return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B, + -(__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, + (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); + return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, + -(__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); + return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, + -(__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/fmaintrin.h b/clang/lib/Headers/fmaintrin.h index 22d1a78..24584a9 100644 --- a/clang/lib/Headers/fmaintrin.h +++ b/clang/lib/Headers/fmaintrin.h @@ -35,7 +35,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } /// Computes a multiply-add of 128-bit vectors of [2 x double]. @@ -55,7 +56,8 @@ _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B, + (__v2df)__C); } /// Computes a scalar multiply-add of the single-precision values in the @@ -133,7 +135,8 @@ _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); + return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); } /// Computes a multiply-subtract of 128-bit vectors of [2 x double]. @@ -153,7 +156,8 @@ _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); + return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B, + -(__v2df)__C); } /// Computes a scalar multiply-subtract of the single-precision values in @@ -231,7 +235,8 @@ _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } /// Computes a negated multiply-add of 128-bit vectors of [2 x double]. @@ -251,7 +256,8 @@ _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, + (__v2df)__C); } /// Computes a scalar negated multiply-add of the single-precision values in @@ -329,7 +335,8 @@ _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); + return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); } /// Computes a negated multiply-subtract of 128-bit vectors of [2 x double]. @@ -349,7 +356,8 @@ _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); + return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, + -(__v2df)__C); } /// Computes a scalar negated multiply-subtract of the single-precision @@ -531,7 +539,8 @@ _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); } /// Computes a multiply-add of 256-bit vectors of [4 x double]. @@ -551,7 +560,8 @@ _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B, + (__v4df)__C); } /// Computes a multiply-subtract of 256-bit vectors of [8 x float]. @@ -571,7 +581,8 @@ _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); + return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, + -(__v8sf)__C); } /// Computes a multiply-subtract of 256-bit vectors of [4 x double]. @@ -591,7 +602,8 @@ _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); + return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B, + -(__v4df)__C); } /// Computes a negated multiply-add of 256-bit vectors of [8 x float]. @@ -611,7 +623,8 @@ _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); } /// Computes a negated multiply-add of 256-bit vectors of [4 x double]. @@ -631,7 +644,8 @@ _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, + (__v4df)__C); } /// Computes a negated multiply-subtract of 256-bit vectors of [8 x float]. @@ -651,7 +665,8 @@ _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); + return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, + -(__v8sf)__C); } /// Computes a negated multiply-subtract of 256-bit vectors of [4 x double]. @@ -671,7 +686,8 @@ _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); + return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, + -(__v4df)__C); } /// Computes a multiply with alternating add/subtract of 256-bit vectors of diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h index 5a02a455..3961b79 100644 --- a/clang/lib/Headers/mmintrin.h +++ b/clang/lib/Headers/mmintrin.h @@ -57,6 +57,9 @@ typedef char __v16qi __attribute__((__vector_size__(16))); #define __trunc64(x) \ (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) +#define __zext128(x) \ + (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ + 1, 2, 3) #define __anyext128(x) \ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ 1, -1, -1) @@ -723,11 +726,11 @@ _mm_madd_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits /// of the products of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_mulhi_pi16(__m64 __m1, __m64 __m2) { - return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__anyext128(__m1), - (__v8hi)__anyext128(__m2))); + return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__zext128(__m1), + (__v8hi)__zext128(__m2))); } /// Multiplies each 16-bit signed integer element of the first 64-bit diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 6a64369..7bf6b84 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -24,6 +24,7 @@ typedef float __m128_u __attribute__((__vector_size__(16), __aligned__(1))); /* Unsigned types */ typedef unsigned int __v4su __attribute__((__vector_size__(16))); +typedef unsigned short __v8hu __attribute__((__vector_size__(16))); /* This header should only be included in a hosted environment as it depends on * a standard library to provide allocation routines. */ @@ -2447,11 +2448,11 @@ _mm_movemask_pi8(__m64 __a) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the products of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_mulhi_pu16(__m64 __a, __m64 __b) { - return __trunc64(__builtin_ia32_pmulhuw128((__v8hi)__anyext128(__a), - (__v8hi)__anyext128(__b))); + return __trunc64(__builtin_ia32_pmulhuw128((__v8hu)__zext128(__a), + (__v8hu)__zext128(__b))); } /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the diff --git a/clang/lib/Interpreter/RemoteJITUtils.cpp b/clang/lib/Interpreter/RemoteJITUtils.cpp index c0e663b..c100f46 100644 --- a/clang/lib/Interpreter/RemoteJITUtils.cpp +++ b/clang/lib/Interpreter/RemoteJITUtils.cpp @@ -33,6 +33,10 @@ using namespace llvm; using namespace llvm::orc; +#if LLVM_ON_UNIX +static std::vector<pid_t> LaunchedExecutorPID; +#endif + Expected<uint64_t> getSlabAllocSize(StringRef SizeString) { SizeString = SizeString.trim(); @@ -89,9 +93,14 @@ createSharedMemoryManager(SimpleRemoteEPC &SREPC, SlabSize, SREPC, SAs); } +// Launches an out-of-process executor for remote JIT. The calling program can +// provide a CustomizeFork callback, which allows it to run custom code in the +// child process before exec. This enables sending custom setup or code to be +// executed in the child (out-of-process) executor. Expected<std::unique_ptr<SimpleRemoteEPC>> launchExecutor(StringRef ExecutablePath, bool UseSharedMemory, - llvm::StringRef SlabAllocateSizeString) { + llvm::StringRef SlabAllocateSizeString, + std::function<void()> CustomizeFork) { #ifndef LLVM_ON_UNIX // FIXME: Add support for Windows. return make_error<StringError>("-" + ExecutablePath + @@ -134,6 +143,9 @@ launchExecutor(StringRef ExecutablePath, bool UseSharedMemory, close(ToExecutor[WriteEnd]); close(FromExecutor[ReadEnd]); + if (CustomizeFork) + CustomizeFork(); + // Execute the child process. std::unique_ptr<char[]> ExecutorPath, FDSpecifier; { @@ -158,6 +170,8 @@ launchExecutor(StringRef ExecutablePath, bool UseSharedMemory, } // else we're the parent... + LaunchedExecutorPID.push_back(ChildPID); + // Close the child ends of the pipes close(ToExecutor[ReadEnd]); close(FromExecutor[WriteEnd]); @@ -265,3 +279,18 @@ connectTCPSocket(StringRef NetworkAddress, bool UseSharedMemory, std::move(S), *SockFD, *SockFD); #endif } + +#if LLVM_ON_UNIX + +pid_t getLastLaunchedExecutorPID() { + if (!LaunchedExecutorPID.size()) + return -1; + return LaunchedExecutorPID.back(); +} + +pid_t getNthLaunchedExecutorPID(int n) { + if (n - 1 < 0 || n - 1 >= static_cast<int>(LaunchedExecutorPID.size())) + return -1; + return LaunchedExecutorPID.at(n - 1); +} +#endif
\ No newline at end of file diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 0edfd60..9c55541 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -3497,6 +3497,19 @@ void Sema::DeclareGlobalAllocationFunction(DeclarationName Name, } auto CreateAllocationFunctionDecl = [&](Attr *ExtraAttr) { + // The MSVC STL has explicit cdecl on its (host-side) allocation function + // specializations for the allocation, so in order to prevent a CC clash + // we use the host's CC, if available, or CC_C as a fallback, for the + // host-side implicit decls, knowing these do not get emitted when compiling + // for device. + if (getLangOpts().CUDAIsDevice && ExtraAttr && + isa<CUDAHostAttr>(ExtraAttr) && + Context.getTargetInfo().getTriple().isSPIRV()) { + if (auto *ATI = Context.getAuxTargetInfo()) + EPI.ExtInfo = EPI.ExtInfo.withCallingConv(ATI->getDefaultCallingConv()); + else + EPI.ExtInfo = EPI.ExtInfo.withCallingConv(CallingConv::CC_C); + } QualType FnType = Context.getFunctionType(Return, Params, EPI); FunctionDecl *Alloc = FunctionDecl::Create( Context, GlobalCtx, SourceLocation(), SourceLocation(), Name, FnType, diff --git a/clang/lib/Sema/SemaObjC.cpp b/clang/lib/Sema/SemaObjC.cpp index 0f39a98..bde00bd 100644 --- a/clang/lib/Sema/SemaObjC.cpp +++ b/clang/lib/Sema/SemaObjC.cpp @@ -691,7 +691,7 @@ static QualType applyObjCTypeArgs(Sema &S, SourceLocation loc, QualType type, if (!anyPackExpansions && finalTypeArgs.size() != numTypeParams) { S.Diag(loc, diag::err_objc_type_args_wrong_arity) << (typeArgs.size() < typeParams->size()) << objcClass->getDeclName() - << (unsigned)finalTypeArgs.size() << (unsigned)numTypeParams; + << (unsigned)finalTypeArgs.size() << numTypeParams; S.Diag(objcClass->getLocation(), diag::note_previous_decl) << objcClass; if (failOnError) diff --git a/clang/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp index 3b3def7..e64153d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp @@ -183,7 +183,8 @@ public: llvm::errs() << "NewAllocator\n"; } - void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const { + void checkBind(SVal Loc, SVal Val, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const { if (isCallbackEnabled(C, "Bind")) llvm::errs() << "Bind\n"; } diff --git a/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp index 837cbbc..921114a 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp @@ -29,7 +29,8 @@ class BoolAssignmentChecker : public Checker<check::Bind> { bool IsTainted = false) const; public: - void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal Loc, SVal Val, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; }; } // end anonymous namespace @@ -55,6 +56,7 @@ static bool isBooleanType(QualType Ty) { } void BoolAssignmentChecker::checkBind(SVal Loc, SVal Val, const Stmt *S, + bool AtDeclInit, CheckerContext &C) const { // We are only interested in stores into Booleans. diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp index 350db4b..392c7ee 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp @@ -175,9 +175,12 @@ public: /// \param Loc The value of the location (pointer). /// \param Val The value which will be stored at the location Loc. /// \param S The bind is performed while processing the statement S. + /// \param AtDeclInit Whether the bind is performed during declaration + /// initialization. /// /// check::Bind - void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &) const {} + void checkBind(SVal Loc, SVal Val, const Stmt *S, bool AtDeclInit, + CheckerContext &) const {} /// Called after a CFG edge is taken within a function. /// diff --git a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp index 152129e..395d724 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp @@ -48,7 +48,8 @@ class DereferenceChecker public: void checkLocation(SVal location, bool isLoad, const Stmt* S, CheckerContext &C) const; - void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal L, SVal V, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; static void AddDerefSource(raw_ostream &os, SmallVectorImpl<SourceRange> &Ranges, @@ -309,7 +310,7 @@ void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S, } void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S, - CheckerContext &C) const { + bool AtDeclInit, CheckerContext &C) const { // If we're binding to a reference, check if the value is known to be null. if (V.isUndef()) return; diff --git a/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp index 7ad54c0..7eb9a1d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp @@ -150,7 +150,8 @@ public: IteratorModeling() = default; void checkPostCall(const CallEvent &Call, CheckerContext &C) const; - void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal Loc, SVal Val, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; void checkPostStmt(const UnaryOperator *UO, CheckerContext &C) const; void checkPostStmt(const BinaryOperator *BO, CheckerContext &C) const; void checkPostStmt(const MaterializeTemporaryExpr *MTE, @@ -234,7 +235,7 @@ void IteratorModeling::checkPostCall(const CallEvent &Call, } void IteratorModeling::checkBind(SVal Loc, SVal Val, const Stmt *S, - CheckerContext &C) const { + bool AtDeclInit, CheckerContext &C) const { auto State = C.getState(); const auto *Pos = getIteratorPosition(State, Val); if (Pos) { diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index 369d619..efb9809 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -3156,7 +3156,7 @@ void MallocChecker::checkPreCall(const CallEvent &Call, for (unsigned I = 0, E = Call.getNumArgs(); I != E; ++I) { SVal ArgSVal = Call.getArgSVal(I); if (isa<Loc>(ArgSVal)) { - SymbolRef Sym = ArgSVal.getAsSymbol(); + SymbolRef Sym = ArgSVal.getAsSymbol(/*IncludeBaseRegions=*/true); if (!Sym) continue; if (checkUseAfterFree(Sym, C, Call.getArgExpr(I))) diff --git a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp index 9744d1a..eeb6b72 100644 --- a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp @@ -97,7 +97,8 @@ public: // libraries. bool NoDiagnoseCallsToSystemHeaders = false; - void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal L, SVal V, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; void checkPostStmt(const ExplicitCastExpr *CE, CheckerContext &C) const; void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const; void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const; @@ -1250,7 +1251,7 @@ static bool isARCNilInitializedLocal(CheckerContext &C, const Stmt *S) { /// Propagate the nullability information through binds and warn when nullable /// pointer or null symbol is assigned to a pointer with a nonnull type. void NullabilityChecker::checkBind(SVal L, SVal V, const Stmt *S, - CheckerContext &C) const { + bool AtDeclInit, CheckerContext &C) const { const TypedValueRegion *TVR = dyn_cast_or_null<TypedValueRegion>(L.getAsRegion()); if (!TVR) diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp index ace3426..e40b4f8 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp @@ -73,7 +73,8 @@ public: void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const; void checkLocation(SVal location, bool isLoad, const Stmt *S, CheckerContext &C) const; - void checkBind(SVal loc, SVal val, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal loc, SVal val, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; void checkPreCall(const CallEvent &CE, CheckerContext &C) const; void checkPostCall(const CallEvent &CE, CheckerContext &C) const; @@ -311,9 +312,8 @@ void ObjCSelfInitChecker::checkLocation(SVal location, bool isLoad, C); } - void ObjCSelfInitChecker::checkBind(SVal loc, SVal val, const Stmt *S, - CheckerContext &C) const { + bool AtDeclInit, CheckerContext &C) const { // Allow assignment of anything to self. Self is a local variable in the // initializer, so it is legal to assign anything to it, like results of // static functions/method calls. After self is assigned something we cannot diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp index 65ff902..1762505 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp @@ -1136,7 +1136,7 @@ ExplodedNode * RetainCountChecker::checkReturnWithRetEffect(const ReturnStmt *S, //===----------------------------------------------------------------------===// void RetainCountChecker::checkBind(SVal loc, SVal val, const Stmt *S, - CheckerContext &C) const { + bool AtDeclInit, CheckerContext &C) const { ProgramStateRef state = C.getState(); const MemRegion *MR = loc.getAsRegion(); diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h index 8854e10..dc8bad6 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h @@ -280,7 +280,8 @@ public: void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, const char *Sep) const override; - void checkBind(SVal loc, SVal val, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal loc, SVal val, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; void checkPostStmt(const BlockExpr *BE, CheckerContext &C) const; void checkPostStmt(const CastExpr *CE, CheckerContext &C) const; diff --git a/clang/lib/StaticAnalyzer/Checkers/StoreToImmutableChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StoreToImmutableChecker.cpp index afad419..2bb3917 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StoreToImmutableChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StoreToImmutableChecker.cpp @@ -26,53 +26,11 @@ class StoreToImmutableChecker : public Checker<check::Bind> { const BugType BT{this, "Write to immutable memory", "CERT Environment (ENV)"}; public: - void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal Loc, SVal Val, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; }; } // end anonymous namespace -static bool isInitializationContext(const Stmt *S, CheckerContext &C) { - // Check if this is a DeclStmt (variable declaration) - if (isa<DeclStmt>(S)) - return true; - - // This part is specific for initialization of const lambdas pre-C++17. - // Lets look at the AST of the statement: - // ``` - // const auto lambda = [](){}; - // ``` - // - // The relevant part of the AST for this case prior to C++17 is: - // ... - // `-DeclStmt - // `-VarDecl - // `-ExprWithCleanups - // `-CXXConstructExpr - // ... - // In C++17 and later, the AST is different: - // ... - // `-DeclStmt - // `-VarDecl - // `-ImplicitCastExpr - // `-LambdaExpr - // |-CXXRecordDecl - // `-CXXConstructExpr - // ... - // And even beside this, the statement `S` that is given to the checkBind - // callback is the VarDecl in C++17 and later, and the CXXConstructExpr in - // C++14 and before. So in order to support the C++14 we need the following - // ugly hack to detect whether this construction is used to initialize a - // variable. - // - // FIXME: This should be eliminated by improving the API of checkBind to - // ensure that it consistently passes the `VarDecl` (instead of the - // `CXXConstructExpr`) when the constructor call denotes the initialization - // of a variable with a lambda, or maybe less preferably, try the more - // invasive approach of passing the information forward to the checkers - // whether the current bind is an initialization or an assignment. - const auto *ConstructExp = dyn_cast<CXXConstructExpr>(S); - return ConstructExp && ConstructExp->isElidable(); -} - static bool isEffectivelyConstRegion(const MemRegion *MR, CheckerContext &C) { if (isa<GlobalImmutableSpaceRegion>(MR)) return true; @@ -128,6 +86,7 @@ getInnermostEnclosingConstDeclRegion(const MemRegion *MR, CheckerContext &C) { } void StoreToImmutableChecker::checkBind(SVal Loc, SVal Val, const Stmt *S, + bool AtDeclInit, CheckerContext &C) const { // We are only interested in stores to memory regions const MemRegion *MR = Loc.getAsRegion(); @@ -136,9 +95,7 @@ void StoreToImmutableChecker::checkBind(SVal Loc, SVal Val, const Stmt *S, // Skip variable declarations and initializations - we only want to catch // actual writes - // FIXME: If the API of checkBind would allow to distinguish between - // initialization and assignment, we could use that instead. - if (isInitializationContext(S, C)) + if (AtDeclInit) return; // Check if the region is in the global immutable space diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp index e98de33..7f8923c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp @@ -26,13 +26,13 @@ class UndefinedAssignmentChecker const BugType BT{this, "Assigned value is uninitialized"}; public: - void checkBind(SVal location, SVal val, const Stmt *S, + void checkBind(SVal location, SVal val, const Stmt *S, bool AtDeclInit, CheckerContext &C) const; }; } void UndefinedAssignmentChecker::checkBind(SVal location, SVal val, - const Stmt *StoreE, + const Stmt *StoreE, bool AtDeclInit, CheckerContext &C) const { if (!val.isUndef()) return; diff --git a/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp index cb73ac6..116dd93 100644 --- a/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp @@ -62,7 +62,8 @@ public: void checkPreCall(const CallEvent &Call, CheckerContext &C) const; void checkPostCall(const CallEvent &Call, CheckerContext &C) const; - void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal L, SVal V, const Stmt *S, bool AtDeclInit, + CheckerContext &C) const; void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const; }; @@ -188,7 +189,7 @@ void VforkChecker::checkPreCall(const CallEvent &Call, } // Prohibit writes in child process (except for vfork's lhs). -void VforkChecker::checkBind(SVal L, SVal V, const Stmt *S, +void VforkChecker::checkBind(SVal L, SVal V, const Stmt *S, bool AtDeclInit, CheckerContext &C) const { ProgramStateRef State = C.getState(); if (!isChildProcess(State)) diff --git a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp index 0fe677e..44c6f9f 100644 --- a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp @@ -376,11 +376,13 @@ namespace { const Stmt *S; ExprEngine &Eng; const ProgramPoint &PP; + bool AtDeclInit; - CheckBindContext(const CheckersTy &checkers, - SVal loc, SVal val, const Stmt *s, ExprEngine &eng, + CheckBindContext(const CheckersTy &checkers, SVal loc, SVal val, + const Stmt *s, bool AtDeclInit, ExprEngine &eng, const ProgramPoint &pp) - : Checkers(checkers), Loc(loc), Val(val), S(s), Eng(eng), PP(pp) {} + : Checkers(checkers), Loc(loc), Val(val), S(s), Eng(eng), PP(pp), + AtDeclInit(AtDeclInit) {} CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); } CheckersTy::const_iterator checkers_end() { return Checkers.end(); } @@ -391,7 +393,7 @@ namespace { const ProgramPoint &L = PP.withTag(checkFn.Checker); CheckerContext C(Bldr, Eng, Pred, L); - checkFn(Loc, Val, S, C); + checkFn(Loc, Val, S, AtDeclInit, C); } }; @@ -408,10 +410,10 @@ namespace { /// Run checkers for binding of a value to a location. void CheckerManager::runCheckersForBind(ExplodedNodeSet &Dst, const ExplodedNodeSet &Src, - SVal location, SVal val, - const Stmt *S, ExprEngine &Eng, + SVal location, SVal val, const Stmt *S, + bool AtDeclInit, ExprEngine &Eng, const ProgramPoint &PP) { - CheckBindContext C(BindCheckers, location, val, S, Eng, PP); + CheckBindContext C(BindCheckers, location, val, S, AtDeclInit, Eng, PP); llvm::TimeTraceScope TimeScope{ "CheckerManager::runCheckersForBind", [&val]() { return getTimeTraceBindMetadata(val); }}; diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index d874844..c853c00 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -3714,9 +3714,8 @@ ExprEngine::notifyCheckersOfPointerEscape(ProgramStateRef State, /// evalBind - Handle the semantics of binding a value to a specific location. /// This method is used by evalStore and (soon) VisitDeclStmt, and others. void ExprEngine::evalBind(ExplodedNodeSet &Dst, const Stmt *StoreE, - ExplodedNode *Pred, - SVal location, SVal Val, - bool atDeclInit, const ProgramPoint *PP) { + ExplodedNode *Pred, SVal location, SVal Val, + bool AtDeclInit, const ProgramPoint *PP) { const LocationContext *LC = Pred->getLocationContext(); PostStmt PS(StoreE, LC); if (!PP) @@ -3725,7 +3724,7 @@ void ExprEngine::evalBind(ExplodedNodeSet &Dst, const Stmt *StoreE, // Do a previsit of the bind. ExplodedNodeSet CheckedSet; getCheckerManager().runCheckersForBind(CheckedSet, Pred, location, Val, - StoreE, *this, *PP); + StoreE, AtDeclInit, *this, *PP); StmtNodeBuilder Bldr(CheckedSet, Dst, *currBldrCtx); @@ -3748,8 +3747,8 @@ void ExprEngine::evalBind(ExplodedNodeSet &Dst, const Stmt *StoreE, // When binding the value, pass on the hint that this is a initialization. // For initializations, we do not need to inform clients of region // changes. - state = state->bindLoc(location.castAs<Loc>(), - Val, LC, /* notifyChanges = */ !atDeclInit); + state = state->bindLoc(location.castAs<Loc>(), Val, LC, + /* notifyChanges = */ !AtDeclInit); const MemRegion *LocReg = nullptr; if (std::optional<loc::MemRegionVal> LocRegVal = diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp index fe70558..c0b28d2 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -85,7 +85,7 @@ void ExprEngine::performTrivialCopy(NodeBuilder &Bldr, ExplodedNode *Pred, evalLocation(Tmp, CallExpr, VExpr, Pred, Pred->getState(), V, /*isLoad=*/true); for (ExplodedNode *N : Tmp) - evalBind(Dst, CallExpr, N, ThisVal, V, true); + evalBind(Dst, CallExpr, N, ThisVal, V, !AlwaysReturnsLValue); PostStmt PS(CallExpr, LCtx); for (ExplodedNode *N : Dst) { |