diff options
Diffstat (limited to 'clang/lib/AST/ByteCode')
26 files changed, 1821 insertions, 530 deletions
diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.h b/clang/lib/AST/ByteCode/BitcastBuffer.h index d1d6ee3..8d32351 100644 --- a/clang/lib/AST/ByteCode/BitcastBuffer.h +++ b/clang/lib/AST/ByteCode/BitcastBuffer.h @@ -89,6 +89,12 @@ struct BitcastBuffer { Data = std::make_unique<std::byte[]>(ByteSize); } + /// Returns the byte at the given offset. + std::byte *atByte(unsigned Offset) { + assert(Offset < FinalBitSize.roundToBytes()); + return Data.get() + Offset; + } + /// Returns the buffer size in bits. Bits size() const { return FinalBitSize; } Bytes byteSize() const { return FinalBitSize.toBytes(); } @@ -113,6 +119,13 @@ struct BitcastBuffer { std::unique_ptr<std::byte[]> copyBits(Bits BitOffset, Bits BitWidth, Bits FullBitWidth, Endian TargetEndianness) const; + + /// Dereferences the value at the given offset. + template <typename T> T deref(Bytes Offset) const { + assert(Offset.getQuantity() < FinalBitSize.roundToBytes()); + assert((Offset.getQuantity() + sizeof(T)) <= FinalBitSize.roundToBytes()); + return *reinterpret_cast<T *>(Data.get() + Offset.getQuantity()); + } }; } // namespace interp diff --git a/clang/lib/AST/ByteCode/ByteCodeEmitter.h b/clang/lib/AST/ByteCode/ByteCodeEmitter.h index ca8dc38..dd18341 100644 --- a/clang/lib/AST/ByteCode/ByteCodeEmitter.h +++ b/clang/lib/AST/ByteCode/ByteCodeEmitter.h @@ -25,11 +25,11 @@ enum Opcode : uint32_t; /// An emitter which links the program to bytecode for later use. class ByteCodeEmitter { protected: - using LabelTy = uint32_t; using AddrTy = uintptr_t; using Local = Scope::Local; public: + using LabelTy = uint32_t; /// Compiles the function into the module. void compileFunc(const FunctionDecl *FuncDecl, Function *Func = nullptr); diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 6c08846..ed5493c 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -16,6 +16,7 @@ #include "PrimType.h" #include "Program.h" #include "clang/AST/Attr.h" +#include "llvm/Support/SaveAndRestore.h" using namespace clang; using namespace clang::interp; @@ -39,7 +40,7 @@ static std::optional<bool> getBoolValue(const Expr *E) { template <class Emitter> class DeclScope final : public LocalScope<Emitter> { public: DeclScope(Compiler<Emitter> *Ctx, const ValueDecl *VD) - : LocalScope<Emitter>(Ctx, VD), Scope(Ctx->P), + : LocalScope<Emitter>(Ctx), Scope(Ctx->P), OldInitializingDecl(Ctx->InitializingDecl) { Ctx->InitializingDecl = VD; Ctx->InitStack.push_back(InitLink::Decl(VD)); @@ -476,8 +477,9 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) { return this->delegate(SubExpr); case CK_BitCast: { + QualType CETy = CE->getType(); // Reject bitcasts to atomic types. - if (CE->getType()->isAtomicType()) { + if (CETy->isAtomicType()) { if (!this->discard(SubExpr)) return false; return this->emitInvalidCast(CastKind::Reinterpret, /*Fatal=*/true, CE); @@ -494,6 +496,7 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) { assert(isPtrType(*FromT)); assert(isPtrType(*ToT)); + bool SrcIsVoidPtr = SubExprTy->isVoidPointerType(); if (FromT == ToT) { if (CE->getType()->isVoidPointerType() && !SubExprTy->isFunctionPointerType()) { @@ -502,6 +505,10 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) { if (!this->visit(SubExpr)) return false; + if (!this->emitCheckBitCast(CETy->getPointeeType().getTypePtr(), + SrcIsVoidPtr, CE)) + return false; + if (CE->getType()->isFunctionPointerType() || SubExprTy->isFunctionPointerType()) { return this->emitFnPtrCast(CE); @@ -767,6 +774,11 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) { case CK_ToVoid: return discard(SubExpr); + case CK_Dynamic: + // This initially goes through VisitCXXDynamicCastExpr, where we emit + // a diagnostic if appropriate. + return this->delegate(SubExpr); + default: return this->emitInvalid(CE); } @@ -1033,8 +1045,15 @@ bool Compiler<Emitter>::VisitPointerArithBinOp(const BinaryOperator *E) { if (!visitAsPointer(RHS, *RT) || !visitAsPointer(LHS, *LT)) return false; + QualType ElemType = LHS->getType()->getPointeeType(); + CharUnits ElemTypeSize; + if (ElemType->isVoidType() || ElemType->isFunctionType()) + ElemTypeSize = CharUnits::One(); + else + ElemTypeSize = Ctx.getASTContext().getTypeSizeInChars(ElemType); + PrimType IntT = classifyPrim(E->getType()); - if (!this->emitSubPtr(IntT, E)) + if (!this->emitSubPtr(IntT, ElemTypeSize.isZero(), E)) return false; return DiscardResult ? this->emitPop(IntT, E) : true; } @@ -1686,6 +1705,9 @@ bool Compiler<Emitter>::VisitFixedPointUnaryOperator(const UnaryOperator *E) { template <class Emitter> bool Compiler<Emitter>::VisitImplicitValueInitExpr( const ImplicitValueInitExpr *E) { + if (DiscardResult) + return true; + QualType QT = E->getType(); if (OptPrimType T = classify(QT)) @@ -2115,8 +2137,7 @@ bool Compiler<Emitter>::visitCallArgs(ArrayRef<const Expr *> Args, } UnsignedOrNone LocalIndex = - allocateLocal(std::move(Source), Arg->getType(), - /*ExtendingDecl=*/nullptr, ScopeKind::Call); + allocateLocal(std::move(Source), Arg->getType(), ScopeKind::Call); if (!LocalIndex) return false; @@ -2429,7 +2450,7 @@ bool Compiler<Emitter>::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E) { // and the RHS is our SubExpr. for (size_t I = 0; I != Size; ++I) { ArrayIndexScope<Emitter> IndexScope(this, I); - LocalScope<Emitter> BS(this); + LocalScope<Emitter> BS(this, ScopeKind::FullExpression); if (!this->visitArrayElemInit(I, SubExpr, SubExprT)) return false; @@ -2482,19 +2503,22 @@ bool Compiler<Emitter>::VisitAbstractConditionalOperator( const Expr *TrueExpr = E->getTrueExpr(); const Expr *FalseExpr = E->getFalseExpr(); - auto visitChildExpr = [&](const Expr *E) -> bool { - LocalScope<Emitter> S(this); - if (!this->delegate(E)) - return false; - return S.destroyLocals(); - }; - if (std::optional<bool> BoolValue = getBoolValue(Condition)) { - if (BoolValue) - return visitChildExpr(TrueExpr); - return visitChildExpr(FalseExpr); - } - + if (*BoolValue) + return this->delegate(TrueExpr); + return this->delegate(FalseExpr); + } + + // Force-init the scope, which creates a InitScope op. This is necessary so + // the scope is not only initialized in one arm of the conditional operator. + this->VarScope->forceInit(); + // The TrueExpr and FalseExpr of a conditional operator do _not_ create a + // scope, which means the local variables created within them unconditionally + // always exist. However, we need to later differentiate which branch was + // taken and only destroy the varibles of the active branch. This is what the + // "enabled" flags on local variables are used for. + llvm::SaveAndRestore LAAA(this->VarScope->LocalsAlwaysEnabled, + /*NewValue=*/false); bool IsBcpCall = false; if (const auto *CE = dyn_cast<CallExpr>(Condition->IgnoreParenCasts()); CE && CE->getBuiltinCallee() == Builtin::BI__builtin_constant_p) { @@ -2524,13 +2548,15 @@ bool Compiler<Emitter>::VisitAbstractConditionalOperator( if (!this->jumpFalse(LabelFalse)) return false; - if (!visitChildExpr(TrueExpr)) + if (!this->delegate(TrueExpr)) return false; + if (!this->jump(LabelEnd)) return false; this->emitLabel(LabelFalse); - if (!visitChildExpr(FalseExpr)) + if (!this->delegate(FalseExpr)) return false; + this->fallthrough(LabelEnd); this->emitLabel(LabelEnd); @@ -2805,10 +2831,10 @@ bool Compiler<Emitter>::VisitCompoundAssignOperator( return false; if (!this->emitLoad(*LT, E)) return false; - if (LT != LHSComputationT) { - if (!this->emitCast(*LT, *LHSComputationT, E)) - return false; - } + if (LT != LHSComputationT && + !this->emitIntegralCast(*LT, *LHSComputationT, E->getComputationLHSType(), + E)) + return false; // Get the RHS value on the stack. if (!this->emitGetLocal(*RT, TempOffset, E)) @@ -2861,10 +2887,9 @@ bool Compiler<Emitter>::VisitCompoundAssignOperator( } // And now cast from LHSComputationT to ResultT. - if (ResultT != LHSComputationT) { - if (!this->emitCast(*LHSComputationT, *ResultT, E)) - return false; - } + if (ResultT != LHSComputationT && + !this->emitIntegralCast(*LHSComputationT, *ResultT, E->getType(), E)) + return false; // And store the result in LHS. if (DiscardResult) { @@ -2879,7 +2904,7 @@ bool Compiler<Emitter>::VisitCompoundAssignOperator( template <class Emitter> bool Compiler<Emitter>::VisitExprWithCleanups(const ExprWithCleanups *E) { - LocalScope<Emitter> ES(this); + LocalScope<Emitter> ES(this, ScopeKind::FullExpression); const Expr *SubExpr = E->getSubExpr(); return this->delegate(SubExpr) && ES.destroyLocals(E); @@ -2902,9 +2927,7 @@ bool Compiler<Emitter>::VisitMaterializeTemporaryExpr( // When we're initializing a global variable *or* the storage duration of // the temporary is explicitly static, create a global variable. OptPrimType SubExprT = classify(SubExpr); - bool IsStatic = E->getStorageDuration() == SD_Static; - if (IsStatic) { - + if (E->getStorageDuration() == SD_Static) { UnsignedOrNone GlobalIndex = P.createGlobal(E); if (!GlobalIndex) return false; @@ -2931,25 +2954,40 @@ bool Compiler<Emitter>::VisitMaterializeTemporaryExpr( return this->emitInitGlobalTempComp(TempDecl, E); } + ScopeKind VarScope = E->getStorageDuration() == SD_FullExpression + ? ScopeKind::FullExpression + : ScopeKind::Block; + // For everyhing else, use local variables. if (SubExprT) { bool IsConst = SubExpr->getType().isConstQualified(); bool IsVolatile = SubExpr->getType().isVolatileQualified(); - unsigned LocalIndex = allocateLocalPrimitive( - E, *SubExprT, IsConst, IsVolatile, E->getExtendingDecl()); + unsigned LocalIndex = + allocateLocalPrimitive(E, *SubExprT, IsConst, IsVolatile, VarScope); + if (!this->VarScope->LocalsAlwaysEnabled && + !this->emitEnableLocal(LocalIndex, E)) + return false; + if (!this->visit(SubExpr)) return false; if (!this->emitSetLocal(*SubExprT, LocalIndex, E)) return false; + return this->emitGetPtrLocal(LocalIndex, E); } if (!this->checkLiteralType(SubExpr)) return false; + const Expr *Inner = E->getSubExpr()->skipRValueSubobjectAdjustments(); if (UnsignedOrNone LocalIndex = - allocateLocal(E, Inner->getType(), E->getExtendingDecl())) { + allocateLocal(E, Inner->getType(), VarScope)) { InitLinkScope<Emitter> ILS(this, InitLink::Temp(*LocalIndex)); + + if (!this->VarScope->LocalsAlwaysEnabled && + !this->emitEnableLocal(*LocalIndex, E)) + return false; + if (!this->emitGetPtrLocal(*LocalIndex, E)) return false; return this->visitInitializer(SubExpr) && this->emitFinishInit(E); @@ -3217,7 +3255,8 @@ bool Compiler<Emitter>::VisitCXXConstructExpr(const CXXConstructExpr *E) { return this->visitInitializer(E->getArg(0)); // Zero initialization. - if (E->requiresZeroInitialization()) { + bool ZeroInit = E->requiresZeroInitialization(); + if (ZeroInit) { const Record *R = getRecord(E->getType()); if (!this->visitZeroRecordInitializer(R, E)) @@ -3228,6 +3267,19 @@ bool Compiler<Emitter>::VisitCXXConstructExpr(const CXXConstructExpr *E) { return true; } + // Avoid materializing a temporary for an elidable copy/move constructor. + if (!ZeroInit && E->isElidable()) { + const Expr *SrcObj = E->getArg(0); + assert(SrcObj->isTemporaryObject(Ctx.getASTContext(), Ctor->getParent())); + assert(Ctx.getASTContext().hasSameUnqualifiedType(E->getType(), + SrcObj->getType())); + if (const auto *ME = dyn_cast<MaterializeTemporaryExpr>(SrcObj)) { + if (!this->emitCheckFunctionDecl(Ctor, E)) + return false; + return this->visitInitializer(ME->getSubExpr()); + } + } + const Function *Func = getFunction(Ctor); if (!Func) @@ -4157,7 +4209,7 @@ bool Compiler<Emitter>::VisitStmtExpr(const StmtExpr *E) { StmtExprScope<Emitter> SS(this); const CompoundStmt *CS = E->getSubStmt(); - const Stmt *Result = CS->getStmtExprResult(); + const Stmt *Result = CS->body_back(); for (const Stmt *S : CS->body()) { if (S != Result) { if (!this->visitStmt(S)) @@ -4221,7 +4273,8 @@ template <class Emitter> bool Compiler<Emitter>::visit(const Expr *E) { // Create local variable to hold the return value. if (!E->isGLValue() && !canClassify(E->getType())) { - UnsignedOrNone LocalIndex = allocateLocal(stripDerivedToBaseCasts(E)); + UnsignedOrNone LocalIndex = allocateLocal( + stripDerivedToBaseCasts(E), QualType(), ScopeKind::FullExpression); if (!LocalIndex) return false; @@ -4578,9 +4631,11 @@ bool Compiler<Emitter>::emitConst(const APSInt &Value, const Expr *E) { } template <class Emitter> -unsigned Compiler<Emitter>::allocateLocalPrimitive( - DeclTy &&Src, PrimType Ty, bool IsConst, bool IsVolatile, - const ValueDecl *ExtendingDecl, ScopeKind SC, bool IsConstexprUnknown) { +unsigned Compiler<Emitter>::allocateLocalPrimitive(DeclTy &&Src, PrimType Ty, + bool IsConst, + bool IsVolatile, + ScopeKind SC, + bool IsConstexprUnknown) { // FIXME: There are cases where Src.is<Expr*>() is wrong, e.g. // (int){12} in C. Consider using Expr::isTemporaryObject() instead // or isa<MaterializeTemporaryExpr>(). @@ -4591,16 +4646,12 @@ unsigned Compiler<Emitter>::allocateLocalPrimitive( Scope::Local Local = this->createLocal(D); if (auto *VD = dyn_cast_if_present<ValueDecl>(Src.dyn_cast<const Decl *>())) Locals.insert({VD, Local}); - if (ExtendingDecl) - VarScope->addExtended(Local, ExtendingDecl); - else - VarScope->addForScopeKind(Local, SC); + VarScope->addForScopeKind(Local, SC); return Local.Offset; } template <class Emitter> UnsignedOrNone Compiler<Emitter>::allocateLocal(DeclTy &&Src, QualType Ty, - const ValueDecl *ExtendingDecl, ScopeKind SC, bool IsConstexprUnknown) { const ValueDecl *Key = nullptr; @@ -4628,10 +4679,7 @@ UnsignedOrNone Compiler<Emitter>::allocateLocal(DeclTy &&Src, QualType Ty, Scope::Local Local = this->createLocal(D); if (Key) Locals.insert({Key, Local}); - if (ExtendingDecl) - VarScope->addExtended(Local, ExtendingDecl); - else - VarScope->addForScopeKind(Local, SC); + VarScope->addForScopeKind(Local, SC); return Local.Offset; } @@ -4683,7 +4731,7 @@ const Function *Compiler<Emitter>::getFunction(const FunctionDecl *FD) { template <class Emitter> bool Compiler<Emitter>::visitExpr(const Expr *E, bool DestroyToplevelScope) { - LocalScope<Emitter> RootScope(this); + LocalScope<Emitter> RootScope(this, ScopeKind::FullExpression); // If we won't destroy the toplevel scope, check for memory leaks first. if (!DestroyToplevelScope) { @@ -4777,7 +4825,7 @@ bool Compiler<Emitter>::visitDeclAndReturn(const VarDecl *VD, const Expr *Init, LS.destroyLocals() && this->emitCheckAllocations(VD); } - LocalScope<Emitter> VDScope(this, VD); + LocalScope<Emitter> VDScope(this); if (!this->visitVarDecl(VD, Init, /*Toplevel=*/true)) return false; @@ -4888,7 +4936,7 @@ Compiler<Emitter>::visitVarDecl(const VarDecl *VD, const Expr *Init, if (VarT) { unsigned Offset = this->allocateLocalPrimitive( VD, *VarT, VD->getType().isConstQualified(), - VD->getType().isVolatileQualified(), nullptr, ScopeKind::Block, + VD->getType().isVolatileQualified(), ScopeKind::Block, IsConstexprUnknown); if (!Init) @@ -4908,7 +4956,7 @@ Compiler<Emitter>::visitVarDecl(const VarDecl *VD, const Expr *Init, } // Local composite variables. if (UnsignedOrNone Offset = this->allocateLocal( - VD, VD->getType(), nullptr, ScopeKind::Block, IsConstexprUnknown)) { + VD, VD->getType(), ScopeKind::Block, IsConstexprUnknown)) { if (!Init) return true; @@ -5412,8 +5460,7 @@ bool Compiler<Emitter>::VisitCXXThisExpr(const CXXThisExpr *E) { unsigned EndIndex = 0; // Find the init list. for (StartIndex = InitStack.size() - 1; StartIndex > 0; --StartIndex) { - if (InitStack[StartIndex].Kind == InitLink::K_InitList || - InitStack[StartIndex].Kind == InitLink::K_This) { + if (InitStack[StartIndex].Kind == InitLink::K_DIE) { EndIndex = StartIndex; --StartIndex; break; @@ -5426,7 +5473,8 @@ bool Compiler<Emitter>::VisitCXXThisExpr(const CXXThisExpr *E) { continue; if (InitStack[StartIndex].Kind != InitLink::K_Field && - InitStack[StartIndex].Kind != InitLink::K_Elem) + InitStack[StartIndex].Kind != InitLink::K_Elem && + InitStack[StartIndex].Kind != InitLink::K_DIE) break; } @@ -5437,7 +5485,8 @@ bool Compiler<Emitter>::VisitCXXThisExpr(const CXXThisExpr *E) { // Emit the instructions. for (unsigned I = StartIndex; I != (EndIndex + 1); ++I) { - if (InitStack[I].Kind == InitLink::K_InitList) + if (InitStack[I].Kind == InitLink::K_InitList || + InitStack[I].Kind == InitLink::K_DIE) continue; if (!InitStack[I].template emit<Emitter>(this, E)) return false; @@ -5622,19 +5671,24 @@ bool Compiler<Emitter>::visitReturnStmt(const ReturnStmt *RS) { } template <class Emitter> bool Compiler<Emitter>::visitIfStmt(const IfStmt *IS) { + LocalScope<Emitter> IfScope(this); + auto visitChildStmt = [&](const Stmt *S) -> bool { LocalScope<Emitter> SScope(this); if (!visitStmt(S)) return false; return SScope.destroyLocals(); }; - if (auto *CondInit = IS->getInit()) + + if (auto *CondInit = IS->getInit()) { if (!visitStmt(CondInit)) return false; + } - if (const DeclStmt *CondDecl = IS->getConditionVariableDeclStmt()) + if (const DeclStmt *CondDecl = IS->getConditionVariableDeclStmt()) { if (!visitDeclStmt(CondDecl)) return false; + } // Save ourselves compiling some code and the jumps, etc. if the condition is // stataically known to be either true or false. We could look at more cases @@ -5658,8 +5712,11 @@ template <class Emitter> bool Compiler<Emitter>::visitIfStmt(const IfStmt *IS) { if (!this->emitInv(IS)) return false; } else { + LocalScope<Emitter> CondScope(this, ScopeKind::FullExpression); if (!this->visitBool(IS->getCond())) return false; + if (!CondScope.destroyLocals()) + return false; } if (!this->maybeEmitDeferredVarInit(IS->getConditionVariable())) @@ -5687,6 +5744,9 @@ template <class Emitter> bool Compiler<Emitter>::visitIfStmt(const IfStmt *IS) { this->emitLabel(LabelEnd); } + if (!IfScope.destroyLocals()) + return false; + return true; } @@ -5906,8 +5966,10 @@ bool Compiler<Emitter>::visitBreakStmt(const BreakStmt *S) { assert(TargetLabel); for (VariableScope<Emitter> *C = this->VarScope; C != BreakScope; - C = C->getParent()) - C->emitDestruction(); + C = C->getParent()) { + if (!C->destroyLocals()) + return false; + } return this->jump(*TargetLabel); } @@ -5941,8 +6003,10 @@ bool Compiler<Emitter>::visitContinueStmt(const ContinueStmt *S) { assert(TargetLabel); for (VariableScope<Emitter> *C = VarScope; C != ContinueScope; - C = C->getParent()) - C->emitDestruction(); + C = C->getParent()) { + if (!C->destroyLocals()) + return false; + } return this->jump(*TargetLabel); } @@ -5983,12 +6047,41 @@ bool Compiler<Emitter>::visitSwitchStmt(const SwitchStmt *S) { for (const SwitchCase *SC = S->getSwitchCaseList(); SC; SC = SC->getNextSwitchCase()) { if (const auto *CS = dyn_cast<CaseStmt>(SC)) { - // FIXME: Implement ranges. - if (CS->caseStmtIsGNURange()) - return false; CaseLabels[SC] = this->getLabel(); + if (CS->caseStmtIsGNURange()) { + LabelTy EndOfRangeCheck = this->getLabel(); + const Expr *Low = CS->getLHS(); + const Expr *High = CS->getRHS(); + if (Low->isValueDependent() || High->isValueDependent()) + return false; + + if (!this->emitGetLocal(CondT, CondVar, CS)) + return false; + if (!this->visit(Low)) + return false; + PrimType LT = this->classifyPrim(Low->getType()); + if (!this->emitGE(LT, S)) + return false; + if (!this->jumpFalse(EndOfRangeCheck)) + return false; + + if (!this->emitGetLocal(CondT, CondVar, CS)) + return false; + if (!this->visit(High)) + return false; + PrimType HT = this->classifyPrim(High->getType()); + if (!this->emitLE(HT, S)) + return false; + if (!this->jumpTrue(CaseLabels[CS])) + return false; + this->emitLabel(EndOfRangeCheck); + continue; + } + const Expr *Value = CS->getLHS(); + if (Value->isValueDependent()) + return false; PrimType ValueT = this->classifyPrim(Value->getType()); // Compare the case statement's value to the switch condition. @@ -6022,6 +6115,7 @@ bool Compiler<Emitter>::visitSwitchStmt(const SwitchStmt *S) { DefaultLabel); if (!this->visitStmt(S->getBody())) return false; + this->fallthrough(EndLabel); this->emitLabel(EndLabel); return LS.destroyLocals(); @@ -6029,6 +6123,7 @@ bool Compiler<Emitter>::visitSwitchStmt(const SwitchStmt *S) { template <class Emitter> bool Compiler<Emitter>::visitCaseStmt(const CaseStmt *S) { + this->fallthrough(CaseLabels[S]); this->emitLabel(CaseLabels[S]); return this->visitStmt(S->getSubStmt()); } @@ -6249,7 +6344,7 @@ bool Compiler<Emitter>::compileConstructor(const CXXConstructorDecl *Ctor) { InitLinkScope<Emitter> InitScope(this, InitLink::This()); for (const auto *Init : Ctor->inits()) { // Scope needed for the initializers. - LocalScope<Emitter> Scope(this); + LocalScope<Emitter> Scope(this, ScopeKind::FullExpression); const Expr *InitExpr = Init->getInit(); if (const FieldDecl *Member = Init->getMember()) { @@ -6306,8 +6401,8 @@ bool Compiler<Emitter>::compileConstructor(const CXXConstructorDecl *Ctor) { unsigned FirstLinkOffset = R->getField(cast<FieldDecl>(IFD->chain()[0]))->Offset; - InitStackScope<Emitter> ISS(this, isa<CXXDefaultInitExpr>(InitExpr)); InitLinkScope<Emitter> ILS(this, InitLink::Field(FirstLinkOffset)); + InitStackScope<Emitter> ISS(this, isa<CXXDefaultInitExpr>(InitExpr)); if (!emitFieldInitializer(NestedField, NestedFieldOffset, InitExpr, IsUnion)) return false; @@ -7095,9 +7190,12 @@ bool Compiler<Emitter>::VisitDeclRefExpr(const DeclRefExpr *E) { return this->visitDeclRef(D, E); } -template <class Emitter> void Compiler<Emitter>::emitCleanup() { - for (VariableScope<Emitter> *C = VarScope; C; C = C->getParent()) - C->emitDestruction(); +template <class Emitter> bool Compiler<Emitter>::emitCleanup() { + for (VariableScope<Emitter> *C = VarScope; C; C = C->getParent()) { + if (!C->destroyLocals()) + return false; + } + return true; } template <class Emitter> @@ -7158,6 +7256,19 @@ bool Compiler<Emitter>::emitPrimCast(PrimType FromT, PrimType ToT, return false; } +template <class Emitter> +bool Compiler<Emitter>::emitIntegralCast(PrimType FromT, PrimType ToT, + QualType ToQT, const Expr *E) { + assert(FromT != ToT); + + if (ToT == PT_IntAP) + return this->emitCastAP(FromT, Ctx.getBitWidth(ToQT), E); + if (ToT == PT_IntAPS) + return this->emitCastAPS(FromT, Ctx.getBitWidth(ToQT), E); + + return this->emitCast(FromT, ToT, E); +} + /// Emits __real(SubExpr) template <class Emitter> bool Compiler<Emitter>::emitComplexReal(const Expr *SubExpr) { diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index 5c46f75..1bd15c3 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -52,12 +52,14 @@ public: K_Decl = 3, K_Elem = 5, K_RVO = 6, - K_InitList = 7 + K_InitList = 7, + K_DIE = 8, }; static InitLink This() { return InitLink{K_This}; } static InitLink InitList() { return InitLink{K_InitList}; } static InitLink RVO() { return InitLink{K_RVO}; } + static InitLink DIE() { return InitLink{K_DIE}; } static InitLink Field(unsigned Offset) { InitLink IL{K_Field}; IL.Offset = Offset; @@ -102,7 +104,7 @@ struct VarCreationState { bool notCreated() const { return !S; } }; -enum class ScopeKind { Call, Block }; +enum class ScopeKind { Block, FullExpression, Call }; /// Compilation context for expressions. template <class Emitter> @@ -256,7 +258,7 @@ protected: protected: /// Emits scope cleanup instructions. - void emitCleanup(); + bool emitCleanup(); /// Returns a record type from a record or pointer type. const RecordType *getRecordTy(QualType Ty); @@ -328,13 +330,11 @@ protected: /// Creates a local primitive value. unsigned allocateLocalPrimitive(DeclTy &&Decl, PrimType Ty, bool IsConst, bool IsVolatile = false, - const ValueDecl *ExtendingDecl = nullptr, ScopeKind SC = ScopeKind::Block, bool IsConstexprUnknown = false); /// Allocates a space storing a local given its type. UnsignedOrNone allocateLocal(DeclTy &&Decl, QualType Ty = QualType(), - const ValueDecl *ExtendingDecl = nullptr, ScopeKind = ScopeKind::Block, bool IsConstexprUnknown = false); UnsignedOrNone allocateTemporary(const Expr *E); @@ -391,6 +391,8 @@ private: } bool emitPrimCast(PrimType FromT, PrimType ToT, QualType ToQT, const Expr *E); + bool emitIntegralCast(PrimType FromT, PrimType ToT, QualType ToQT, + const Expr *E); PrimType classifyComplexElementType(QualType T) const { assert(T->isAnyComplexType()); @@ -472,39 +474,18 @@ extern template class Compiler<EvalEmitter>; /// Scope chain managing the variable lifetimes. template <class Emitter> class VariableScope { public: - VariableScope(Compiler<Emitter> *Ctx, const ValueDecl *VD, - ScopeKind Kind = ScopeKind::Block) - : Ctx(Ctx), Parent(Ctx->VarScope), ValDecl(VD), Kind(Kind) { + VariableScope(Compiler<Emitter> *Ctx, ScopeKind Kind = ScopeKind::Block) + : Ctx(Ctx), Parent(Ctx->VarScope), Kind(Kind) { + if (Parent) + this->LocalsAlwaysEnabled = Parent->LocalsAlwaysEnabled; Ctx->VarScope = this; } virtual ~VariableScope() { Ctx->VarScope = this->Parent; } - virtual void addLocal(const Scope::Local &Local) { + virtual void addLocal(Scope::Local Local) { llvm_unreachable("Shouldn't be called"); } - - void addExtended(const Scope::Local &Local, const ValueDecl *ExtendingDecl) { - // Walk up the chain of scopes until we find the one for ExtendingDecl. - // If there is no such scope, attach it to the parent one. - VariableScope *P = this; - while (P) { - if (P->ValDecl == ExtendingDecl) { - P->addLocal(Local); - return; - } - P = P->Parent; - if (!P) - break; - } - - // Use the parent scope. - if (this->Parent) - this->Parent->addLocal(Local); - else - this->addLocal(Local); - } - /// Like addExtended, but adds to the nearest scope of the given kind. void addForScopeKind(const Scope::Local &Local, ScopeKind Kind) { VariableScope *P = this; @@ -522,18 +503,22 @@ public: this->addLocal(Local); } - virtual void emitDestruction() {} virtual bool emitDestructors(const Expr *E = nullptr) { return true; } virtual bool destroyLocals(const Expr *E = nullptr) { return true; } + virtual void forceInit() {} VariableScope *getParent() const { return Parent; } ScopeKind getKind() const { return Kind; } + /// Whether locals added to this scope are enabled by default. + /// This is almost always true, except for the two branches + /// of a conditional operator. + bool LocalsAlwaysEnabled = true; + protected: /// Compiler instance. Compiler<Emitter> *Ctx; /// Link to the parent scope. VariableScope *Parent; - const ValueDecl *ValDecl = nullptr; ScopeKind Kind; }; @@ -541,9 +526,7 @@ protected: template <class Emitter> class LocalScope : public VariableScope<Emitter> { public: LocalScope(Compiler<Emitter> *Ctx, ScopeKind Kind = ScopeKind::Block) - : VariableScope<Emitter>(Ctx, nullptr, Kind) {} - LocalScope(Compiler<Emitter> *Ctx, const ValueDecl *VD) - : VariableScope<Emitter>(Ctx, VD) {} + : VariableScope<Emitter>(Ctx, Kind) {} /// Emit a Destroy op for this scope. ~LocalScope() override { @@ -552,16 +535,6 @@ public: this->Ctx->emitDestroy(*Idx, SourceInfo{}); removeStoredOpaqueValues(); } - - /// Overriden to support explicit destruction. - void emitDestruction() override { - if (!Idx) - return; - - this->emitDestructors(); - this->Ctx->emitDestroy(*Idx, SourceInfo{}); - } - /// Explicit destruction of local variables. bool destroyLocals(const Expr *E = nullptr) override { if (!Idx) @@ -574,29 +547,60 @@ public: return Success; } - void addLocal(const Scope::Local &Local) override { + void addLocal(Scope::Local Local) override { if (!Idx) { Idx = static_cast<unsigned>(this->Ctx->Descriptors.size()); this->Ctx->Descriptors.emplace_back(); this->Ctx->emitInitScope(*Idx, {}); } + Local.EnabledByDefault = this->LocalsAlwaysEnabled; this->Ctx->Descriptors[*Idx].emplace_back(Local); } + /// Force-initialize this scope. Usually, scopes are lazily initialized when + /// the first local variable is created, but in scenarios with conditonal + /// operators, we need to ensure scope is initialized just in case one of the + /// arms will create a local and the other won't. In such a case, the + /// InitScope() op would be part of the arm that created the local. + void forceInit() override { + if (!Idx) { + Idx = static_cast<unsigned>(this->Ctx->Descriptors.size()); + this->Ctx->Descriptors.emplace_back(); + this->Ctx->emitInitScope(*Idx, {}); + } + } + bool emitDestructors(const Expr *E = nullptr) override { if (!Idx) return true; + // Emit destructor calls for local variables of record // type with a destructor. for (Scope::Local &Local : llvm::reverse(this->Ctx->Descriptors[*Idx])) { if (Local.Desc->hasTrivialDtor()) continue; - if (!this->Ctx->emitGetPtrLocal(Local.Offset, E)) - return false; - if (!this->Ctx->emitDestructionPop(Local.Desc, Local.Desc->getLoc())) - return false; + if (!Local.EnabledByDefault) { + typename Emitter::LabelTy EndLabel = this->Ctx->getLabel(); + if (!this->Ctx->emitGetLocalEnabled(Local.Offset, E)) + return false; + if (!this->Ctx->jumpFalse(EndLabel)) + return false; + + if (!this->Ctx->emitGetPtrLocal(Local.Offset, E)) + return false; + + if (!this->Ctx->emitDestructionPop(Local.Desc, Local.Desc->getLoc())) + return false; + + this->Ctx->emitLabel(EndLabel); + } else { + if (!this->Ctx->emitGetPtrLocal(Local.Offset, E)) + return false; + if (!this->Ctx->emitDestructionPop(Local.Desc, Local.Desc->getLoc())) + return false; + } removeIfStoredOpaqueValue(Local); } @@ -668,22 +672,29 @@ public: ~InitLinkScope() { this->Ctx->InitStack.pop_back(); } -private: +public: Compiler<Emitter> *Ctx; }; template <class Emitter> class InitStackScope final { public: InitStackScope(Compiler<Emitter> *Ctx, bool Active) - : Ctx(Ctx), OldValue(Ctx->InitStackActive) { + : Ctx(Ctx), OldValue(Ctx->InitStackActive), Active(Active) { Ctx->InitStackActive = Active; + if (Active) + Ctx->InitStack.push_back(InitLink::DIE()); } - ~InitStackScope() { this->Ctx->InitStackActive = OldValue; } + ~InitStackScope() { + this->Ctx->InitStackActive = OldValue; + if (Active) + Ctx->InitStack.pop_back(); + } private: Compiler<Emitter> *Ctx; bool OldValue; + bool Active; }; } // namespace interp diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp index 12bf3a3..74ec986 100644 --- a/clang/lib/AST/ByteCode/Context.cpp +++ b/clang/lib/AST/ByteCode/Context.cpp @@ -21,7 +21,6 @@ #include "clang/AST/ASTLambda.h" #include "clang/AST/Expr.h" #include "clang/Basic/TargetInfo.h" -#include "llvm/Support/SystemZ/zOSSupport.h" using namespace clang; using namespace clang::interp; diff --git a/clang/lib/AST/ByteCode/Context.h b/clang/lib/AST/ByteCode/Context.h index f5fa977..a21bb3e 100644 --- a/clang/lib/AST/ByteCode/Context.h +++ b/clang/lib/AST/ByteCode/Context.h @@ -98,20 +98,22 @@ public: return classify(E->getType()); } - bool canClassify(QualType T) { + bool canClassify(QualType T) const { if (const auto *BT = dyn_cast<BuiltinType>(T)) { if (BT->isInteger() || BT->isFloatingPoint()) return true; if (BT->getKind() == BuiltinType::Bool) return true; } + if (T->isPointerOrReferenceType()) + return true; if (T->isArrayType() || T->isRecordType() || T->isAnyComplexType() || T->isVectorType()) return false; return classify(T) != std::nullopt; } - bool canClassify(const Expr *E) { + bool canClassify(const Expr *E) const { if (E->isGLValue()) return true; return canClassify(E->getType()); diff --git a/clang/lib/AST/ByteCode/Disasm.cpp b/clang/lib/AST/ByteCode/Disasm.cpp index fd0903f..35937e3 100644 --- a/clang/lib/AST/ByteCode/Disasm.cpp +++ b/clang/lib/AST/ByteCode/Disasm.cpp @@ -138,9 +138,16 @@ static size_t getNumDisplayWidth(size_t N) { return L; } -LLVM_DUMP_METHOD void Function::dump() const { dump(llvm::errs()); } +LLVM_DUMP_METHOD void Function::dump(CodePtr PC) const { + dump(llvm::errs(), PC); +} -LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const { +LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS, + CodePtr OpPC) const { + if (OpPC) { + assert(OpPC >= getCodeBegin()); + assert(OpPC <= getCodeEnd()); + } { ColorScope SC(OS, true, {llvm::raw_ostream::BRIGHT_GREEN, true}); OS << getName() << " " << (const void *)this << "\n"; @@ -154,6 +161,7 @@ LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const { size_t Addr; std::string Op; bool IsJump; + bool CurrentOp = false; llvm::SmallVector<std::string> Args; }; @@ -171,6 +179,7 @@ LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const { auto Op = PC.read<Opcode>(); Text.Addr = Addr; Text.IsJump = isJumpOpcode(Op); + Text.CurrentOp = (PC == OpPC); switch (Op) { #define GET_DISASM #include "Opcodes.inc" @@ -198,9 +207,15 @@ LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const { Text.reserve(Code.size()); size_t LongestLine = 0; // Print code to a string, one at a time. - for (auto C : Code) { + for (const auto &C : Code) { std::string Line; llvm::raw_string_ostream LS(Line); + if (OpPC) { + if (C.CurrentOp) + LS << " * "; + else + LS << " "; + } LS << C.Addr; LS.indent(LongestAddr - getNumDisplayWidth(C.Addr) + 4); LS << C.Op; @@ -436,8 +451,28 @@ LLVM_DUMP_METHOD void Descriptor::dumpFull(unsigned Offset, FO += ElemDesc->getAllocSize(); } + } else if (isPrimitiveArray()) { + OS.indent(Spaces) << "Elements: " << getNumElems() << '\n'; + OS.indent(Spaces) << "Element type: " << primTypeToString(getPrimType()) + << '\n'; + unsigned FO = Offset + sizeof(InitMapPtr); + for (unsigned I = 0; I != getNumElems(); ++I) { + OS.indent(Spaces) << "Element " << I << " offset: " << FO << '\n'; + FO += getElemSize(); + } } else if (isRecord()) { ElemRecord->dump(OS, Indent + 1, Offset); + unsigned I = 0; + for (const Record::Field &F : ElemRecord->fields()) { + OS.indent(Spaces) << "- Field " << I << ": "; + { + ColorScope SC(OS, true, {llvm::raw_ostream::BRIGHT_RED, true}); + OS << F.Decl->getName(); + } + OS << ". Offset " << (Offset + F.Offset) << "\n"; + F.Desc->dumpFull(Offset + F.Offset, Indent + 1); + ++I; + } } else if (isPrimitive()) { } else { } @@ -484,8 +519,14 @@ LLVM_DUMP_METHOD void InterpFrame::dump(llvm::raw_ostream &OS, OS << " (" << F->getName() << ")"; } OS << "\n"; - OS.indent(Spaces) << "This: " << getThis() << "\n"; - OS.indent(Spaces) << "RVO: " << getRVOPtr() << "\n"; + if (hasThisPointer()) + OS.indent(Spaces) << "This: " << getThis() << "\n"; + else + OS.indent(Spaces) << "This: -\n"; + if (Func && Func->hasRVO()) + OS.indent(Spaces) << "RVO: " << getRVOPtr() << "\n"; + else + OS.indent(Spaces) << "RVO: -\n"; OS.indent(Spaces) << "Depth: " << Depth << "\n"; OS.indent(Spaces) << "ArgSize: " << ArgSize << "\n"; OS.indent(Spaces) << "Args: " << (void *)Args << "\n"; diff --git a/clang/lib/AST/ByteCode/EvalEmitter.cpp b/clang/lib/AST/ByteCode/EvalEmitter.cpp index 0073217..a2e01ef 100644 --- a/clang/lib/AST/ByteCode/EvalEmitter.cpp +++ b/clang/lib/AST/ByteCode/EvalEmitter.cpp @@ -113,7 +113,7 @@ Scope::Local EvalEmitter::createLocal(Descriptor *D) { InlineDescriptor &Desc = *reinterpret_cast<InlineDescriptor *>(B->rawData()); Desc.Desc = D; Desc.Offset = sizeof(InlineDescriptor); - Desc.IsActive = true; + Desc.IsActive = false; Desc.IsBase = false; Desc.IsFieldMutable = false; Desc.IsConst = false; @@ -322,6 +322,33 @@ bool EvalEmitter::emitDestroy(uint32_t I, SourceInfo Info) { return true; } +bool EvalEmitter::emitGetLocalEnabled(uint32_t I, SourceInfo Info) { + if (!isActive()) + return true; + + Block *B = getLocal(I); + const InlineDescriptor &Desc = + *reinterpret_cast<InlineDescriptor *>(B->rawData()); + + S.Stk.push<bool>(Desc.IsActive); + return true; +} + +bool EvalEmitter::emitEnableLocal(uint32_t I, SourceInfo Info) { + if (!isActive()) + return true; + + // FIXME: This is a little dirty, but to avoid adding a flag to + // InlineDescriptor that's only ever useful on the toplevel of local + // variables, we reuse the IsActive flag for the enabled state. We should + // probably use a different struct than InlineDescriptor for the block-level + // inline descriptor of local varaibles. + Block *B = getLocal(I); + InlineDescriptor &Desc = *reinterpret_cast<InlineDescriptor *>(B->rawData()); + Desc.IsActive = true; + return true; +} + /// Global temporaries (LifetimeExtendedTemporary) carry their value /// around as an APValue, which codegen accesses. /// We set their value once when creating them, but we don't update it diff --git a/clang/lib/AST/ByteCode/Floating.h b/clang/lib/AST/ByteCode/Floating.h index 659892e..cc918dc 100644 --- a/clang/lib/AST/ByteCode/Floating.h +++ b/clang/lib/AST/ByteCode/Floating.h @@ -45,7 +45,8 @@ private: if (singleWord()) return APFloat(getSemantics(), APInt(BitWidth, Val)); unsigned NumWords = numWords(); - return APFloat(getSemantics(), APInt(BitWidth, NumWords, Memory)); + return APFloat(getSemantics(), + APInt(BitWidth, llvm::ArrayRef(Memory, NumWords))); } public: diff --git a/clang/lib/AST/ByteCode/Function.h b/clang/lib/AST/ByteCode/Function.h index 95add58..80283af 100644 --- a/clang/lib/AST/ByteCode/Function.h +++ b/clang/lib/AST/ByteCode/Function.h @@ -41,6 +41,8 @@ public: unsigned Offset; /// Descriptor of the local. Descriptor *Desc; + /// If the cleanup for this local should be emitted. + bool EnabledByDefault = true; }; using LocalVectorTy = llvm::SmallVector<Local, 8>; @@ -310,8 +312,8 @@ private: public: /// Dumps the disassembled bytecode to \c llvm::errs(). - void dump() const; - void dump(llvm::raw_ostream &OS) const; + void dump(CodePtr PC = {}) const; + void dump(llvm::raw_ostream &OS, CodePtr PC = {}) const; }; } // namespace interp diff --git a/clang/lib/AST/ByteCode/Integral.h b/clang/lib/AST/ByteCode/Integral.h index 1318024..e90f1a9 100644 --- a/clang/lib/AST/ByteCode/Integral.h +++ b/clang/lib/AST/ByteCode/Integral.h @@ -202,30 +202,21 @@ public: static Integral min(unsigned NumBits) { return Integral(Min); } static Integral max(unsigned NumBits) { return Integral(Max); } + static Integral zero(unsigned BitWidth = 0) { return from(0); } - template <typename ValT> static Integral from(ValT Value) { - if constexpr (std::is_integral<ValT>::value) + template <typename ValT> + static Integral from(ValT Value, unsigned NumBits = 0) { + if constexpr (std::is_integral_v<ValT>) return Integral(Value); else - return Integral::from(static_cast<Integral::ReprT>(Value)); + return Integral(static_cast<Integral::ReprT>(Value)); } template <unsigned SrcBits, bool SrcSign> - static std::enable_if_t<SrcBits != 0, Integral> - from(Integral<SrcBits, SrcSign> Value) { + static Integral from(Integral<SrcBits, SrcSign> Value) { return Integral(Value.V); } - static Integral zero(unsigned BitWidth = 0) { return from(0); } - - template <typename T> static Integral from(T Value, unsigned NumBits) { - return Integral(Value); - } - - static bool inRange(int64_t Value, unsigned NumBits) { - return CheckRange<ReprT, Min, Max>(Value); - } - static bool increment(Integral A, Integral *R) { return add(A, Integral(ReprT(1)), A.bitWidth(), R); } @@ -328,13 +319,6 @@ private: return false; } } - template <typename T, T Min, T Max> static bool CheckRange(int64_t V) { - if constexpr (std::is_signed_v<T>) { - return Min <= V && V <= Max; - } else { - return V >= 0 && static_cast<uint64_t>(V) <= Max; - } - } }; template <unsigned Bits, bool Signed> diff --git a/clang/lib/AST/ByteCode/IntegralAP.h b/clang/lib/AST/ByteCode/IntegralAP.h index 6683db9..b11e6ee 100644 --- a/clang/lib/AST/ByteCode/IntegralAP.h +++ b/clang/lib/AST/ByteCode/IntegralAP.h @@ -63,7 +63,7 @@ public: if (singleWord()) return APInt(BitWidth, Val, Signed); unsigned NumWords = llvm::APInt::getNumWords(BitWidth); - return llvm::APInt(BitWidth, NumWords, Memory); + return llvm::APInt(BitWidth, llvm::ArrayRef(Memory, NumWords)); } public: diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index a2fb0fb..80ef656 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -919,33 +919,8 @@ bool CheckInit(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { return true; } -static bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { - - if (F->isVirtual() && !S.getLangOpts().CPlusPlus20) { - const SourceLocation &Loc = S.Current->getLocation(OpPC); - S.CCEDiag(Loc, diag::note_constexpr_virtual_call); - return false; - } - - if (S.checkingPotentialConstantExpression() && S.Current->getDepth() != 0) - return false; - - if (F->isValid() && F->hasBody() && F->isConstexpr()) - return true; - - const FunctionDecl *DiagDecl = F->getDecl(); - const FunctionDecl *Definition = nullptr; - DiagDecl->getBody(Definition); - - if (!Definition && S.checkingPotentialConstantExpression() && - DiagDecl->isConstexpr()) { - return false; - } - - // Implicitly constexpr. - if (F->isLambdaStaticInvoker()) - return true; - +static bool diagnoseCallableDecl(InterpState &S, CodePtr OpPC, + const FunctionDecl *DiagDecl) { // Bail out if the function declaration itself is invalid. We will // have produced a relevant diagnostic while parsing it, so just // note the problematic sub-expression. @@ -953,11 +928,10 @@ static bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { return Invalid(S, OpPC); // Diagnose failed assertions specially. - if (S.Current->getLocation(OpPC).isMacroID() && - F->getDecl()->getIdentifier()) { + if (S.Current->getLocation(OpPC).isMacroID() && DiagDecl->getIdentifier()) { // FIXME: Instead of checking for an implementation-defined function, // check and evaluate the assert() macro. - StringRef Name = F->getDecl()->getName(); + StringRef Name = DiagDecl->getName(); bool AssertFailed = Name == "__assert_rtn" || Name == "__assert_fail" || Name == "_wassert"; if (AssertFailed) { @@ -1004,7 +978,7 @@ static bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { // for a constant expression. It might be defined at the point we're // actually calling it. bool IsExtern = DiagDecl->getStorageClass() == SC_Extern; - bool IsDefined = F->isDefined(); + bool IsDefined = DiagDecl->isDefined(); if (!IsDefined && !IsExtern && DiagDecl->isConstexpr() && S.checkingPotentialConstantExpression()) return false; @@ -1027,6 +1001,35 @@ static bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { return false; } +static bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { + if (F->isVirtual() && !S.getLangOpts().CPlusPlus20) { + const SourceLocation &Loc = S.Current->getLocation(OpPC); + S.CCEDiag(Loc, diag::note_constexpr_virtual_call); + return false; + } + + if (S.checkingPotentialConstantExpression() && S.Current->getDepth() != 0) + return false; + + if (F->isValid() && F->hasBody() && F->isConstexpr()) + return true; + + const FunctionDecl *DiagDecl = F->getDecl(); + const FunctionDecl *Definition = nullptr; + DiagDecl->getBody(Definition); + + if (!Definition && S.checkingPotentialConstantExpression() && + DiagDecl->isConstexpr()) { + return false; + } + + // Implicitly constexpr. + if (F->isLambdaStaticInvoker()) + return true; + + return diagnoseCallableDecl(S, OpPC, DiagDecl); +} + static bool CheckCallDepth(InterpState &S, CodePtr OpPC) { if ((S.Current->getDepth() + 1) > S.getLangOpts().ConstexprCallDepth) { S.FFDiag(S.Current->getSource(OpPC), @@ -1404,7 +1407,8 @@ bool CheckLiteralType(InterpState &S, CodePtr OpPC, const Type *T) { // http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1677 // Therefore, we use the C++1y behavior. - if (S.Current->getFunction() && S.Current->getFunction()->isConstructor() && + if (!S.Current->isBottomFrame() && + S.Current->getFunction()->isConstructor() && S.Current->getThis().getDeclDesc()->asDecl() == S.EvaluatingDecl) { return true; } @@ -1431,8 +1435,12 @@ static bool getField(InterpState &S, CodePtr OpPC, const Pointer &Ptr, return false; if (Ptr.isIntegralPointer()) { - S.Stk.push<Pointer>(Ptr.asIntPointer().atOffset(S.getASTContext(), Off)); - return true; + if (std::optional<IntPointer> IntPtr = + Ptr.asIntPointer().atOffset(S.getASTContext(), Off)) { + S.Stk.push<Pointer>(std::move(*IntPtr)); + return true; + } + return false; } if (!Ptr.isBlockPointer()) { @@ -1445,6 +1453,10 @@ static bool getField(InterpState &S, CodePtr OpPC, const Pointer &Ptr, return false; } + // We can't get the field of something that's not a record. + if (!Ptr.getFieldDesc()->isRecord()) + return false; + if ((Ptr.getByteOffset() + Off) >= Ptr.block()->getSize()) return false; @@ -1500,6 +1512,21 @@ bool CheckDestructor(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { return CheckActive(S, OpPC, Ptr, AK_Destroy); } +/// Opcode. Check if the function decl can be called at compile time. +bool CheckFunctionDecl(InterpState &S, CodePtr OpPC, const FunctionDecl *FD) { + if (S.checkingPotentialConstantExpression() && S.Current->getDepth() != 0) + return false; + + const FunctionDecl *Definition = nullptr; + const Stmt *Body = FD->getBody(Definition); + + if (Definition && Body && + (Definition->isConstexpr() || Definition->hasAttr<MSConstexprAttr>())) + return true; + + return diagnoseCallableDecl(S, OpPC, FD); +} + static void compileFunction(InterpState &S, const Function *Func) { const FunctionDecl *Definition = Func->getDecl()->getDefinition(); if (!Definition) @@ -2058,15 +2085,15 @@ bool InvalidShuffleVectorIndex(InterpState &S, CodePtr OpPC, uint32_t Index) { bool CheckPointerToIntegralCast(InterpState &S, CodePtr OpPC, const Pointer &Ptr, unsigned BitWidth) { + const SourceInfo &E = S.Current->getSource(OpPC); + S.CCEDiag(E, diag::note_constexpr_invalid_cast) + << 2 << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC); + if (Ptr.isDummy()) return false; if (Ptr.isFunctionPointer()) return true; - const SourceInfo &E = S.Current->getSource(OpPC); - S.CCEDiag(E, diag::note_constexpr_invalid_cast) - << 2 << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC); - if (Ptr.isBlockPointer() && !Ptr.isZero()) { // Only allow based lvalue casts if they are lossless. if (S.getASTContext().getTargetInfo().getPointerWidth(LangAS::Default) != diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 5ab9c8e..d8b8b20 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -117,6 +117,7 @@ bool CheckBitCast(InterpState &S, CodePtr OpPC, bool HasIndeterminateBits, bool TargetIsUCharOrByte); bool CheckBCPResult(InterpState &S, const Pointer &Ptr); bool CheckDestructor(InterpState &S, CodePtr OpPC, const Pointer &Ptr); +bool CheckFunctionDecl(InterpState &S, CodePtr OpPC, const FunctionDecl *FD); bool handleFixedPointOverflow(InterpState &S, CodePtr OpPC, const FixedPoint &FP); @@ -1915,6 +1916,9 @@ bool Load(InterpState &S, CodePtr OpPC) { return false; if (!Ptr.isBlockPointer()) return false; + if (const Descriptor *D = Ptr.getFieldDesc(); + !(D->isPrimitive() || D->isPrimitiveArray()) || D->getPrimType() != Name) + return false; S.Stk.push<T>(Ptr.deref<T>()); return true; } @@ -1926,6 +1930,9 @@ bool LoadPop(InterpState &S, CodePtr OpPC) { return false; if (!Ptr.isBlockPointer()) return false; + if (const Descriptor *D = Ptr.getFieldDesc(); + !(D->isPrimitive() || D->isPrimitiveArray()) || D->getPrimType() != Name) + return false; S.Stk.push<T>(Ptr.deref<T>()); return true; } @@ -2310,13 +2317,11 @@ std::optional<Pointer> OffsetHelper(InterpState &S, CodePtr OpPC, template <PrimType Name, class T = typename PrimConv<Name>::T> bool AddOffset(InterpState &S, CodePtr OpPC) { const T &Offset = S.Stk.pop<T>(); - Pointer Ptr = S.Stk.pop<Pointer>(); - if (Ptr.isBlockPointer()) - Ptr = Ptr.expand(); + const Pointer &Ptr = S.Stk.pop<Pointer>().expand(); if (std::optional<Pointer> Result = OffsetHelper<T, ArithOp::Add>( S, OpPC, Offset, Ptr, /*IsPointerArith=*/true)) { - S.Stk.push<Pointer>(*Result); + S.Stk.push<Pointer>(Result->narrow()); return true; } return false; @@ -2325,11 +2330,11 @@ bool AddOffset(InterpState &S, CodePtr OpPC) { template <PrimType Name, class T = typename PrimConv<Name>::T> bool SubOffset(InterpState &S, CodePtr OpPC) { const T &Offset = S.Stk.pop<T>(); - const Pointer &Ptr = S.Stk.pop<Pointer>(); + const Pointer &Ptr = S.Stk.pop<Pointer>().expand(); if (std::optional<Pointer> Result = OffsetHelper<T, ArithOp::Sub>( S, OpPC, Offset, Ptr, /*IsPointerArith=*/true)) { - S.Stk.push<Pointer>(*Result); + S.Stk.push<Pointer>(Result->narrow()); return true; } return false; @@ -2355,7 +2360,7 @@ static inline bool IncDecPtrHelper(InterpState &S, CodePtr OpPC, if (std::optional<Pointer> Result = OffsetHelper<OneT, Op>(S, OpPC, One, P, /*IsPointerArith=*/true)) { // Store the new value. - Ptr.deref<Pointer>() = *Result; + Ptr.deref<Pointer>() = Result->narrow(); return true; } return false; @@ -2383,9 +2388,9 @@ static inline bool DecPtr(InterpState &S, CodePtr OpPC) { /// 2) Pops another Pointer from the stack. /// 3) Pushes the difference of the indices of the two pointers on the stack. template <PrimType Name, class T = typename PrimConv<Name>::T> -inline bool SubPtr(InterpState &S, CodePtr OpPC) { - const Pointer &LHS = S.Stk.pop<Pointer>(); - const Pointer &RHS = S.Stk.pop<Pointer>(); +inline bool SubPtr(InterpState &S, CodePtr OpPC, bool ElemSizeIsZero) { + const Pointer &LHS = S.Stk.pop<Pointer>().expand(); + const Pointer &RHS = S.Stk.pop<Pointer>().expand(); if (!Pointer::hasSameBase(LHS, RHS) && S.getLangOpts().CPlusPlus) { S.FFDiag(S.Current->getSource(OpPC), @@ -2395,25 +2400,23 @@ inline bool SubPtr(InterpState &S, CodePtr OpPC) { return false; } - if (LHS == RHS) { - S.Stk.push<T>(); - return true; - } + if (ElemSizeIsZero) { + QualType PtrT = LHS.getType(); + while (auto *AT = dyn_cast<ArrayType>(PtrT)) + PtrT = AT->getElementType(); - for (const Pointer &P : {LHS, RHS}) { - if (P.isZeroSizeArray()) { - QualType PtrT = P.getType(); - while (auto *AT = dyn_cast<ArrayType>(PtrT)) - PtrT = AT->getElementType(); + QualType ArrayTy = S.getASTContext().getConstantArrayType( + PtrT, APInt::getZero(1), nullptr, ArraySizeModifier::Normal, 0); + S.FFDiag(S.Current->getSource(OpPC), + diag::note_constexpr_pointer_subtraction_zero_size) + << ArrayTy; - QualType ArrayTy = S.getASTContext().getConstantArrayType( - PtrT, APInt::getZero(1), nullptr, ArraySizeModifier::Normal, 0); - S.FFDiag(S.Current->getSource(OpPC), - diag::note_constexpr_pointer_subtraction_zero_size) - << ArrayTy; + return false; + } - return false; - } + if (LHS == RHS) { + S.Stk.push<T>(); + return true; } int64_t A64 = @@ -2471,6 +2474,18 @@ inline bool InitScope(InterpState &S, CodePtr OpPC, uint32_t I) { return true; } +inline bool EnableLocal(InterpState &S, CodePtr OpPC, uint32_t I) { + assert(!S.Current->isLocalEnabled(I)); + S.Current->enableLocal(I); + return true; +} + +inline bool GetLocalEnabled(InterpState &S, CodePtr OpPC, uint32_t I) { + assert(S.Current); + S.Stk.push<bool>(S.Current->isLocalEnabled(I)); + return true; +} + //===----------------------------------------------------------------------===// // Cast, CastFP //===----------------------------------------------------------------------===// @@ -2631,10 +2646,6 @@ template <PrimType Name, class T = typename PrimConv<Name>::T> bool CastPointerIntegral(InterpState &S, CodePtr OpPC) { const Pointer &Ptr = S.Stk.pop<Pointer>(); - S.CCEDiag(S.Current->getSource(OpPC), diag::note_constexpr_invalid_cast) - << diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret - << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC); - if (!CheckPointerToIntegralCast(S, OpPC, Ptr, T::bitWidth())) return Invalid(S, OpPC); @@ -3078,7 +3089,7 @@ inline bool ArrayElemPtr(InterpState &S, CodePtr OpPC) { S.Stk.push<Pointer>(Ptr.atIndex(0).narrow()); return true; } - S.Stk.push<Pointer>(Ptr); + S.Stk.push<Pointer>(Ptr.narrow()); return true; } @@ -3109,7 +3120,7 @@ inline bool ArrayElemPtrPop(InterpState &S, CodePtr OpPC) { S.Stk.push<Pointer>(Ptr.atIndex(0).narrow()); return true; } - S.Stk.push<Pointer>(Ptr); + S.Stk.push<Pointer>(Ptr.narrow()); return true; } @@ -3184,7 +3195,7 @@ inline bool ArrayDecay(InterpState &S, CodePtr OpPC) { } if (Ptr.isRoot() || !Ptr.isUnknownSizeArray()) { - S.Stk.push<Pointer>(Ptr.atIndex(0)); + S.Stk.push<Pointer>(Ptr.atIndex(0).narrow()); return true; } @@ -3283,17 +3294,69 @@ inline bool SideEffect(InterpState &S, CodePtr OpPC) { return S.noteSideEffect(); } +inline bool CheckBitCast(InterpState &S, CodePtr OpPC, const Type *TargetType, + bool SrcIsVoidPtr) { + const auto &Ptr = S.Stk.peek<Pointer>(); + if (Ptr.isZero()) + return true; + if (!Ptr.isBlockPointer()) + return true; + + if (TargetType->isIntegerType()) + return true; + + if (SrcIsVoidPtr && S.getLangOpts().CPlusPlus) { + bool HasValidResult = !Ptr.isZero(); + + if (HasValidResult) { + if (S.getStdAllocatorCaller("allocate")) + return true; + + const auto &E = cast<CastExpr>(S.Current->getExpr(OpPC)); + if (S.getLangOpts().CPlusPlus26 && + S.getASTContext().hasSimilarType(Ptr.getType(), + QualType(TargetType, 0))) + return true; + + S.CCEDiag(E, diag::note_constexpr_invalid_void_star_cast) + << E->getSubExpr()->getType() << S.getLangOpts().CPlusPlus26 + << Ptr.getType().getCanonicalType() << E->getType()->getPointeeType(); + } else if (!S.getLangOpts().CPlusPlus26) { + const SourceInfo &E = S.Current->getSource(OpPC); + S.CCEDiag(E, diag::note_constexpr_invalid_cast) + << diag::ConstexprInvalidCastKind::CastFrom << "'void *'" + << S.Current->getRange(OpPC); + } + } + + QualType PtrType = Ptr.getType(); + if (PtrType->isRecordType() && + PtrType->getAsRecordDecl() != TargetType->getAsRecordDecl()) { + S.CCEDiag(S.Current->getSource(OpPC), diag::note_constexpr_invalid_cast) + << diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret + << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC); + return false; + } + return true; +} + /// Same here, but only for casts. inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind, bool Fatal) { const SourceLocation &Loc = S.Current->getLocation(OpPC); - if (Kind == CastKind::Reinterpret) { + switch (Kind) { + case CastKind::Reinterpret: S.CCEDiag(Loc, diag::note_constexpr_invalid_cast) - << static_cast<unsigned>(Kind) << S.Current->getRange(OpPC); + << diag::ConstexprInvalidCastKind::Reinterpret + << S.Current->getRange(OpPC); return !Fatal; - } - if (Kind == CastKind::Volatile) { + case CastKind::ReinterpretLike: + S.CCEDiag(Loc, diag::note_constexpr_invalid_cast) + << diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret + << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC); + return !Fatal; + case CastKind::Volatile: if (!S.checkingPotentialConstantExpression()) { const auto *E = cast<CastExpr>(S.Current->getExpr(OpPC)); if (S.getLangOpts().CPlusPlus) @@ -3304,14 +3367,13 @@ inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind, } return false; - } - if (Kind == CastKind::Dynamic) { + case CastKind::Dynamic: assert(!S.getLangOpts().CPlusPlus20); - S.CCEDiag(S.Current->getSource(OpPC), diag::note_constexpr_invalid_cast) + S.CCEDiag(Loc, diag::note_constexpr_invalid_cast) << diag::ConstexprInvalidCastKind::Dynamic; return true; } - + llvm_unreachable("Unhandled CastKind"); return false; } diff --git a/clang/lib/AST/ByteCode/InterpBlock.cpp b/clang/lib/AST/ByteCode/InterpBlock.cpp index 24825ad..dc0178a 100644 --- a/clang/lib/AST/ByteCode/InterpBlock.cpp +++ b/clang/lib/AST/ByteCode/InterpBlock.cpp @@ -102,12 +102,21 @@ bool Block::hasPointer(const Pointer *P) const { void Block::movePointersTo(Block *B) { assert(B != this); + unsigned MDDiff = static_cast<int>(B->Desc->getMetadataSize()) - + static_cast<int>(Desc->getMetadataSize()); while (Pointers) { Pointer *P = Pointers; this->removePointer(P); P->BS.Pointee = B; + + // If the metadata size changed between the two blocks, move the pointer + // base/offset. Realistically, this should only happen when we move pointers + // from a dummy pointer to a global one. + P->BS.Base += MDDiff; + P->Offset += MDDiff; + B->addPointer(P); } assert(!this->hasPointers()); diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 8b57b96..59b4896 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -48,6 +48,11 @@ static void discard(InterpStack &Stk, PrimType T) { TYPE_SWITCH(T, { Stk.discard<T>(); }); } +static uint64_t popToUInt64(const InterpState &S, const Expr *E) { + INT_TYPE_SWITCH(*S.getContext().classify(E->getType()), + return static_cast<uint64_t>(S.Stk.pop<T>())); +} + static APSInt popToAPSInt(InterpStack &Stk, PrimType T) { INT_TYPE_SWITCH(T, return Stk.pop<T>().toAPSInt()); } @@ -167,6 +172,38 @@ static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) { return Result; } +// Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics. +// Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions. +static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst, + InterpState &S, const Expr *DiagExpr) { + if (Src.isInfinity()) { + if (S.diagnosing()) + S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 0; + return false; + } + if (Src.isNaN()) { + if (S.diagnosing()) + S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 1; + return false; + } + APFloat Val = Src; + bool LosesInfo = false; + APFloat::opStatus Status = Val.convert( + APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo); + if (LosesInfo || Val.isDenormal()) { + if (S.diagnosing()) + S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic_strict); + return false; + } + if (Status != APFloat::opOK) { + if (S.diagnosing()) + S.CCEDiag(DiagExpr, diag::note_invalid_subexpr_in_const_expr); + return false; + } + Dst.copy(Val); + return true; +} + static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { @@ -212,8 +249,7 @@ static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC, uint64_t Limit = ~static_cast<uint64_t>(0); if (ID == Builtin::BIstrncmp || ID == Builtin::BI__builtin_strncmp || ID == Builtin::BIwcsncmp || ID == Builtin::BI__builtin_wcsncmp) - Limit = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2))) - .getZExtValue(); + Limit = popToUInt64(S, Call->getArg(2)); const Pointer &B = S.Stk.pop<Pointer>(); const Pointer &A = S.Stk.pop<Pointer>(); @@ -296,7 +332,7 @@ static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC, static bool interp__builtin_strlen(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call, unsigned ID) { - const Pointer &StrPtr = S.Stk.pop<Pointer>(); + const Pointer &StrPtr = S.Stk.pop<Pointer>().expand(); if (ID == Builtin::BIstrlen || ID == Builtin::BIwcslen) diagnoseNonConstexprBuiltin(S, OpPC, ID); @@ -972,9 +1008,10 @@ static bool interp__builtin_bswap(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { const APSInt &Val = popToAPSInt(S, Call->getArg(0)); - assert(Val.getActiveBits() <= 64); - - pushInteger(S, Val.byteSwap(), Call->getType()); + if (Val.getBitWidth() == 8) + pushInteger(S, Val, Call->getType()); + else + pushInteger(S, Val.byteSwap(), Call->getType()); return true; } @@ -990,7 +1027,7 @@ static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC, }; const Pointer &Ptr = S.Stk.pop<Pointer>(); - const APSInt &SizeVal = popToAPSInt(S, Call->getArg(0)); + uint64_t SizeVal = popToUInt64(S, Call->getArg(0)); // For __atomic_is_lock_free(sizeof(_Atomic(T))), if the size is a power // of two less than or equal to the maximum inline atomic width, we know it @@ -1002,7 +1039,7 @@ static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC, // x86-64 processors. // Check power-of-two. - CharUnits Size = CharUnits::fromQuantity(SizeVal.getZExtValue()); + CharUnits Size = CharUnits::fromQuantity(SizeVal); if (Size.isPowerOfTwo()) { // Check against inlining width. unsigned InlineWidthBits = @@ -1056,9 +1093,9 @@ static bool interp__builtin_c11_atomic_is_lock_free(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { - const APSInt &SizeVal = popToAPSInt(S, Call->getArg(0)); + uint64_t SizeVal = popToUInt64(S, Call->getArg(0)); - CharUnits Size = CharUnits::fromQuantity(SizeVal.getZExtValue()); + CharUnits Size = CharUnits::fromQuantity(SizeVal); if (Size.isPowerOfTwo()) { // Check against inlining width. unsigned InlineWidthBits = @@ -1316,8 +1353,9 @@ static bool interp__builtin_infer_alloc_token(InterpState &S, CodePtr OpPC, uint64_t BitWidth = ASTCtx.getTypeSize(ASTCtx.getSizeType()); auto Mode = ASTCtx.getLangOpts().AllocTokenMode.value_or(llvm::DefaultAllocTokenMode); + auto MaxTokensOpt = ASTCtx.getLangOpts().AllocTokenMax; uint64_t MaxTokens = - ASTCtx.getLangOpts().AllocTokenMax.value_or(~0ULL >> (64 - BitWidth)); + MaxTokensOpt.value_or(0) ? *MaxTokensOpt : (~0ULL >> (64 - BitWidth)); // We do not read any of the arguments; discard them. for (int I = Call->getNumArgs() - 1; I >= 0; --I) @@ -1439,7 +1477,7 @@ static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC, Allocator.allocate(Desc, NumElems.getZExtValue(), S.Ctx.getEvalID(), DynamicAllocator::Form::Operator); assert(B); - S.Stk.push<Pointer>(Pointer(B).atIndex(0)); + S.Stk.push<Pointer>(Pointer(B).atIndex(0).narrow()); return true; } @@ -1625,51 +1663,6 @@ static bool interp__builtin_elementwise_abs(InterpState &S, CodePtr OpPC, } /// Can be called with an integer or vector as the first and only parameter. -static bool interp__builtin_elementwise_popcount(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call, - unsigned BuiltinID) { - assert(Call->getNumArgs() == 1); - if (Call->getArg(0)->getType()->isIntegerType()) { - APSInt Val = popToAPSInt(S, Call->getArg(0)); - - if (BuiltinID == Builtin::BI__builtin_elementwise_popcount) { - pushInteger(S, Val.popcount(), Call->getType()); - } else { - pushInteger(S, Val.reverseBits(), Call->getType()); - } - return true; - } - // Otherwise, the argument must be a vector. - assert(Call->getArg(0)->getType()->isVectorType()); - const Pointer &Arg = S.Stk.pop<Pointer>(); - assert(Arg.getFieldDesc()->isPrimitiveArray()); - const Pointer &Dst = S.Stk.peek<Pointer>(); - assert(Dst.getFieldDesc()->isPrimitiveArray()); - assert(Arg.getFieldDesc()->getNumElems() == - Dst.getFieldDesc()->getNumElems()); - - QualType ElemType = Arg.getFieldDesc()->getElemQualType(); - PrimType ElemT = *S.getContext().classify(ElemType); - unsigned NumElems = Arg.getNumElems(); - - // FIXME: Reading from uninitialized vector elements? - for (unsigned I = 0; I != NumElems; ++I) { - INT_TYPE_SWITCH_NO_BOOL(ElemT, { - if (BuiltinID == Builtin::BI__builtin_elementwise_popcount) { - Dst.elem<T>(I) = T::from(Arg.elem<T>(I).toAPSInt().popcount()); - } else { - Dst.elem<T>(I) = - T::from(Arg.elem<T>(I).toAPSInt().reverseBits().getZExtValue()); - } - }); - } - Dst.initializeAllElements(); - - return true; -} - -/// Can be called with an integer or vector as the first and only parameter. static bool interp__builtin_elementwise_countzeroes(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, @@ -1762,11 +1755,9 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID) { assert(Call->getNumArgs() == 3); const ASTContext &ASTCtx = S.getASTContext(); - APSInt Size = popToAPSInt(S, Call->getArg(2)); - const Pointer SrcPtr = S.Stk.pop<Pointer>(); - const Pointer DestPtr = S.Stk.pop<Pointer>(); - - assert(!Size.isSigned() && "memcpy and friends take an unsigned size"); + uint64_t Size = popToUInt64(S, Call->getArg(2)); + Pointer SrcPtr = S.Stk.pop<Pointer>().expand(); + Pointer DestPtr = S.Stk.pop<Pointer>().expand(); if (ID == Builtin::BImemcpy || ID == Builtin::BImemmove) diagnoseNonConstexprBuiltin(S, OpPC, ID); @@ -1779,7 +1770,7 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, ID == Builtin::BI__builtin_wmemmove; // If the size is zero, we treat this as always being a valid no-op. - if (Size.isZero()) { + if (Size == 0) { S.Stk.push<Pointer>(DestPtr); return true; } @@ -1841,11 +1832,10 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, if (WChar) { uint64_t WCharSize = ASTCtx.getTypeSizeInChars(ASTCtx.getWCharType()).getQuantity(); - Size *= APSInt(APInt(Size.getBitWidth(), WCharSize, /*IsSigned=*/false), - /*IsUnsigend=*/true); + Size *= WCharSize; } - if (Size.urem(DestElemSize) != 0) { + if (Size % DestElemSize != 0) { S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_memcpy_unsupported) << Move << WChar << 0 << DestElemType << Size << DestElemSize; @@ -1878,12 +1868,12 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, // Check if we have enough elements to read from and write to. size_t RemainingDestBytes = RemainingDestElems * DestElemSize; size_t RemainingSrcBytes = RemainingSrcElems * SrcElemSize; - if (Size.ugt(RemainingDestBytes) || Size.ugt(RemainingSrcBytes)) { - APInt N = Size.udiv(DestElemSize); + if (Size > RemainingDestBytes || Size > RemainingSrcBytes) { + APInt N = APInt(64, Size / DestElemSize); S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_memcpy_unsupported) - << Move << WChar << (Size.ugt(RemainingSrcBytes) ? 1 : 2) - << DestElemType << toString(N, 10, /*Signed=*/false); + << Move << WChar << (Size > RemainingSrcBytes ? 1 : 2) << DestElemType + << toString(N, 10, /*Signed=*/false); return false; } @@ -1900,18 +1890,17 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, unsigned SrcIndex = SrcP.expand().getIndex() * SrcP.elemSize(); unsigned DstIndex = DestP.expand().getIndex() * DestP.elemSize(); - unsigned N = Size.getZExtValue(); - if ((SrcIndex <= DstIndex && (SrcIndex + N) > DstIndex) || - (DstIndex <= SrcIndex && (DstIndex + N) > SrcIndex)) { + if ((SrcIndex <= DstIndex && (SrcIndex + Size) > DstIndex) || + (DstIndex <= SrcIndex && (DstIndex + Size) > SrcIndex)) { S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_memcpy_overlap) << /*IsWChar=*/false; return false; } } - assert(Size.getZExtValue() % DestElemSize == 0); - if (!DoMemcpy(S, OpPC, SrcPtr, DestPtr, Bytes(Size.getZExtValue()).toBits())) + assert(Size % DestElemSize == 0); + if (!DoMemcpy(S, OpPC, SrcPtr, DestPtr, Bytes(Size).toBits())) return false; S.Stk.push<Pointer>(DestPtr); @@ -1928,7 +1917,7 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call, unsigned ID) { assert(Call->getNumArgs() == 3); - const APSInt &Size = popToAPSInt(S, Call->getArg(2)); + uint64_t Size = popToUInt64(S, Call->getArg(2)); const Pointer &PtrB = S.Stk.pop<Pointer>(); const Pointer &PtrA = S.Stk.pop<Pointer>(); @@ -1936,7 +1925,7 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC, ID == Builtin::BIwmemcmp) diagnoseNonConstexprBuiltin(S, OpPC, ID); - if (Size.isZero()) { + if (Size == 0) { pushInteger(S, 0, Call->getType()); return true; } @@ -1964,6 +1953,10 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC, if (PtrA.isDummy() || PtrB.isDummy()) return false; + if (!CheckRange(S, OpPC, PtrA, AK_Read) || + !CheckRange(S, OpPC, PtrB, AK_Read)) + return false; + // Now, read both pointers to a buffer and compare those. BitcastBuffer BufferA( Bits(ASTCtx.getTypeSize(ElemTypeA) * PtrA.getNumElems())); @@ -1989,14 +1982,14 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC, ElemSize = ASTCtx.getTypeSizeInChars(ASTCtx.getWCharType()).getQuantity(); // The Size given for the wide variants is in wide-char units. Convert it // to bytes. - size_t ByteSize = Size.getZExtValue() * ElemSize; + size_t ByteSize = Size * ElemSize; size_t CmpSize = std::min(MinBufferSize, ByteSize); for (size_t I = 0; I != CmpSize; I += ElemSize) { if (IsWide) { INT_TYPE_SWITCH(*S.getContext().classify(ASTCtx.getWCharType()), { - T A = *reinterpret_cast<T *>(BufferA.Data.get() + I); - T B = *reinterpret_cast<T *>(BufferB.Data.get() + I); + T A = *reinterpret_cast<T *>(BufferA.atByte(I)); + T B = *reinterpret_cast<T *>(BufferB.atByte(I)); if (A < B) { pushInteger(S, -1, Call->getType()); return true; @@ -2007,8 +2000,8 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC, } }); } else { - std::byte A = BufferA.Data[I]; - std::byte B = BufferB.Data[I]; + std::byte A = BufferA.deref<std::byte>(Bytes(I)); + std::byte B = BufferB.deref<std::byte>(Bytes(I)); if (A < B) { pushInteger(S, -1, Call->getType()); @@ -2277,7 +2270,7 @@ static bool interp__builtin_object_size(InterpState &S, CodePtr OpPC, // clear, objects are whole variables. If it is set, a closest surrounding // subobject is considered the object a pointer points to. The second bit // determines if maximum or minimum of remaining bytes is computed. - unsigned Kind = popToAPSInt(S, Call->getArg(1)).getZExtValue(); + unsigned Kind = popToUInt64(S, Call->getArg(1)); assert(Kind <= 3 && "unexpected kind"); bool UseFieldDesc = (Kind & 1u); bool ReportMinimum = (Kind & 2u); @@ -2405,18 +2398,39 @@ static bool interp__builtin_elementwise_int_unaryop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<APInt(const APSInt &)> Fn) { assert(Call->getNumArgs() == 1); - assert(Call->getType()->isIntegerType()); // Single integer case. if (!Call->getArg(0)->getType()->isVectorType()) { + assert(Call->getType()->isIntegerType()); APSInt Src = popToAPSInt(S, Call->getArg(0)); APInt Result = Fn(Src); pushInteger(S, APSInt(std::move(Result), !Src.isSigned()), Call->getType()); return true; } - // TODO: Add vector integer handling. - return false; + // Vector case. + const Pointer &Arg = S.Stk.pop<Pointer>(); + assert(Arg.getFieldDesc()->isPrimitiveArray()); + const Pointer &Dst = S.Stk.peek<Pointer>(); + assert(Dst.getFieldDesc()->isPrimitiveArray()); + assert(Arg.getFieldDesc()->getNumElems() == + Dst.getFieldDesc()->getNumElems()); + + QualType ElemType = Arg.getFieldDesc()->getElemQualType(); + PrimType ElemT = *S.getContext().classify(ElemType); + unsigned NumElems = Arg.getNumElems(); + bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType(); + + for (unsigned I = 0; I != NumElems; ++I) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { + APSInt Src = Arg.elem<T>(I).toAPSInt(); + APInt Result = Fn(Src); + Dst.elem<T>(I) = static_cast<T>(APSInt(std::move(Result), DestUnsigned)); + }); + } + Dst.initializeAllElements(); + + return true; } static bool interp__builtin_elementwise_int_binop( @@ -2714,6 +2728,35 @@ static bool interp_builtin_horizontal_fp_binop( return true; } +static bool interp__builtin_ia32_addsub(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + // Addsub: alternates between subtraction and addition + // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) + const Pointer &RHS = S.Stk.pop<Pointer>(); + const Pointer &LHS = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); + llvm::RoundingMode RM = getRoundingMode(FPO); + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); + unsigned NumElems = VT->getNumElements(); + + using T = PrimConv<PT_Float>::T; + for (unsigned I = 0; I != NumElems; ++I) { + APFloat LElem = LHS.elem<T>(I).getAPFloat(); + APFloat RElem = RHS.elem<T>(I).getAPFloat(); + if (I % 2 == 0) { + // Even indices: subtract + LElem.subtract(RElem, RM); + } else { + // Odd indices: add + LElem.add(RElem, RM); + } + Dst.elem<T>(I) = static_cast<T>(LElem); + } + Dst.initializeAllElements(); + return true; +} + static bool interp__builtin_elementwise_triop_fp( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<APFloat(const APFloat &, const APFloat &, @@ -2808,105 +2851,26 @@ static bool interp__builtin_select(InterpState &S, CodePtr OpPC, return true; } -static bool interp__builtin_blend(InterpState &S, CodePtr OpPC, - const CallExpr *Call) { - APSInt Mask = popToAPSInt(S, Call->getArg(2)); - const Pointer &TrueVec = S.Stk.pop<Pointer>(); - const Pointer &FalseVec = S.Stk.pop<Pointer>(); - const Pointer &Dst = S.Stk.peek<Pointer>(); - - assert(FalseVec.getNumElems() == TrueVec.getNumElems()); - assert(FalseVec.getNumElems() == Dst.getNumElems()); - unsigned NumElems = FalseVec.getNumElems(); - PrimType ElemT = FalseVec.getFieldDesc()->getPrimType(); - PrimType DstElemT = Dst.getFieldDesc()->getPrimType(); - - for (unsigned I = 0; I != NumElems; ++I) { - bool MaskBit = Mask[I % 8]; - if (ElemT == PT_Float) { - assert(DstElemT == PT_Float); - Dst.elem<Floating>(I) = - MaskBit ? TrueVec.elem<Floating>(I) : FalseVec.elem<Floating>(I); - } else { - assert(DstElemT == ElemT); - INT_TYPE_SWITCH_NO_BOOL(DstElemT, { - Dst.elem<T>(I) = - static_cast<T>(MaskBit ? TrueVec.elem<T>(I).toAPSInt() - : FalseVec.elem<T>(I).toAPSInt()); - }); - } - } - Dst.initializeAllElements(); - - return true; -} - -static bool interp__builtin_ia32_pshufb(InterpState &S, CodePtr OpPC, - const CallExpr *Call) { - assert(Call->getNumArgs() == 2 && "masked forms handled via select*"); - const Pointer &Control = S.Stk.pop<Pointer>(); - const Pointer &Src = S.Stk.pop<Pointer>(); - const Pointer &Dst = S.Stk.peek<Pointer>(); - - unsigned NumElems = Dst.getNumElems(); - assert(NumElems == Control.getNumElems()); - assert(NumElems == Dst.getNumElems()); - - for (unsigned Idx = 0; Idx != NumElems; ++Idx) { - uint8_t Ctlb = static_cast<uint8_t>(Control.elem<int8_t>(Idx)); - - if (Ctlb & 0x80) { - Dst.elem<int8_t>(Idx) = 0; - } else { - unsigned LaneBase = (Idx / 16) * 16; - unsigned SrcOffset = Ctlb & 0x0F; - unsigned SrcIdx = LaneBase + SrcOffset; - - Dst.elem<int8_t>(Idx) = Src.elem<int8_t>(SrcIdx); - } - } - Dst.initializeAllElements(); - return true; -} +/// Scalar variant of AVX512 predicated select: +/// Result[i] = (Mask bit 0) ? LHS[i] : RHS[i], but only element 0 may change. +/// All other elements are taken from RHS. +static bool interp__builtin_select_scalar(InterpState &S, + const CallExpr *Call) { + unsigned N = + Call->getArg(1)->getType()->getAs<VectorType>()->getNumElements(); -static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC, - const CallExpr *Call, bool IsShufHW) { - assert(Call->getNumArgs() == 2 && "masked forms handled via select*"); - APSInt ControlImm = popToAPSInt(S, Call->getArg(1)); - const Pointer &Src = S.Stk.pop<Pointer>(); + const Pointer &W = S.Stk.pop<Pointer>(); + const Pointer &A = S.Stk.pop<Pointer>(); + APSInt U = popToAPSInt(S, Call->getArg(0)); const Pointer &Dst = S.Stk.peek<Pointer>(); - unsigned NumElems = Dst.getNumElems(); - PrimType ElemT = Dst.getFieldDesc()->getPrimType(); + bool TakeA0 = U.getZExtValue() & 1ULL; - unsigned ElemBits = static_cast<unsigned>(primSize(ElemT) * 8); - if (ElemBits != 16 && ElemBits != 32) - return false; + for (unsigned I = TakeA0; I != N; ++I) + Dst.elem<Floating>(I) = W.elem<Floating>(I); + if (TakeA0) + Dst.elem<Floating>(0) = A.elem<Floating>(0); - unsigned LaneElts = 128u / ElemBits; - assert(LaneElts && (NumElems % LaneElts == 0)); - - uint8_t Ctl = static_cast<uint8_t>(ControlImm.getZExtValue()); - - for (unsigned Idx = 0; Idx != NumElems; Idx++) { - unsigned LaneBase = (Idx / LaneElts) * LaneElts; - unsigned LaneIdx = Idx % LaneElts; - unsigned SrcIdx = Idx; - unsigned Sel = (Ctl >> (2 * (LaneIdx & 0x3))) & 0x3; - if (ElemBits == 32) { - SrcIdx = LaneBase + Sel; - } else { - constexpr unsigned HalfSize = 4; - bool InHigh = LaneIdx >= HalfSize; - if (!IsShufHW && !InHigh) { - SrcIdx = LaneBase + Sel; - } else if (IsShufHW && InHigh) { - SrcIdx = LaneBase + HalfSize + Sel; - } - } - - INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(Idx) = Src.elem<T>(SrcIdx); }); - } Dst.initializeAllElements(); return true; } @@ -3377,35 +3341,141 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC, return true; } -static bool interp__builtin_x86_byteshift( - InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID, - llvm::function_ref<APInt(const Pointer &, unsigned Lane, unsigned I, - unsigned Shift)> - Fn) { - assert(Call->getNumArgs() == 2); +static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + unsigned ID) { + assert(Call->getNumArgs() == 1); - APSInt ImmAPS = popToAPSInt(S, Call->getArg(1)); - uint64_t Shift = ImmAPS.getZExtValue() & 0xff; + const Pointer &Vec = S.Stk.pop<Pointer>(); + unsigned RetWidth = S.getASTContext().getIntWidth(Call->getType()); + APInt RetMask(RetWidth, 0); - const Pointer &Src = S.Stk.pop<Pointer>(); - if (!Src.getFieldDesc()->isPrimitiveArray()) - return false; + unsigned VectorLen = Vec.getNumElems(); + PrimType ElemT = Vec.getFieldDesc()->getPrimType(); + + for (unsigned ElemNum = 0; ElemNum != VectorLen; ++ElemNum) { + APSInt A; + INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem<T>(ElemNum).toAPSInt(); }); + unsigned MSB = A[A.getBitWidth() - 1]; + RetMask.setBitVal(ElemNum, MSB); + } + pushInteger(S, RetMask, Call->getType()); + return true; +} +static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + bool HasRoundingMask) { + APSInt Rounding, MaskInt; + Pointer Src, B, A; + + if (HasRoundingMask) { + assert(Call->getNumArgs() == 5); + Rounding = popToAPSInt(S, Call->getArg(4)); + MaskInt = popToAPSInt(S, Call->getArg(3)); + Src = S.Stk.pop<Pointer>(); + B = S.Stk.pop<Pointer>(); + A = S.Stk.pop<Pointer>(); + if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) || + !CheckLoad(S, OpPC, Src)) + return false; + } else { + assert(Call->getNumArgs() == 2); + B = S.Stk.pop<Pointer>(); + A = S.Stk.pop<Pointer>(); + if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B)) + return false; + } - unsigned NumElems = Src.getNumElems(); + const auto *DstVTy = Call->getType()->castAs<VectorType>(); + unsigned NumElems = DstVTy->getNumElements(); const Pointer &Dst = S.Stk.peek<Pointer>(); - PrimType ElemT = Src.getFieldDesc()->getPrimType(); - for (unsigned Lane = 0; Lane != NumElems; Lane += 16) { - for (unsigned I = 0; I != 16; ++I) { - unsigned Base = Lane + I; - APSInt Result = APSInt(Fn(Src, Lane, I, Shift)); - INT_TYPE_SWITCH_NO_BOOL(ElemT, - { Dst.elem<T>(Base) = static_cast<T>(Result); }); - } + // Copy all elements except lane 0 (overwritten below) from A to Dst. + for (unsigned I = 1; I != NumElems; ++I) + Dst.elem<Floating>(I) = A.elem<Floating>(I); + + // Convert element 0 from double to float, or use Src if masked off. + if (!HasRoundingMask || (MaskInt.getZExtValue() & 0x1)) { + assert(S.getASTContext().FloatTy == DstVTy->getElementType() && + "cvtsd2ss requires float element type in destination vector"); + + Floating Conv = S.allocFloat( + S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType())); + APFloat SrcVal = B.elem<Floating>(0).getAPFloat(); + if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call)) + return false; + Dst.elem<Floating>(0) = Conv; + } else { + Dst.elem<Floating>(0) = Src.elem<Floating>(0); } Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC, + const CallExpr *Call, bool IsMasked, + bool HasRounding) { + + APSInt MaskVal; + Pointer PassThrough; + Pointer Src; + APSInt Rounding; + + if (IsMasked) { + // Pop in reverse order. + if (HasRounding) { + Rounding = popToAPSInt(S, Call->getArg(3)); + MaskVal = popToAPSInt(S, Call->getArg(2)); + PassThrough = S.Stk.pop<Pointer>(); + Src = S.Stk.pop<Pointer>(); + } else { + MaskVal = popToAPSInt(S, Call->getArg(2)); + PassThrough = S.Stk.pop<Pointer>(); + Src = S.Stk.pop<Pointer>(); + } + + if (!CheckLoad(S, OpPC, PassThrough)) + return false; + } else { + // Pop source only. + Src = S.Stk.pop<Pointer>(); + } + + if (!CheckLoad(S, OpPC, Src)) + return false; + + const auto *RetVTy = Call->getType()->castAs<VectorType>(); + unsigned RetElems = RetVTy->getNumElements(); + unsigned SrcElems = Src.getNumElems(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + + // Initialize destination with passthrough or zeros. + for (unsigned I = 0; I != RetElems; ++I) + if (IsMasked) + Dst.elem<Floating>(I) = PassThrough.elem<Floating>(I); + else + Dst.elem<Floating>(I) = Floating(APFloat(0.0f)); + + assert(S.getASTContext().FloatTy == RetVTy->getElementType() && + "cvtpd2ps requires float element type in return vector"); + + // Convert double to float for enabled elements (only process source elements + // that exist). + for (unsigned I = 0; I != SrcElems; ++I) { + if (IsMasked && !MaskVal[I]) + continue; + + APFloat SrcVal = Src.elem<Floating>(I).getAPFloat(); + + Floating Conv = S.allocFloat( + S.getASTContext().getFloatTypeSemantics(RetVTy->getElementType())); + if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call)) + return false; + Dst.elem<Floating>(I) = Conv; + } + Dst.initializeAllElements(); return true; } @@ -3414,19 +3484,65 @@ static bool interp__builtin_ia32_shuffle_generic( llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)> GetSourceIndex) { - assert(Call->getNumArgs() == 3); - unsigned ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue(); + assert(Call->getNumArgs() == 2 || Call->getNumArgs() == 3); + + unsigned ShuffleMask = 0; + Pointer A, MaskVector, B; + bool IsVectorMask = false; + bool IsSingleOperand = (Call->getNumArgs() == 2); + + if (IsSingleOperand) { + QualType MaskType = Call->getArg(1)->getType(); + if (MaskType->isVectorType()) { + IsVectorMask = true; + MaskVector = S.Stk.pop<Pointer>(); + A = S.Stk.pop<Pointer>(); + B = A; + } else if (MaskType->isIntegerType()) { + ShuffleMask = popToAPSInt(S, Call->getArg(1)).getZExtValue(); + A = S.Stk.pop<Pointer>(); + B = A; + } else { + return false; + } + } else { + QualType Arg2Type = Call->getArg(2)->getType(); + if (Arg2Type->isVectorType()) { + IsVectorMask = true; + B = S.Stk.pop<Pointer>(); + MaskVector = S.Stk.pop<Pointer>(); + A = S.Stk.pop<Pointer>(); + } else if (Arg2Type->isIntegerType()) { + ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue(); + B = S.Stk.pop<Pointer>(); + A = S.Stk.pop<Pointer>(); + } else { + return false; + } + } QualType Arg0Type = Call->getArg(0)->getType(); const auto *VecT = Arg0Type->castAs<VectorType>(); PrimType ElemT = *S.getContext().classify(VecT->getElementType()); unsigned NumElems = VecT->getNumElements(); - const Pointer &B = S.Stk.pop<Pointer>(); - const Pointer &A = S.Stk.pop<Pointer>(); const Pointer &Dst = S.Stk.peek<Pointer>(); + PrimType MaskElemT = PT_Uint32; + if (IsVectorMask) { + QualType Arg1Type = Call->getArg(1)->getType(); + const auto *MaskVecT = Arg1Type->castAs<VectorType>(); + QualType MaskElemType = MaskVecT->getElementType(); + MaskElemT = *S.getContext().classify(MaskElemType); + } + for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) { + if (IsVectorMask) { + INT_TYPE_SWITCH(MaskElemT, { + ShuffleMask = static_cast<unsigned>(MaskVector.elem<T>(DstIdx)); + }); + } + auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); if (SrcIdx < 0) { @@ -3447,6 +3563,367 @@ static bool interp__builtin_ia32_shuffle_generic( return true; } +static bool interp__builtin_ia32_shift_with_count( + InterpState &S, CodePtr OpPC, const CallExpr *Call, + llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp, + llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) { + + assert(Call->getNumArgs() == 2); + + const Pointer &Count = S.Stk.pop<Pointer>(); + const Pointer &Source = S.Stk.pop<Pointer>(); + + QualType SourceType = Call->getArg(0)->getType(); + QualType CountType = Call->getArg(1)->getType(); + assert(SourceType->isVectorType() && CountType->isVectorType()); + + const auto *SourceVecT = SourceType->castAs<VectorType>(); + const auto *CountVecT = CountType->castAs<VectorType>(); + PrimType SourceElemT = *S.getContext().classify(SourceVecT->getElementType()); + PrimType CountElemT = *S.getContext().classify(CountVecT->getElementType()); + + const Pointer &Dst = S.Stk.peek<Pointer>(); + + unsigned DestEltWidth = + S.getASTContext().getTypeSize(SourceVecT->getElementType()); + bool IsDestUnsigned = SourceVecT->getElementType()->isUnsignedIntegerType(); + unsigned DestLen = SourceVecT->getNumElements(); + unsigned CountEltWidth = + S.getASTContext().getTypeSize(CountVecT->getElementType()); + unsigned NumBitsInQWord = 64; + unsigned NumCountElts = NumBitsInQWord / CountEltWidth; + + uint64_t CountLQWord = 0; + for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) { + uint64_t Elt = 0; + INT_TYPE_SWITCH(CountElemT, + { Elt = static_cast<uint64_t>(Count.elem<T>(EltIdx)); }); + CountLQWord |= (Elt << (EltIdx * CountEltWidth)); + } + + for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) { + APSInt Elt; + INT_TYPE_SWITCH(SourceElemT, { Elt = Source.elem<T>(EltIdx).toAPSInt(); }); + + APInt Result; + if (CountLQWord < DestEltWidth) { + Result = ShiftOp(Elt, CountLQWord); + } else { + Result = OverflowOp(Elt, DestEltWidth); + } + if (IsDestUnsigned) { + INT_TYPE_SWITCH(SourceElemT, { + Dst.elem<T>(EltIdx) = T::from(Result.getZExtValue()); + }); + } else { + INT_TYPE_SWITCH(SourceElemT, { + Dst.elem<T>(EltIdx) = T::from(Result.getSExtValue()); + }); + } + } + + Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + + assert(Call->getNumArgs() == 3); + + QualType SourceType = Call->getArg(0)->getType(); + QualType ShuffleMaskType = Call->getArg(1)->getType(); + QualType ZeroMaskType = Call->getArg(2)->getType(); + if (!SourceType->isVectorType() || !ShuffleMaskType->isVectorType() || + !ZeroMaskType->isIntegerType()) { + return false; + } + + Pointer Source, ShuffleMask; + APSInt ZeroMask = popToAPSInt(S, Call->getArg(2)); + ShuffleMask = S.Stk.pop<Pointer>(); + Source = S.Stk.pop<Pointer>(); + + const auto *SourceVecT = SourceType->castAs<VectorType>(); + const auto *ShuffleMaskVecT = ShuffleMaskType->castAs<VectorType>(); + assert(SourceVecT->getNumElements() == ShuffleMaskVecT->getNumElements()); + assert(ZeroMask.getBitWidth() == SourceVecT->getNumElements()); + + PrimType SourceElemT = *S.getContext().classify(SourceVecT->getElementType()); + PrimType ShuffleMaskElemT = + *S.getContext().classify(ShuffleMaskVecT->getElementType()); + + unsigned NumBytesInQWord = 8; + unsigned NumBitsInByte = 8; + unsigned NumBytes = SourceVecT->getNumElements(); + unsigned NumQWords = NumBytes / NumBytesInQWord; + unsigned RetWidth = ZeroMask.getBitWidth(); + APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true); + + for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) { + APInt SourceQWord(64, 0); + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + uint64_t Byte = 0; + INT_TYPE_SWITCH(SourceElemT, { + Byte = static_cast<uint64_t>( + Source.elem<T>(QWordId * NumBytesInQWord + ByteIdx)); + }); + SourceQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); + } + + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx; + unsigned M = 0; + INT_TYPE_SWITCH(ShuffleMaskElemT, { + M = static_cast<unsigned>(ShuffleMask.elem<T>(SelIdx)) & 0x3F; + }); + + if (ZeroMask[SelIdx]) { + RetMask.setBitVal(SelIdx, SourceQWord[M]); + } + } + } + + pushInteger(S, RetMask, Call->getType()); + return true; +} + +static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + // Arguments are: vector of floats, rounding immediate + assert(Call->getNumArgs() == 2); + + APSInt Imm = popToAPSInt(S, Call->getArg(1)); + const Pointer &Src = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + + assert(Src.getFieldDesc()->isPrimitiveArray()); + assert(Dst.getFieldDesc()->isPrimitiveArray()); + + const auto *SrcVTy = Call->getArg(0)->getType()->castAs<VectorType>(); + unsigned SrcNumElems = SrcVTy->getNumElements(); + const auto *DstVTy = Call->getType()->castAs<VectorType>(); + unsigned DstNumElems = DstVTy->getNumElements(); + + const llvm::fltSemantics &HalfSem = + S.getASTContext().getFloatTypeSemantics(S.getASTContext().HalfTy); + + // imm[2] == 1 means use MXCSR rounding mode. + // In that case, we can only evaluate if the conversion is exact. + int ImmVal = Imm.getZExtValue(); + bool UseMXCSR = (ImmVal & 4) != 0; + bool IsFPConstrained = + Call->getFPFeaturesInEffect(S.getASTContext().getLangOpts()) + .isFPConstrained(); + + llvm::RoundingMode RM; + if (!UseMXCSR) { + switch (ImmVal & 3) { + case 0: + RM = llvm::RoundingMode::NearestTiesToEven; + break; + case 1: + RM = llvm::RoundingMode::TowardNegative; + break; + case 2: + RM = llvm::RoundingMode::TowardPositive; + break; + case 3: + RM = llvm::RoundingMode::TowardZero; + break; + default: + llvm_unreachable("Invalid immediate rounding mode"); + } + } else { + // For MXCSR, we must check for exactness. We can use any rounding mode + // for the trial conversion since the result is the same if it's exact. + RM = llvm::RoundingMode::NearestTiesToEven; + } + + QualType DstElemQT = Dst.getFieldDesc()->getElemQualType(); + PrimType DstElemT = *S.getContext().classify(DstElemQT); + + for (unsigned I = 0; I != SrcNumElems; ++I) { + Floating SrcVal = Src.elem<Floating>(I); + APFloat DstVal = SrcVal.getAPFloat(); + + bool LostInfo; + APFloat::opStatus St = DstVal.convert(HalfSem, RM, &LostInfo); + + if (UseMXCSR && IsFPConstrained && St != APFloat::opOK) { + S.FFDiag(S.Current->getSource(OpPC), + diag::note_constexpr_dynamic_rounding); + return false; + } + + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { + // Convert the destination value's bit pattern to an unsigned integer, + // then reconstruct the element using the target type's 'from' method. + uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue(); + Dst.elem<T>(I) = T::from(RawBits); + }); + } + + // Zero out remaining elements if the destination has more elements + // (e.g., vcvtps2ph converting 4 floats to 8 shorts). + if (DstNumElems > SrcNumElems) { + for (unsigned I = SrcNumElems; I != DstNumElems; ++I) { + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); }); + } + } + + Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 2); + + QualType ATy = Call->getArg(0)->getType(); + QualType BTy = Call->getArg(1)->getType(); + if (!ATy->isVectorType() || !BTy->isVectorType()) { + return false; + } + + const Pointer &BPtr = S.Stk.pop<Pointer>(); + const Pointer &APtr = S.Stk.pop<Pointer>(); + const auto *AVecT = ATy->castAs<VectorType>(); + assert(AVecT->getNumElements() == + BTy->castAs<VectorType>()->getNumElements()); + + PrimType ElemT = *S.getContext().classify(AVecT->getElementType()); + + unsigned NumBytesInQWord = 8; + unsigned NumBitsInByte = 8; + unsigned NumBytes = AVecT->getNumElements(); + unsigned NumQWords = NumBytes / NumBytesInQWord; + const Pointer &Dst = S.Stk.peek<Pointer>(); + + for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) { + APInt BQWord(64, 0); + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordId * NumBytesInQWord + ByteIdx; + INT_TYPE_SWITCH(ElemT, { + uint64_t Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx)); + BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); + }); + } + + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordId * NumBytesInQWord + ByteIdx; + uint64_t Ctrl = 0; + INT_TYPE_SWITCH( + ElemT, { Ctrl = static_cast<uint64_t>(APtr.elem<T>(Idx)) & 0x3F; }); + + APInt Byte(8, 0); + for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) { + Byte.setBitVal(BitIdx, BQWord[(Ctrl + BitIdx) & 0x3F]); + } + INT_TYPE_SWITCH(ElemT, + { Dst.elem<T>(Idx) = T::from(Byte.getZExtValue()); }); + } + } + + Dst.initializeAllElements(); + + return true; +} + +static bool interp_builtin_ia32_gfni_affine(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + bool Inverse) { + assert(Call->getNumArgs() == 3); + QualType XType = Call->getArg(0)->getType(); + QualType AType = Call->getArg(1)->getType(); + QualType ImmType = Call->getArg(2)->getType(); + if (!XType->isVectorType() || !AType->isVectorType() || + !ImmType->isIntegerType()) { + return false; + } + + Pointer X, A; + APSInt Imm = popToAPSInt(S, Call->getArg(2)); + A = S.Stk.pop<Pointer>(); + X = S.Stk.pop<Pointer>(); + + const Pointer &Dst = S.Stk.peek<Pointer>(); + const auto *AVecT = AType->castAs<VectorType>(); + assert(XType->castAs<VectorType>()->getNumElements() == + AVecT->getNumElements()); + unsigned NumBytesInQWord = 8; + unsigned NumBytes = AVecT->getNumElements(); + unsigned NumBitsInQWord = 64; + unsigned NumQWords = NumBytes / NumBytesInQWord; + unsigned NumBitsInByte = 8; + PrimType AElemT = *S.getContext().classify(AVecT->getElementType()); + + // computing A*X + Imm + for (unsigned QWordIdx = 0; QWordIdx != NumQWords; ++QWordIdx) { + // Extract the QWords from X, A + APInt XQWord(NumBitsInQWord, 0); + APInt AQWord(NumBitsInQWord, 0); + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx; + uint8_t XByte; + uint8_t AByte; + INT_TYPE_SWITCH(AElemT, { + XByte = static_cast<uint8_t>(X.elem<T>(Idx)); + AByte = static_cast<uint8_t>(A.elem<T>(Idx)); + }); + + XQWord.insertBits(APInt(NumBitsInByte, XByte), ByteIdx * NumBitsInByte); + AQWord.insertBits(APInt(NumBitsInByte, AByte), ByteIdx * NumBitsInByte); + } + + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx; + uint8_t XByte = + XQWord.lshr(ByteIdx * NumBitsInByte).getLoBits(8).getZExtValue(); + INT_TYPE_SWITCH(AElemT, { + Dst.elem<T>(Idx) = T::from(GFNIAffine(XByte, AQWord, Imm, Inverse)); + }); + } + } + Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 2); + + QualType AType = Call->getArg(0)->getType(); + QualType BType = Call->getArg(1)->getType(); + if (!AType->isVectorType() || !BType->isVectorType()) { + return false; + } + + Pointer A, B; + B = S.Stk.pop<Pointer>(); + A = S.Stk.pop<Pointer>(); + + const Pointer &Dst = S.Stk.peek<Pointer>(); + const auto *AVecT = AType->castAs<VectorType>(); + assert(AVecT->getNumElements() == + BType->castAs<VectorType>()->getNumElements()); + + PrimType AElemT = *S.getContext().classify(AVecT->getElementType()); + unsigned NumBytes = A.getNumElems(); + + for (unsigned ByteIdx = 0; ByteIdx != NumBytes; ++ByteIdx) { + uint8_t AByte, BByte; + INT_TYPE_SWITCH(AElemT, { + AByte = static_cast<uint8_t>(A.elem<T>(ByteIdx)); + BByte = static_cast<uint8_t>(B.elem<T>(ByteIdx)); + Dst.elem<T>(ByteIdx) = T::from(GFNIMul(AByte, BByte)); + }); + } + + Dst.initializeAllElements(); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -3743,7 +4220,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case Builtin::BI__builtin_elementwise_ctzg: return interp__builtin_elementwise_countzeroes(S, OpPC, Frame, Call, BuiltinID); - + case Builtin::BI__builtin_bswapg: case Builtin::BI__builtin_bswap16: case Builtin::BI__builtin_bswap32: case Builtin::BI__builtin_bswap64: @@ -3803,6 +4280,66 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return Result; }); + case clang::X86::BI__builtin_ia32_ktestcqi: + case clang::X86::BI__builtin_ia32_ktestchi: + case clang::X86::BI__builtin_ia32_ktestcsi: + case clang::X86::BI__builtin_ia32_ktestcdi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B) { + return APInt(sizeof(unsigned char) * 8, (~A & B) == 0); + }); + + case clang::X86::BI__builtin_ia32_ktestzqi: + case clang::X86::BI__builtin_ia32_ktestzhi: + case clang::X86::BI__builtin_ia32_ktestzsi: + case clang::X86::BI__builtin_ia32_ktestzdi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B) { + return APInt(sizeof(unsigned char) * 8, (A & B) == 0); + }); + + case clang::X86::BI__builtin_ia32_kortestcqi: + case clang::X86::BI__builtin_ia32_kortestchi: + case clang::X86::BI__builtin_ia32_kortestcsi: + case clang::X86::BI__builtin_ia32_kortestcdi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B) { + return APInt(sizeof(unsigned char) * 8, ~(A | B) == 0); + }); + + case clang::X86::BI__builtin_ia32_kortestzqi: + case clang::X86::BI__builtin_ia32_kortestzhi: + case clang::X86::BI__builtin_ia32_kortestzsi: + case clang::X86::BI__builtin_ia32_kortestzdi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B) { + return APInt(sizeof(unsigned char) * 8, (A | B) == 0); + }); + + case clang::X86::BI__builtin_ia32_kshiftliqi: + case clang::X86::BI__builtin_ia32_kshiftlihi: + case clang::X86::BI__builtin_ia32_kshiftlisi: + case clang::X86::BI__builtin_ia32_kshiftlidi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { + unsigned Amt = RHS.getZExtValue() & 0xFF; + if (Amt >= LHS.getBitWidth()) + return APInt::getZero(LHS.getBitWidth()); + return LHS.shl(Amt); + }); + + case clang::X86::BI__builtin_ia32_kshiftriqi: + case clang::X86::BI__builtin_ia32_kshiftrihi: + case clang::X86::BI__builtin_ia32_kshiftrisi: + case clang::X86::BI__builtin_ia32_kshiftridi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { + unsigned Amt = RHS.getZExtValue() & 0xFF; + if (Amt >= LHS.getBitWidth()) + return APInt::getZero(LHS.getBitWidth()); + return LHS.lshr(Amt); + }); + case clang::X86::BI__builtin_ia32_lzcnt_u16: case clang::X86::BI__builtin_ia32_lzcnt_u32: case clang::X86::BI__builtin_ia32_lzcnt_u64: @@ -3888,9 +4425,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return interp__builtin_vector_reduce(S, OpPC, Call, BuiltinID); case Builtin::BI__builtin_elementwise_popcount: + return interp__builtin_elementwise_int_unaryop( + S, OpPC, Call, [](const APSInt &Src) { + return APInt(Src.getBitWidth(), Src.popcount()); + }); case Builtin::BI__builtin_elementwise_bitreverse: - return interp__builtin_elementwise_popcount(S, OpPC, Frame, Call, - BuiltinID); + return interp__builtin_elementwise_int_unaryop( + S, OpPC, Call, [](const APSInt &Src) { return Src.reverseBits(); }); case Builtin::BI__builtin_elementwise_abs: return interp__builtin_elementwise_abs(S, OpPC, Frame, Call, BuiltinID); @@ -4141,6 +4682,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return APInt::getAllOnes(DstBits); }); + case clang::X86::BI__builtin_ia32_selectss_128: + case clang::X86::BI__builtin_ia32_selectsd_128: + case clang::X86::BI__builtin_ia32_selectsh_128: + case clang::X86::BI__builtin_ia32_selectsbf_128: + return interp__builtin_select_scalar(S, Call); case clang::X86::BI__builtin_ia32_vprotbi: case clang::X86::BI__builtin_ia32_vprotdi: case clang::X86::BI__builtin_ia32_vprotqi: @@ -4215,6 +4761,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, F.subtract(RHS, RM); return F; }); + case clang::X86::BI__builtin_ia32_addsubpd: + case clang::X86::BI__builtin_ia32_addsubps: + case clang::X86::BI__builtin_ia32_addsubpd256: + case clang::X86::BI__builtin_ia32_addsubps256: + return interp__builtin_ia32_addsub(S, OpPC, Call); case clang::X86::BI__builtin_ia32_pmuldq128: case clang::X86::BI__builtin_ia32_pmuldq256: @@ -4306,7 +4857,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_pblendw256: case clang::X86::BI__builtin_ia32_pblendd128: case clang::X86::BI__builtin_ia32_pblendd256: - return interp__builtin_blend(S, OpPC, Call); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + // Bit index for mask. + unsigned MaskBit = (ShuffleMask >> (DstIdx % 8)) & 0x1; + unsigned SrcVecIdx = MaskBit ? 1 : 0; // 1 = TrueVec, 0 = FalseVec + return std::pair<unsigned, int>{SrcVecIdx, static_cast<int>(DstIdx)}; + }); + + case clang::X86::BI__builtin_ia32_blendvpd: case clang::X86::BI__builtin_ia32_blendvpd256: @@ -4415,6 +4974,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return std::pair<unsigned, int>{SrcIdx, static_cast<int>(LaneOffset + Index)}; }); + + case X86::BI__builtin_ia32_vgf2p8affineinvqb_v16qi: + case X86::BI__builtin_ia32_vgf2p8affineinvqb_v32qi: + case X86::BI__builtin_ia32_vgf2p8affineinvqb_v64qi: + return interp_builtin_ia32_gfni_affine(S, OpPC, Call, true); + case X86::BI__builtin_ia32_vgf2p8affineqb_v16qi: + case X86::BI__builtin_ia32_vgf2p8affineqb_v32qi: + case X86::BI__builtin_ia32_vgf2p8affineqb_v64qi: + return interp_builtin_ia32_gfni_affine(S, OpPC, Call, false); + + case X86::BI__builtin_ia32_vgf2p8mulb_v16qi: + case X86::BI__builtin_ia32_vgf2p8mulb_v32qi: + case X86::BI__builtin_ia32_vgf2p8mulb_v64qi: + return interp__builtin_ia32_gfni_mul(S, OpPC, Call); + case X86::BI__builtin_ia32_insertps128: return interp__builtin_ia32_shuffle_generic( S, OpPC, Call, [](unsigned DstIdx, unsigned Mask) { @@ -4434,26 +5008,212 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)}; } }); + case X86::BI__builtin_ia32_permvarsi256: + case X86::BI__builtin_ia32_permvarsf256: + case X86::BI__builtin_ia32_permvardf512: + case X86::BI__builtin_ia32_permvardi512: + case X86::BI__builtin_ia32_permvarhi128: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x7; + return std::pair<unsigned, int>{0, Offset}; + }); + case X86::BI__builtin_ia32_permvarqi128: + case X86::BI__builtin_ia32_permvarhi256: + case X86::BI__builtin_ia32_permvarsi512: + case X86::BI__builtin_ia32_permvarsf512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0xF; + return std::pair<unsigned, int>{0, Offset}; + }); + case X86::BI__builtin_ia32_permvardi256: + case X86::BI__builtin_ia32_permvardf256: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3; + return std::pair<unsigned, int>{0, Offset}; + }); + case X86::BI__builtin_ia32_permvarqi256: + case X86::BI__builtin_ia32_permvarhi512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x1F; + return std::pair<unsigned, int>{0, Offset}; + }); + case X86::BI__builtin_ia32_permvarqi512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3F; + return std::pair<unsigned, int>{0, Offset}; + }); + case X86::BI__builtin_ia32_vpermi2varq128: + case X86::BI__builtin_ia32_vpermi2varpd128: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x1; + unsigned SrcIdx = (ShuffleMask >> 1) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_vpermi2vard128: + case X86::BI__builtin_ia32_vpermi2varps128: + case X86::BI__builtin_ia32_vpermi2varq256: + case X86::BI__builtin_ia32_vpermi2varpd256: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3; + unsigned SrcIdx = (ShuffleMask >> 2) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_vpermi2varhi128: + case X86::BI__builtin_ia32_vpermi2vard256: + case X86::BI__builtin_ia32_vpermi2varps256: + case X86::BI__builtin_ia32_vpermi2varq512: + case X86::BI__builtin_ia32_vpermi2varpd512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x7; + unsigned SrcIdx = (ShuffleMask >> 3) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_vpermi2varqi128: + case X86::BI__builtin_ia32_vpermi2varhi256: + case X86::BI__builtin_ia32_vpermi2vard512: + case X86::BI__builtin_ia32_vpermi2varps512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0xF; + unsigned SrcIdx = (ShuffleMask >> 4) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_vpermi2varqi256: + case X86::BI__builtin_ia32_vpermi2varhi512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x1F; + unsigned SrcIdx = (ShuffleMask >> 5) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_vpermi2varqi512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3F; + unsigned SrcIdx = (ShuffleMask >> 6) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); case X86::BI__builtin_ia32_pshufb128: case X86::BI__builtin_ia32_pshufb256: case X86::BI__builtin_ia32_pshufb512: - return interp__builtin_ia32_pshufb(S, OpPC, Call); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + uint8_t Ctlb = static_cast<uint8_t>(ShuffleMask); + if (Ctlb & 0x80) + return std::make_pair(0, -1); + + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned SrcOffset = Ctlb & 0x0F; + unsigned SrcIdx = LaneBase + SrcOffset; + return std::make_pair(0, static_cast<int>(SrcIdx)); + }); case X86::BI__builtin_ia32_pshuflw: case X86::BI__builtin_ia32_pshuflw256: case X86::BI__builtin_ia32_pshuflw512: - return interp__builtin_ia32_pshuf(S, OpPC, Call, false); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned LaneBase = (DstIdx / 8) * 8; + unsigned LaneIdx = DstIdx % 8; + if (LaneIdx < 4) { + unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3; + return std::make_pair(0, static_cast<int>(LaneBase + Sel)); + } + + return std::make_pair(0, static_cast<int>(DstIdx)); + }); case X86::BI__builtin_ia32_pshufhw: case X86::BI__builtin_ia32_pshufhw256: case X86::BI__builtin_ia32_pshufhw512: - return interp__builtin_ia32_pshuf(S, OpPC, Call, true); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned LaneBase = (DstIdx / 8) * 8; + unsigned LaneIdx = DstIdx % 8; + if (LaneIdx >= 4) { + unsigned Sel = (ShuffleMask >> (2 * (LaneIdx - 4))) & 0x3; + return std::make_pair(0, static_cast<int>(LaneBase + 4 + Sel)); + } + + return std::make_pair(0, static_cast<int>(DstIdx)); + }); case X86::BI__builtin_ia32_pshufd: case X86::BI__builtin_ia32_pshufd256: case X86::BI__builtin_ia32_pshufd512: - return interp__builtin_ia32_pshuf(S, OpPC, Call, false); + case X86::BI__builtin_ia32_vpermilps: + case X86::BI__builtin_ia32_vpermilps256: + case X86::BI__builtin_ia32_vpermilps512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned LaneBase = (DstIdx / 4) * 4; + unsigned LaneIdx = DstIdx % 4; + unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3; + return std::make_pair(0, static_cast<int>(LaneBase + Sel)); + }); + + case X86::BI__builtin_ia32_vpermilvarpd: + case X86::BI__builtin_ia32_vpermilvarpd256: + case X86::BI__builtin_ia32_vpermilvarpd512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned NumElemPerLane = 2; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = ShuffleMask & 0b10 ? 1 : 0; + return std::make_pair( + 0, static_cast<int>(Lane * NumElemPerLane + Offset)); + }); + + case X86::BI__builtin_ia32_vpermilvarps: + case X86::BI__builtin_ia32_vpermilvarps256: + case X86::BI__builtin_ia32_vpermilvarps512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned NumElemPerLane = 4; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = ShuffleMask & 0b11; + return std::make_pair( + 0, static_cast<int>(Lane * NumElemPerLane + Offset)); + }); + case X86::BI__builtin_ia32_vpermilpd: + case X86::BI__builtin_ia32_vpermilpd256: + case X86::BI__builtin_ia32_vpermilpd512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Control) { + unsigned NumElemPerLane = 2; + unsigned BitsPerElem = 1; + unsigned MaskBits = 8; + unsigned IndexMask = 0x1; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned LaneOffset = Lane * NumElemPerLane; + unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; + unsigned Index = (Control >> BitIndex) & IndexMask; + return std::make_pair(0, static_cast<int>(LaneOffset + Index)); + }); + + case X86::BI__builtin_ia32_permdf256: + case X86::BI__builtin_ia32_permdi256: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Control) { + // permute4x64 operates on 4 64-bit elements + // For element i (0-3), extract bits [2*i+1:2*i] from Control + unsigned Index = (Control >> (2 * DstIdx)) & 0x3; + return std::make_pair(0, static_cast<int>(Index)); + }); + + case X86::BI__builtin_ia32_vpmultishiftqb128: + case X86::BI__builtin_ia32_vpmultishiftqb256: + case X86::BI__builtin_ia32_vpmultishiftqb512: + return interp__builtin_ia32_multishiftqb(S, OpPC, Call); case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: case X86::BI__builtin_ia32_kandsi: @@ -4509,9 +5269,70 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); + case X86::BI__builtin_ia32_kmovb: + case X86::BI__builtin_ia32_kmovw: + case X86::BI__builtin_ia32_kmovd: + case X86::BI__builtin_ia32_kmovq: + return interp__builtin_elementwise_int_unaryop( + S, OpPC, Call, [](const APSInt &Src) { return Src; }); + + case X86::BI__builtin_ia32_kunpckhi: + case X86::BI__builtin_ia32_kunpckdi: + case X86::BI__builtin_ia32_kunpcksi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B) { + // Generic kunpack: extract lower half of each operand and concatenate + // Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0] + unsigned BW = A.getBitWidth(); + return APSInt(A.trunc(BW / 2).concat(B.trunc(BW / 2)), + A.isUnsigned()); + }); + case X86::BI__builtin_ia32_phminposuw128: return interp__builtin_ia32_phminposuw(S, OpPC, Call); + case X86::BI__builtin_ia32_psraq128: + case X86::BI__builtin_ia32_psraq256: + case X86::BI__builtin_ia32_psraq512: + case X86::BI__builtin_ia32_psrad128: + case X86::BI__builtin_ia32_psrad256: + case X86::BI__builtin_ia32_psrad512: + case X86::BI__builtin_ia32_psraw128: + case X86::BI__builtin_ia32_psraw256: + case X86::BI__builtin_ia32_psraw512: + return interp__builtin_ia32_shift_with_count( + S, OpPC, Call, + [](const APInt &Elt, uint64_t Count) { return Elt.ashr(Count); }, + [](const APInt &Elt, unsigned Width) { return Elt.ashr(Width - 1); }); + + case X86::BI__builtin_ia32_psllq128: + case X86::BI__builtin_ia32_psllq256: + case X86::BI__builtin_ia32_psllq512: + case X86::BI__builtin_ia32_pslld128: + case X86::BI__builtin_ia32_pslld256: + case X86::BI__builtin_ia32_pslld512: + case X86::BI__builtin_ia32_psllw128: + case X86::BI__builtin_ia32_psllw256: + case X86::BI__builtin_ia32_psllw512: + return interp__builtin_ia32_shift_with_count( + S, OpPC, Call, + [](const APInt &Elt, uint64_t Count) { return Elt.shl(Count); }, + [](const APInt &Elt, unsigned Width) { return APInt::getZero(Width); }); + + case X86::BI__builtin_ia32_psrlq128: + case X86::BI__builtin_ia32_psrlq256: + case X86::BI__builtin_ia32_psrlq512: + case X86::BI__builtin_ia32_psrld128: + case X86::BI__builtin_ia32_psrld256: + case X86::BI__builtin_ia32_psrld512: + case X86::BI__builtin_ia32_psrlw128: + case X86::BI__builtin_ia32_psrlw256: + case X86::BI__builtin_ia32_psrlw512: + return interp__builtin_ia32_shift_with_count( + S, OpPC, Call, + [](const APInt &Elt, uint64_t Count) { return Elt.lshr(Count); }, + [](const APInt &Elt, unsigned Width) { return APInt::getZero(Width); }); + case X86::BI__builtin_ia32_pternlogd128_mask: case X86::BI__builtin_ia32_pternlogd256_mask: case X86::BI__builtin_ia32_pternlogd512_mask: @@ -4533,6 +5354,39 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshr); + case X86::BI__builtin_ia32_shuf_f32x4_256: + case X86::BI__builtin_ia32_shuf_i32x4_256: + case X86::BI__builtin_ia32_shuf_f64x2_256: + case X86::BI__builtin_ia32_shuf_i64x2_256: + case X86::BI__builtin_ia32_shuf_f32x4: + case X86::BI__builtin_ia32_shuf_i32x4: + case X86::BI__builtin_ia32_shuf_f64x2: + case X86::BI__builtin_ia32_shuf_i64x2: { + // Destination and sources A, B all have the same type. + QualType VecQT = Call->getArg(0)->getType(); + const auto *VecT = VecQT->castAs<VectorType>(); + unsigned NumElems = VecT->getNumElements(); + unsigned ElemBits = S.getASTContext().getTypeSize(VecT->getElementType()); + unsigned LaneBits = 128u; + unsigned NumLanes = (NumElems * ElemBits) / LaneBits; + unsigned NumElemsPerLane = LaneBits / ElemBits; + + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, + [NumLanes, NumElemsPerLane](unsigned DstIdx, unsigned ShuffleMask) { + // DstIdx determines source. ShuffleMask selects lane in source. + unsigned BitsPerElem = NumLanes / 2; + unsigned IndexMask = (1u << BitsPerElem) - 1; + unsigned Lane = DstIdx / NumElemsPerLane; + unsigned SrcIdx = (Lane < NumLanes / 2) ? 0 : 1; + unsigned BitIdx = BitsPerElem * Lane; + unsigned SrcLaneIdx = (ShuffleMask >> BitIdx) & IndexMask; + unsigned ElemInLane = DstIdx % NumElemsPerLane; + unsigned IdxToPick = SrcLaneIdx * NumElemsPerLane + ElemInLane; + return std::pair<unsigned, int>{SrcIdx, IdxToPick}; + }); + } + case X86::BI__builtin_ia32_insertf32x4_256: case X86::BI__builtin_ia32_inserti32x4_256: case X86::BI__builtin_ia32_insertf64x2_256: @@ -4551,6 +5405,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_insert128i256: return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID); + case clang::X86::BI__builtin_ia32_vcvtps2ph: + case clang::X86::BI__builtin_ia32_vcvtps2ph256: + return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call); + case X86::BI__builtin_ia32_vec_ext_v4hi: case X86::BI__builtin_ia32_vec_ext_v16qi: case X86::BI__builtin_ia32_vec_ext_v8hi: @@ -4574,6 +5432,34 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_vec_set_v4di: return interp__builtin_vec_set(S, OpPC, Call, BuiltinID); + case X86::BI__builtin_ia32_cvtb2mask128: + case X86::BI__builtin_ia32_cvtb2mask256: + case X86::BI__builtin_ia32_cvtb2mask512: + case X86::BI__builtin_ia32_cvtw2mask128: + case X86::BI__builtin_ia32_cvtw2mask256: + case X86::BI__builtin_ia32_cvtw2mask512: + case X86::BI__builtin_ia32_cvtd2mask128: + case X86::BI__builtin_ia32_cvtd2mask256: + case X86::BI__builtin_ia32_cvtd2mask512: + case X86::BI__builtin_ia32_cvtq2mask128: + case X86::BI__builtin_ia32_cvtq2mask256: + case X86::BI__builtin_ia32_cvtq2mask512: + return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, BuiltinID); + + case X86::BI__builtin_ia32_cvtsd2ss: + return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, false); + + case X86::BI__builtin_ia32_cvtsd2ss_round_mask: + return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, true); + + case X86::BI__builtin_ia32_cvtpd2ps: + case X86::BI__builtin_ia32_cvtpd2ps256: + return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, false, false); + case X86::BI__builtin_ia32_cvtpd2ps_mask: + return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, true, false); + case X86::BI__builtin_ia32_cvtpd2ps512_mask: + return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, true, true); + case X86::BI__builtin_ia32_cmpb128_mask: case X86::BI__builtin_ia32_cmpw128_mask: case X86::BI__builtin_ia32_cmpd128_mask: @@ -4603,6 +5489,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_ucmpq512_mask: return interp__builtin_ia32_cmp_mask(S, OpPC, Call, BuiltinID, /*IsUnsigned=*/true); + + case X86::BI__builtin_ia32_vpshufbitqmb128_mask: + case X86::BI__builtin_ia32_vpshufbitqmb256_mask: + case X86::BI__builtin_ia32_vpshufbitqmb512_mask: + return interp__builtin_ia32_shufbitqmb_mask(S, OpPC, Call); + case X86::BI__builtin_ia32_pslldqi128_byteshift: case X86::BI__builtin_ia32_pslldqi256_byteshift: case X86::BI__builtin_ia32_pslldqi512_byteshift: @@ -4610,13 +5502,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, // The lane width is hardcoded to 16 to match the SIMD register size, // but the algorithm processes one byte per iteration, // so APInt(8, ...) is correct and intentional. - return interp__builtin_x86_byteshift( - S, OpPC, Call, BuiltinID, - [](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) { - if (I < Shift) { - return APInt(8, 0); - } - return APInt(8, Src.elem<uint8_t>(Lane + I - Shift)); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, + [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> { + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned LaneIdx = DstIdx % 16; + if (LaneIdx < Shift) + return std::make_pair(0, -1); + + return std::make_pair(0, + static_cast<int>(LaneBase + LaneIdx - Shift)); }); case X86::BI__builtin_ia32_psrldqi128_byteshift: @@ -4626,16 +5521,60 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, // The lane width is hardcoded to 16 to match the SIMD register size, // but the algorithm processes one byte per iteration, // so APInt(8, ...) is correct and intentional. - return interp__builtin_x86_byteshift( - S, OpPC, Call, BuiltinID, - [](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) { - if (I + Shift < 16) { - return APInt(8, Src.elem<uint8_t>(Lane + I + Shift)); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, + [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> { + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned LaneIdx = DstIdx % 16; + if (LaneIdx + Shift < 16) + return std::make_pair(0, + static_cast<int>(LaneBase + LaneIdx + Shift)); + + return std::make_pair(0, -1); + }); + + case X86::BI__builtin_ia32_palignr128: + case X86::BI__builtin_ia32_palignr256: + case X86::BI__builtin_ia32_palignr512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Shift) { + // Default to -1 → zero-fill this destination element + unsigned VecIdx = 1; + int ElemIdx = -1; + + int Lane = DstIdx / 16; + int Offset = DstIdx % 16; + + // Elements come from VecB first, then VecA after the shift boundary + unsigned ShiftedIdx = Offset + (Shift & 0xFF); + if (ShiftedIdx < 16) { // from VecB + ElemIdx = ShiftedIdx + (Lane * 16); + } else if (ShiftedIdx < 32) { // from VecA + VecIdx = 0; + ElemIdx = (ShiftedIdx - 16) + (Lane * 16); } - return APInt(8, 0); + return std::pair<unsigned, int>{VecIdx, ElemIdx}; }); + case X86::BI__builtin_ia32_alignd128: + case X86::BI__builtin_ia32_alignd256: + case X86::BI__builtin_ia32_alignd512: + case X86::BI__builtin_ia32_alignq128: + case X86::BI__builtin_ia32_alignq256: + case X86::BI__builtin_ia32_alignq512: { + unsigned NumElems = Call->getType()->castAs<VectorType>()->getNumElements(); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [NumElems](unsigned DstIdx, unsigned Shift) { + unsigned Imm = Shift & 0xFF; + unsigned EffectiveShift = Imm & (NumElems - 1); + unsigned SourcePos = DstIdx + EffectiveShift; + unsigned VecIdx = SourcePos < NumElems ? 1u : 0u; + unsigned ElemIdx = SourcePos & (NumElems - 1); + return std::pair<unsigned, int>{VecIdx, static_cast<int>(ElemIdx)}; + }); + } + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ByteCode/InterpFrame.cpp b/clang/lib/AST/ByteCode/InterpFrame.cpp index 039acb5..3b88376 100644 --- a/clang/lib/AST/ByteCode/InterpFrame.cpp +++ b/clang/lib/AST/ByteCode/InterpFrame.cpp @@ -89,11 +89,23 @@ void InterpFrame::destroyScopes() { void InterpFrame::initScope(unsigned Idx) { if (!Func) return; + for (auto &Local : Func->getScope(Idx).locals()) { localBlock(Local.Offset)->invokeCtor(); } } +void InterpFrame::enableLocal(unsigned Idx) { + assert(Func); + + // FIXME: This is a little dirty, but to avoid adding a flag to + // InlineDescriptor that's only ever useful on the toplevel of local + // variables, we reuse the IsActive flag for the enabled state. We should + // probably use a different struct than InlineDescriptor for the block-level + // inline descriptor of local varaibles. + localInlineDesc(Idx)->IsActive = true; +} + void InterpFrame::destroy(unsigned Idx) { for (auto &Local : Func->getScope(Idx).locals_reverse()) { S.deallocate(localBlock(Local.Offset)); diff --git a/clang/lib/AST/ByteCode/InterpFrame.h b/clang/lib/AST/ByteCode/InterpFrame.h index fa9de2e..e150e92 100644 --- a/clang/lib/AST/ByteCode/InterpFrame.h +++ b/clang/lib/AST/ByteCode/InterpFrame.h @@ -55,6 +55,10 @@ public: void destroy(unsigned Idx); void initScope(unsigned Idx); void destroyScopes(); + void enableLocal(unsigned Idx); + bool isLocalEnabled(unsigned Idx) const { + return localInlineDesc(Idx)->IsActive; + } /// Describes the frame with arguments for diagnostic purposes. void describe(llvm::raw_ostream &OS) const override; @@ -109,6 +113,7 @@ public: /// Returns the 'this' pointer. const Pointer &getThis() const { assert(hasThisPointer()); + assert(!isBottomFrame()); return stackRef<Pointer>(ThisPointerOffset); } @@ -116,6 +121,7 @@ public: const Pointer &getRVOPtr() const { assert(Func); assert(Func->hasRVO()); + assert(!isBottomFrame()); return stackRef<Pointer>(0); } diff --git a/clang/lib/AST/ByteCode/MemberPointer.cpp b/clang/lib/AST/ByteCode/MemberPointer.cpp index dfc8583..8b1b018 100644 --- a/clang/lib/AST/ByteCode/MemberPointer.cpp +++ b/clang/lib/AST/ByteCode/MemberPointer.cpp @@ -23,6 +23,15 @@ std::optional<Pointer> MemberPointer::toPointer(const Context &Ctx) const { if (!Base.isBlockPointer()) return std::nullopt; + unsigned BlockMDSize = Base.block()->getDescriptor()->getMetadataSize(); + + if (PtrOffset >= 0) { + // If the resulting base would be too small, return nullopt. + if (Base.BS.Base < static_cast<unsigned>(PtrOffset) || + (Base.BS.Base - PtrOffset < BlockMDSize)) + return std::nullopt; + } + Pointer CastedBase = (PtrOffset < 0 ? Base.atField(-PtrOffset) : Base.atFieldSub(PtrOffset)); @@ -31,7 +40,7 @@ std::optional<Pointer> MemberPointer::toPointer(const Context &Ctx) const { return std::nullopt; unsigned Offset = 0; - Offset += CastedBase.block()->getDescriptor()->getMetadataSize(); + Offset += BlockMDSize; if (const auto *FD = dyn_cast<FieldDecl>(Dcl)) { if (FD->getParent() == BaseRecord->getDecl()) diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td index 1c17ad9e..6e76879 100644 --- a/clang/lib/AST/ByteCode/Opcodes.td +++ b/clang/lib/AST/ByteCode/Opcodes.td @@ -53,6 +53,7 @@ def ArgBool : ArgType { let Name = "bool"; } def ArgFixedPoint : ArgType { let Name = "FixedPoint"; let AsRef = true; } def ArgFunction : ArgType { let Name = "const Function *"; } +def ArgFunctionDecl : ArgType { let Name = "const FunctionDecl *"; } def ArgRecordDecl : ArgType { let Name = "const RecordDecl *"; } def ArgRecordField : ArgType { let Name = "const Record::Field *"; } def ArgFltSemantics : ArgType { let Name = "const llvm::fltSemantics *"; } @@ -250,6 +251,16 @@ def InitScope : Opcode { let Args = [ArgUint32]; } +def GetLocalEnabled : Opcode { + let Args = [ArgUint32]; + let HasCustomEval = 1; +} + +def EnableLocal : Opcode { + let Args = [ArgUint32]; + let HasCustomEval = 1; +} + //===----------------------------------------------------------------------===// // Constants //===----------------------------------------------------------------------===// @@ -360,8 +371,14 @@ def NarrowPtr : Opcode; // [Pointer] -> [Pointer] def ExpandPtr : Opcode; // [Pointer, Offset] -> [Pointer] -def ArrayElemPtr : AluOpcode; -def ArrayElemPtrPop : AluOpcode; +def ArrayElemPtr : Opcode { + let Types = [IntegralTypeClass]; + let HasGroup = 1; +} +def ArrayElemPtrPop : Opcode { + let Types = [IntegralTypeClass]; + let HasGroup = 1; +} def ArrayElemPop : Opcode { let Args = [ArgUint32]; @@ -421,6 +438,8 @@ def CheckLiteralType : Opcode { } def CheckArraySize : Opcode { let Args = [ArgUint64]; } +def CheckFunctionDecl : Opcode { let Args = [ArgFunctionDecl]; } +def CheckBitCast : Opcode { let Args = [ArgTypePtr, ArgBool]; } // [] -> [Value] def GetGlobal : AccessOpcode; @@ -533,13 +552,20 @@ def InitElemPop : Opcode { //===----------------------------------------------------------------------===// // [Pointer, Integral] -> [Pointer] -def AddOffset : AluOpcode; +def AddOffset : Opcode { + let Types = [IntegralTypeClass]; + let HasGroup = 1; +} // [Pointer, Integral] -> [Pointer] -def SubOffset : AluOpcode; +def SubOffset : Opcode { + let Types = [IntegralTypeClass]; + let HasGroup = 1; +} // [Pointer, Pointer] -> [Integral] def SubPtr : Opcode { let Types = [IntegerTypeClass]; + let Args = [ArgBool]; let HasGroup = 1; } diff --git a/clang/lib/AST/ByteCode/Pointer.cpp b/clang/lib/AST/ByteCode/Pointer.cpp index e417bdf..00e74db 100644 --- a/clang/lib/AST/ByteCode/Pointer.cpp +++ b/clang/lib/AST/ByteCode/Pointer.cpp @@ -33,6 +33,7 @@ Pointer::Pointer(Block *Pointee, uint64_t BaseAndOffset) Pointer::Pointer(Block *Pointee, unsigned Base, uint64_t Offset) : Offset(Offset), StorageKind(Storage::Block) { assert((Base == RootPtrMark || Base % alignof(void *) == 0) && "wrong base"); + assert(Base >= Pointee->getDescriptor()->getMetadataSize()); BS = {Pointee, Base, nullptr, nullptr}; @@ -894,8 +895,8 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx, return Result; } -IntPointer IntPointer::atOffset(const ASTContext &ASTCtx, - unsigned Offset) const { +std::optional<IntPointer> IntPointer::atOffset(const ASTContext &ASTCtx, + unsigned Offset) const { if (!this->Desc) return *this; const Record *R = this->Desc->ElemRecord; @@ -913,6 +914,9 @@ IntPointer IntPointer::atOffset(const ASTContext &ASTCtx, return *this; const FieldDecl *FD = F->Decl; + if (FD->getParent()->isInvalidDecl()) + return std::nullopt; + const ASTRecordLayout &Layout = ASTCtx.getASTRecordLayout(FD->getParent()); unsigned FieldIndex = FD->getFieldIndex(); uint64_t FieldOffset = diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h index cd738ce..0978090 100644 --- a/clang/lib/AST/ByteCode/Pointer.h +++ b/clang/lib/AST/ByteCode/Pointer.h @@ -47,7 +47,8 @@ struct IntPointer { const Descriptor *Desc; uint64_t Value; - IntPointer atOffset(const ASTContext &ASTCtx, unsigned Offset) const; + std::optional<IntPointer> atOffset(const ASTContext &ASTCtx, + unsigned Offset) const; IntPointer baseCast(const ASTContext &ASTCtx, unsigned BaseOffset) const; }; @@ -199,17 +200,19 @@ public: return Pointer(BS.Pointee, sizeof(InlineDescriptor), Offset == 0 ? Offset : PastEndMark); - // Pointer is one past end - magic offset marks that. - if (isOnePastEnd()) - return Pointer(BS.Pointee, Base, PastEndMark); - - if (Offset != Base) { - // If we're pointing to a primitive array element, there's nothing to do. - if (inPrimitiveArray()) - return *this; - // Pointer is to a composite array element - enter it. - if (Offset != Base) + if (inArray()) { + // Pointer is one past end - magic offset marks that. + if (isOnePastEnd()) + return Pointer(BS.Pointee, Base, PastEndMark); + + if (Offset != Base) { + // If we're pointing to a primitive array element, there's nothing to + // do. + if (inPrimitiveArray()) + return *this; + // Pointer is to a composite array element - enter it. return Pointer(BS.Pointee, Offset, Offset); + } } // Otherwise, we're pointing to a non-array element or @@ -219,6 +222,8 @@ public: /// Expands a pointer to the containing array, undoing narrowing. [[nodiscard]] Pointer expand() const { + if (!isBlockPointer()) + return *this; assert(isBlockPointer()); Block *Pointee = BS.Pointee; @@ -830,6 +835,9 @@ private: inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Pointer &P) { P.print(OS); + OS << ' '; + if (const Descriptor *D = P.getFieldDesc()) + D->dump(OS); return OS; } diff --git a/clang/lib/AST/ByteCode/PrimType.h b/clang/lib/AST/ByteCode/PrimType.h index 54fd39a..f0454b4 100644 --- a/clang/lib/AST/ByteCode/PrimType.h +++ b/clang/lib/AST/ByteCode/PrimType.h @@ -101,6 +101,7 @@ inline constexpr bool isSignedType(PrimType T) { enum class CastKind : uint8_t { Reinterpret, + ReinterpretLike, Volatile, Dynamic, }; @@ -111,6 +112,9 @@ inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, case interp::CastKind::Reinterpret: OS << "reinterpret_cast"; break; + case interp::CastKind::ReinterpretLike: + OS << "reinterpret_like"; + break; case interp::CastKind::Volatile: OS << "volatile"; break; diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp index e0b2852..d9693407 100644 --- a/clang/lib/AST/ByteCode/Program.cpp +++ b/clang/lib/AST/ByteCode/Program.cpp @@ -27,7 +27,7 @@ unsigned Program::getOrCreateNativePointer(const void *Ptr) { return It->second; } -const void *Program::getNativePointer(unsigned Idx) { +const void *Program::getNativePointer(unsigned Idx) const { return NativePointers[Idx]; } @@ -36,30 +36,19 @@ unsigned Program::createGlobalString(const StringLiteral *S, const Expr *Base) { const size_t BitWidth = CharWidth * Ctx.getCharBit(); unsigned StringLength = S->getLength(); - PrimType CharType; - switch (CharWidth) { - case 1: - CharType = PT_Sint8; - break; - case 2: - CharType = PT_Uint16; - break; - case 4: - CharType = PT_Uint32; - break; - default: - llvm_unreachable("unsupported character width"); - } + OptPrimType CharType = + Ctx.classify(S->getType()->castAsArrayTypeUnsafe()->getElementType()); + assert(CharType); if (!Base) Base = S; // Create a descriptor for the string. - Descriptor *Desc = - allocateDescriptor(Base, CharType, Descriptor::GlobalMD, StringLength + 1, - /*isConst=*/true, - /*isTemporary=*/false, - /*isMutable=*/false); + Descriptor *Desc = allocateDescriptor(Base, *CharType, Descriptor::GlobalMD, + StringLength + 1, + /*isConst=*/true, + /*isTemporary=*/false, + /*isMutable=*/false); // Allocate storage for the string. // The byte length does not include the null terminator. @@ -79,26 +68,9 @@ unsigned Program::createGlobalString(const StringLiteral *S, const Expr *Base) { } else { // Construct the string in storage. for (unsigned I = 0; I <= StringLength; ++I) { - const uint32_t CodePoint = I == StringLength ? 0 : S->getCodeUnit(I); - switch (CharType) { - case PT_Sint8: { - using T = PrimConv<PT_Sint8>::T; - Ptr.elem<T>(I) = T::from(CodePoint, BitWidth); - break; - } - case PT_Uint16: { - using T = PrimConv<PT_Uint16>::T; - Ptr.elem<T>(I) = T::from(CodePoint, BitWidth); - break; - } - case PT_Uint32: { - using T = PrimConv<PT_Uint32>::T; - Ptr.elem<T>(I) = T::from(CodePoint, BitWidth); - break; - } - default: - llvm_unreachable("unsupported character type"); - } + uint32_t CodePoint = I == StringLength ? 0 : S->getCodeUnit(I); + INT_TYPE_SWITCH_NO_BOOL(*CharType, + Ptr.elem<T>(I) = T::from(CodePoint, BitWidth);); } } Ptr.initializeAllElements(); @@ -218,21 +190,43 @@ UnsignedOrNone Program::createGlobal(const ValueDecl *VD, const Expr *Init) { return std::nullopt; Global *NewGlobal = Globals[*Idx]; + // Note that this loop has one iteration where Redecl == VD. for (const Decl *Redecl : VD->redecls()) { - unsigned &PIdx = GlobalIndices[Redecl]; + + // If this redecl was registered as a dummy variable, it is now a proper + // global variable and points to the block we just created. + if (auto DummyIt = DummyVariables.find(Redecl); + DummyIt != DummyVariables.end()) { + Global *Dummy = Globals[DummyIt->second]; + Dummy->block()->movePointersTo(NewGlobal->block()); + Globals[DummyIt->second] = NewGlobal; + DummyVariables.erase(DummyIt); + } + // If the redeclaration hasn't been registered yet at all, we just set its + // global index to Idx. If it has been registered yet, it might have + // pointers pointing to it and we need to transfer those pointers to the new + // block. + auto [Iter, Inserted] = GlobalIndices.try_emplace(Redecl); + if (Inserted) { + GlobalIndices[Redecl] = *Idx; + continue; + } + if (Redecl != VD) { - if (Block *RedeclBlock = Globals[PIdx]->block(); + if (Block *RedeclBlock = Globals[Iter->second]->block(); RedeclBlock->isExtern()) { - Globals[PIdx] = NewGlobal; + // All pointers pointing to the previous extern decl now point to the // new decl. // A previous iteration might've already fixed up the pointers for this // global. if (RedeclBlock != NewGlobal->block()) RedeclBlock->movePointersTo(NewGlobal->block()); + + Globals[Iter->second] = NewGlobal; } } - PIdx = *Idx; + Iter->second = *Idx; } return *Idx; diff --git a/clang/lib/AST/ByteCode/Program.h b/clang/lib/AST/ByteCode/Program.h index 28fcc97..c879550 100644 --- a/clang/lib/AST/ByteCode/Program.h +++ b/clang/lib/AST/ByteCode/Program.h @@ -58,7 +58,7 @@ public: unsigned getOrCreateNativePointer(const void *Ptr); /// Returns the value of a marshalled native pointer. - const void *getNativePointer(unsigned Idx); + const void *getNativePointer(unsigned Idx) const; /// Emits a string literal among global data. unsigned createGlobalString(const StringLiteral *S, @@ -205,7 +205,6 @@ private: const Block *block() const { return &B; } private: - /// Required metadata - does not actually track pointers. Block B; }; diff --git a/clang/lib/AST/ByteCode/Source.h b/clang/lib/AST/ByteCode/Source.h index f355d14..56ca197 100644 --- a/clang/lib/AST/ByteCode/Source.h +++ b/clang/lib/AST/ByteCode/Source.h @@ -51,6 +51,7 @@ public: explicit operator bool() const { return Ptr; } bool operator<=(const CodePtr &RHS) const { return Ptr <= RHS.Ptr; } bool operator>=(const CodePtr &RHS) const { return Ptr >= RHS.Ptr; } + bool operator==(const CodePtr RHS) const { return Ptr == RHS.Ptr; } /// Reads data and advances the pointer. template <typename T> std::enable_if_t<!std::is_pointer<T>::value, T> read() { |
