diff options
Diffstat (limited to 'clang/lib/AST')
44 files changed, 4074 insertions, 848 deletions
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 687cd46..13018ba 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -3108,9 +3108,9 @@ TypeSourceInfo *ASTContext::CreateTypeSourceInfo(QualType T, TypeSourceInfo *ASTContext::getTrivialTypeSourceInfo(QualType T, SourceLocation L) const { - TypeSourceInfo *DI = CreateTypeSourceInfo(T); - DI->getTypeLoc().initialize(const_cast<ASTContext &>(*this), L); - return DI; + TypeSourceInfo *TSI = CreateTypeSourceInfo(T); + TSI->getTypeLoc().initialize(const_cast<ASTContext &>(*this), L); + return TSI; } const ASTRecordLayout & @@ -5891,11 +5891,11 @@ TypeSourceInfo *ASTContext::getTemplateSpecializationTypeInfo( QualType TST = getTemplateSpecializationType( Keyword, Name, SpecifiedArgs.arguments(), CanonicalArgs, Underlying); - TypeSourceInfo *DI = CreateTypeSourceInfo(TST); - DI->getTypeLoc().castAs<TemplateSpecializationTypeLoc>().set( + TypeSourceInfo *TSI = CreateTypeSourceInfo(TST); + TSI->getTypeLoc().castAs<TemplateSpecializationTypeLoc>().set( ElaboratedKeywordLoc, QualifierLoc, TemplateKeywordLoc, NameLoc, SpecifiedArgs); - return DI; + return TSI; } QualType ASTContext::getTemplateSpecializationType( @@ -10527,6 +10527,21 @@ bool ASTContext::areCompatibleVectorTypes(QualType FirstVec, Second->getVectorKind() != VectorKind::RVVFixedLengthMask_4) return true; + // In OpenCL, treat half and _Float16 vector types as compatible. + if (getLangOpts().OpenCL && + First->getNumElements() == Second->getNumElements()) { + QualType FirstElt = First->getElementType(); + QualType SecondElt = Second->getElementType(); + + if ((FirstElt->isFloat16Type() && SecondElt->isHalfType()) || + (FirstElt->isHalfType() && SecondElt->isFloat16Type())) { + if (First->getVectorKind() != VectorKind::AltiVecPixel && + First->getVectorKind() != VectorKind::AltiVecBool && + Second->getVectorKind() != VectorKind::AltiVecPixel && + Second->getVectorKind() != VectorKind::AltiVecBool) + return true; + } + } return false; } @@ -12040,7 +12055,7 @@ bool ASTContext::mergeExtParameterInfo( void ASTContext::ResetObjCLayout(const ObjCInterfaceDecl *D) { if (auto It = ObjCLayouts.find(D); It != ObjCLayouts.end()) { It->second = nullptr; - for (auto *SubClass : ObjCSubClasses[D]) + for (auto *SubClass : ObjCSubClasses.lookup(D)) ResetObjCLayout(SubClass); } } @@ -12403,6 +12418,11 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, // Read the base type. switch (*Str++) { default: llvm_unreachable("Unknown builtin type letter!"); + case 'e': + assert(HowLong == 0 && !Signed && !Unsigned && + "Bad modifiers used with 'e'!"); + Type = Context.getLangOpts().OpenCL ? Context.HalfTy : Context.Float16Ty; + break; case 'x': assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers used with 'x'!"); @@ -12535,6 +12555,10 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, Type = Context.AMDGPUTextureTy; break; } + case 'r': { + Type = Context.HLSLResourceTy; + break; + } default: llvm_unreachable("Unexpected target builtin type"); } diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index bf51c3e..c144174 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -696,6 +696,10 @@ namespace clang { ExpectedStmt VisitCXXFoldExpr(CXXFoldExpr *E); ExpectedStmt VisitRequiresExpr(RequiresExpr* E); ExpectedStmt VisitConceptSpecializationExpr(ConceptSpecializationExpr* E); + ExpectedStmt + VisitSubstNonTypeTemplateParmPackExpr(SubstNonTypeTemplateParmPackExpr *E); + ExpectedStmt VisitPseudoObjectExpr(PseudoObjectExpr *E); + ExpectedStmt VisitCXXParenListInitExpr(CXXParenListInitExpr *E); // Helper for chaining together multiple imports. If an error is detected, // subsequent imports will return default constructed nodes, so that failure @@ -1287,6 +1291,26 @@ bool ASTNodeImporter::hasSameVisibilityContextAndLinkage(TypedefNameDecl *Found, using namespace clang; +auto ASTImporter::FunctionDeclImportCycleDetector::makeScopedCycleDetection( + const FunctionDecl *D) { + const FunctionDecl *LambdaD = nullptr; + if (!isCycle(D) && D) { + FunctionDeclsWithImportInProgress.insert(D); + LambdaD = D; + } + return llvm::make_scope_exit([this, LambdaD]() { + if (LambdaD) { + FunctionDeclsWithImportInProgress.erase(LambdaD); + } + }); +} + +bool ASTImporter::FunctionDeclImportCycleDetector::isCycle( + const FunctionDecl *D) const { + return FunctionDeclsWithImportInProgress.find(D) != + FunctionDeclsWithImportInProgress.end(); +} + ExpectedType ASTNodeImporter::VisitType(const Type *T) { Importer.FromDiag(SourceLocation(), diag::err_unsupported_ast_node) << T->getTypeClassName(); @@ -4034,7 +4058,10 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) { // E.g.: auto foo() { struct X{}; return X(); } // To avoid an infinite recursion when importing, create the FunctionDecl // with a simplified return type. - if (hasReturnTypeDeclaredInside(D)) { + // Reuse this approach for auto return types declared as typenames from + // template params, tracked in FindFunctionDeclImportCycle. + if (hasReturnTypeDeclaredInside(D) || + Importer.FindFunctionDeclImportCycle.isCycle(D)) { FromReturnTy = Importer.getFromContext().VoidTy; UsedDifferentProtoType = true; } @@ -4057,6 +4084,8 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) { } Error Err = Error::success(); + auto ScopedReturnTypeDeclCycleDetector = + Importer.FindFunctionDeclImportCycle.makeScopedCycleDetection(D); auto T = importChecked(Err, FromTy); auto TInfo = importChecked(Err, FromTSI); auto ToInnerLocStart = importChecked(Err, D->getInnerLocStart()); @@ -9273,6 +9302,50 @@ ASTNodeImporter::VisitConceptSpecializationExpr(ConceptSpecializationExpr *E) { const_cast<ImplicitConceptSpecializationDecl *>(CSD), &Satisfaction); } +ExpectedStmt ASTNodeImporter::VisitSubstNonTypeTemplateParmPackExpr( + SubstNonTypeTemplateParmPackExpr *E) { + Error Err = Error::success(); + auto ToType = importChecked(Err, E->getType()); + auto ToPackLoc = importChecked(Err, E->getParameterPackLocation()); + auto ToArgPack = importChecked(Err, E->getArgumentPack()); + auto ToAssociatedDecl = importChecked(Err, E->getAssociatedDecl()); + if (Err) + return std::move(Err); + + return new (Importer.getToContext()) SubstNonTypeTemplateParmPackExpr( + ToType, E->getValueKind(), ToPackLoc, ToArgPack, ToAssociatedDecl, + E->getIndex(), E->getFinal()); +} + +ExpectedStmt ASTNodeImporter::VisitPseudoObjectExpr(PseudoObjectExpr *E) { + SmallVector<Expr *, 4> ToSemantics(E->getNumSemanticExprs()); + if (Error Err = ImportContainerChecked(E->semantics(), ToSemantics)) + return std::move(Err); + auto ToSyntOrErr = import(E->getSyntacticForm()); + if (!ToSyntOrErr) + return ToSyntOrErr.takeError(); + return PseudoObjectExpr::Create(Importer.getToContext(), *ToSyntOrErr, + ToSemantics, E->getResultExprIndex()); +} + +ExpectedStmt +ASTNodeImporter::VisitCXXParenListInitExpr(CXXParenListInitExpr *E) { + Error Err = Error::success(); + auto ToType = importChecked(Err, E->getType()); + auto ToInitLoc = importChecked(Err, E->getInitLoc()); + auto ToBeginLoc = importChecked(Err, E->getBeginLoc()); + auto ToEndLoc = importChecked(Err, E->getEndLoc()); + if (Err) + return std::move(Err); + + SmallVector<Expr *, 4> ToArgs(E->getInitExprs().size()); + if (Error Err = ImportContainerChecked(E->getInitExprs(), ToArgs)) + return std::move(Err); + return CXXParenListInitExpr::Create(Importer.getToContext(), ToArgs, ToType, + E->getUserSpecifiedInitExprs().size(), + ToInitLoc, ToBeginLoc, ToEndLoc); +} + Error ASTNodeImporter::ImportOverriddenMethods(CXXMethodDecl *ToMethod, CXXMethodDecl *FromMethod) { Error ImportErrors = Error::success(); diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.h b/clang/lib/AST/ByteCode/BitcastBuffer.h index d1d6ee3..8d32351 100644 --- a/clang/lib/AST/ByteCode/BitcastBuffer.h +++ b/clang/lib/AST/ByteCode/BitcastBuffer.h @@ -89,6 +89,12 @@ struct BitcastBuffer { Data = std::make_unique<std::byte[]>(ByteSize); } + /// Returns the byte at the given offset. + std::byte *atByte(unsigned Offset) { + assert(Offset < FinalBitSize.roundToBytes()); + return Data.get() + Offset; + } + /// Returns the buffer size in bits. Bits size() const { return FinalBitSize; } Bytes byteSize() const { return FinalBitSize.toBytes(); } @@ -113,6 +119,13 @@ struct BitcastBuffer { std::unique_ptr<std::byte[]> copyBits(Bits BitOffset, Bits BitWidth, Bits FullBitWidth, Endian TargetEndianness) const; + + /// Dereferences the value at the given offset. + template <typename T> T deref(Bytes Offset) const { + assert(Offset.getQuantity() < FinalBitSize.roundToBytes()); + assert((Offset.getQuantity() + sizeof(T)) <= FinalBitSize.roundToBytes()); + return *reinterpret_cast<T *>(Data.get() + Offset.getQuantity()); + } }; } // namespace interp diff --git a/clang/lib/AST/ByteCode/ByteCodeEmitter.h b/clang/lib/AST/ByteCode/ByteCodeEmitter.h index ca8dc38..dd18341 100644 --- a/clang/lib/AST/ByteCode/ByteCodeEmitter.h +++ b/clang/lib/AST/ByteCode/ByteCodeEmitter.h @@ -25,11 +25,11 @@ enum Opcode : uint32_t; /// An emitter which links the program to bytecode for later use. class ByteCodeEmitter { protected: - using LabelTy = uint32_t; using AddrTy = uintptr_t; using Local = Scope::Local; public: + using LabelTy = uint32_t; /// Compiles the function into the module. void compileFunc(const FunctionDecl *FuncDecl, Function *Func = nullptr); diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 6c08846..ed5493c 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -16,6 +16,7 @@ #include "PrimType.h" #include "Program.h" #include "clang/AST/Attr.h" +#include "llvm/Support/SaveAndRestore.h" using namespace clang; using namespace clang::interp; @@ -39,7 +40,7 @@ static std::optional<bool> getBoolValue(const Expr *E) { template <class Emitter> class DeclScope final : public LocalScope<Emitter> { public: DeclScope(Compiler<Emitter> *Ctx, const ValueDecl *VD) - : LocalScope<Emitter>(Ctx, VD), Scope(Ctx->P), + : LocalScope<Emitter>(Ctx), Scope(Ctx->P), OldInitializingDecl(Ctx->InitializingDecl) { Ctx->InitializingDecl = VD; Ctx->InitStack.push_back(InitLink::Decl(VD)); @@ -476,8 +477,9 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) { return this->delegate(SubExpr); case CK_BitCast: { + QualType CETy = CE->getType(); // Reject bitcasts to atomic types. - if (CE->getType()->isAtomicType()) { + if (CETy->isAtomicType()) { if (!this->discard(SubExpr)) return false; return this->emitInvalidCast(CastKind::Reinterpret, /*Fatal=*/true, CE); @@ -494,6 +496,7 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) { assert(isPtrType(*FromT)); assert(isPtrType(*ToT)); + bool SrcIsVoidPtr = SubExprTy->isVoidPointerType(); if (FromT == ToT) { if (CE->getType()->isVoidPointerType() && !SubExprTy->isFunctionPointerType()) { @@ -502,6 +505,10 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) { if (!this->visit(SubExpr)) return false; + if (!this->emitCheckBitCast(CETy->getPointeeType().getTypePtr(), + SrcIsVoidPtr, CE)) + return false; + if (CE->getType()->isFunctionPointerType() || SubExprTy->isFunctionPointerType()) { return this->emitFnPtrCast(CE); @@ -767,6 +774,11 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) { case CK_ToVoid: return discard(SubExpr); + case CK_Dynamic: + // This initially goes through VisitCXXDynamicCastExpr, where we emit + // a diagnostic if appropriate. + return this->delegate(SubExpr); + default: return this->emitInvalid(CE); } @@ -1033,8 +1045,15 @@ bool Compiler<Emitter>::VisitPointerArithBinOp(const BinaryOperator *E) { if (!visitAsPointer(RHS, *RT) || !visitAsPointer(LHS, *LT)) return false; + QualType ElemType = LHS->getType()->getPointeeType(); + CharUnits ElemTypeSize; + if (ElemType->isVoidType() || ElemType->isFunctionType()) + ElemTypeSize = CharUnits::One(); + else + ElemTypeSize = Ctx.getASTContext().getTypeSizeInChars(ElemType); + PrimType IntT = classifyPrim(E->getType()); - if (!this->emitSubPtr(IntT, E)) + if (!this->emitSubPtr(IntT, ElemTypeSize.isZero(), E)) return false; return DiscardResult ? this->emitPop(IntT, E) : true; } @@ -1686,6 +1705,9 @@ bool Compiler<Emitter>::VisitFixedPointUnaryOperator(const UnaryOperator *E) { template <class Emitter> bool Compiler<Emitter>::VisitImplicitValueInitExpr( const ImplicitValueInitExpr *E) { + if (DiscardResult) + return true; + QualType QT = E->getType(); if (OptPrimType T = classify(QT)) @@ -2115,8 +2137,7 @@ bool Compiler<Emitter>::visitCallArgs(ArrayRef<const Expr *> Args, } UnsignedOrNone LocalIndex = - allocateLocal(std::move(Source), Arg->getType(), - /*ExtendingDecl=*/nullptr, ScopeKind::Call); + allocateLocal(std::move(Source), Arg->getType(), ScopeKind::Call); if (!LocalIndex) return false; @@ -2429,7 +2450,7 @@ bool Compiler<Emitter>::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E) { // and the RHS is our SubExpr. for (size_t I = 0; I != Size; ++I) { ArrayIndexScope<Emitter> IndexScope(this, I); - LocalScope<Emitter> BS(this); + LocalScope<Emitter> BS(this, ScopeKind::FullExpression); if (!this->visitArrayElemInit(I, SubExpr, SubExprT)) return false; @@ -2482,19 +2503,22 @@ bool Compiler<Emitter>::VisitAbstractConditionalOperator( const Expr *TrueExpr = E->getTrueExpr(); const Expr *FalseExpr = E->getFalseExpr(); - auto visitChildExpr = [&](const Expr *E) -> bool { - LocalScope<Emitter> S(this); - if (!this->delegate(E)) - return false; - return S.destroyLocals(); - }; - if (std::optional<bool> BoolValue = getBoolValue(Condition)) { - if (BoolValue) - return visitChildExpr(TrueExpr); - return visitChildExpr(FalseExpr); - } - + if (*BoolValue) + return this->delegate(TrueExpr); + return this->delegate(FalseExpr); + } + + // Force-init the scope, which creates a InitScope op. This is necessary so + // the scope is not only initialized in one arm of the conditional operator. + this->VarScope->forceInit(); + // The TrueExpr and FalseExpr of a conditional operator do _not_ create a + // scope, which means the local variables created within them unconditionally + // always exist. However, we need to later differentiate which branch was + // taken and only destroy the varibles of the active branch. This is what the + // "enabled" flags on local variables are used for. + llvm::SaveAndRestore LAAA(this->VarScope->LocalsAlwaysEnabled, + /*NewValue=*/false); bool IsBcpCall = false; if (const auto *CE = dyn_cast<CallExpr>(Condition->IgnoreParenCasts()); CE && CE->getBuiltinCallee() == Builtin::BI__builtin_constant_p) { @@ -2524,13 +2548,15 @@ bool Compiler<Emitter>::VisitAbstractConditionalOperator( if (!this->jumpFalse(LabelFalse)) return false; - if (!visitChildExpr(TrueExpr)) + if (!this->delegate(TrueExpr)) return false; + if (!this->jump(LabelEnd)) return false; this->emitLabel(LabelFalse); - if (!visitChildExpr(FalseExpr)) + if (!this->delegate(FalseExpr)) return false; + this->fallthrough(LabelEnd); this->emitLabel(LabelEnd); @@ -2805,10 +2831,10 @@ bool Compiler<Emitter>::VisitCompoundAssignOperator( return false; if (!this->emitLoad(*LT, E)) return false; - if (LT != LHSComputationT) { - if (!this->emitCast(*LT, *LHSComputationT, E)) - return false; - } + if (LT != LHSComputationT && + !this->emitIntegralCast(*LT, *LHSComputationT, E->getComputationLHSType(), + E)) + return false; // Get the RHS value on the stack. if (!this->emitGetLocal(*RT, TempOffset, E)) @@ -2861,10 +2887,9 @@ bool Compiler<Emitter>::VisitCompoundAssignOperator( } // And now cast from LHSComputationT to ResultT. - if (ResultT != LHSComputationT) { - if (!this->emitCast(*LHSComputationT, *ResultT, E)) - return false; - } + if (ResultT != LHSComputationT && + !this->emitIntegralCast(*LHSComputationT, *ResultT, E->getType(), E)) + return false; // And store the result in LHS. if (DiscardResult) { @@ -2879,7 +2904,7 @@ bool Compiler<Emitter>::VisitCompoundAssignOperator( template <class Emitter> bool Compiler<Emitter>::VisitExprWithCleanups(const ExprWithCleanups *E) { - LocalScope<Emitter> ES(this); + LocalScope<Emitter> ES(this, ScopeKind::FullExpression); const Expr *SubExpr = E->getSubExpr(); return this->delegate(SubExpr) && ES.destroyLocals(E); @@ -2902,9 +2927,7 @@ bool Compiler<Emitter>::VisitMaterializeTemporaryExpr( // When we're initializing a global variable *or* the storage duration of // the temporary is explicitly static, create a global variable. OptPrimType SubExprT = classify(SubExpr); - bool IsStatic = E->getStorageDuration() == SD_Static; - if (IsStatic) { - + if (E->getStorageDuration() == SD_Static) { UnsignedOrNone GlobalIndex = P.createGlobal(E); if (!GlobalIndex) return false; @@ -2931,25 +2954,40 @@ bool Compiler<Emitter>::VisitMaterializeTemporaryExpr( return this->emitInitGlobalTempComp(TempDecl, E); } + ScopeKind VarScope = E->getStorageDuration() == SD_FullExpression + ? ScopeKind::FullExpression + : ScopeKind::Block; + // For everyhing else, use local variables. if (SubExprT) { bool IsConst = SubExpr->getType().isConstQualified(); bool IsVolatile = SubExpr->getType().isVolatileQualified(); - unsigned LocalIndex = allocateLocalPrimitive( - E, *SubExprT, IsConst, IsVolatile, E->getExtendingDecl()); + unsigned LocalIndex = + allocateLocalPrimitive(E, *SubExprT, IsConst, IsVolatile, VarScope); + if (!this->VarScope->LocalsAlwaysEnabled && + !this->emitEnableLocal(LocalIndex, E)) + return false; + if (!this->visit(SubExpr)) return false; if (!this->emitSetLocal(*SubExprT, LocalIndex, E)) return false; + return this->emitGetPtrLocal(LocalIndex, E); } if (!this->checkLiteralType(SubExpr)) return false; + const Expr *Inner = E->getSubExpr()->skipRValueSubobjectAdjustments(); if (UnsignedOrNone LocalIndex = - allocateLocal(E, Inner->getType(), E->getExtendingDecl())) { + allocateLocal(E, Inner->getType(), VarScope)) { InitLinkScope<Emitter> ILS(this, InitLink::Temp(*LocalIndex)); + + if (!this->VarScope->LocalsAlwaysEnabled && + !this->emitEnableLocal(*LocalIndex, E)) + return false; + if (!this->emitGetPtrLocal(*LocalIndex, E)) return false; return this->visitInitializer(SubExpr) && this->emitFinishInit(E); @@ -3217,7 +3255,8 @@ bool Compiler<Emitter>::VisitCXXConstructExpr(const CXXConstructExpr *E) { return this->visitInitializer(E->getArg(0)); // Zero initialization. - if (E->requiresZeroInitialization()) { + bool ZeroInit = E->requiresZeroInitialization(); + if (ZeroInit) { const Record *R = getRecord(E->getType()); if (!this->visitZeroRecordInitializer(R, E)) @@ -3228,6 +3267,19 @@ bool Compiler<Emitter>::VisitCXXConstructExpr(const CXXConstructExpr *E) { return true; } + // Avoid materializing a temporary for an elidable copy/move constructor. + if (!ZeroInit && E->isElidable()) { + const Expr *SrcObj = E->getArg(0); + assert(SrcObj->isTemporaryObject(Ctx.getASTContext(), Ctor->getParent())); + assert(Ctx.getASTContext().hasSameUnqualifiedType(E->getType(), + SrcObj->getType())); + if (const auto *ME = dyn_cast<MaterializeTemporaryExpr>(SrcObj)) { + if (!this->emitCheckFunctionDecl(Ctor, E)) + return false; + return this->visitInitializer(ME->getSubExpr()); + } + } + const Function *Func = getFunction(Ctor); if (!Func) @@ -4157,7 +4209,7 @@ bool Compiler<Emitter>::VisitStmtExpr(const StmtExpr *E) { StmtExprScope<Emitter> SS(this); const CompoundStmt *CS = E->getSubStmt(); - const Stmt *Result = CS->getStmtExprResult(); + const Stmt *Result = CS->body_back(); for (const Stmt *S : CS->body()) { if (S != Result) { if (!this->visitStmt(S)) @@ -4221,7 +4273,8 @@ template <class Emitter> bool Compiler<Emitter>::visit(const Expr *E) { // Create local variable to hold the return value. if (!E->isGLValue() && !canClassify(E->getType())) { - UnsignedOrNone LocalIndex = allocateLocal(stripDerivedToBaseCasts(E)); + UnsignedOrNone LocalIndex = allocateLocal( + stripDerivedToBaseCasts(E), QualType(), ScopeKind::FullExpression); if (!LocalIndex) return false; @@ -4578,9 +4631,11 @@ bool Compiler<Emitter>::emitConst(const APSInt &Value, const Expr *E) { } template <class Emitter> -unsigned Compiler<Emitter>::allocateLocalPrimitive( - DeclTy &&Src, PrimType Ty, bool IsConst, bool IsVolatile, - const ValueDecl *ExtendingDecl, ScopeKind SC, bool IsConstexprUnknown) { +unsigned Compiler<Emitter>::allocateLocalPrimitive(DeclTy &&Src, PrimType Ty, + bool IsConst, + bool IsVolatile, + ScopeKind SC, + bool IsConstexprUnknown) { // FIXME: There are cases where Src.is<Expr*>() is wrong, e.g. // (int){12} in C. Consider using Expr::isTemporaryObject() instead // or isa<MaterializeTemporaryExpr>(). @@ -4591,16 +4646,12 @@ unsigned Compiler<Emitter>::allocateLocalPrimitive( Scope::Local Local = this->createLocal(D); if (auto *VD = dyn_cast_if_present<ValueDecl>(Src.dyn_cast<const Decl *>())) Locals.insert({VD, Local}); - if (ExtendingDecl) - VarScope->addExtended(Local, ExtendingDecl); - else - VarScope->addForScopeKind(Local, SC); + VarScope->addForScopeKind(Local, SC); return Local.Offset; } template <class Emitter> UnsignedOrNone Compiler<Emitter>::allocateLocal(DeclTy &&Src, QualType Ty, - const ValueDecl *ExtendingDecl, ScopeKind SC, bool IsConstexprUnknown) { const ValueDecl *Key = nullptr; @@ -4628,10 +4679,7 @@ UnsignedOrNone Compiler<Emitter>::allocateLocal(DeclTy &&Src, QualType Ty, Scope::Local Local = this->createLocal(D); if (Key) Locals.insert({Key, Local}); - if (ExtendingDecl) - VarScope->addExtended(Local, ExtendingDecl); - else - VarScope->addForScopeKind(Local, SC); + VarScope->addForScopeKind(Local, SC); return Local.Offset; } @@ -4683,7 +4731,7 @@ const Function *Compiler<Emitter>::getFunction(const FunctionDecl *FD) { template <class Emitter> bool Compiler<Emitter>::visitExpr(const Expr *E, bool DestroyToplevelScope) { - LocalScope<Emitter> RootScope(this); + LocalScope<Emitter> RootScope(this, ScopeKind::FullExpression); // If we won't destroy the toplevel scope, check for memory leaks first. if (!DestroyToplevelScope) { @@ -4777,7 +4825,7 @@ bool Compiler<Emitter>::visitDeclAndReturn(const VarDecl *VD, const Expr *Init, LS.destroyLocals() && this->emitCheckAllocations(VD); } - LocalScope<Emitter> VDScope(this, VD); + LocalScope<Emitter> VDScope(this); if (!this->visitVarDecl(VD, Init, /*Toplevel=*/true)) return false; @@ -4888,7 +4936,7 @@ Compiler<Emitter>::visitVarDecl(const VarDecl *VD, const Expr *Init, if (VarT) { unsigned Offset = this->allocateLocalPrimitive( VD, *VarT, VD->getType().isConstQualified(), - VD->getType().isVolatileQualified(), nullptr, ScopeKind::Block, + VD->getType().isVolatileQualified(), ScopeKind::Block, IsConstexprUnknown); if (!Init) @@ -4908,7 +4956,7 @@ Compiler<Emitter>::visitVarDecl(const VarDecl *VD, const Expr *Init, } // Local composite variables. if (UnsignedOrNone Offset = this->allocateLocal( - VD, VD->getType(), nullptr, ScopeKind::Block, IsConstexprUnknown)) { + VD, VD->getType(), ScopeKind::Block, IsConstexprUnknown)) { if (!Init) return true; @@ -5412,8 +5460,7 @@ bool Compiler<Emitter>::VisitCXXThisExpr(const CXXThisExpr *E) { unsigned EndIndex = 0; // Find the init list. for (StartIndex = InitStack.size() - 1; StartIndex > 0; --StartIndex) { - if (InitStack[StartIndex].Kind == InitLink::K_InitList || - InitStack[StartIndex].Kind == InitLink::K_This) { + if (InitStack[StartIndex].Kind == InitLink::K_DIE) { EndIndex = StartIndex; --StartIndex; break; @@ -5426,7 +5473,8 @@ bool Compiler<Emitter>::VisitCXXThisExpr(const CXXThisExpr *E) { continue; if (InitStack[StartIndex].Kind != InitLink::K_Field && - InitStack[StartIndex].Kind != InitLink::K_Elem) + InitStack[StartIndex].Kind != InitLink::K_Elem && + InitStack[StartIndex].Kind != InitLink::K_DIE) break; } @@ -5437,7 +5485,8 @@ bool Compiler<Emitter>::VisitCXXThisExpr(const CXXThisExpr *E) { // Emit the instructions. for (unsigned I = StartIndex; I != (EndIndex + 1); ++I) { - if (InitStack[I].Kind == InitLink::K_InitList) + if (InitStack[I].Kind == InitLink::K_InitList || + InitStack[I].Kind == InitLink::K_DIE) continue; if (!InitStack[I].template emit<Emitter>(this, E)) return false; @@ -5622,19 +5671,24 @@ bool Compiler<Emitter>::visitReturnStmt(const ReturnStmt *RS) { } template <class Emitter> bool Compiler<Emitter>::visitIfStmt(const IfStmt *IS) { + LocalScope<Emitter> IfScope(this); + auto visitChildStmt = [&](const Stmt *S) -> bool { LocalScope<Emitter> SScope(this); if (!visitStmt(S)) return false; return SScope.destroyLocals(); }; - if (auto *CondInit = IS->getInit()) + + if (auto *CondInit = IS->getInit()) { if (!visitStmt(CondInit)) return false; + } - if (const DeclStmt *CondDecl = IS->getConditionVariableDeclStmt()) + if (const DeclStmt *CondDecl = IS->getConditionVariableDeclStmt()) { if (!visitDeclStmt(CondDecl)) return false; + } // Save ourselves compiling some code and the jumps, etc. if the condition is // stataically known to be either true or false. We could look at more cases @@ -5658,8 +5712,11 @@ template <class Emitter> bool Compiler<Emitter>::visitIfStmt(const IfStmt *IS) { if (!this->emitInv(IS)) return false; } else { + LocalScope<Emitter> CondScope(this, ScopeKind::FullExpression); if (!this->visitBool(IS->getCond())) return false; + if (!CondScope.destroyLocals()) + return false; } if (!this->maybeEmitDeferredVarInit(IS->getConditionVariable())) @@ -5687,6 +5744,9 @@ template <class Emitter> bool Compiler<Emitter>::visitIfStmt(const IfStmt *IS) { this->emitLabel(LabelEnd); } + if (!IfScope.destroyLocals()) + return false; + return true; } @@ -5906,8 +5966,10 @@ bool Compiler<Emitter>::visitBreakStmt(const BreakStmt *S) { assert(TargetLabel); for (VariableScope<Emitter> *C = this->VarScope; C != BreakScope; - C = C->getParent()) - C->emitDestruction(); + C = C->getParent()) { + if (!C->destroyLocals()) + return false; + } return this->jump(*TargetLabel); } @@ -5941,8 +6003,10 @@ bool Compiler<Emitter>::visitContinueStmt(const ContinueStmt *S) { assert(TargetLabel); for (VariableScope<Emitter> *C = VarScope; C != ContinueScope; - C = C->getParent()) - C->emitDestruction(); + C = C->getParent()) { + if (!C->destroyLocals()) + return false; + } return this->jump(*TargetLabel); } @@ -5983,12 +6047,41 @@ bool Compiler<Emitter>::visitSwitchStmt(const SwitchStmt *S) { for (const SwitchCase *SC = S->getSwitchCaseList(); SC; SC = SC->getNextSwitchCase()) { if (const auto *CS = dyn_cast<CaseStmt>(SC)) { - // FIXME: Implement ranges. - if (CS->caseStmtIsGNURange()) - return false; CaseLabels[SC] = this->getLabel(); + if (CS->caseStmtIsGNURange()) { + LabelTy EndOfRangeCheck = this->getLabel(); + const Expr *Low = CS->getLHS(); + const Expr *High = CS->getRHS(); + if (Low->isValueDependent() || High->isValueDependent()) + return false; + + if (!this->emitGetLocal(CondT, CondVar, CS)) + return false; + if (!this->visit(Low)) + return false; + PrimType LT = this->classifyPrim(Low->getType()); + if (!this->emitGE(LT, S)) + return false; + if (!this->jumpFalse(EndOfRangeCheck)) + return false; + + if (!this->emitGetLocal(CondT, CondVar, CS)) + return false; + if (!this->visit(High)) + return false; + PrimType HT = this->classifyPrim(High->getType()); + if (!this->emitLE(HT, S)) + return false; + if (!this->jumpTrue(CaseLabels[CS])) + return false; + this->emitLabel(EndOfRangeCheck); + continue; + } + const Expr *Value = CS->getLHS(); + if (Value->isValueDependent()) + return false; PrimType ValueT = this->classifyPrim(Value->getType()); // Compare the case statement's value to the switch condition. @@ -6022,6 +6115,7 @@ bool Compiler<Emitter>::visitSwitchStmt(const SwitchStmt *S) { DefaultLabel); if (!this->visitStmt(S->getBody())) return false; + this->fallthrough(EndLabel); this->emitLabel(EndLabel); return LS.destroyLocals(); @@ -6029,6 +6123,7 @@ bool Compiler<Emitter>::visitSwitchStmt(const SwitchStmt *S) { template <class Emitter> bool Compiler<Emitter>::visitCaseStmt(const CaseStmt *S) { + this->fallthrough(CaseLabels[S]); this->emitLabel(CaseLabels[S]); return this->visitStmt(S->getSubStmt()); } @@ -6249,7 +6344,7 @@ bool Compiler<Emitter>::compileConstructor(const CXXConstructorDecl *Ctor) { InitLinkScope<Emitter> InitScope(this, InitLink::This()); for (const auto *Init : Ctor->inits()) { // Scope needed for the initializers. - LocalScope<Emitter> Scope(this); + LocalScope<Emitter> Scope(this, ScopeKind::FullExpression); const Expr *InitExpr = Init->getInit(); if (const FieldDecl *Member = Init->getMember()) { @@ -6306,8 +6401,8 @@ bool Compiler<Emitter>::compileConstructor(const CXXConstructorDecl *Ctor) { unsigned FirstLinkOffset = R->getField(cast<FieldDecl>(IFD->chain()[0]))->Offset; - InitStackScope<Emitter> ISS(this, isa<CXXDefaultInitExpr>(InitExpr)); InitLinkScope<Emitter> ILS(this, InitLink::Field(FirstLinkOffset)); + InitStackScope<Emitter> ISS(this, isa<CXXDefaultInitExpr>(InitExpr)); if (!emitFieldInitializer(NestedField, NestedFieldOffset, InitExpr, IsUnion)) return false; @@ -7095,9 +7190,12 @@ bool Compiler<Emitter>::VisitDeclRefExpr(const DeclRefExpr *E) { return this->visitDeclRef(D, E); } -template <class Emitter> void Compiler<Emitter>::emitCleanup() { - for (VariableScope<Emitter> *C = VarScope; C; C = C->getParent()) - C->emitDestruction(); +template <class Emitter> bool Compiler<Emitter>::emitCleanup() { + for (VariableScope<Emitter> *C = VarScope; C; C = C->getParent()) { + if (!C->destroyLocals()) + return false; + } + return true; } template <class Emitter> @@ -7158,6 +7256,19 @@ bool Compiler<Emitter>::emitPrimCast(PrimType FromT, PrimType ToT, return false; } +template <class Emitter> +bool Compiler<Emitter>::emitIntegralCast(PrimType FromT, PrimType ToT, + QualType ToQT, const Expr *E) { + assert(FromT != ToT); + + if (ToT == PT_IntAP) + return this->emitCastAP(FromT, Ctx.getBitWidth(ToQT), E); + if (ToT == PT_IntAPS) + return this->emitCastAPS(FromT, Ctx.getBitWidth(ToQT), E); + + return this->emitCast(FromT, ToT, E); +} + /// Emits __real(SubExpr) template <class Emitter> bool Compiler<Emitter>::emitComplexReal(const Expr *SubExpr) { diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index 5c46f75..1bd15c3 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -52,12 +52,14 @@ public: K_Decl = 3, K_Elem = 5, K_RVO = 6, - K_InitList = 7 + K_InitList = 7, + K_DIE = 8, }; static InitLink This() { return InitLink{K_This}; } static InitLink InitList() { return InitLink{K_InitList}; } static InitLink RVO() { return InitLink{K_RVO}; } + static InitLink DIE() { return InitLink{K_DIE}; } static InitLink Field(unsigned Offset) { InitLink IL{K_Field}; IL.Offset = Offset; @@ -102,7 +104,7 @@ struct VarCreationState { bool notCreated() const { return !S; } }; -enum class ScopeKind { Call, Block }; +enum class ScopeKind { Block, FullExpression, Call }; /// Compilation context for expressions. template <class Emitter> @@ -256,7 +258,7 @@ protected: protected: /// Emits scope cleanup instructions. - void emitCleanup(); + bool emitCleanup(); /// Returns a record type from a record or pointer type. const RecordType *getRecordTy(QualType Ty); @@ -328,13 +330,11 @@ protected: /// Creates a local primitive value. unsigned allocateLocalPrimitive(DeclTy &&Decl, PrimType Ty, bool IsConst, bool IsVolatile = false, - const ValueDecl *ExtendingDecl = nullptr, ScopeKind SC = ScopeKind::Block, bool IsConstexprUnknown = false); /// Allocates a space storing a local given its type. UnsignedOrNone allocateLocal(DeclTy &&Decl, QualType Ty = QualType(), - const ValueDecl *ExtendingDecl = nullptr, ScopeKind = ScopeKind::Block, bool IsConstexprUnknown = false); UnsignedOrNone allocateTemporary(const Expr *E); @@ -391,6 +391,8 @@ private: } bool emitPrimCast(PrimType FromT, PrimType ToT, QualType ToQT, const Expr *E); + bool emitIntegralCast(PrimType FromT, PrimType ToT, QualType ToQT, + const Expr *E); PrimType classifyComplexElementType(QualType T) const { assert(T->isAnyComplexType()); @@ -472,39 +474,18 @@ extern template class Compiler<EvalEmitter>; /// Scope chain managing the variable lifetimes. template <class Emitter> class VariableScope { public: - VariableScope(Compiler<Emitter> *Ctx, const ValueDecl *VD, - ScopeKind Kind = ScopeKind::Block) - : Ctx(Ctx), Parent(Ctx->VarScope), ValDecl(VD), Kind(Kind) { + VariableScope(Compiler<Emitter> *Ctx, ScopeKind Kind = ScopeKind::Block) + : Ctx(Ctx), Parent(Ctx->VarScope), Kind(Kind) { + if (Parent) + this->LocalsAlwaysEnabled = Parent->LocalsAlwaysEnabled; Ctx->VarScope = this; } virtual ~VariableScope() { Ctx->VarScope = this->Parent; } - virtual void addLocal(const Scope::Local &Local) { + virtual void addLocal(Scope::Local Local) { llvm_unreachable("Shouldn't be called"); } - - void addExtended(const Scope::Local &Local, const ValueDecl *ExtendingDecl) { - // Walk up the chain of scopes until we find the one for ExtendingDecl. - // If there is no such scope, attach it to the parent one. - VariableScope *P = this; - while (P) { - if (P->ValDecl == ExtendingDecl) { - P->addLocal(Local); - return; - } - P = P->Parent; - if (!P) - break; - } - - // Use the parent scope. - if (this->Parent) - this->Parent->addLocal(Local); - else - this->addLocal(Local); - } - /// Like addExtended, but adds to the nearest scope of the given kind. void addForScopeKind(const Scope::Local &Local, ScopeKind Kind) { VariableScope *P = this; @@ -522,18 +503,22 @@ public: this->addLocal(Local); } - virtual void emitDestruction() {} virtual bool emitDestructors(const Expr *E = nullptr) { return true; } virtual bool destroyLocals(const Expr *E = nullptr) { return true; } + virtual void forceInit() {} VariableScope *getParent() const { return Parent; } ScopeKind getKind() const { return Kind; } + /// Whether locals added to this scope are enabled by default. + /// This is almost always true, except for the two branches + /// of a conditional operator. + bool LocalsAlwaysEnabled = true; + protected: /// Compiler instance. Compiler<Emitter> *Ctx; /// Link to the parent scope. VariableScope *Parent; - const ValueDecl *ValDecl = nullptr; ScopeKind Kind; }; @@ -541,9 +526,7 @@ protected: template <class Emitter> class LocalScope : public VariableScope<Emitter> { public: LocalScope(Compiler<Emitter> *Ctx, ScopeKind Kind = ScopeKind::Block) - : VariableScope<Emitter>(Ctx, nullptr, Kind) {} - LocalScope(Compiler<Emitter> *Ctx, const ValueDecl *VD) - : VariableScope<Emitter>(Ctx, VD) {} + : VariableScope<Emitter>(Ctx, Kind) {} /// Emit a Destroy op for this scope. ~LocalScope() override { @@ -552,16 +535,6 @@ public: this->Ctx->emitDestroy(*Idx, SourceInfo{}); removeStoredOpaqueValues(); } - - /// Overriden to support explicit destruction. - void emitDestruction() override { - if (!Idx) - return; - - this->emitDestructors(); - this->Ctx->emitDestroy(*Idx, SourceInfo{}); - } - /// Explicit destruction of local variables. bool destroyLocals(const Expr *E = nullptr) override { if (!Idx) @@ -574,29 +547,60 @@ public: return Success; } - void addLocal(const Scope::Local &Local) override { + void addLocal(Scope::Local Local) override { if (!Idx) { Idx = static_cast<unsigned>(this->Ctx->Descriptors.size()); this->Ctx->Descriptors.emplace_back(); this->Ctx->emitInitScope(*Idx, {}); } + Local.EnabledByDefault = this->LocalsAlwaysEnabled; this->Ctx->Descriptors[*Idx].emplace_back(Local); } + /// Force-initialize this scope. Usually, scopes are lazily initialized when + /// the first local variable is created, but in scenarios with conditonal + /// operators, we need to ensure scope is initialized just in case one of the + /// arms will create a local and the other won't. In such a case, the + /// InitScope() op would be part of the arm that created the local. + void forceInit() override { + if (!Idx) { + Idx = static_cast<unsigned>(this->Ctx->Descriptors.size()); + this->Ctx->Descriptors.emplace_back(); + this->Ctx->emitInitScope(*Idx, {}); + } + } + bool emitDestructors(const Expr *E = nullptr) override { if (!Idx) return true; + // Emit destructor calls for local variables of record // type with a destructor. for (Scope::Local &Local : llvm::reverse(this->Ctx->Descriptors[*Idx])) { if (Local.Desc->hasTrivialDtor()) continue; - if (!this->Ctx->emitGetPtrLocal(Local.Offset, E)) - return false; - if (!this->Ctx->emitDestructionPop(Local.Desc, Local.Desc->getLoc())) - return false; + if (!Local.EnabledByDefault) { + typename Emitter::LabelTy EndLabel = this->Ctx->getLabel(); + if (!this->Ctx->emitGetLocalEnabled(Local.Offset, E)) + return false; + if (!this->Ctx->jumpFalse(EndLabel)) + return false; + + if (!this->Ctx->emitGetPtrLocal(Local.Offset, E)) + return false; + + if (!this->Ctx->emitDestructionPop(Local.Desc, Local.Desc->getLoc())) + return false; + + this->Ctx->emitLabel(EndLabel); + } else { + if (!this->Ctx->emitGetPtrLocal(Local.Offset, E)) + return false; + if (!this->Ctx->emitDestructionPop(Local.Desc, Local.Desc->getLoc())) + return false; + } removeIfStoredOpaqueValue(Local); } @@ -668,22 +672,29 @@ public: ~InitLinkScope() { this->Ctx->InitStack.pop_back(); } -private: +public: Compiler<Emitter> *Ctx; }; template <class Emitter> class InitStackScope final { public: InitStackScope(Compiler<Emitter> *Ctx, bool Active) - : Ctx(Ctx), OldValue(Ctx->InitStackActive) { + : Ctx(Ctx), OldValue(Ctx->InitStackActive), Active(Active) { Ctx->InitStackActive = Active; + if (Active) + Ctx->InitStack.push_back(InitLink::DIE()); } - ~InitStackScope() { this->Ctx->InitStackActive = OldValue; } + ~InitStackScope() { + this->Ctx->InitStackActive = OldValue; + if (Active) + Ctx->InitStack.pop_back(); + } private: Compiler<Emitter> *Ctx; bool OldValue; + bool Active; }; } // namespace interp diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp index 12bf3a3..74ec986 100644 --- a/clang/lib/AST/ByteCode/Context.cpp +++ b/clang/lib/AST/ByteCode/Context.cpp @@ -21,7 +21,6 @@ #include "clang/AST/ASTLambda.h" #include "clang/AST/Expr.h" #include "clang/Basic/TargetInfo.h" -#include "llvm/Support/SystemZ/zOSSupport.h" using namespace clang; using namespace clang::interp; diff --git a/clang/lib/AST/ByteCode/Context.h b/clang/lib/AST/ByteCode/Context.h index f5fa977..a21bb3e 100644 --- a/clang/lib/AST/ByteCode/Context.h +++ b/clang/lib/AST/ByteCode/Context.h @@ -98,20 +98,22 @@ public: return classify(E->getType()); } - bool canClassify(QualType T) { + bool canClassify(QualType T) const { if (const auto *BT = dyn_cast<BuiltinType>(T)) { if (BT->isInteger() || BT->isFloatingPoint()) return true; if (BT->getKind() == BuiltinType::Bool) return true; } + if (T->isPointerOrReferenceType()) + return true; if (T->isArrayType() || T->isRecordType() || T->isAnyComplexType() || T->isVectorType()) return false; return classify(T) != std::nullopt; } - bool canClassify(const Expr *E) { + bool canClassify(const Expr *E) const { if (E->isGLValue()) return true; return canClassify(E->getType()); diff --git a/clang/lib/AST/ByteCode/Disasm.cpp b/clang/lib/AST/ByteCode/Disasm.cpp index fd0903f..35937e3 100644 --- a/clang/lib/AST/ByteCode/Disasm.cpp +++ b/clang/lib/AST/ByteCode/Disasm.cpp @@ -138,9 +138,16 @@ static size_t getNumDisplayWidth(size_t N) { return L; } -LLVM_DUMP_METHOD void Function::dump() const { dump(llvm::errs()); } +LLVM_DUMP_METHOD void Function::dump(CodePtr PC) const { + dump(llvm::errs(), PC); +} -LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const { +LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS, + CodePtr OpPC) const { + if (OpPC) { + assert(OpPC >= getCodeBegin()); + assert(OpPC <= getCodeEnd()); + } { ColorScope SC(OS, true, {llvm::raw_ostream::BRIGHT_GREEN, true}); OS << getName() << " " << (const void *)this << "\n"; @@ -154,6 +161,7 @@ LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const { size_t Addr; std::string Op; bool IsJump; + bool CurrentOp = false; llvm::SmallVector<std::string> Args; }; @@ -171,6 +179,7 @@ LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const { auto Op = PC.read<Opcode>(); Text.Addr = Addr; Text.IsJump = isJumpOpcode(Op); + Text.CurrentOp = (PC == OpPC); switch (Op) { #define GET_DISASM #include "Opcodes.inc" @@ -198,9 +207,15 @@ LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const { Text.reserve(Code.size()); size_t LongestLine = 0; // Print code to a string, one at a time. - for (auto C : Code) { + for (const auto &C : Code) { std::string Line; llvm::raw_string_ostream LS(Line); + if (OpPC) { + if (C.CurrentOp) + LS << " * "; + else + LS << " "; + } LS << C.Addr; LS.indent(LongestAddr - getNumDisplayWidth(C.Addr) + 4); LS << C.Op; @@ -436,8 +451,28 @@ LLVM_DUMP_METHOD void Descriptor::dumpFull(unsigned Offset, FO += ElemDesc->getAllocSize(); } + } else if (isPrimitiveArray()) { + OS.indent(Spaces) << "Elements: " << getNumElems() << '\n'; + OS.indent(Spaces) << "Element type: " << primTypeToString(getPrimType()) + << '\n'; + unsigned FO = Offset + sizeof(InitMapPtr); + for (unsigned I = 0; I != getNumElems(); ++I) { + OS.indent(Spaces) << "Element " << I << " offset: " << FO << '\n'; + FO += getElemSize(); + } } else if (isRecord()) { ElemRecord->dump(OS, Indent + 1, Offset); + unsigned I = 0; + for (const Record::Field &F : ElemRecord->fields()) { + OS.indent(Spaces) << "- Field " << I << ": "; + { + ColorScope SC(OS, true, {llvm::raw_ostream::BRIGHT_RED, true}); + OS << F.Decl->getName(); + } + OS << ". Offset " << (Offset + F.Offset) << "\n"; + F.Desc->dumpFull(Offset + F.Offset, Indent + 1); + ++I; + } } else if (isPrimitive()) { } else { } @@ -484,8 +519,14 @@ LLVM_DUMP_METHOD void InterpFrame::dump(llvm::raw_ostream &OS, OS << " (" << F->getName() << ")"; } OS << "\n"; - OS.indent(Spaces) << "This: " << getThis() << "\n"; - OS.indent(Spaces) << "RVO: " << getRVOPtr() << "\n"; + if (hasThisPointer()) + OS.indent(Spaces) << "This: " << getThis() << "\n"; + else + OS.indent(Spaces) << "This: -\n"; + if (Func && Func->hasRVO()) + OS.indent(Spaces) << "RVO: " << getRVOPtr() << "\n"; + else + OS.indent(Spaces) << "RVO: -\n"; OS.indent(Spaces) << "Depth: " << Depth << "\n"; OS.indent(Spaces) << "ArgSize: " << ArgSize << "\n"; OS.indent(Spaces) << "Args: " << (void *)Args << "\n"; diff --git a/clang/lib/AST/ByteCode/EvalEmitter.cpp b/clang/lib/AST/ByteCode/EvalEmitter.cpp index 0073217..a2e01ef 100644 --- a/clang/lib/AST/ByteCode/EvalEmitter.cpp +++ b/clang/lib/AST/ByteCode/EvalEmitter.cpp @@ -113,7 +113,7 @@ Scope::Local EvalEmitter::createLocal(Descriptor *D) { InlineDescriptor &Desc = *reinterpret_cast<InlineDescriptor *>(B->rawData()); Desc.Desc = D; Desc.Offset = sizeof(InlineDescriptor); - Desc.IsActive = true; + Desc.IsActive = false; Desc.IsBase = false; Desc.IsFieldMutable = false; Desc.IsConst = false; @@ -322,6 +322,33 @@ bool EvalEmitter::emitDestroy(uint32_t I, SourceInfo Info) { return true; } +bool EvalEmitter::emitGetLocalEnabled(uint32_t I, SourceInfo Info) { + if (!isActive()) + return true; + + Block *B = getLocal(I); + const InlineDescriptor &Desc = + *reinterpret_cast<InlineDescriptor *>(B->rawData()); + + S.Stk.push<bool>(Desc.IsActive); + return true; +} + +bool EvalEmitter::emitEnableLocal(uint32_t I, SourceInfo Info) { + if (!isActive()) + return true; + + // FIXME: This is a little dirty, but to avoid adding a flag to + // InlineDescriptor that's only ever useful on the toplevel of local + // variables, we reuse the IsActive flag for the enabled state. We should + // probably use a different struct than InlineDescriptor for the block-level + // inline descriptor of local varaibles. + Block *B = getLocal(I); + InlineDescriptor &Desc = *reinterpret_cast<InlineDescriptor *>(B->rawData()); + Desc.IsActive = true; + return true; +} + /// Global temporaries (LifetimeExtendedTemporary) carry their value /// around as an APValue, which codegen accesses. /// We set their value once when creating them, but we don't update it diff --git a/clang/lib/AST/ByteCode/Floating.h b/clang/lib/AST/ByteCode/Floating.h index 659892e..cc918dc 100644 --- a/clang/lib/AST/ByteCode/Floating.h +++ b/clang/lib/AST/ByteCode/Floating.h @@ -45,7 +45,8 @@ private: if (singleWord()) return APFloat(getSemantics(), APInt(BitWidth, Val)); unsigned NumWords = numWords(); - return APFloat(getSemantics(), APInt(BitWidth, NumWords, Memory)); + return APFloat(getSemantics(), + APInt(BitWidth, llvm::ArrayRef(Memory, NumWords))); } public: diff --git a/clang/lib/AST/ByteCode/Function.h b/clang/lib/AST/ByteCode/Function.h index 95add58..80283af 100644 --- a/clang/lib/AST/ByteCode/Function.h +++ b/clang/lib/AST/ByteCode/Function.h @@ -41,6 +41,8 @@ public: unsigned Offset; /// Descriptor of the local. Descriptor *Desc; + /// If the cleanup for this local should be emitted. + bool EnabledByDefault = true; }; using LocalVectorTy = llvm::SmallVector<Local, 8>; @@ -310,8 +312,8 @@ private: public: /// Dumps the disassembled bytecode to \c llvm::errs(). - void dump() const; - void dump(llvm::raw_ostream &OS) const; + void dump(CodePtr PC = {}) const; + void dump(llvm::raw_ostream &OS, CodePtr PC = {}) const; }; } // namespace interp diff --git a/clang/lib/AST/ByteCode/Integral.h b/clang/lib/AST/ByteCode/Integral.h index 1318024..e90f1a9 100644 --- a/clang/lib/AST/ByteCode/Integral.h +++ b/clang/lib/AST/ByteCode/Integral.h @@ -202,30 +202,21 @@ public: static Integral min(unsigned NumBits) { return Integral(Min); } static Integral max(unsigned NumBits) { return Integral(Max); } + static Integral zero(unsigned BitWidth = 0) { return from(0); } - template <typename ValT> static Integral from(ValT Value) { - if constexpr (std::is_integral<ValT>::value) + template <typename ValT> + static Integral from(ValT Value, unsigned NumBits = 0) { + if constexpr (std::is_integral_v<ValT>) return Integral(Value); else - return Integral::from(static_cast<Integral::ReprT>(Value)); + return Integral(static_cast<Integral::ReprT>(Value)); } template <unsigned SrcBits, bool SrcSign> - static std::enable_if_t<SrcBits != 0, Integral> - from(Integral<SrcBits, SrcSign> Value) { + static Integral from(Integral<SrcBits, SrcSign> Value) { return Integral(Value.V); } - static Integral zero(unsigned BitWidth = 0) { return from(0); } - - template <typename T> static Integral from(T Value, unsigned NumBits) { - return Integral(Value); - } - - static bool inRange(int64_t Value, unsigned NumBits) { - return CheckRange<ReprT, Min, Max>(Value); - } - static bool increment(Integral A, Integral *R) { return add(A, Integral(ReprT(1)), A.bitWidth(), R); } @@ -328,13 +319,6 @@ private: return false; } } - template <typename T, T Min, T Max> static bool CheckRange(int64_t V) { - if constexpr (std::is_signed_v<T>) { - return Min <= V && V <= Max; - } else { - return V >= 0 && static_cast<uint64_t>(V) <= Max; - } - } }; template <unsigned Bits, bool Signed> diff --git a/clang/lib/AST/ByteCode/IntegralAP.h b/clang/lib/AST/ByteCode/IntegralAP.h index 6683db9..b11e6ee 100644 --- a/clang/lib/AST/ByteCode/IntegralAP.h +++ b/clang/lib/AST/ByteCode/IntegralAP.h @@ -63,7 +63,7 @@ public: if (singleWord()) return APInt(BitWidth, Val, Signed); unsigned NumWords = llvm::APInt::getNumWords(BitWidth); - return llvm::APInt(BitWidth, NumWords, Memory); + return llvm::APInt(BitWidth, llvm::ArrayRef(Memory, NumWords)); } public: diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index a2fb0fb..80ef656 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -919,33 +919,8 @@ bool CheckInit(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { return true; } -static bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { - - if (F->isVirtual() && !S.getLangOpts().CPlusPlus20) { - const SourceLocation &Loc = S.Current->getLocation(OpPC); - S.CCEDiag(Loc, diag::note_constexpr_virtual_call); - return false; - } - - if (S.checkingPotentialConstantExpression() && S.Current->getDepth() != 0) - return false; - - if (F->isValid() && F->hasBody() && F->isConstexpr()) - return true; - - const FunctionDecl *DiagDecl = F->getDecl(); - const FunctionDecl *Definition = nullptr; - DiagDecl->getBody(Definition); - - if (!Definition && S.checkingPotentialConstantExpression() && - DiagDecl->isConstexpr()) { - return false; - } - - // Implicitly constexpr. - if (F->isLambdaStaticInvoker()) - return true; - +static bool diagnoseCallableDecl(InterpState &S, CodePtr OpPC, + const FunctionDecl *DiagDecl) { // Bail out if the function declaration itself is invalid. We will // have produced a relevant diagnostic while parsing it, so just // note the problematic sub-expression. @@ -953,11 +928,10 @@ static bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { return Invalid(S, OpPC); // Diagnose failed assertions specially. - if (S.Current->getLocation(OpPC).isMacroID() && - F->getDecl()->getIdentifier()) { + if (S.Current->getLocation(OpPC).isMacroID() && DiagDecl->getIdentifier()) { // FIXME: Instead of checking for an implementation-defined function, // check and evaluate the assert() macro. - StringRef Name = F->getDecl()->getName(); + StringRef Name = DiagDecl->getName(); bool AssertFailed = Name == "__assert_rtn" || Name == "__assert_fail" || Name == "_wassert"; if (AssertFailed) { @@ -1004,7 +978,7 @@ static bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { // for a constant expression. It might be defined at the point we're // actually calling it. bool IsExtern = DiagDecl->getStorageClass() == SC_Extern; - bool IsDefined = F->isDefined(); + bool IsDefined = DiagDecl->isDefined(); if (!IsDefined && !IsExtern && DiagDecl->isConstexpr() && S.checkingPotentialConstantExpression()) return false; @@ -1027,6 +1001,35 @@ static bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { return false; } +static bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { + if (F->isVirtual() && !S.getLangOpts().CPlusPlus20) { + const SourceLocation &Loc = S.Current->getLocation(OpPC); + S.CCEDiag(Loc, diag::note_constexpr_virtual_call); + return false; + } + + if (S.checkingPotentialConstantExpression() && S.Current->getDepth() != 0) + return false; + + if (F->isValid() && F->hasBody() && F->isConstexpr()) + return true; + + const FunctionDecl *DiagDecl = F->getDecl(); + const FunctionDecl *Definition = nullptr; + DiagDecl->getBody(Definition); + + if (!Definition && S.checkingPotentialConstantExpression() && + DiagDecl->isConstexpr()) { + return false; + } + + // Implicitly constexpr. + if (F->isLambdaStaticInvoker()) + return true; + + return diagnoseCallableDecl(S, OpPC, DiagDecl); +} + static bool CheckCallDepth(InterpState &S, CodePtr OpPC) { if ((S.Current->getDepth() + 1) > S.getLangOpts().ConstexprCallDepth) { S.FFDiag(S.Current->getSource(OpPC), @@ -1404,7 +1407,8 @@ bool CheckLiteralType(InterpState &S, CodePtr OpPC, const Type *T) { // http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1677 // Therefore, we use the C++1y behavior. - if (S.Current->getFunction() && S.Current->getFunction()->isConstructor() && + if (!S.Current->isBottomFrame() && + S.Current->getFunction()->isConstructor() && S.Current->getThis().getDeclDesc()->asDecl() == S.EvaluatingDecl) { return true; } @@ -1431,8 +1435,12 @@ static bool getField(InterpState &S, CodePtr OpPC, const Pointer &Ptr, return false; if (Ptr.isIntegralPointer()) { - S.Stk.push<Pointer>(Ptr.asIntPointer().atOffset(S.getASTContext(), Off)); - return true; + if (std::optional<IntPointer> IntPtr = + Ptr.asIntPointer().atOffset(S.getASTContext(), Off)) { + S.Stk.push<Pointer>(std::move(*IntPtr)); + return true; + } + return false; } if (!Ptr.isBlockPointer()) { @@ -1445,6 +1453,10 @@ static bool getField(InterpState &S, CodePtr OpPC, const Pointer &Ptr, return false; } + // We can't get the field of something that's not a record. + if (!Ptr.getFieldDesc()->isRecord()) + return false; + if ((Ptr.getByteOffset() + Off) >= Ptr.block()->getSize()) return false; @@ -1500,6 +1512,21 @@ bool CheckDestructor(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { return CheckActive(S, OpPC, Ptr, AK_Destroy); } +/// Opcode. Check if the function decl can be called at compile time. +bool CheckFunctionDecl(InterpState &S, CodePtr OpPC, const FunctionDecl *FD) { + if (S.checkingPotentialConstantExpression() && S.Current->getDepth() != 0) + return false; + + const FunctionDecl *Definition = nullptr; + const Stmt *Body = FD->getBody(Definition); + + if (Definition && Body && + (Definition->isConstexpr() || Definition->hasAttr<MSConstexprAttr>())) + return true; + + return diagnoseCallableDecl(S, OpPC, FD); +} + static void compileFunction(InterpState &S, const Function *Func) { const FunctionDecl *Definition = Func->getDecl()->getDefinition(); if (!Definition) @@ -2058,15 +2085,15 @@ bool InvalidShuffleVectorIndex(InterpState &S, CodePtr OpPC, uint32_t Index) { bool CheckPointerToIntegralCast(InterpState &S, CodePtr OpPC, const Pointer &Ptr, unsigned BitWidth) { + const SourceInfo &E = S.Current->getSource(OpPC); + S.CCEDiag(E, diag::note_constexpr_invalid_cast) + << 2 << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC); + if (Ptr.isDummy()) return false; if (Ptr.isFunctionPointer()) return true; - const SourceInfo &E = S.Current->getSource(OpPC); - S.CCEDiag(E, diag::note_constexpr_invalid_cast) - << 2 << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC); - if (Ptr.isBlockPointer() && !Ptr.isZero()) { // Only allow based lvalue casts if they are lossless. if (S.getASTContext().getTargetInfo().getPointerWidth(LangAS::Default) != diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 5ab9c8e..d8b8b20 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -117,6 +117,7 @@ bool CheckBitCast(InterpState &S, CodePtr OpPC, bool HasIndeterminateBits, bool TargetIsUCharOrByte); bool CheckBCPResult(InterpState &S, const Pointer &Ptr); bool CheckDestructor(InterpState &S, CodePtr OpPC, const Pointer &Ptr); +bool CheckFunctionDecl(InterpState &S, CodePtr OpPC, const FunctionDecl *FD); bool handleFixedPointOverflow(InterpState &S, CodePtr OpPC, const FixedPoint &FP); @@ -1915,6 +1916,9 @@ bool Load(InterpState &S, CodePtr OpPC) { return false; if (!Ptr.isBlockPointer()) return false; + if (const Descriptor *D = Ptr.getFieldDesc(); + !(D->isPrimitive() || D->isPrimitiveArray()) || D->getPrimType() != Name) + return false; S.Stk.push<T>(Ptr.deref<T>()); return true; } @@ -1926,6 +1930,9 @@ bool LoadPop(InterpState &S, CodePtr OpPC) { return false; if (!Ptr.isBlockPointer()) return false; + if (const Descriptor *D = Ptr.getFieldDesc(); + !(D->isPrimitive() || D->isPrimitiveArray()) || D->getPrimType() != Name) + return false; S.Stk.push<T>(Ptr.deref<T>()); return true; } @@ -2310,13 +2317,11 @@ std::optional<Pointer> OffsetHelper(InterpState &S, CodePtr OpPC, template <PrimType Name, class T = typename PrimConv<Name>::T> bool AddOffset(InterpState &S, CodePtr OpPC) { const T &Offset = S.Stk.pop<T>(); - Pointer Ptr = S.Stk.pop<Pointer>(); - if (Ptr.isBlockPointer()) - Ptr = Ptr.expand(); + const Pointer &Ptr = S.Stk.pop<Pointer>().expand(); if (std::optional<Pointer> Result = OffsetHelper<T, ArithOp::Add>( S, OpPC, Offset, Ptr, /*IsPointerArith=*/true)) { - S.Stk.push<Pointer>(*Result); + S.Stk.push<Pointer>(Result->narrow()); return true; } return false; @@ -2325,11 +2330,11 @@ bool AddOffset(InterpState &S, CodePtr OpPC) { template <PrimType Name, class T = typename PrimConv<Name>::T> bool SubOffset(InterpState &S, CodePtr OpPC) { const T &Offset = S.Stk.pop<T>(); - const Pointer &Ptr = S.Stk.pop<Pointer>(); + const Pointer &Ptr = S.Stk.pop<Pointer>().expand(); if (std::optional<Pointer> Result = OffsetHelper<T, ArithOp::Sub>( S, OpPC, Offset, Ptr, /*IsPointerArith=*/true)) { - S.Stk.push<Pointer>(*Result); + S.Stk.push<Pointer>(Result->narrow()); return true; } return false; @@ -2355,7 +2360,7 @@ static inline bool IncDecPtrHelper(InterpState &S, CodePtr OpPC, if (std::optional<Pointer> Result = OffsetHelper<OneT, Op>(S, OpPC, One, P, /*IsPointerArith=*/true)) { // Store the new value. - Ptr.deref<Pointer>() = *Result; + Ptr.deref<Pointer>() = Result->narrow(); return true; } return false; @@ -2383,9 +2388,9 @@ static inline bool DecPtr(InterpState &S, CodePtr OpPC) { /// 2) Pops another Pointer from the stack. /// 3) Pushes the difference of the indices of the two pointers on the stack. template <PrimType Name, class T = typename PrimConv<Name>::T> -inline bool SubPtr(InterpState &S, CodePtr OpPC) { - const Pointer &LHS = S.Stk.pop<Pointer>(); - const Pointer &RHS = S.Stk.pop<Pointer>(); +inline bool SubPtr(InterpState &S, CodePtr OpPC, bool ElemSizeIsZero) { + const Pointer &LHS = S.Stk.pop<Pointer>().expand(); + const Pointer &RHS = S.Stk.pop<Pointer>().expand(); if (!Pointer::hasSameBase(LHS, RHS) && S.getLangOpts().CPlusPlus) { S.FFDiag(S.Current->getSource(OpPC), @@ -2395,25 +2400,23 @@ inline bool SubPtr(InterpState &S, CodePtr OpPC) { return false; } - if (LHS == RHS) { - S.Stk.push<T>(); - return true; - } + if (ElemSizeIsZero) { + QualType PtrT = LHS.getType(); + while (auto *AT = dyn_cast<ArrayType>(PtrT)) + PtrT = AT->getElementType(); - for (const Pointer &P : {LHS, RHS}) { - if (P.isZeroSizeArray()) { - QualType PtrT = P.getType(); - while (auto *AT = dyn_cast<ArrayType>(PtrT)) - PtrT = AT->getElementType(); + QualType ArrayTy = S.getASTContext().getConstantArrayType( + PtrT, APInt::getZero(1), nullptr, ArraySizeModifier::Normal, 0); + S.FFDiag(S.Current->getSource(OpPC), + diag::note_constexpr_pointer_subtraction_zero_size) + << ArrayTy; - QualType ArrayTy = S.getASTContext().getConstantArrayType( - PtrT, APInt::getZero(1), nullptr, ArraySizeModifier::Normal, 0); - S.FFDiag(S.Current->getSource(OpPC), - diag::note_constexpr_pointer_subtraction_zero_size) - << ArrayTy; + return false; + } - return false; - } + if (LHS == RHS) { + S.Stk.push<T>(); + return true; } int64_t A64 = @@ -2471,6 +2474,18 @@ inline bool InitScope(InterpState &S, CodePtr OpPC, uint32_t I) { return true; } +inline bool EnableLocal(InterpState &S, CodePtr OpPC, uint32_t I) { + assert(!S.Current->isLocalEnabled(I)); + S.Current->enableLocal(I); + return true; +} + +inline bool GetLocalEnabled(InterpState &S, CodePtr OpPC, uint32_t I) { + assert(S.Current); + S.Stk.push<bool>(S.Current->isLocalEnabled(I)); + return true; +} + //===----------------------------------------------------------------------===// // Cast, CastFP //===----------------------------------------------------------------------===// @@ -2631,10 +2646,6 @@ template <PrimType Name, class T = typename PrimConv<Name>::T> bool CastPointerIntegral(InterpState &S, CodePtr OpPC) { const Pointer &Ptr = S.Stk.pop<Pointer>(); - S.CCEDiag(S.Current->getSource(OpPC), diag::note_constexpr_invalid_cast) - << diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret - << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC); - if (!CheckPointerToIntegralCast(S, OpPC, Ptr, T::bitWidth())) return Invalid(S, OpPC); @@ -3078,7 +3089,7 @@ inline bool ArrayElemPtr(InterpState &S, CodePtr OpPC) { S.Stk.push<Pointer>(Ptr.atIndex(0).narrow()); return true; } - S.Stk.push<Pointer>(Ptr); + S.Stk.push<Pointer>(Ptr.narrow()); return true; } @@ -3109,7 +3120,7 @@ inline bool ArrayElemPtrPop(InterpState &S, CodePtr OpPC) { S.Stk.push<Pointer>(Ptr.atIndex(0).narrow()); return true; } - S.Stk.push<Pointer>(Ptr); + S.Stk.push<Pointer>(Ptr.narrow()); return true; } @@ -3184,7 +3195,7 @@ inline bool ArrayDecay(InterpState &S, CodePtr OpPC) { } if (Ptr.isRoot() || !Ptr.isUnknownSizeArray()) { - S.Stk.push<Pointer>(Ptr.atIndex(0)); + S.Stk.push<Pointer>(Ptr.atIndex(0).narrow()); return true; } @@ -3283,17 +3294,69 @@ inline bool SideEffect(InterpState &S, CodePtr OpPC) { return S.noteSideEffect(); } +inline bool CheckBitCast(InterpState &S, CodePtr OpPC, const Type *TargetType, + bool SrcIsVoidPtr) { + const auto &Ptr = S.Stk.peek<Pointer>(); + if (Ptr.isZero()) + return true; + if (!Ptr.isBlockPointer()) + return true; + + if (TargetType->isIntegerType()) + return true; + + if (SrcIsVoidPtr && S.getLangOpts().CPlusPlus) { + bool HasValidResult = !Ptr.isZero(); + + if (HasValidResult) { + if (S.getStdAllocatorCaller("allocate")) + return true; + + const auto &E = cast<CastExpr>(S.Current->getExpr(OpPC)); + if (S.getLangOpts().CPlusPlus26 && + S.getASTContext().hasSimilarType(Ptr.getType(), + QualType(TargetType, 0))) + return true; + + S.CCEDiag(E, diag::note_constexpr_invalid_void_star_cast) + << E->getSubExpr()->getType() << S.getLangOpts().CPlusPlus26 + << Ptr.getType().getCanonicalType() << E->getType()->getPointeeType(); + } else if (!S.getLangOpts().CPlusPlus26) { + const SourceInfo &E = S.Current->getSource(OpPC); + S.CCEDiag(E, diag::note_constexpr_invalid_cast) + << diag::ConstexprInvalidCastKind::CastFrom << "'void *'" + << S.Current->getRange(OpPC); + } + } + + QualType PtrType = Ptr.getType(); + if (PtrType->isRecordType() && + PtrType->getAsRecordDecl() != TargetType->getAsRecordDecl()) { + S.CCEDiag(S.Current->getSource(OpPC), diag::note_constexpr_invalid_cast) + << diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret + << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC); + return false; + } + return true; +} + /// Same here, but only for casts. inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind, bool Fatal) { const SourceLocation &Loc = S.Current->getLocation(OpPC); - if (Kind == CastKind::Reinterpret) { + switch (Kind) { + case CastKind::Reinterpret: S.CCEDiag(Loc, diag::note_constexpr_invalid_cast) - << static_cast<unsigned>(Kind) << S.Current->getRange(OpPC); + << diag::ConstexprInvalidCastKind::Reinterpret + << S.Current->getRange(OpPC); return !Fatal; - } - if (Kind == CastKind::Volatile) { + case CastKind::ReinterpretLike: + S.CCEDiag(Loc, diag::note_constexpr_invalid_cast) + << diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret + << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC); + return !Fatal; + case CastKind::Volatile: if (!S.checkingPotentialConstantExpression()) { const auto *E = cast<CastExpr>(S.Current->getExpr(OpPC)); if (S.getLangOpts().CPlusPlus) @@ -3304,14 +3367,13 @@ inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind, } return false; - } - if (Kind == CastKind::Dynamic) { + case CastKind::Dynamic: assert(!S.getLangOpts().CPlusPlus20); - S.CCEDiag(S.Current->getSource(OpPC), diag::note_constexpr_invalid_cast) + S.CCEDiag(Loc, diag::note_constexpr_invalid_cast) << diag::ConstexprInvalidCastKind::Dynamic; return true; } - + llvm_unreachable("Unhandled CastKind"); return false; } diff --git a/clang/lib/AST/ByteCode/InterpBlock.cpp b/clang/lib/AST/ByteCode/InterpBlock.cpp index 24825ad..dc0178a 100644 --- a/clang/lib/AST/ByteCode/InterpBlock.cpp +++ b/clang/lib/AST/ByteCode/InterpBlock.cpp @@ -102,12 +102,21 @@ bool Block::hasPointer(const Pointer *P) const { void Block::movePointersTo(Block *B) { assert(B != this); + unsigned MDDiff = static_cast<int>(B->Desc->getMetadataSize()) - + static_cast<int>(Desc->getMetadataSize()); while (Pointers) { Pointer *P = Pointers; this->removePointer(P); P->BS.Pointee = B; + + // If the metadata size changed between the two blocks, move the pointer + // base/offset. Realistically, this should only happen when we move pointers + // from a dummy pointer to a global one. + P->BS.Base += MDDiff; + P->Offset += MDDiff; + B->addPointer(P); } assert(!this->hasPointers()); diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b3ab82d..59b4896 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -48,6 +48,11 @@ static void discard(InterpStack &Stk, PrimType T) { TYPE_SWITCH(T, { Stk.discard<T>(); }); } +static uint64_t popToUInt64(const InterpState &S, const Expr *E) { + INT_TYPE_SWITCH(*S.getContext().classify(E->getType()), + return static_cast<uint64_t>(S.Stk.pop<T>())); +} + static APSInt popToAPSInt(InterpStack &Stk, PrimType T) { INT_TYPE_SWITCH(T, return Stk.pop<T>().toAPSInt()); } @@ -167,6 +172,38 @@ static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) { return Result; } +// Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics. +// Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions. +static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst, + InterpState &S, const Expr *DiagExpr) { + if (Src.isInfinity()) { + if (S.diagnosing()) + S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 0; + return false; + } + if (Src.isNaN()) { + if (S.diagnosing()) + S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 1; + return false; + } + APFloat Val = Src; + bool LosesInfo = false; + APFloat::opStatus Status = Val.convert( + APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo); + if (LosesInfo || Val.isDenormal()) { + if (S.diagnosing()) + S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic_strict); + return false; + } + if (Status != APFloat::opOK) { + if (S.diagnosing()) + S.CCEDiag(DiagExpr, diag::note_invalid_subexpr_in_const_expr); + return false; + } + Dst.copy(Val); + return true; +} + static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { @@ -212,8 +249,7 @@ static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC, uint64_t Limit = ~static_cast<uint64_t>(0); if (ID == Builtin::BIstrncmp || ID == Builtin::BI__builtin_strncmp || ID == Builtin::BIwcsncmp || ID == Builtin::BI__builtin_wcsncmp) - Limit = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2))) - .getZExtValue(); + Limit = popToUInt64(S, Call->getArg(2)); const Pointer &B = S.Stk.pop<Pointer>(); const Pointer &A = S.Stk.pop<Pointer>(); @@ -296,7 +332,7 @@ static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC, static bool interp__builtin_strlen(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call, unsigned ID) { - const Pointer &StrPtr = S.Stk.pop<Pointer>(); + const Pointer &StrPtr = S.Stk.pop<Pointer>().expand(); if (ID == Builtin::BIstrlen || ID == Builtin::BIwcslen) diagnoseNonConstexprBuiltin(S, OpPC, ID); @@ -972,9 +1008,10 @@ static bool interp__builtin_bswap(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { const APSInt &Val = popToAPSInt(S, Call->getArg(0)); - assert(Val.getActiveBits() <= 64); - - pushInteger(S, Val.byteSwap(), Call->getType()); + if (Val.getBitWidth() == 8) + pushInteger(S, Val, Call->getType()); + else + pushInteger(S, Val.byteSwap(), Call->getType()); return true; } @@ -990,7 +1027,7 @@ static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC, }; const Pointer &Ptr = S.Stk.pop<Pointer>(); - const APSInt &SizeVal = popToAPSInt(S, Call->getArg(0)); + uint64_t SizeVal = popToUInt64(S, Call->getArg(0)); // For __atomic_is_lock_free(sizeof(_Atomic(T))), if the size is a power // of two less than or equal to the maximum inline atomic width, we know it @@ -1002,7 +1039,7 @@ static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC, // x86-64 processors. // Check power-of-two. - CharUnits Size = CharUnits::fromQuantity(SizeVal.getZExtValue()); + CharUnits Size = CharUnits::fromQuantity(SizeVal); if (Size.isPowerOfTwo()) { // Check against inlining width. unsigned InlineWidthBits = @@ -1056,9 +1093,9 @@ static bool interp__builtin_c11_atomic_is_lock_free(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { - const APSInt &SizeVal = popToAPSInt(S, Call->getArg(0)); + uint64_t SizeVal = popToUInt64(S, Call->getArg(0)); - CharUnits Size = CharUnits::fromQuantity(SizeVal.getZExtValue()); + CharUnits Size = CharUnits::fromQuantity(SizeVal); if (Size.isPowerOfTwo()) { // Check against inlining width. unsigned InlineWidthBits = @@ -1316,8 +1353,9 @@ static bool interp__builtin_infer_alloc_token(InterpState &S, CodePtr OpPC, uint64_t BitWidth = ASTCtx.getTypeSize(ASTCtx.getSizeType()); auto Mode = ASTCtx.getLangOpts().AllocTokenMode.value_or(llvm::DefaultAllocTokenMode); + auto MaxTokensOpt = ASTCtx.getLangOpts().AllocTokenMax; uint64_t MaxTokens = - ASTCtx.getLangOpts().AllocTokenMax.value_or(~0ULL >> (64 - BitWidth)); + MaxTokensOpt.value_or(0) ? *MaxTokensOpt : (~0ULL >> (64 - BitWidth)); // We do not read any of the arguments; discard them. for (int I = Call->getNumArgs() - 1; I >= 0; --I) @@ -1439,7 +1477,7 @@ static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC, Allocator.allocate(Desc, NumElems.getZExtValue(), S.Ctx.getEvalID(), DynamicAllocator::Form::Operator); assert(B); - S.Stk.push<Pointer>(Pointer(B).atIndex(0)); + S.Stk.push<Pointer>(Pointer(B).atIndex(0).narrow()); return true; } @@ -1625,51 +1663,6 @@ static bool interp__builtin_elementwise_abs(InterpState &S, CodePtr OpPC, } /// Can be called with an integer or vector as the first and only parameter. -static bool interp__builtin_elementwise_popcount(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call, - unsigned BuiltinID) { - assert(Call->getNumArgs() == 1); - if (Call->getArg(0)->getType()->isIntegerType()) { - APSInt Val = popToAPSInt(S, Call->getArg(0)); - - if (BuiltinID == Builtin::BI__builtin_elementwise_popcount) { - pushInteger(S, Val.popcount(), Call->getType()); - } else { - pushInteger(S, Val.reverseBits(), Call->getType()); - } - return true; - } - // Otherwise, the argument must be a vector. - assert(Call->getArg(0)->getType()->isVectorType()); - const Pointer &Arg = S.Stk.pop<Pointer>(); - assert(Arg.getFieldDesc()->isPrimitiveArray()); - const Pointer &Dst = S.Stk.peek<Pointer>(); - assert(Dst.getFieldDesc()->isPrimitiveArray()); - assert(Arg.getFieldDesc()->getNumElems() == - Dst.getFieldDesc()->getNumElems()); - - QualType ElemType = Arg.getFieldDesc()->getElemQualType(); - PrimType ElemT = *S.getContext().classify(ElemType); - unsigned NumElems = Arg.getNumElems(); - - // FIXME: Reading from uninitialized vector elements? - for (unsigned I = 0; I != NumElems; ++I) { - INT_TYPE_SWITCH_NO_BOOL(ElemT, { - if (BuiltinID == Builtin::BI__builtin_elementwise_popcount) { - Dst.elem<T>(I) = T::from(Arg.elem<T>(I).toAPSInt().popcount()); - } else { - Dst.elem<T>(I) = - T::from(Arg.elem<T>(I).toAPSInt().reverseBits().getZExtValue()); - } - }); - } - Dst.initializeAllElements(); - - return true; -} - -/// Can be called with an integer or vector as the first and only parameter. static bool interp__builtin_elementwise_countzeroes(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, @@ -1762,11 +1755,9 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID) { assert(Call->getNumArgs() == 3); const ASTContext &ASTCtx = S.getASTContext(); - APSInt Size = popToAPSInt(S, Call->getArg(2)); - const Pointer SrcPtr = S.Stk.pop<Pointer>(); - const Pointer DestPtr = S.Stk.pop<Pointer>(); - - assert(!Size.isSigned() && "memcpy and friends take an unsigned size"); + uint64_t Size = popToUInt64(S, Call->getArg(2)); + Pointer SrcPtr = S.Stk.pop<Pointer>().expand(); + Pointer DestPtr = S.Stk.pop<Pointer>().expand(); if (ID == Builtin::BImemcpy || ID == Builtin::BImemmove) diagnoseNonConstexprBuiltin(S, OpPC, ID); @@ -1779,7 +1770,7 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, ID == Builtin::BI__builtin_wmemmove; // If the size is zero, we treat this as always being a valid no-op. - if (Size.isZero()) { + if (Size == 0) { S.Stk.push<Pointer>(DestPtr); return true; } @@ -1841,11 +1832,10 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, if (WChar) { uint64_t WCharSize = ASTCtx.getTypeSizeInChars(ASTCtx.getWCharType()).getQuantity(); - Size *= APSInt(APInt(Size.getBitWidth(), WCharSize, /*IsSigned=*/false), - /*IsUnsigend=*/true); + Size *= WCharSize; } - if (Size.urem(DestElemSize) != 0) { + if (Size % DestElemSize != 0) { S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_memcpy_unsupported) << Move << WChar << 0 << DestElemType << Size << DestElemSize; @@ -1878,12 +1868,12 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, // Check if we have enough elements to read from and write to. size_t RemainingDestBytes = RemainingDestElems * DestElemSize; size_t RemainingSrcBytes = RemainingSrcElems * SrcElemSize; - if (Size.ugt(RemainingDestBytes) || Size.ugt(RemainingSrcBytes)) { - APInt N = Size.udiv(DestElemSize); + if (Size > RemainingDestBytes || Size > RemainingSrcBytes) { + APInt N = APInt(64, Size / DestElemSize); S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_memcpy_unsupported) - << Move << WChar << (Size.ugt(RemainingSrcBytes) ? 1 : 2) - << DestElemType << toString(N, 10, /*Signed=*/false); + << Move << WChar << (Size > RemainingSrcBytes ? 1 : 2) << DestElemType + << toString(N, 10, /*Signed=*/false); return false; } @@ -1900,18 +1890,17 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, unsigned SrcIndex = SrcP.expand().getIndex() * SrcP.elemSize(); unsigned DstIndex = DestP.expand().getIndex() * DestP.elemSize(); - unsigned N = Size.getZExtValue(); - if ((SrcIndex <= DstIndex && (SrcIndex + N) > DstIndex) || - (DstIndex <= SrcIndex && (DstIndex + N) > SrcIndex)) { + if ((SrcIndex <= DstIndex && (SrcIndex + Size) > DstIndex) || + (DstIndex <= SrcIndex && (DstIndex + Size) > SrcIndex)) { S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_memcpy_overlap) << /*IsWChar=*/false; return false; } } - assert(Size.getZExtValue() % DestElemSize == 0); - if (!DoMemcpy(S, OpPC, SrcPtr, DestPtr, Bytes(Size.getZExtValue()).toBits())) + assert(Size % DestElemSize == 0); + if (!DoMemcpy(S, OpPC, SrcPtr, DestPtr, Bytes(Size).toBits())) return false; S.Stk.push<Pointer>(DestPtr); @@ -1928,7 +1917,7 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call, unsigned ID) { assert(Call->getNumArgs() == 3); - const APSInt &Size = popToAPSInt(S, Call->getArg(2)); + uint64_t Size = popToUInt64(S, Call->getArg(2)); const Pointer &PtrB = S.Stk.pop<Pointer>(); const Pointer &PtrA = S.Stk.pop<Pointer>(); @@ -1936,7 +1925,7 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC, ID == Builtin::BIwmemcmp) diagnoseNonConstexprBuiltin(S, OpPC, ID); - if (Size.isZero()) { + if (Size == 0) { pushInteger(S, 0, Call->getType()); return true; } @@ -1964,6 +1953,10 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC, if (PtrA.isDummy() || PtrB.isDummy()) return false; + if (!CheckRange(S, OpPC, PtrA, AK_Read) || + !CheckRange(S, OpPC, PtrB, AK_Read)) + return false; + // Now, read both pointers to a buffer and compare those. BitcastBuffer BufferA( Bits(ASTCtx.getTypeSize(ElemTypeA) * PtrA.getNumElems())); @@ -1989,14 +1982,14 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC, ElemSize = ASTCtx.getTypeSizeInChars(ASTCtx.getWCharType()).getQuantity(); // The Size given for the wide variants is in wide-char units. Convert it // to bytes. - size_t ByteSize = Size.getZExtValue() * ElemSize; + size_t ByteSize = Size * ElemSize; size_t CmpSize = std::min(MinBufferSize, ByteSize); for (size_t I = 0; I != CmpSize; I += ElemSize) { if (IsWide) { INT_TYPE_SWITCH(*S.getContext().classify(ASTCtx.getWCharType()), { - T A = *reinterpret_cast<T *>(BufferA.Data.get() + I); - T B = *reinterpret_cast<T *>(BufferB.Data.get() + I); + T A = *reinterpret_cast<T *>(BufferA.atByte(I)); + T B = *reinterpret_cast<T *>(BufferB.atByte(I)); if (A < B) { pushInteger(S, -1, Call->getType()); return true; @@ -2007,8 +2000,8 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC, } }); } else { - std::byte A = BufferA.Data[I]; - std::byte B = BufferB.Data[I]; + std::byte A = BufferA.deref<std::byte>(Bytes(I)); + std::byte B = BufferB.deref<std::byte>(Bytes(I)); if (A < B) { pushInteger(S, -1, Call->getType()); @@ -2277,7 +2270,7 @@ static bool interp__builtin_object_size(InterpState &S, CodePtr OpPC, // clear, objects are whole variables. If it is set, a closest surrounding // subobject is considered the object a pointer points to. The second bit // determines if maximum or minimum of remaining bytes is computed. - unsigned Kind = popToAPSInt(S, Call->getArg(1)).getZExtValue(); + unsigned Kind = popToUInt64(S, Call->getArg(1)); assert(Kind <= 3 && "unexpected kind"); bool UseFieldDesc = (Kind & 1u); bool ReportMinimum = (Kind & 2u); @@ -2405,18 +2398,39 @@ static bool interp__builtin_elementwise_int_unaryop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<APInt(const APSInt &)> Fn) { assert(Call->getNumArgs() == 1); - assert(Call->getType()->isIntegerType()); // Single integer case. if (!Call->getArg(0)->getType()->isVectorType()) { + assert(Call->getType()->isIntegerType()); APSInt Src = popToAPSInt(S, Call->getArg(0)); APInt Result = Fn(Src); pushInteger(S, APSInt(std::move(Result), !Src.isSigned()), Call->getType()); return true; } - // TODO: Add vector integer handling. - return false; + // Vector case. + const Pointer &Arg = S.Stk.pop<Pointer>(); + assert(Arg.getFieldDesc()->isPrimitiveArray()); + const Pointer &Dst = S.Stk.peek<Pointer>(); + assert(Dst.getFieldDesc()->isPrimitiveArray()); + assert(Arg.getFieldDesc()->getNumElems() == + Dst.getFieldDesc()->getNumElems()); + + QualType ElemType = Arg.getFieldDesc()->getElemQualType(); + PrimType ElemT = *S.getContext().classify(ElemType); + unsigned NumElems = Arg.getNumElems(); + bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType(); + + for (unsigned I = 0; I != NumElems; ++I) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { + APSInt Src = Arg.elem<T>(I).toAPSInt(); + APInt Result = Fn(Src); + Dst.elem<T>(I) = static_cast<T>(APSInt(std::move(Result), DestUnsigned)); + }); + } + Dst.initializeAllElements(); + + return true; } static bool interp__builtin_elementwise_int_binop( @@ -2714,6 +2728,35 @@ static bool interp_builtin_horizontal_fp_binop( return true; } +static bool interp__builtin_ia32_addsub(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + // Addsub: alternates between subtraction and addition + // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) + const Pointer &RHS = S.Stk.pop<Pointer>(); + const Pointer &LHS = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); + llvm::RoundingMode RM = getRoundingMode(FPO); + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); + unsigned NumElems = VT->getNumElements(); + + using T = PrimConv<PT_Float>::T; + for (unsigned I = 0; I != NumElems; ++I) { + APFloat LElem = LHS.elem<T>(I).getAPFloat(); + APFloat RElem = RHS.elem<T>(I).getAPFloat(); + if (I % 2 == 0) { + // Even indices: subtract + LElem.subtract(RElem, RM); + } else { + // Odd indices: add + LElem.add(RElem, RM); + } + Dst.elem<T>(I) = static_cast<T>(LElem); + } + Dst.initializeAllElements(); + return true; +} + static bool interp__builtin_elementwise_triop_fp( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<APFloat(const APFloat &, const APFloat &, @@ -2808,105 +2851,26 @@ static bool interp__builtin_select(InterpState &S, CodePtr OpPC, return true; } -static bool interp__builtin_blend(InterpState &S, CodePtr OpPC, - const CallExpr *Call) { - APSInt Mask = popToAPSInt(S, Call->getArg(2)); - const Pointer &TrueVec = S.Stk.pop<Pointer>(); - const Pointer &FalseVec = S.Stk.pop<Pointer>(); - const Pointer &Dst = S.Stk.peek<Pointer>(); - - assert(FalseVec.getNumElems() == TrueVec.getNumElems()); - assert(FalseVec.getNumElems() == Dst.getNumElems()); - unsigned NumElems = FalseVec.getNumElems(); - PrimType ElemT = FalseVec.getFieldDesc()->getPrimType(); - PrimType DstElemT = Dst.getFieldDesc()->getPrimType(); - - for (unsigned I = 0; I != NumElems; ++I) { - bool MaskBit = Mask[I % 8]; - if (ElemT == PT_Float) { - assert(DstElemT == PT_Float); - Dst.elem<Floating>(I) = - MaskBit ? TrueVec.elem<Floating>(I) : FalseVec.elem<Floating>(I); - } else { - assert(DstElemT == ElemT); - INT_TYPE_SWITCH_NO_BOOL(DstElemT, { - Dst.elem<T>(I) = - static_cast<T>(MaskBit ? TrueVec.elem<T>(I).toAPSInt() - : FalseVec.elem<T>(I).toAPSInt()); - }); - } - } - Dst.initializeAllElements(); - - return true; -} - -static bool interp__builtin_ia32_pshufb(InterpState &S, CodePtr OpPC, - const CallExpr *Call) { - assert(Call->getNumArgs() == 2 && "masked forms handled via select*"); - const Pointer &Control = S.Stk.pop<Pointer>(); - const Pointer &Src = S.Stk.pop<Pointer>(); - const Pointer &Dst = S.Stk.peek<Pointer>(); - - unsigned NumElems = Dst.getNumElems(); - assert(NumElems == Control.getNumElems()); - assert(NumElems == Dst.getNumElems()); - - for (unsigned Idx = 0; Idx != NumElems; ++Idx) { - uint8_t Ctlb = static_cast<uint8_t>(Control.elem<int8_t>(Idx)); - - if (Ctlb & 0x80) { - Dst.elem<int8_t>(Idx) = 0; - } else { - unsigned LaneBase = (Idx / 16) * 16; - unsigned SrcOffset = Ctlb & 0x0F; - unsigned SrcIdx = LaneBase + SrcOffset; - - Dst.elem<int8_t>(Idx) = Src.elem<int8_t>(SrcIdx); - } - } - Dst.initializeAllElements(); - return true; -} +/// Scalar variant of AVX512 predicated select: +/// Result[i] = (Mask bit 0) ? LHS[i] : RHS[i], but only element 0 may change. +/// All other elements are taken from RHS. +static bool interp__builtin_select_scalar(InterpState &S, + const CallExpr *Call) { + unsigned N = + Call->getArg(1)->getType()->getAs<VectorType>()->getNumElements(); -static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC, - const CallExpr *Call, bool IsShufHW) { - assert(Call->getNumArgs() == 2 && "masked forms handled via select*"); - APSInt ControlImm = popToAPSInt(S, Call->getArg(1)); - const Pointer &Src = S.Stk.pop<Pointer>(); + const Pointer &W = S.Stk.pop<Pointer>(); + const Pointer &A = S.Stk.pop<Pointer>(); + APSInt U = popToAPSInt(S, Call->getArg(0)); const Pointer &Dst = S.Stk.peek<Pointer>(); - unsigned NumElems = Dst.getNumElems(); - PrimType ElemT = Dst.getFieldDesc()->getPrimType(); - - unsigned ElemBits = static_cast<unsigned>(primSize(ElemT) * 8); - if (ElemBits != 16 && ElemBits != 32) - return false; - - unsigned LaneElts = 128u / ElemBits; - assert(LaneElts && (NumElems % LaneElts == 0)); + bool TakeA0 = U.getZExtValue() & 1ULL; - uint8_t Ctl = static_cast<uint8_t>(ControlImm.getZExtValue()); + for (unsigned I = TakeA0; I != N; ++I) + Dst.elem<Floating>(I) = W.elem<Floating>(I); + if (TakeA0) + Dst.elem<Floating>(0) = A.elem<Floating>(0); - for (unsigned Idx = 0; Idx != NumElems; Idx++) { - unsigned LaneBase = (Idx / LaneElts) * LaneElts; - unsigned LaneIdx = Idx % LaneElts; - unsigned SrcIdx = Idx; - unsigned Sel = (Ctl >> (2 * (LaneIdx & 0x3))) & 0x3; - if (ElemBits == 32) { - SrcIdx = LaneBase + Sel; - } else { - constexpr unsigned HalfSize = 4; - bool InHigh = LaneIdx >= HalfSize; - if (!IsShufHW && !InHigh) { - SrcIdx = LaneBase + Sel; - } else if (IsShufHW && InHigh) { - SrcIdx = LaneBase + HalfSize + Sel; - } - } - - INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(Idx) = Src.elem<T>(SrcIdx); }); - } Dst.initializeAllElements(); return true; } @@ -3377,65 +3341,589 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC, return true; } -static bool interp__builtin_x86_byteshift( - InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID, - llvm::function_ref<APInt(const Pointer &, unsigned Lane, unsigned I, - unsigned Shift)> - Fn) { - assert(Call->getNumArgs() == 2); +static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + unsigned ID) { + assert(Call->getNumArgs() == 1); - APSInt ImmAPS = popToAPSInt(S, Call->getArg(1)); - uint64_t Shift = ImmAPS.getZExtValue() & 0xff; + const Pointer &Vec = S.Stk.pop<Pointer>(); + unsigned RetWidth = S.getASTContext().getIntWidth(Call->getType()); + APInt RetMask(RetWidth, 0); - const Pointer &Src = S.Stk.pop<Pointer>(); - if (!Src.getFieldDesc()->isPrimitiveArray()) - return false; + unsigned VectorLen = Vec.getNumElems(); + PrimType ElemT = Vec.getFieldDesc()->getPrimType(); + + for (unsigned ElemNum = 0; ElemNum != VectorLen; ++ElemNum) { + APSInt A; + INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem<T>(ElemNum).toAPSInt(); }); + unsigned MSB = A[A.getBitWidth() - 1]; + RetMask.setBitVal(ElemNum, MSB); + } + pushInteger(S, RetMask, Call->getType()); + return true; +} +static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + bool HasRoundingMask) { + APSInt Rounding, MaskInt; + Pointer Src, B, A; + + if (HasRoundingMask) { + assert(Call->getNumArgs() == 5); + Rounding = popToAPSInt(S, Call->getArg(4)); + MaskInt = popToAPSInt(S, Call->getArg(3)); + Src = S.Stk.pop<Pointer>(); + B = S.Stk.pop<Pointer>(); + A = S.Stk.pop<Pointer>(); + if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) || + !CheckLoad(S, OpPC, Src)) + return false; + } else { + assert(Call->getNumArgs() == 2); + B = S.Stk.pop<Pointer>(); + A = S.Stk.pop<Pointer>(); + if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B)) + return false; + } - unsigned NumElems = Src.getNumElems(); + const auto *DstVTy = Call->getType()->castAs<VectorType>(); + unsigned NumElems = DstVTy->getNumElements(); const Pointer &Dst = S.Stk.peek<Pointer>(); - PrimType ElemT = Src.getFieldDesc()->getPrimType(); - for (unsigned Lane = 0; Lane != NumElems; Lane += 16) { - for (unsigned I = 0; I != 16; ++I) { - unsigned Base = Lane + I; - APSInt Result = APSInt(Fn(Src, Lane, I, Shift)); - INT_TYPE_SWITCH_NO_BOOL(ElemT, - { Dst.elem<T>(Base) = static_cast<T>(Result); }); - } + // Copy all elements except lane 0 (overwritten below) from A to Dst. + for (unsigned I = 1; I != NumElems; ++I) + Dst.elem<Floating>(I) = A.elem<Floating>(I); + + // Convert element 0 from double to float, or use Src if masked off. + if (!HasRoundingMask || (MaskInt.getZExtValue() & 0x1)) { + assert(S.getASTContext().FloatTy == DstVTy->getElementType() && + "cvtsd2ss requires float element type in destination vector"); + + Floating Conv = S.allocFloat( + S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType())); + APFloat SrcVal = B.elem<Floating>(0).getAPFloat(); + if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call)) + return false; + Dst.elem<Floating>(0) = Conv; + } else { + Dst.elem<Floating>(0) = Src.elem<Floating>(0); } Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC, + const CallExpr *Call, bool IsMasked, + bool HasRounding) { + + APSInt MaskVal; + Pointer PassThrough; + Pointer Src; + APSInt Rounding; + + if (IsMasked) { + // Pop in reverse order. + if (HasRounding) { + Rounding = popToAPSInt(S, Call->getArg(3)); + MaskVal = popToAPSInt(S, Call->getArg(2)); + PassThrough = S.Stk.pop<Pointer>(); + Src = S.Stk.pop<Pointer>(); + } else { + MaskVal = popToAPSInt(S, Call->getArg(2)); + PassThrough = S.Stk.pop<Pointer>(); + Src = S.Stk.pop<Pointer>(); + } + + if (!CheckLoad(S, OpPC, PassThrough)) + return false; + } else { + // Pop source only. + Src = S.Stk.pop<Pointer>(); + } + if (!CheckLoad(S, OpPC, Src)) + return false; + + const auto *RetVTy = Call->getType()->castAs<VectorType>(); + unsigned RetElems = RetVTy->getNumElements(); + unsigned SrcElems = Src.getNumElems(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + + // Initialize destination with passthrough or zeros. + for (unsigned I = 0; I != RetElems; ++I) + if (IsMasked) + Dst.elem<Floating>(I) = PassThrough.elem<Floating>(I); + else + Dst.elem<Floating>(I) = Floating(APFloat(0.0f)); + + assert(S.getASTContext().FloatTy == RetVTy->getElementType() && + "cvtpd2ps requires float element type in return vector"); + + // Convert double to float for enabled elements (only process source elements + // that exist). + for (unsigned I = 0; I != SrcElems; ++I) { + if (IsMasked && !MaskVal[I]) + continue; + + APFloat SrcVal = Src.elem<Floating>(I).getAPFloat(); + + Floating Conv = S.allocFloat( + S.getASTContext().getFloatTypeSemantics(RetVTy->getElementType())); + if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call)) + return false; + Dst.elem<Floating>(I) = Conv; + } + + Dst.initializeAllElements(); return true; } static bool interp__builtin_ia32_shuffle_generic( InterpState &S, CodePtr OpPC, const CallExpr *Call, - llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> + llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)> GetSourceIndex) { - assert(Call->getNumArgs() == 3); - unsigned ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue(); + assert(Call->getNumArgs() == 2 || Call->getNumArgs() == 3); + + unsigned ShuffleMask = 0; + Pointer A, MaskVector, B; + bool IsVectorMask = false; + bool IsSingleOperand = (Call->getNumArgs() == 2); + + if (IsSingleOperand) { + QualType MaskType = Call->getArg(1)->getType(); + if (MaskType->isVectorType()) { + IsVectorMask = true; + MaskVector = S.Stk.pop<Pointer>(); + A = S.Stk.pop<Pointer>(); + B = A; + } else if (MaskType->isIntegerType()) { + ShuffleMask = popToAPSInt(S, Call->getArg(1)).getZExtValue(); + A = S.Stk.pop<Pointer>(); + B = A; + } else { + return false; + } + } else { + QualType Arg2Type = Call->getArg(2)->getType(); + if (Arg2Type->isVectorType()) { + IsVectorMask = true; + B = S.Stk.pop<Pointer>(); + MaskVector = S.Stk.pop<Pointer>(); + A = S.Stk.pop<Pointer>(); + } else if (Arg2Type->isIntegerType()) { + ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue(); + B = S.Stk.pop<Pointer>(); + A = S.Stk.pop<Pointer>(); + } else { + return false; + } + } QualType Arg0Type = Call->getArg(0)->getType(); const auto *VecT = Arg0Type->castAs<VectorType>(); PrimType ElemT = *S.getContext().classify(VecT->getElementType()); unsigned NumElems = VecT->getNumElements(); - const Pointer &B = S.Stk.pop<Pointer>(); - const Pointer &A = S.Stk.pop<Pointer>(); const Pointer &Dst = S.Stk.peek<Pointer>(); + PrimType MaskElemT = PT_Uint32; + if (IsVectorMask) { + QualType Arg1Type = Call->getArg(1)->getType(); + const auto *MaskVecT = Arg1Type->castAs<VectorType>(); + QualType MaskElemType = MaskVecT->getElementType(); + MaskElemT = *S.getContext().classify(MaskElemType); + } + for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) { + if (IsVectorMask) { + INT_TYPE_SWITCH(MaskElemT, { + ShuffleMask = static_cast<unsigned>(MaskVector.elem<T>(DstIdx)); + }); + } + auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); - const Pointer &Src = (SrcVecIdx == 0) ? A : B; - TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); }); + + if (SrcIdx < 0) { + // Zero out this element + if (ElemT == PT_Float) { + Dst.elem<Floating>(DstIdx) = Floating( + S.getASTContext().getFloatTypeSemantics(VecT->getElementType())); + } else { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); }); + } + } else { + const Pointer &Src = (SrcVecIdx == 0) ? A : B; + TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); }); + } + } + Dst.initializeAllElements(); + + return true; +} + +static bool interp__builtin_ia32_shift_with_count( + InterpState &S, CodePtr OpPC, const CallExpr *Call, + llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp, + llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) { + + assert(Call->getNumArgs() == 2); + + const Pointer &Count = S.Stk.pop<Pointer>(); + const Pointer &Source = S.Stk.pop<Pointer>(); + + QualType SourceType = Call->getArg(0)->getType(); + QualType CountType = Call->getArg(1)->getType(); + assert(SourceType->isVectorType() && CountType->isVectorType()); + + const auto *SourceVecT = SourceType->castAs<VectorType>(); + const auto *CountVecT = CountType->castAs<VectorType>(); + PrimType SourceElemT = *S.getContext().classify(SourceVecT->getElementType()); + PrimType CountElemT = *S.getContext().classify(CountVecT->getElementType()); + + const Pointer &Dst = S.Stk.peek<Pointer>(); + + unsigned DestEltWidth = + S.getASTContext().getTypeSize(SourceVecT->getElementType()); + bool IsDestUnsigned = SourceVecT->getElementType()->isUnsignedIntegerType(); + unsigned DestLen = SourceVecT->getNumElements(); + unsigned CountEltWidth = + S.getASTContext().getTypeSize(CountVecT->getElementType()); + unsigned NumBitsInQWord = 64; + unsigned NumCountElts = NumBitsInQWord / CountEltWidth; + + uint64_t CountLQWord = 0; + for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) { + uint64_t Elt = 0; + INT_TYPE_SWITCH(CountElemT, + { Elt = static_cast<uint64_t>(Count.elem<T>(EltIdx)); }); + CountLQWord |= (Elt << (EltIdx * CountEltWidth)); + } + + for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) { + APSInt Elt; + INT_TYPE_SWITCH(SourceElemT, { Elt = Source.elem<T>(EltIdx).toAPSInt(); }); + + APInt Result; + if (CountLQWord < DestEltWidth) { + Result = ShiftOp(Elt, CountLQWord); + } else { + Result = OverflowOp(Elt, DestEltWidth); + } + if (IsDestUnsigned) { + INT_TYPE_SWITCH(SourceElemT, { + Dst.elem<T>(EltIdx) = T::from(Result.getZExtValue()); + }); + } else { + INT_TYPE_SWITCH(SourceElemT, { + Dst.elem<T>(EltIdx) = T::from(Result.getSExtValue()); + }); + } + } + + Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + + assert(Call->getNumArgs() == 3); + + QualType SourceType = Call->getArg(0)->getType(); + QualType ShuffleMaskType = Call->getArg(1)->getType(); + QualType ZeroMaskType = Call->getArg(2)->getType(); + if (!SourceType->isVectorType() || !ShuffleMaskType->isVectorType() || + !ZeroMaskType->isIntegerType()) { + return false; } + + Pointer Source, ShuffleMask; + APSInt ZeroMask = popToAPSInt(S, Call->getArg(2)); + ShuffleMask = S.Stk.pop<Pointer>(); + Source = S.Stk.pop<Pointer>(); + + const auto *SourceVecT = SourceType->castAs<VectorType>(); + const auto *ShuffleMaskVecT = ShuffleMaskType->castAs<VectorType>(); + assert(SourceVecT->getNumElements() == ShuffleMaskVecT->getNumElements()); + assert(ZeroMask.getBitWidth() == SourceVecT->getNumElements()); + + PrimType SourceElemT = *S.getContext().classify(SourceVecT->getElementType()); + PrimType ShuffleMaskElemT = + *S.getContext().classify(ShuffleMaskVecT->getElementType()); + + unsigned NumBytesInQWord = 8; + unsigned NumBitsInByte = 8; + unsigned NumBytes = SourceVecT->getNumElements(); + unsigned NumQWords = NumBytes / NumBytesInQWord; + unsigned RetWidth = ZeroMask.getBitWidth(); + APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true); + + for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) { + APInt SourceQWord(64, 0); + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + uint64_t Byte = 0; + INT_TYPE_SWITCH(SourceElemT, { + Byte = static_cast<uint64_t>( + Source.elem<T>(QWordId * NumBytesInQWord + ByteIdx)); + }); + SourceQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); + } + + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx; + unsigned M = 0; + INT_TYPE_SWITCH(ShuffleMaskElemT, { + M = static_cast<unsigned>(ShuffleMask.elem<T>(SelIdx)) & 0x3F; + }); + + if (ZeroMask[SelIdx]) { + RetMask.setBitVal(SelIdx, SourceQWord[M]); + } + } + } + + pushInteger(S, RetMask, Call->getType()); + return true; +} + +static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + // Arguments are: vector of floats, rounding immediate + assert(Call->getNumArgs() == 2); + + APSInt Imm = popToAPSInt(S, Call->getArg(1)); + const Pointer &Src = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + + assert(Src.getFieldDesc()->isPrimitiveArray()); + assert(Dst.getFieldDesc()->isPrimitiveArray()); + + const auto *SrcVTy = Call->getArg(0)->getType()->castAs<VectorType>(); + unsigned SrcNumElems = SrcVTy->getNumElements(); + const auto *DstVTy = Call->getType()->castAs<VectorType>(); + unsigned DstNumElems = DstVTy->getNumElements(); + + const llvm::fltSemantics &HalfSem = + S.getASTContext().getFloatTypeSemantics(S.getASTContext().HalfTy); + + // imm[2] == 1 means use MXCSR rounding mode. + // In that case, we can only evaluate if the conversion is exact. + int ImmVal = Imm.getZExtValue(); + bool UseMXCSR = (ImmVal & 4) != 0; + bool IsFPConstrained = + Call->getFPFeaturesInEffect(S.getASTContext().getLangOpts()) + .isFPConstrained(); + + llvm::RoundingMode RM; + if (!UseMXCSR) { + switch (ImmVal & 3) { + case 0: + RM = llvm::RoundingMode::NearestTiesToEven; + break; + case 1: + RM = llvm::RoundingMode::TowardNegative; + break; + case 2: + RM = llvm::RoundingMode::TowardPositive; + break; + case 3: + RM = llvm::RoundingMode::TowardZero; + break; + default: + llvm_unreachable("Invalid immediate rounding mode"); + } + } else { + // For MXCSR, we must check for exactness. We can use any rounding mode + // for the trial conversion since the result is the same if it's exact. + RM = llvm::RoundingMode::NearestTiesToEven; + } + + QualType DstElemQT = Dst.getFieldDesc()->getElemQualType(); + PrimType DstElemT = *S.getContext().classify(DstElemQT); + + for (unsigned I = 0; I != SrcNumElems; ++I) { + Floating SrcVal = Src.elem<Floating>(I); + APFloat DstVal = SrcVal.getAPFloat(); + + bool LostInfo; + APFloat::opStatus St = DstVal.convert(HalfSem, RM, &LostInfo); + + if (UseMXCSR && IsFPConstrained && St != APFloat::opOK) { + S.FFDiag(S.Current->getSource(OpPC), + diag::note_constexpr_dynamic_rounding); + return false; + } + + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { + // Convert the destination value's bit pattern to an unsigned integer, + // then reconstruct the element using the target type's 'from' method. + uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue(); + Dst.elem<T>(I) = T::from(RawBits); + }); + } + + // Zero out remaining elements if the destination has more elements + // (e.g., vcvtps2ph converting 4 floats to 8 shorts). + if (DstNumElems > SrcNumElems) { + for (unsigned I = SrcNumElems; I != DstNumElems; ++I) { + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); }); + } + } + + Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 2); + + QualType ATy = Call->getArg(0)->getType(); + QualType BTy = Call->getArg(1)->getType(); + if (!ATy->isVectorType() || !BTy->isVectorType()) { + return false; + } + + const Pointer &BPtr = S.Stk.pop<Pointer>(); + const Pointer &APtr = S.Stk.pop<Pointer>(); + const auto *AVecT = ATy->castAs<VectorType>(); + assert(AVecT->getNumElements() == + BTy->castAs<VectorType>()->getNumElements()); + + PrimType ElemT = *S.getContext().classify(AVecT->getElementType()); + + unsigned NumBytesInQWord = 8; + unsigned NumBitsInByte = 8; + unsigned NumBytes = AVecT->getNumElements(); + unsigned NumQWords = NumBytes / NumBytesInQWord; + const Pointer &Dst = S.Stk.peek<Pointer>(); + + for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) { + APInt BQWord(64, 0); + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordId * NumBytesInQWord + ByteIdx; + INT_TYPE_SWITCH(ElemT, { + uint64_t Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx)); + BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); + }); + } + + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordId * NumBytesInQWord + ByteIdx; + uint64_t Ctrl = 0; + INT_TYPE_SWITCH( + ElemT, { Ctrl = static_cast<uint64_t>(APtr.elem<T>(Idx)) & 0x3F; }); + + APInt Byte(8, 0); + for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) { + Byte.setBitVal(BitIdx, BQWord[(Ctrl + BitIdx) & 0x3F]); + } + INT_TYPE_SWITCH(ElemT, + { Dst.elem<T>(Idx) = T::from(Byte.getZExtValue()); }); + } + } + Dst.initializeAllElements(); return true; } +static bool interp_builtin_ia32_gfni_affine(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + bool Inverse) { + assert(Call->getNumArgs() == 3); + QualType XType = Call->getArg(0)->getType(); + QualType AType = Call->getArg(1)->getType(); + QualType ImmType = Call->getArg(2)->getType(); + if (!XType->isVectorType() || !AType->isVectorType() || + !ImmType->isIntegerType()) { + return false; + } + + Pointer X, A; + APSInt Imm = popToAPSInt(S, Call->getArg(2)); + A = S.Stk.pop<Pointer>(); + X = S.Stk.pop<Pointer>(); + + const Pointer &Dst = S.Stk.peek<Pointer>(); + const auto *AVecT = AType->castAs<VectorType>(); + assert(XType->castAs<VectorType>()->getNumElements() == + AVecT->getNumElements()); + unsigned NumBytesInQWord = 8; + unsigned NumBytes = AVecT->getNumElements(); + unsigned NumBitsInQWord = 64; + unsigned NumQWords = NumBytes / NumBytesInQWord; + unsigned NumBitsInByte = 8; + PrimType AElemT = *S.getContext().classify(AVecT->getElementType()); + + // computing A*X + Imm + for (unsigned QWordIdx = 0; QWordIdx != NumQWords; ++QWordIdx) { + // Extract the QWords from X, A + APInt XQWord(NumBitsInQWord, 0); + APInt AQWord(NumBitsInQWord, 0); + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx; + uint8_t XByte; + uint8_t AByte; + INT_TYPE_SWITCH(AElemT, { + XByte = static_cast<uint8_t>(X.elem<T>(Idx)); + AByte = static_cast<uint8_t>(A.elem<T>(Idx)); + }); + + XQWord.insertBits(APInt(NumBitsInByte, XByte), ByteIdx * NumBitsInByte); + AQWord.insertBits(APInt(NumBitsInByte, AByte), ByteIdx * NumBitsInByte); + } + + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx; + uint8_t XByte = + XQWord.lshr(ByteIdx * NumBitsInByte).getLoBits(8).getZExtValue(); + INT_TYPE_SWITCH(AElemT, { + Dst.elem<T>(Idx) = T::from(GFNIAffine(XByte, AQWord, Imm, Inverse)); + }); + } + } + Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 2); + + QualType AType = Call->getArg(0)->getType(); + QualType BType = Call->getArg(1)->getType(); + if (!AType->isVectorType() || !BType->isVectorType()) { + return false; + } + + Pointer A, B; + B = S.Stk.pop<Pointer>(); + A = S.Stk.pop<Pointer>(); + + const Pointer &Dst = S.Stk.peek<Pointer>(); + const auto *AVecT = AType->castAs<VectorType>(); + assert(AVecT->getNumElements() == + BType->castAs<VectorType>()->getNumElements()); + + PrimType AElemT = *S.getContext().classify(AVecT->getElementType()); + unsigned NumBytes = A.getNumElems(); + + for (unsigned ByteIdx = 0; ByteIdx != NumBytes; ++ByteIdx) { + uint8_t AByte, BByte; + INT_TYPE_SWITCH(AElemT, { + AByte = static_cast<uint8_t>(A.elem<T>(ByteIdx)); + BByte = static_cast<uint8_t>(B.elem<T>(ByteIdx)); + Dst.elem<T>(ByteIdx) = T::from(GFNIMul(AByte, BByte)); + }); + } + + Dst.initializeAllElements(); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -3732,7 +4220,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case Builtin::BI__builtin_elementwise_ctzg: return interp__builtin_elementwise_countzeroes(S, OpPC, Frame, Call, BuiltinID); - + case Builtin::BI__builtin_bswapg: case Builtin::BI__builtin_bswap16: case Builtin::BI__builtin_bswap32: case Builtin::BI__builtin_bswap64: @@ -3792,6 +4280,66 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return Result; }); + case clang::X86::BI__builtin_ia32_ktestcqi: + case clang::X86::BI__builtin_ia32_ktestchi: + case clang::X86::BI__builtin_ia32_ktestcsi: + case clang::X86::BI__builtin_ia32_ktestcdi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B) { + return APInt(sizeof(unsigned char) * 8, (~A & B) == 0); + }); + + case clang::X86::BI__builtin_ia32_ktestzqi: + case clang::X86::BI__builtin_ia32_ktestzhi: + case clang::X86::BI__builtin_ia32_ktestzsi: + case clang::X86::BI__builtin_ia32_ktestzdi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B) { + return APInt(sizeof(unsigned char) * 8, (A & B) == 0); + }); + + case clang::X86::BI__builtin_ia32_kortestcqi: + case clang::X86::BI__builtin_ia32_kortestchi: + case clang::X86::BI__builtin_ia32_kortestcsi: + case clang::X86::BI__builtin_ia32_kortestcdi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B) { + return APInt(sizeof(unsigned char) * 8, ~(A | B) == 0); + }); + + case clang::X86::BI__builtin_ia32_kortestzqi: + case clang::X86::BI__builtin_ia32_kortestzhi: + case clang::X86::BI__builtin_ia32_kortestzsi: + case clang::X86::BI__builtin_ia32_kortestzdi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B) { + return APInt(sizeof(unsigned char) * 8, (A | B) == 0); + }); + + case clang::X86::BI__builtin_ia32_kshiftliqi: + case clang::X86::BI__builtin_ia32_kshiftlihi: + case clang::X86::BI__builtin_ia32_kshiftlisi: + case clang::X86::BI__builtin_ia32_kshiftlidi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { + unsigned Amt = RHS.getZExtValue() & 0xFF; + if (Amt >= LHS.getBitWidth()) + return APInt::getZero(LHS.getBitWidth()); + return LHS.shl(Amt); + }); + + case clang::X86::BI__builtin_ia32_kshiftriqi: + case clang::X86::BI__builtin_ia32_kshiftrihi: + case clang::X86::BI__builtin_ia32_kshiftrisi: + case clang::X86::BI__builtin_ia32_kshiftridi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { + unsigned Amt = RHS.getZExtValue() & 0xFF; + if (Amt >= LHS.getBitWidth()) + return APInt::getZero(LHS.getBitWidth()); + return LHS.lshr(Amt); + }); + case clang::X86::BI__builtin_ia32_lzcnt_u16: case clang::X86::BI__builtin_ia32_lzcnt_u32: case clang::X86::BI__builtin_ia32_lzcnt_u64: @@ -3877,9 +4425,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return interp__builtin_vector_reduce(S, OpPC, Call, BuiltinID); case Builtin::BI__builtin_elementwise_popcount: + return interp__builtin_elementwise_int_unaryop( + S, OpPC, Call, [](const APSInt &Src) { + return APInt(Src.getBitWidth(), Src.popcount()); + }); case Builtin::BI__builtin_elementwise_bitreverse: - return interp__builtin_elementwise_popcount(S, OpPC, Frame, Call, - BuiltinID); + return interp__builtin_elementwise_int_unaryop( + S, OpPC, Call, [](const APSInt &Src) { return Src.reverseBits(); }); case Builtin::BI__builtin_elementwise_abs: return interp__builtin_elementwise_abs(S, OpPC, Frame, Call, BuiltinID); @@ -4130,6 +4682,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return APInt::getAllOnes(DstBits); }); + case clang::X86::BI__builtin_ia32_selectss_128: + case clang::X86::BI__builtin_ia32_selectsd_128: + case clang::X86::BI__builtin_ia32_selectsh_128: + case clang::X86::BI__builtin_ia32_selectsbf_128: + return interp__builtin_select_scalar(S, Call); case clang::X86::BI__builtin_ia32_vprotbi: case clang::X86::BI__builtin_ia32_vprotdi: case clang::X86::BI__builtin_ia32_vprotqi: @@ -4204,6 +4761,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, F.subtract(RHS, RM); return F; }); + case clang::X86::BI__builtin_ia32_addsubpd: + case clang::X86::BI__builtin_ia32_addsubps: + case clang::X86::BI__builtin_ia32_addsubpd256: + case clang::X86::BI__builtin_ia32_addsubps256: + return interp__builtin_ia32_addsub(S, OpPC, Call); case clang::X86::BI__builtin_ia32_pmuldq128: case clang::X86::BI__builtin_ia32_pmuldq256: @@ -4295,7 +4857,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_pblendw256: case clang::X86::BI__builtin_ia32_pblendd128: case clang::X86::BI__builtin_ia32_pblendd256: - return interp__builtin_blend(S, OpPC, Call); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + // Bit index for mask. + unsigned MaskBit = (ShuffleMask >> (DstIdx % 8)) & 0x1; + unsigned SrcVecIdx = MaskBit ? 1 : 0; // 1 = TrueVec, 0 = FalseVec + return std::pair<unsigned, int>{SrcVecIdx, static_cast<int>(DstIdx)}; + }); + + case clang::X86::BI__builtin_ia32_blendvpd: case clang::X86::BI__builtin_ia32_blendvpd256: @@ -4382,7 +4952,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0; unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index}; + return std::pair<unsigned, int>{SrcIdx, + static_cast<int>(LaneOffset + Index)}; }); case X86::BI__builtin_ia32_shufpd: case X86::BI__builtin_ia32_shufpd256: @@ -4400,28 +4971,249 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0; unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index}; + return std::pair<unsigned, int>{SrcIdx, + static_cast<int>(LaneOffset + Index)}; + }); + + case X86::BI__builtin_ia32_vgf2p8affineinvqb_v16qi: + case X86::BI__builtin_ia32_vgf2p8affineinvqb_v32qi: + case X86::BI__builtin_ia32_vgf2p8affineinvqb_v64qi: + return interp_builtin_ia32_gfni_affine(S, OpPC, Call, true); + case X86::BI__builtin_ia32_vgf2p8affineqb_v16qi: + case X86::BI__builtin_ia32_vgf2p8affineqb_v32qi: + case X86::BI__builtin_ia32_vgf2p8affineqb_v64qi: + return interp_builtin_ia32_gfni_affine(S, OpPC, Call, false); + + case X86::BI__builtin_ia32_vgf2p8mulb_v16qi: + case X86::BI__builtin_ia32_vgf2p8mulb_v32qi: + case X86::BI__builtin_ia32_vgf2p8mulb_v64qi: + return interp__builtin_ia32_gfni_mul(S, OpPC, Call); + + case X86::BI__builtin_ia32_insertps128: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Mask) { + // Bits [3:0]: zero mask - if bit is set, zero this element + if ((Mask & (1 << DstIdx)) != 0) { + return std::pair<unsigned, int>{0, -1}; + } + // Bits [7:6]: select element from source vector Y (0-3) + // Bits [5:4]: select destination position (0-3) + unsigned SrcElem = (Mask >> 6) & 0x3; + unsigned DstElem = (Mask >> 4) & 0x3; + if (DstIdx == DstElem) { + // Insert element from source vector (B) at this position + return std::pair<unsigned, int>{1, static_cast<int>(SrcElem)}; + } else { + // Copy from destination vector (A) + return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)}; + } + }); + case X86::BI__builtin_ia32_permvarsi256: + case X86::BI__builtin_ia32_permvarsf256: + case X86::BI__builtin_ia32_permvardf512: + case X86::BI__builtin_ia32_permvardi512: + case X86::BI__builtin_ia32_permvarhi128: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x7; + return std::pair<unsigned, int>{0, Offset}; + }); + case X86::BI__builtin_ia32_permvarqi128: + case X86::BI__builtin_ia32_permvarhi256: + case X86::BI__builtin_ia32_permvarsi512: + case X86::BI__builtin_ia32_permvarsf512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0xF; + return std::pair<unsigned, int>{0, Offset}; + }); + case X86::BI__builtin_ia32_permvardi256: + case X86::BI__builtin_ia32_permvardf256: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3; + return std::pair<unsigned, int>{0, Offset}; + }); + case X86::BI__builtin_ia32_permvarqi256: + case X86::BI__builtin_ia32_permvarhi512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x1F; + return std::pair<unsigned, int>{0, Offset}; + }); + case X86::BI__builtin_ia32_permvarqi512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3F; + return std::pair<unsigned, int>{0, Offset}; + }); + case X86::BI__builtin_ia32_vpermi2varq128: + case X86::BI__builtin_ia32_vpermi2varpd128: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x1; + unsigned SrcIdx = (ShuffleMask >> 1) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_vpermi2vard128: + case X86::BI__builtin_ia32_vpermi2varps128: + case X86::BI__builtin_ia32_vpermi2varq256: + case X86::BI__builtin_ia32_vpermi2varpd256: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3; + unsigned SrcIdx = (ShuffleMask >> 2) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_vpermi2varhi128: + case X86::BI__builtin_ia32_vpermi2vard256: + case X86::BI__builtin_ia32_vpermi2varps256: + case X86::BI__builtin_ia32_vpermi2varq512: + case X86::BI__builtin_ia32_vpermi2varpd512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x7; + unsigned SrcIdx = (ShuffleMask >> 3) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_vpermi2varqi128: + case X86::BI__builtin_ia32_vpermi2varhi256: + case X86::BI__builtin_ia32_vpermi2vard512: + case X86::BI__builtin_ia32_vpermi2varps512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0xF; + unsigned SrcIdx = (ShuffleMask >> 4) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_vpermi2varqi256: + case X86::BI__builtin_ia32_vpermi2varhi512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x1F; + unsigned SrcIdx = (ShuffleMask >> 5) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_vpermi2varqi512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3F; + unsigned SrcIdx = (ShuffleMask >> 6) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; }); case X86::BI__builtin_ia32_pshufb128: case X86::BI__builtin_ia32_pshufb256: case X86::BI__builtin_ia32_pshufb512: - return interp__builtin_ia32_pshufb(S, OpPC, Call); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + uint8_t Ctlb = static_cast<uint8_t>(ShuffleMask); + if (Ctlb & 0x80) + return std::make_pair(0, -1); + + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned SrcOffset = Ctlb & 0x0F; + unsigned SrcIdx = LaneBase + SrcOffset; + return std::make_pair(0, static_cast<int>(SrcIdx)); + }); case X86::BI__builtin_ia32_pshuflw: case X86::BI__builtin_ia32_pshuflw256: case X86::BI__builtin_ia32_pshuflw512: - return interp__builtin_ia32_pshuf(S, OpPC, Call, false); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned LaneBase = (DstIdx / 8) * 8; + unsigned LaneIdx = DstIdx % 8; + if (LaneIdx < 4) { + unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3; + return std::make_pair(0, static_cast<int>(LaneBase + Sel)); + } + + return std::make_pair(0, static_cast<int>(DstIdx)); + }); case X86::BI__builtin_ia32_pshufhw: case X86::BI__builtin_ia32_pshufhw256: case X86::BI__builtin_ia32_pshufhw512: - return interp__builtin_ia32_pshuf(S, OpPC, Call, true); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned LaneBase = (DstIdx / 8) * 8; + unsigned LaneIdx = DstIdx % 8; + if (LaneIdx >= 4) { + unsigned Sel = (ShuffleMask >> (2 * (LaneIdx - 4))) & 0x3; + return std::make_pair(0, static_cast<int>(LaneBase + 4 + Sel)); + } + + return std::make_pair(0, static_cast<int>(DstIdx)); + }); case X86::BI__builtin_ia32_pshufd: case X86::BI__builtin_ia32_pshufd256: case X86::BI__builtin_ia32_pshufd512: - return interp__builtin_ia32_pshuf(S, OpPC, Call, false); + case X86::BI__builtin_ia32_vpermilps: + case X86::BI__builtin_ia32_vpermilps256: + case X86::BI__builtin_ia32_vpermilps512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned LaneBase = (DstIdx / 4) * 4; + unsigned LaneIdx = DstIdx % 4; + unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3; + return std::make_pair(0, static_cast<int>(LaneBase + Sel)); + }); + + case X86::BI__builtin_ia32_vpermilvarpd: + case X86::BI__builtin_ia32_vpermilvarpd256: + case X86::BI__builtin_ia32_vpermilvarpd512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned NumElemPerLane = 2; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = ShuffleMask & 0b10 ? 1 : 0; + return std::make_pair( + 0, static_cast<int>(Lane * NumElemPerLane + Offset)); + }); + + case X86::BI__builtin_ia32_vpermilvarps: + case X86::BI__builtin_ia32_vpermilvarps256: + case X86::BI__builtin_ia32_vpermilvarps512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned NumElemPerLane = 4; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = ShuffleMask & 0b11; + return std::make_pair( + 0, static_cast<int>(Lane * NumElemPerLane + Offset)); + }); + case X86::BI__builtin_ia32_vpermilpd: + case X86::BI__builtin_ia32_vpermilpd256: + case X86::BI__builtin_ia32_vpermilpd512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Control) { + unsigned NumElemPerLane = 2; + unsigned BitsPerElem = 1; + unsigned MaskBits = 8; + unsigned IndexMask = 0x1; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned LaneOffset = Lane * NumElemPerLane; + unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; + unsigned Index = (Control >> BitIndex) & IndexMask; + return std::make_pair(0, static_cast<int>(LaneOffset + Index)); + }); + + case X86::BI__builtin_ia32_permdf256: + case X86::BI__builtin_ia32_permdi256: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Control) { + // permute4x64 operates on 4 64-bit elements + // For element i (0-3), extract bits [2*i+1:2*i] from Control + unsigned Index = (Control >> (2 * DstIdx)) & 0x3; + return std::make_pair(0, static_cast<int>(Index)); + }); + + case X86::BI__builtin_ia32_vpmultishiftqb128: + case X86::BI__builtin_ia32_vpmultishiftqb256: + case X86::BI__builtin_ia32_vpmultishiftqb512: + return interp__builtin_ia32_multishiftqb(S, OpPC, Call); case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: case X86::BI__builtin_ia32_kandsi: @@ -4477,9 +5269,70 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); + case X86::BI__builtin_ia32_kmovb: + case X86::BI__builtin_ia32_kmovw: + case X86::BI__builtin_ia32_kmovd: + case X86::BI__builtin_ia32_kmovq: + return interp__builtin_elementwise_int_unaryop( + S, OpPC, Call, [](const APSInt &Src) { return Src; }); + + case X86::BI__builtin_ia32_kunpckhi: + case X86::BI__builtin_ia32_kunpckdi: + case X86::BI__builtin_ia32_kunpcksi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B) { + // Generic kunpack: extract lower half of each operand and concatenate + // Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0] + unsigned BW = A.getBitWidth(); + return APSInt(A.trunc(BW / 2).concat(B.trunc(BW / 2)), + A.isUnsigned()); + }); + case X86::BI__builtin_ia32_phminposuw128: return interp__builtin_ia32_phminposuw(S, OpPC, Call); + case X86::BI__builtin_ia32_psraq128: + case X86::BI__builtin_ia32_psraq256: + case X86::BI__builtin_ia32_psraq512: + case X86::BI__builtin_ia32_psrad128: + case X86::BI__builtin_ia32_psrad256: + case X86::BI__builtin_ia32_psrad512: + case X86::BI__builtin_ia32_psraw128: + case X86::BI__builtin_ia32_psraw256: + case X86::BI__builtin_ia32_psraw512: + return interp__builtin_ia32_shift_with_count( + S, OpPC, Call, + [](const APInt &Elt, uint64_t Count) { return Elt.ashr(Count); }, + [](const APInt &Elt, unsigned Width) { return Elt.ashr(Width - 1); }); + + case X86::BI__builtin_ia32_psllq128: + case X86::BI__builtin_ia32_psllq256: + case X86::BI__builtin_ia32_psllq512: + case X86::BI__builtin_ia32_pslld128: + case X86::BI__builtin_ia32_pslld256: + case X86::BI__builtin_ia32_pslld512: + case X86::BI__builtin_ia32_psllw128: + case X86::BI__builtin_ia32_psllw256: + case X86::BI__builtin_ia32_psllw512: + return interp__builtin_ia32_shift_with_count( + S, OpPC, Call, + [](const APInt &Elt, uint64_t Count) { return Elt.shl(Count); }, + [](const APInt &Elt, unsigned Width) { return APInt::getZero(Width); }); + + case X86::BI__builtin_ia32_psrlq128: + case X86::BI__builtin_ia32_psrlq256: + case X86::BI__builtin_ia32_psrlq512: + case X86::BI__builtin_ia32_psrld128: + case X86::BI__builtin_ia32_psrld256: + case X86::BI__builtin_ia32_psrld512: + case X86::BI__builtin_ia32_psrlw128: + case X86::BI__builtin_ia32_psrlw256: + case X86::BI__builtin_ia32_psrlw512: + return interp__builtin_ia32_shift_with_count( + S, OpPC, Call, + [](const APInt &Elt, uint64_t Count) { return Elt.lshr(Count); }, + [](const APInt &Elt, unsigned Width) { return APInt::getZero(Width); }); + case X86::BI__builtin_ia32_pternlogd128_mask: case X86::BI__builtin_ia32_pternlogd256_mask: case X86::BI__builtin_ia32_pternlogd512_mask: @@ -4501,6 +5354,39 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshr); + case X86::BI__builtin_ia32_shuf_f32x4_256: + case X86::BI__builtin_ia32_shuf_i32x4_256: + case X86::BI__builtin_ia32_shuf_f64x2_256: + case X86::BI__builtin_ia32_shuf_i64x2_256: + case X86::BI__builtin_ia32_shuf_f32x4: + case X86::BI__builtin_ia32_shuf_i32x4: + case X86::BI__builtin_ia32_shuf_f64x2: + case X86::BI__builtin_ia32_shuf_i64x2: { + // Destination and sources A, B all have the same type. + QualType VecQT = Call->getArg(0)->getType(); + const auto *VecT = VecQT->castAs<VectorType>(); + unsigned NumElems = VecT->getNumElements(); + unsigned ElemBits = S.getASTContext().getTypeSize(VecT->getElementType()); + unsigned LaneBits = 128u; + unsigned NumLanes = (NumElems * ElemBits) / LaneBits; + unsigned NumElemsPerLane = LaneBits / ElemBits; + + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, + [NumLanes, NumElemsPerLane](unsigned DstIdx, unsigned ShuffleMask) { + // DstIdx determines source. ShuffleMask selects lane in source. + unsigned BitsPerElem = NumLanes / 2; + unsigned IndexMask = (1u << BitsPerElem) - 1; + unsigned Lane = DstIdx / NumElemsPerLane; + unsigned SrcIdx = (Lane < NumLanes / 2) ? 0 : 1; + unsigned BitIdx = BitsPerElem * Lane; + unsigned SrcLaneIdx = (ShuffleMask >> BitIdx) & IndexMask; + unsigned ElemInLane = DstIdx % NumElemsPerLane; + unsigned IdxToPick = SrcLaneIdx * NumElemsPerLane + ElemInLane; + return std::pair<unsigned, int>{SrcIdx, IdxToPick}; + }); + } + case X86::BI__builtin_ia32_insertf32x4_256: case X86::BI__builtin_ia32_inserti32x4_256: case X86::BI__builtin_ia32_insertf64x2_256: @@ -4519,6 +5405,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_insert128i256: return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID); + case clang::X86::BI__builtin_ia32_vcvtps2ph: + case clang::X86::BI__builtin_ia32_vcvtps2ph256: + return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call); + case X86::BI__builtin_ia32_vec_ext_v4hi: case X86::BI__builtin_ia32_vec_ext_v16qi: case X86::BI__builtin_ia32_vec_ext_v8hi: @@ -4542,6 +5432,34 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_vec_set_v4di: return interp__builtin_vec_set(S, OpPC, Call, BuiltinID); + case X86::BI__builtin_ia32_cvtb2mask128: + case X86::BI__builtin_ia32_cvtb2mask256: + case X86::BI__builtin_ia32_cvtb2mask512: + case X86::BI__builtin_ia32_cvtw2mask128: + case X86::BI__builtin_ia32_cvtw2mask256: + case X86::BI__builtin_ia32_cvtw2mask512: + case X86::BI__builtin_ia32_cvtd2mask128: + case X86::BI__builtin_ia32_cvtd2mask256: + case X86::BI__builtin_ia32_cvtd2mask512: + case X86::BI__builtin_ia32_cvtq2mask128: + case X86::BI__builtin_ia32_cvtq2mask256: + case X86::BI__builtin_ia32_cvtq2mask512: + return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, BuiltinID); + + case X86::BI__builtin_ia32_cvtsd2ss: + return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, false); + + case X86::BI__builtin_ia32_cvtsd2ss_round_mask: + return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, true); + + case X86::BI__builtin_ia32_cvtpd2ps: + case X86::BI__builtin_ia32_cvtpd2ps256: + return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, false, false); + case X86::BI__builtin_ia32_cvtpd2ps_mask: + return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, true, false); + case X86::BI__builtin_ia32_cvtpd2ps512_mask: + return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, true, true); + case X86::BI__builtin_ia32_cmpb128_mask: case X86::BI__builtin_ia32_cmpw128_mask: case X86::BI__builtin_ia32_cmpd128_mask: @@ -4571,6 +5489,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_ucmpq512_mask: return interp__builtin_ia32_cmp_mask(S, OpPC, Call, BuiltinID, /*IsUnsigned=*/true); + + case X86::BI__builtin_ia32_vpshufbitqmb128_mask: + case X86::BI__builtin_ia32_vpshufbitqmb256_mask: + case X86::BI__builtin_ia32_vpshufbitqmb512_mask: + return interp__builtin_ia32_shufbitqmb_mask(S, OpPC, Call); + case X86::BI__builtin_ia32_pslldqi128_byteshift: case X86::BI__builtin_ia32_pslldqi256_byteshift: case X86::BI__builtin_ia32_pslldqi512_byteshift: @@ -4578,13 +5502,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, // The lane width is hardcoded to 16 to match the SIMD register size, // but the algorithm processes one byte per iteration, // so APInt(8, ...) is correct and intentional. - return interp__builtin_x86_byteshift( - S, OpPC, Call, BuiltinID, - [](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) { - if (I < Shift) { - return APInt(8, 0); - } - return APInt(8, Src.elem<uint8_t>(Lane + I - Shift)); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, + [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> { + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned LaneIdx = DstIdx % 16; + if (LaneIdx < Shift) + return std::make_pair(0, -1); + + return std::make_pair(0, + static_cast<int>(LaneBase + LaneIdx - Shift)); }); case X86::BI__builtin_ia32_psrldqi128_byteshift: @@ -4594,16 +5521,60 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, // The lane width is hardcoded to 16 to match the SIMD register size, // but the algorithm processes one byte per iteration, // so APInt(8, ...) is correct and intentional. - return interp__builtin_x86_byteshift( - S, OpPC, Call, BuiltinID, - [](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) { - if (I + Shift < 16) { - return APInt(8, Src.elem<uint8_t>(Lane + I + Shift)); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, + [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> { + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned LaneIdx = DstIdx % 16; + if (LaneIdx + Shift < 16) + return std::make_pair(0, + static_cast<int>(LaneBase + LaneIdx + Shift)); + + return std::make_pair(0, -1); + }); + + case X86::BI__builtin_ia32_palignr128: + case X86::BI__builtin_ia32_palignr256: + case X86::BI__builtin_ia32_palignr512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Shift) { + // Default to -1 → zero-fill this destination element + unsigned VecIdx = 1; + int ElemIdx = -1; + + int Lane = DstIdx / 16; + int Offset = DstIdx % 16; + + // Elements come from VecB first, then VecA after the shift boundary + unsigned ShiftedIdx = Offset + (Shift & 0xFF); + if (ShiftedIdx < 16) { // from VecB + ElemIdx = ShiftedIdx + (Lane * 16); + } else if (ShiftedIdx < 32) { // from VecA + VecIdx = 0; + ElemIdx = (ShiftedIdx - 16) + (Lane * 16); } - return APInt(8, 0); + return std::pair<unsigned, int>{VecIdx, ElemIdx}; }); + case X86::BI__builtin_ia32_alignd128: + case X86::BI__builtin_ia32_alignd256: + case X86::BI__builtin_ia32_alignd512: + case X86::BI__builtin_ia32_alignq128: + case X86::BI__builtin_ia32_alignq256: + case X86::BI__builtin_ia32_alignq512: { + unsigned NumElems = Call->getType()->castAs<VectorType>()->getNumElements(); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [NumElems](unsigned DstIdx, unsigned Shift) { + unsigned Imm = Shift & 0xFF; + unsigned EffectiveShift = Imm & (NumElems - 1); + unsigned SourcePos = DstIdx + EffectiveShift; + unsigned VecIdx = SourcePos < NumElems ? 1u : 0u; + unsigned ElemIdx = SourcePos & (NumElems - 1); + return std::pair<unsigned, int>{VecIdx, static_cast<int>(ElemIdx)}; + }); + } + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ByteCode/InterpFrame.cpp b/clang/lib/AST/ByteCode/InterpFrame.cpp index 039acb5..3b88376 100644 --- a/clang/lib/AST/ByteCode/InterpFrame.cpp +++ b/clang/lib/AST/ByteCode/InterpFrame.cpp @@ -89,11 +89,23 @@ void InterpFrame::destroyScopes() { void InterpFrame::initScope(unsigned Idx) { if (!Func) return; + for (auto &Local : Func->getScope(Idx).locals()) { localBlock(Local.Offset)->invokeCtor(); } } +void InterpFrame::enableLocal(unsigned Idx) { + assert(Func); + + // FIXME: This is a little dirty, but to avoid adding a flag to + // InlineDescriptor that's only ever useful on the toplevel of local + // variables, we reuse the IsActive flag for the enabled state. We should + // probably use a different struct than InlineDescriptor for the block-level + // inline descriptor of local varaibles. + localInlineDesc(Idx)->IsActive = true; +} + void InterpFrame::destroy(unsigned Idx) { for (auto &Local : Func->getScope(Idx).locals_reverse()) { S.deallocate(localBlock(Local.Offset)); diff --git a/clang/lib/AST/ByteCode/InterpFrame.h b/clang/lib/AST/ByteCode/InterpFrame.h index fa9de2e..e150e92 100644 --- a/clang/lib/AST/ByteCode/InterpFrame.h +++ b/clang/lib/AST/ByteCode/InterpFrame.h @@ -55,6 +55,10 @@ public: void destroy(unsigned Idx); void initScope(unsigned Idx); void destroyScopes(); + void enableLocal(unsigned Idx); + bool isLocalEnabled(unsigned Idx) const { + return localInlineDesc(Idx)->IsActive; + } /// Describes the frame with arguments for diagnostic purposes. void describe(llvm::raw_ostream &OS) const override; @@ -109,6 +113,7 @@ public: /// Returns the 'this' pointer. const Pointer &getThis() const { assert(hasThisPointer()); + assert(!isBottomFrame()); return stackRef<Pointer>(ThisPointerOffset); } @@ -116,6 +121,7 @@ public: const Pointer &getRVOPtr() const { assert(Func); assert(Func->hasRVO()); + assert(!isBottomFrame()); return stackRef<Pointer>(0); } diff --git a/clang/lib/AST/ByteCode/MemberPointer.cpp b/clang/lib/AST/ByteCode/MemberPointer.cpp index dfc8583..8b1b018 100644 --- a/clang/lib/AST/ByteCode/MemberPointer.cpp +++ b/clang/lib/AST/ByteCode/MemberPointer.cpp @@ -23,6 +23,15 @@ std::optional<Pointer> MemberPointer::toPointer(const Context &Ctx) const { if (!Base.isBlockPointer()) return std::nullopt; + unsigned BlockMDSize = Base.block()->getDescriptor()->getMetadataSize(); + + if (PtrOffset >= 0) { + // If the resulting base would be too small, return nullopt. + if (Base.BS.Base < static_cast<unsigned>(PtrOffset) || + (Base.BS.Base - PtrOffset < BlockMDSize)) + return std::nullopt; + } + Pointer CastedBase = (PtrOffset < 0 ? Base.atField(-PtrOffset) : Base.atFieldSub(PtrOffset)); @@ -31,7 +40,7 @@ std::optional<Pointer> MemberPointer::toPointer(const Context &Ctx) const { return std::nullopt; unsigned Offset = 0; - Offset += CastedBase.block()->getDescriptor()->getMetadataSize(); + Offset += BlockMDSize; if (const auto *FD = dyn_cast<FieldDecl>(Dcl)) { if (FD->getParent() == BaseRecord->getDecl()) diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td index 1c17ad9e..6e76879 100644 --- a/clang/lib/AST/ByteCode/Opcodes.td +++ b/clang/lib/AST/ByteCode/Opcodes.td @@ -53,6 +53,7 @@ def ArgBool : ArgType { let Name = "bool"; } def ArgFixedPoint : ArgType { let Name = "FixedPoint"; let AsRef = true; } def ArgFunction : ArgType { let Name = "const Function *"; } +def ArgFunctionDecl : ArgType { let Name = "const FunctionDecl *"; } def ArgRecordDecl : ArgType { let Name = "const RecordDecl *"; } def ArgRecordField : ArgType { let Name = "const Record::Field *"; } def ArgFltSemantics : ArgType { let Name = "const llvm::fltSemantics *"; } @@ -250,6 +251,16 @@ def InitScope : Opcode { let Args = [ArgUint32]; } +def GetLocalEnabled : Opcode { + let Args = [ArgUint32]; + let HasCustomEval = 1; +} + +def EnableLocal : Opcode { + let Args = [ArgUint32]; + let HasCustomEval = 1; +} + //===----------------------------------------------------------------------===// // Constants //===----------------------------------------------------------------------===// @@ -360,8 +371,14 @@ def NarrowPtr : Opcode; // [Pointer] -> [Pointer] def ExpandPtr : Opcode; // [Pointer, Offset] -> [Pointer] -def ArrayElemPtr : AluOpcode; -def ArrayElemPtrPop : AluOpcode; +def ArrayElemPtr : Opcode { + let Types = [IntegralTypeClass]; + let HasGroup = 1; +} +def ArrayElemPtrPop : Opcode { + let Types = [IntegralTypeClass]; + let HasGroup = 1; +} def ArrayElemPop : Opcode { let Args = [ArgUint32]; @@ -421,6 +438,8 @@ def CheckLiteralType : Opcode { } def CheckArraySize : Opcode { let Args = [ArgUint64]; } +def CheckFunctionDecl : Opcode { let Args = [ArgFunctionDecl]; } +def CheckBitCast : Opcode { let Args = [ArgTypePtr, ArgBool]; } // [] -> [Value] def GetGlobal : AccessOpcode; @@ -533,13 +552,20 @@ def InitElemPop : Opcode { //===----------------------------------------------------------------------===// // [Pointer, Integral] -> [Pointer] -def AddOffset : AluOpcode; +def AddOffset : Opcode { + let Types = [IntegralTypeClass]; + let HasGroup = 1; +} // [Pointer, Integral] -> [Pointer] -def SubOffset : AluOpcode; +def SubOffset : Opcode { + let Types = [IntegralTypeClass]; + let HasGroup = 1; +} // [Pointer, Pointer] -> [Integral] def SubPtr : Opcode { let Types = [IntegerTypeClass]; + let Args = [ArgBool]; let HasGroup = 1; } diff --git a/clang/lib/AST/ByteCode/Pointer.cpp b/clang/lib/AST/ByteCode/Pointer.cpp index e417bdf..00e74db 100644 --- a/clang/lib/AST/ByteCode/Pointer.cpp +++ b/clang/lib/AST/ByteCode/Pointer.cpp @@ -33,6 +33,7 @@ Pointer::Pointer(Block *Pointee, uint64_t BaseAndOffset) Pointer::Pointer(Block *Pointee, unsigned Base, uint64_t Offset) : Offset(Offset), StorageKind(Storage::Block) { assert((Base == RootPtrMark || Base % alignof(void *) == 0) && "wrong base"); + assert(Base >= Pointee->getDescriptor()->getMetadataSize()); BS = {Pointee, Base, nullptr, nullptr}; @@ -894,8 +895,8 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx, return Result; } -IntPointer IntPointer::atOffset(const ASTContext &ASTCtx, - unsigned Offset) const { +std::optional<IntPointer> IntPointer::atOffset(const ASTContext &ASTCtx, + unsigned Offset) const { if (!this->Desc) return *this; const Record *R = this->Desc->ElemRecord; @@ -913,6 +914,9 @@ IntPointer IntPointer::atOffset(const ASTContext &ASTCtx, return *this; const FieldDecl *FD = F->Decl; + if (FD->getParent()->isInvalidDecl()) + return std::nullopt; + const ASTRecordLayout &Layout = ASTCtx.getASTRecordLayout(FD->getParent()); unsigned FieldIndex = FD->getFieldIndex(); uint64_t FieldOffset = diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h index cd738ce..0978090 100644 --- a/clang/lib/AST/ByteCode/Pointer.h +++ b/clang/lib/AST/ByteCode/Pointer.h @@ -47,7 +47,8 @@ struct IntPointer { const Descriptor *Desc; uint64_t Value; - IntPointer atOffset(const ASTContext &ASTCtx, unsigned Offset) const; + std::optional<IntPointer> atOffset(const ASTContext &ASTCtx, + unsigned Offset) const; IntPointer baseCast(const ASTContext &ASTCtx, unsigned BaseOffset) const; }; @@ -199,17 +200,19 @@ public: return Pointer(BS.Pointee, sizeof(InlineDescriptor), Offset == 0 ? Offset : PastEndMark); - // Pointer is one past end - magic offset marks that. - if (isOnePastEnd()) - return Pointer(BS.Pointee, Base, PastEndMark); - - if (Offset != Base) { - // If we're pointing to a primitive array element, there's nothing to do. - if (inPrimitiveArray()) - return *this; - // Pointer is to a composite array element - enter it. - if (Offset != Base) + if (inArray()) { + // Pointer is one past end - magic offset marks that. + if (isOnePastEnd()) + return Pointer(BS.Pointee, Base, PastEndMark); + + if (Offset != Base) { + // If we're pointing to a primitive array element, there's nothing to + // do. + if (inPrimitiveArray()) + return *this; + // Pointer is to a composite array element - enter it. return Pointer(BS.Pointee, Offset, Offset); + } } // Otherwise, we're pointing to a non-array element or @@ -219,6 +222,8 @@ public: /// Expands a pointer to the containing array, undoing narrowing. [[nodiscard]] Pointer expand() const { + if (!isBlockPointer()) + return *this; assert(isBlockPointer()); Block *Pointee = BS.Pointee; @@ -830,6 +835,9 @@ private: inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Pointer &P) { P.print(OS); + OS << ' '; + if (const Descriptor *D = P.getFieldDesc()) + D->dump(OS); return OS; } diff --git a/clang/lib/AST/ByteCode/PrimType.h b/clang/lib/AST/ByteCode/PrimType.h index 54fd39a..f0454b4 100644 --- a/clang/lib/AST/ByteCode/PrimType.h +++ b/clang/lib/AST/ByteCode/PrimType.h @@ -101,6 +101,7 @@ inline constexpr bool isSignedType(PrimType T) { enum class CastKind : uint8_t { Reinterpret, + ReinterpretLike, Volatile, Dynamic, }; @@ -111,6 +112,9 @@ inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, case interp::CastKind::Reinterpret: OS << "reinterpret_cast"; break; + case interp::CastKind::ReinterpretLike: + OS << "reinterpret_like"; + break; case interp::CastKind::Volatile: OS << "volatile"; break; diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp index e0b2852..d9693407 100644 --- a/clang/lib/AST/ByteCode/Program.cpp +++ b/clang/lib/AST/ByteCode/Program.cpp @@ -27,7 +27,7 @@ unsigned Program::getOrCreateNativePointer(const void *Ptr) { return It->second; } -const void *Program::getNativePointer(unsigned Idx) { +const void *Program::getNativePointer(unsigned Idx) const { return NativePointers[Idx]; } @@ -36,30 +36,19 @@ unsigned Program::createGlobalString(const StringLiteral *S, const Expr *Base) { const size_t BitWidth = CharWidth * Ctx.getCharBit(); unsigned StringLength = S->getLength(); - PrimType CharType; - switch (CharWidth) { - case 1: - CharType = PT_Sint8; - break; - case 2: - CharType = PT_Uint16; - break; - case 4: - CharType = PT_Uint32; - break; - default: - llvm_unreachable("unsupported character width"); - } + OptPrimType CharType = + Ctx.classify(S->getType()->castAsArrayTypeUnsafe()->getElementType()); + assert(CharType); if (!Base) Base = S; // Create a descriptor for the string. - Descriptor *Desc = - allocateDescriptor(Base, CharType, Descriptor::GlobalMD, StringLength + 1, - /*isConst=*/true, - /*isTemporary=*/false, - /*isMutable=*/false); + Descriptor *Desc = allocateDescriptor(Base, *CharType, Descriptor::GlobalMD, + StringLength + 1, + /*isConst=*/true, + /*isTemporary=*/false, + /*isMutable=*/false); // Allocate storage for the string. // The byte length does not include the null terminator. @@ -79,26 +68,9 @@ unsigned Program::createGlobalString(const StringLiteral *S, const Expr *Base) { } else { // Construct the string in storage. for (unsigned I = 0; I <= StringLength; ++I) { - const uint32_t CodePoint = I == StringLength ? 0 : S->getCodeUnit(I); - switch (CharType) { - case PT_Sint8: { - using T = PrimConv<PT_Sint8>::T; - Ptr.elem<T>(I) = T::from(CodePoint, BitWidth); - break; - } - case PT_Uint16: { - using T = PrimConv<PT_Uint16>::T; - Ptr.elem<T>(I) = T::from(CodePoint, BitWidth); - break; - } - case PT_Uint32: { - using T = PrimConv<PT_Uint32>::T; - Ptr.elem<T>(I) = T::from(CodePoint, BitWidth); - break; - } - default: - llvm_unreachable("unsupported character type"); - } + uint32_t CodePoint = I == StringLength ? 0 : S->getCodeUnit(I); + INT_TYPE_SWITCH_NO_BOOL(*CharType, + Ptr.elem<T>(I) = T::from(CodePoint, BitWidth);); } } Ptr.initializeAllElements(); @@ -218,21 +190,43 @@ UnsignedOrNone Program::createGlobal(const ValueDecl *VD, const Expr *Init) { return std::nullopt; Global *NewGlobal = Globals[*Idx]; + // Note that this loop has one iteration where Redecl == VD. for (const Decl *Redecl : VD->redecls()) { - unsigned &PIdx = GlobalIndices[Redecl]; + + // If this redecl was registered as a dummy variable, it is now a proper + // global variable and points to the block we just created. + if (auto DummyIt = DummyVariables.find(Redecl); + DummyIt != DummyVariables.end()) { + Global *Dummy = Globals[DummyIt->second]; + Dummy->block()->movePointersTo(NewGlobal->block()); + Globals[DummyIt->second] = NewGlobal; + DummyVariables.erase(DummyIt); + } + // If the redeclaration hasn't been registered yet at all, we just set its + // global index to Idx. If it has been registered yet, it might have + // pointers pointing to it and we need to transfer those pointers to the new + // block. + auto [Iter, Inserted] = GlobalIndices.try_emplace(Redecl); + if (Inserted) { + GlobalIndices[Redecl] = *Idx; + continue; + } + if (Redecl != VD) { - if (Block *RedeclBlock = Globals[PIdx]->block(); + if (Block *RedeclBlock = Globals[Iter->second]->block(); RedeclBlock->isExtern()) { - Globals[PIdx] = NewGlobal; + // All pointers pointing to the previous extern decl now point to the // new decl. // A previous iteration might've already fixed up the pointers for this // global. if (RedeclBlock != NewGlobal->block()) RedeclBlock->movePointersTo(NewGlobal->block()); + + Globals[Iter->second] = NewGlobal; } } - PIdx = *Idx; + Iter->second = *Idx; } return *Idx; diff --git a/clang/lib/AST/ByteCode/Program.h b/clang/lib/AST/ByteCode/Program.h index 28fcc97..c879550 100644 --- a/clang/lib/AST/ByteCode/Program.h +++ b/clang/lib/AST/ByteCode/Program.h @@ -58,7 +58,7 @@ public: unsigned getOrCreateNativePointer(const void *Ptr); /// Returns the value of a marshalled native pointer. - const void *getNativePointer(unsigned Idx); + const void *getNativePointer(unsigned Idx) const; /// Emits a string literal among global data. unsigned createGlobalString(const StringLiteral *S, @@ -205,7 +205,6 @@ private: const Block *block() const { return &B; } private: - /// Required metadata - does not actually track pointers. Block B; }; diff --git a/clang/lib/AST/ByteCode/Source.h b/clang/lib/AST/ByteCode/Source.h index f355d14..56ca197 100644 --- a/clang/lib/AST/ByteCode/Source.h +++ b/clang/lib/AST/ByteCode/Source.h @@ -51,6 +51,7 @@ public: explicit operator bool() const { return Ptr; } bool operator<=(const CodePtr &RHS) const { return Ptr <= RHS.Ptr; } bool operator>=(const CodePtr &RHS) const { return Ptr >= RHS.Ptr; } + bool operator==(const CodePtr RHS) const { return Ptr == RHS.Ptr; } /// Reads data and advances the pointer. template <typename T> std::enable_if_t<!std::is_pointer<T>::value, T> read() { diff --git a/clang/lib/AST/CXXInheritance.cpp b/clang/lib/AST/CXXInheritance.cpp index 7a3e7ea..29f5916 100644 --- a/clang/lib/AST/CXXInheritance.cpp +++ b/clang/lib/AST/CXXInheritance.cpp @@ -34,9 +34,9 @@ using namespace clang; /// ambiguous, i.e., there are two or more paths that refer to /// different base class subobjects of the same type. BaseType must be /// an unqualified, canonical class type. -bool CXXBasePaths::isAmbiguous(CanQualType BaseType) { +bool CXXBasePaths::isAmbiguous(CanQualType BaseType) const { BaseType = BaseType.getUnqualifiedType(); - IsVirtBaseAndNumberNonVirtBases Subobjects = ClassSubobjects[BaseType]; + IsVirtBaseAndNumberNonVirtBases Subobjects = ClassSubobjects.lookup(BaseType); return Subobjects.NumberOfNonVirtBases + (Subobjects.IsVirtBase ? 1 : 0) > 1; } diff --git a/clang/lib/AST/CommentSema.cpp b/clang/lib/AST/CommentSema.cpp index 27ff5ab..d5ba240 100644 --- a/clang/lib/AST/CommentSema.cpp +++ b/clang/lib/AST/CommentSema.cpp @@ -225,7 +225,7 @@ static ParamCommandPassDirection getParamPassDirection(StringRef Arg) { return llvm::StringSwitch<ParamCommandPassDirection>(Arg) .Case("[in]", ParamCommandPassDirection::In) .Case("[out]", ParamCommandPassDirection::Out) - .Cases("[in,out]", "[out,in]", ParamCommandPassDirection::InOut) + .Cases({"[in,out]", "[out,in]"}, ParamCommandPassDirection::InOut) .Default(static_cast<ParamCommandPassDirection>(-1)); } diff --git a/clang/lib/AST/ComparisonCategories.cpp b/clang/lib/AST/ComparisonCategories.cpp index 0c7a7f4..1b9c938 100644 --- a/clang/lib/AST/ComparisonCategories.cpp +++ b/clang/lib/AST/ComparisonCategories.cpp @@ -49,7 +49,7 @@ bool ComparisonCategoryInfo::ValueInfo::hasValidIntValue() const { // Before we attempt to get the value of the first field, ensure that we // actually have one (and only one) field. const auto *Record = VD->getType()->getAsCXXRecordDecl(); - if (std::distance(Record->field_begin(), Record->field_end()) != 1 || + if (Record->getNumFields() != 1 || !Record->field_begin()->getType()->isIntegralOrEnumerationType()) return false; diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp index e0cf0de..638080e 100644 --- a/clang/lib/AST/ComputeDependence.cpp +++ b/clang/lib/AST/ComputeDependence.cpp @@ -178,7 +178,7 @@ ExprDependence clang::computeDependence(StmtExpr *E, unsigned TemplateDepth) { auto D = toExprDependenceForImpliedType(E->getType()->getDependence()); // Propagate dependence of the result. if (const auto *CompoundExprResult = - dyn_cast_or_null<ValueStmt>(E->getSubStmt()->getStmtExprResult())) + dyn_cast_or_null<ValueStmt>(E->getSubStmt()->body_back())) if (const Expr *ResultExpr = CompoundExprResult->getExprStmt()) D |= ResultExpr->getDependence(); // Note: we treat a statement-expression in a dependent context as always diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 8579e51..4444b10 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1742,6 +1742,9 @@ void NamedDecl::printNestedNameSpecifier(raw_ostream &OS, // Collect named contexts. DeclarationName NameInScope = getDeclName(); for (; Ctx; Ctx = Ctx->getParent()) { + if (P.Callbacks && P.Callbacks->isScopeVisible(Ctx)) + continue; + // Suppress anonymous namespace if requested. if (P.SuppressUnwrittenScope && isa<NamespaceDecl>(Ctx) && cast<NamespaceDecl>(Ctx)->isAnonymousNamespace()) @@ -1750,9 +1753,11 @@ void NamedDecl::printNestedNameSpecifier(raw_ostream &OS, // Suppress inline namespace if it doesn't make the result ambiguous. if (Ctx->isInlineNamespace() && NameInScope) { if (P.SuppressInlineNamespace == - PrintingPolicy::SuppressInlineNamespaceMode::All || + llvm::to_underlying( + PrintingPolicy::SuppressInlineNamespaceMode::All) || (P.SuppressInlineNamespace == - PrintingPolicy::SuppressInlineNamespaceMode::Redundant && + llvm::to_underlying( + PrintingPolicy::SuppressInlineNamespaceMode::Redundant) && cast<NamespaceDecl>(Ctx)->isRedundantInlineQualifierFor( NameInScope))) { continue; @@ -1787,7 +1792,9 @@ void NamedDecl::printNestedNameSpecifier(raw_ostream &OS, else OS << *ND; } else if (const auto *RD = dyn_cast<RecordDecl>(DC)) { - if (!RD->getIdentifier()) + if (TypedefNameDecl *TD = RD->getTypedefNameForAnonDecl()) + OS << *TD; + else if (!RD->getIdentifier()) OS << "(anonymous " << RD->getKindName() << ')'; else OS << *RD; @@ -3180,7 +3187,7 @@ void FunctionDecl::DefaultedOrDeletedFunctionInfo::setDeletedMessage( } FunctionDecl::DefaultedOrDeletedFunctionInfo * -FunctionDecl::getDefalutedOrDeletedInfo() const { +FunctionDecl::getDefaultedOrDeletedInfo() const { return FunctionDeclBits.HasDefaultedOrDeletedInfo ? DefaultedOrDeletedInfo : nullptr; } diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 340bb4b..ca7f3e1 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1934,6 +1934,7 @@ bool CastExpr::CastConsistency() const { case CK_FixedPointToBoolean: case CK_HLSLArrayRValue: case CK_HLSLVectorTruncation: + case CK_HLSLMatrixTruncation: case CK_HLSLElementwiseCast: case CK_HLSLAggregateSplatCast: CheckNoBasePath: @@ -5213,6 +5214,8 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) { case AO__scoped_atomic_fetch_min: case AO__scoped_atomic_fetch_max: case AO__scoped_atomic_exchange_n: + case AO__scoped_atomic_uinc_wrap: + case AO__scoped_atomic_udec_wrap: case AO__hip_atomic_exchange: case AO__hip_atomic_fetch_add: case AO__hip_atomic_fetch_sub: diff --git a/clang/lib/AST/ExprConstShared.h b/clang/lib/AST/ExprConstShared.h index 401ae62..def5766 100644 --- a/clang/lib/AST/ExprConstShared.h +++ b/clang/lib/AST/ExprConstShared.h @@ -15,9 +15,12 @@ #define LLVM_CLANG_LIB_AST_EXPRCONSTSHARED_H #include "clang/Basic/TypeTraits.h" +#include <cstdint> namespace llvm { class APFloat; +class APInt; +class APSInt; } namespace clang { class QualType; @@ -74,4 +77,9 @@ void HandleComplexComplexDiv(llvm::APFloat A, llvm::APFloat B, llvm::APFloat C, CharUnits GetAlignOfExpr(const ASTContext &Ctx, const Expr *E, UnaryExprOrTypeTrait ExprKind); +uint8_t GFNIMultiplicativeInverse(uint8_t Byte); +uint8_t GFNIMul(uint8_t AByte, uint8_t BByte); +uint8_t GFNIAffine(uint8_t XByte, const llvm::APInt &AQword, + const llvm::APSInt &Imm, bool Inverse = false); + #endif diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index d0404b9..d81496f 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -3829,6 +3829,350 @@ static bool CheckArraySize(EvalInfo &Info, const ConstantArrayType *CAT, /*Diag=*/true); } +static bool handleScalarCast(EvalInfo &Info, const FPOptions FPO, const Expr *E, + QualType SourceTy, QualType DestTy, + APValue const &Original, APValue &Result) { + // boolean must be checked before integer + // since IsIntegerType() is true for bool + if (SourceTy->isBooleanType()) { + if (DestTy->isBooleanType()) { + Result = Original; + return true; + } + if (DestTy->isIntegerType() || DestTy->isRealFloatingType()) { + bool BoolResult; + if (!HandleConversionToBool(Original, BoolResult)) + return false; + uint64_t IntResult = BoolResult; + QualType IntType = DestTy->isIntegerType() + ? DestTy + : Info.Ctx.getIntTypeForBitwidth(64, false); + Result = APValue(Info.Ctx.MakeIntValue(IntResult, IntType)); + } + if (DestTy->isRealFloatingType()) { + APValue Result2 = APValue(APFloat(0.0)); + if (!HandleIntToFloatCast(Info, E, FPO, + Info.Ctx.getIntTypeForBitwidth(64, false), + Result.getInt(), DestTy, Result2.getFloat())) + return false; + Result = Result2; + } + return true; + } + if (SourceTy->isIntegerType()) { + if (DestTy->isRealFloatingType()) { + Result = APValue(APFloat(0.0)); + return HandleIntToFloatCast(Info, E, FPO, SourceTy, Original.getInt(), + DestTy, Result.getFloat()); + } + if (DestTy->isBooleanType()) { + bool BoolResult; + if (!HandleConversionToBool(Original, BoolResult)) + return false; + uint64_t IntResult = BoolResult; + Result = APValue(Info.Ctx.MakeIntValue(IntResult, DestTy)); + return true; + } + if (DestTy->isIntegerType()) { + Result = APValue( + HandleIntToIntCast(Info, E, DestTy, SourceTy, Original.getInt())); + return true; + } + } else if (SourceTy->isRealFloatingType()) { + if (DestTy->isRealFloatingType()) { + Result = Original; + return HandleFloatToFloatCast(Info, E, SourceTy, DestTy, + Result.getFloat()); + } + if (DestTy->isBooleanType()) { + bool BoolResult; + if (!HandleConversionToBool(Original, BoolResult)) + return false; + uint64_t IntResult = BoolResult; + Result = APValue(Info.Ctx.MakeIntValue(IntResult, DestTy)); + return true; + } + if (DestTy->isIntegerType()) { + Result = APValue(APSInt()); + return HandleFloatToIntCast(Info, E, SourceTy, Original.getFloat(), + DestTy, Result.getInt()); + } + } + + Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); + return false; +} + +// do the heavy lifting for casting to aggregate types +// because we have to deal with bitfields specially +static bool constructAggregate(EvalInfo &Info, const FPOptions FPO, + const Expr *E, APValue &Result, + QualType ResultType, + SmallVectorImpl<APValue> &Elements, + SmallVectorImpl<QualType> &ElTypes) { + + SmallVector<std::tuple<APValue *, QualType, unsigned>> WorkList = { + {&Result, ResultType, 0}}; + + unsigned ElI = 0; + while (!WorkList.empty() && ElI < Elements.size()) { + auto [Res, Type, BitWidth] = WorkList.pop_back_val(); + + if (Type->isRealFloatingType()) { + if (!handleScalarCast(Info, FPO, E, ElTypes[ElI], Type, Elements[ElI], + *Res)) + return false; + ElI++; + continue; + } + if (Type->isIntegerType()) { + if (!handleScalarCast(Info, FPO, E, ElTypes[ElI], Type, Elements[ElI], + *Res)) + return false; + if (BitWidth > 0) { + if (!Res->isInt()) + return false; + APSInt &Int = Res->getInt(); + unsigned OldBitWidth = Int.getBitWidth(); + unsigned NewBitWidth = BitWidth; + if (NewBitWidth < OldBitWidth) + Int = Int.trunc(NewBitWidth).extend(OldBitWidth); + } + ElI++; + continue; + } + if (Type->isVectorType()) { + QualType ElTy = Type->castAs<VectorType>()->getElementType(); + unsigned NumEl = Type->castAs<VectorType>()->getNumElements(); + SmallVector<APValue> Vals(NumEl); + for (unsigned I = 0; I < NumEl; ++I) { + if (!handleScalarCast(Info, FPO, E, ElTypes[ElI], ElTy, Elements[ElI], + Vals[I])) + return false; + ElI++; + } + *Res = APValue(Vals.data(), NumEl); + continue; + } + if (Type->isConstantArrayType()) { + QualType ElTy = cast<ConstantArrayType>(Info.Ctx.getAsArrayType(Type)) + ->getElementType(); + uint64_t Size = + cast<ConstantArrayType>(Info.Ctx.getAsArrayType(Type))->getZExtSize(); + *Res = APValue(APValue::UninitArray(), Size, Size); + for (int64_t I = Size - 1; I > -1; --I) + WorkList.emplace_back(&Res->getArrayInitializedElt(I), ElTy, 0u); + continue; + } + if (Type->isRecordType()) { + const RecordDecl *RD = Type->getAsRecordDecl(); + + unsigned NumBases = 0; + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + NumBases = CXXRD->getNumBases(); + + *Res = APValue(APValue::UninitStruct(), NumBases, RD->getNumFields()); + + SmallVector<std::tuple<APValue *, QualType, unsigned>> ReverseList; + // we need to traverse backwards + // Visit the base classes. + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + if (CXXRD->getNumBases() > 0) { + assert(CXXRD->getNumBases() == 1); + const CXXBaseSpecifier &BS = CXXRD->bases_begin()[0]; + ReverseList.emplace_back(&Res->getStructBase(0), BS.getType(), 0u); + } + } + + // Visit the fields. + for (FieldDecl *FD : RD->fields()) { + unsigned FDBW = 0; + if (FD->isUnnamedBitField()) + continue; + if (FD->isBitField()) { + FDBW = FD->getBitWidthValue(); + } + + ReverseList.emplace_back(&Res->getStructField(FD->getFieldIndex()), + FD->getType(), FDBW); + } + + std::reverse(ReverseList.begin(), ReverseList.end()); + llvm::append_range(WorkList, ReverseList); + continue; + } + Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); + return false; + } + return true; +} + +static bool handleElementwiseCast(EvalInfo &Info, const Expr *E, + const FPOptions FPO, + SmallVectorImpl<APValue> &Elements, + SmallVectorImpl<QualType> &SrcTypes, + SmallVectorImpl<QualType> &DestTypes, + SmallVectorImpl<APValue> &Results) { + + assert((Elements.size() == SrcTypes.size()) && + (Elements.size() == DestTypes.size())); + + for (unsigned I = 0, ESz = Elements.size(); I < ESz; ++I) { + APValue Original = Elements[I]; + QualType SourceTy = SrcTypes[I]; + QualType DestTy = DestTypes[I]; + + if (!handleScalarCast(Info, FPO, E, SourceTy, DestTy, Original, Results[I])) + return false; + } + return true; +} + +static unsigned elementwiseSize(EvalInfo &Info, QualType BaseTy) { + + SmallVector<QualType> WorkList = {BaseTy}; + + unsigned Size = 0; + while (!WorkList.empty()) { + QualType Type = WorkList.pop_back_val(); + if (Type->isRealFloatingType() || Type->isIntegerType() || + Type->isBooleanType()) { + ++Size; + continue; + } + if (Type->isVectorType()) { + unsigned NumEl = Type->castAs<VectorType>()->getNumElements(); + Size += NumEl; + continue; + } + if (Type->isConstantArrayType()) { + QualType ElTy = cast<ConstantArrayType>(Info.Ctx.getAsArrayType(Type)) + ->getElementType(); + uint64_t ArrSize = + cast<ConstantArrayType>(Info.Ctx.getAsArrayType(Type))->getZExtSize(); + for (uint64_t I = 0; I < ArrSize; ++I) { + WorkList.push_back(ElTy); + } + continue; + } + if (Type->isRecordType()) { + const RecordDecl *RD = Type->getAsRecordDecl(); + + // Visit the base classes. + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + if (CXXRD->getNumBases() > 0) { + assert(CXXRD->getNumBases() == 1); + const CXXBaseSpecifier &BS = CXXRD->bases_begin()[0]; + WorkList.push_back(BS.getType()); + } + } + + // visit the fields. + for (FieldDecl *FD : RD->fields()) { + if (FD->isUnnamedBitField()) + continue; + WorkList.push_back(FD->getType()); + } + continue; + } + } + return Size; +} + +static bool hlslAggSplatHelper(EvalInfo &Info, const Expr *E, APValue &SrcVal, + QualType &SrcTy) { + SrcTy = E->getType(); + + if (!Evaluate(SrcVal, Info, E)) + return false; + + assert((SrcVal.isFloat() || SrcVal.isInt() || + (SrcVal.isVector() && SrcVal.getVectorLength() == 1)) && + "Not a valid HLSLAggregateSplatCast."); + + if (SrcVal.isVector()) { + assert(SrcTy->isVectorType() && "Type mismatch."); + SrcTy = SrcTy->castAs<VectorType>()->getElementType(); + SrcVal = SrcVal.getVectorElt(0); + } + return true; +} + +static bool flattenAPValue(EvalInfo &Info, const Expr *E, APValue Value, + QualType BaseTy, SmallVectorImpl<APValue> &Elements, + SmallVectorImpl<QualType> &Types, unsigned Size) { + + SmallVector<std::pair<APValue, QualType>> WorkList = {{Value, BaseTy}}; + unsigned Populated = 0; + while (!WorkList.empty() && Populated < Size) { + auto [Work, Type] = WorkList.pop_back_val(); + + if (Work.isFloat() || Work.isInt()) { + Elements.push_back(Work); + Types.push_back(Type); + Populated++; + continue; + } + if (Work.isVector()) { + assert(Type->isVectorType() && "Type mismatch."); + QualType ElTy = Type->castAs<VectorType>()->getElementType(); + for (unsigned I = 0; I < Work.getVectorLength() && Populated < Size; + I++) { + Elements.push_back(Work.getVectorElt(I)); + Types.push_back(ElTy); + Populated++; + } + continue; + } + if (Work.isArray()) { + assert(Type->isConstantArrayType() && "Type mismatch."); + QualType ElTy = cast<ConstantArrayType>(Info.Ctx.getAsArrayType(Type)) + ->getElementType(); + for (int64_t I = Work.getArraySize() - 1; I > -1; --I) { + WorkList.emplace_back(Work.getArrayInitializedElt(I), ElTy); + } + continue; + } + + if (Work.isStruct()) { + assert(Type->isRecordType() && "Type mismatch."); + + const RecordDecl *RD = Type->getAsRecordDecl(); + + SmallVector<std::pair<APValue, QualType>> ReverseList; + // Visit the fields. + for (FieldDecl *FD : RD->fields()) { + if (FD->isUnnamedBitField()) + continue; + ReverseList.emplace_back(Work.getStructField(FD->getFieldIndex()), + FD->getType()); + } + + std::reverse(ReverseList.begin(), ReverseList.end()); + llvm::append_range(WorkList, ReverseList); + + // Visit the base classes. + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + if (CXXRD->getNumBases() > 0) { + assert(CXXRD->getNumBases() == 1); + const CXXBaseSpecifier &BS = CXXRD->bases_begin()[0]; + const APValue &Base = Work.getStructBase(0); + + // Can happen in error cases. + if (!Base.isStruct()) + return false; + + WorkList.emplace_back(Base, BS.getType()); + } + } + continue; + } + Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); + return false; + } + return true; +} + namespace { /// A handle to a complete object (an object that is not a subobject of /// another object). @@ -4639,6 +4983,30 @@ handleLValueToRValueConversion(EvalInfo &Info, const Expr *Conv, QualType Type, return Obj && extractSubobject(Info, Conv, Obj, LVal.Designator, RVal, AK); } +static bool hlslElementwiseCastHelper(EvalInfo &Info, const Expr *E, + QualType DestTy, + SmallVectorImpl<APValue> &SrcVals, + SmallVectorImpl<QualType> &SrcTypes) { + APValue Val; + if (!Evaluate(Val, Info, E)) + return false; + + // must be dealing with a record + if (Val.isLValue()) { + LValue LVal; + LVal.setFrom(Info.Ctx, Val); + if (!handleLValueToRValueConversion(Info, E, E->getType(), LVal, Val)) + return false; + } + + unsigned NEls = elementwiseSize(Info, DestTy); + // flatten the source + if (!flattenAPValue(Info, E, Val, E->getType(), SrcVals, SrcTypes, NEls)) + return false; + + return true; +} + /// Perform an assignment of Val to LVal. Takes ownership of Val. static bool handleAssignment(EvalInfo &Info, const Expr *E, const LValue &LVal, QualType LValType, APValue &Val) { @@ -5160,8 +5528,8 @@ static bool handleDefaultInitValue(QualType T, APValue &Result) { Result = APValue((const FieldDecl *)nullptr); return true; } - Result = APValue(APValue::UninitStruct(), RD->getNumBases(), - std::distance(RD->field_begin(), RD->field_end())); + Result = + APValue(APValue::UninitStruct(), RD->getNumBases(), RD->getNumFields()); unsigned Index = 0; for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(), @@ -5452,10 +5820,13 @@ static EvalStmtResult EvaluateSwitch(StmtResult &Result, EvalInfo &Info, } const CaseStmt *CS = cast<CaseStmt>(SC); - APSInt LHS = CS->getLHS()->EvaluateKnownConstInt(Info.Ctx); - APSInt RHS = CS->getRHS() ? CS->getRHS()->EvaluateKnownConstInt(Info.Ctx) - : LHS; - if (LHS <= Value && Value <= RHS) { + const Expr *LHS = CS->getLHS(); + const Expr *RHS = CS->getRHS(); + if (LHS->isValueDependent() || (RHS && RHS->isValueDependent())) + return ESR_Failed; + APSInt LHSValue = LHS->EvaluateKnownConstInt(Info.Ctx); + APSInt RHSValue = RHS ? RHS->EvaluateKnownConstInt(Info.Ctx) : LHSValue; + if (LHSValue <= Value && Value <= RHSValue) { Found = SC; break; } @@ -6812,7 +7183,7 @@ static bool HandleConstructorCall(const Expr *E, const LValue &This, if (!Result.hasValue()) { if (!RD->isUnion()) Result = APValue(APValue::UninitStruct(), RD->getNumBases(), - std::distance(RD->field_begin(), RD->field_end())); + RD->getNumFields()); else // A union starts with no active member. Result = APValue((const FieldDecl*)nullptr); @@ -7763,8 +8134,7 @@ class BufferToAPValueConverter { if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) NumBases = CXXRD->getNumBases(); - APValue ResultVal(APValue::UninitStruct(), NumBases, - std::distance(RD->field_begin(), RD->field_end())); + APValue ResultVal(APValue::UninitStruct(), NumBases, RD->getNumFields()); // Visit the base classes. if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { @@ -8667,6 +9037,25 @@ public: case CK_UserDefinedConversion: return StmtVisitorTy::Visit(E->getSubExpr()); + case CK_HLSLArrayRValue: { + const Expr *SubExpr = E->getSubExpr(); + if (!SubExpr->isGLValue()) { + APValue Val; + if (!Evaluate(Val, Info, SubExpr)) + return false; + return DerivedSuccess(Val, E); + } + + LValue LVal; + if (!EvaluateLValue(SubExpr, LVal, Info)) + return false; + APValue RVal; + // Note, we use the subexpression's type in order to retain cv-qualifiers. + if (!handleLValueToRValueConversion(Info, E, SubExpr->getType(), LVal, + RVal)) + return false; + return DerivedSuccess(RVal, E); + } case CK_LValueToRValue: { LValue LVal; if (!EvaluateLValue(E->getSubExpr(), LVal, Info)) @@ -10755,7 +11144,7 @@ static bool HandleClassZeroInitialization(EvalInfo &Info, const Expr *E, assert(!RD->isUnion() && "Expected non-union class type"); const CXXRecordDecl *CD = dyn_cast<CXXRecordDecl>(RD); Result = APValue(APValue::UninitStruct(), CD ? CD->getNumBases() : 0, - std::distance(RD->field_begin(), RD->field_end())); + RD->getNumFields()); if (RD->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); @@ -10851,6 +11240,42 @@ bool RecordExprEvaluator::VisitCastExpr(const CastExpr *E) { Result = *Value; return true; } + case CK_HLSLAggregateSplatCast: { + APValue Val; + QualType ValTy; + + if (!hlslAggSplatHelper(Info, E->getSubExpr(), Val, ValTy)) + return false; + + unsigned NEls = elementwiseSize(Info, E->getType()); + // splat our Val + SmallVector<APValue> SplatEls(NEls, Val); + SmallVector<QualType> SplatType(NEls, ValTy); + + // cast the elements and construct our struct result + const FPOptions FPO = E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()); + if (!constructAggregate(Info, FPO, E, Result, E->getType(), SplatEls, + SplatType)) + return false; + + return true; + } + case CK_HLSLElementwiseCast: { + SmallVector<APValue> SrcEls; + SmallVector<QualType> SrcTypes; + + if (!hlslElementwiseCastHelper(Info, E->getSubExpr(), E->getType(), SrcEls, + SrcTypes)) + return false; + + // cast the elements and construct our struct result + const FPOptions FPO = E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()); + if (!constructAggregate(Info, FPO, E, Result, E->getType(), SrcEls, + SrcTypes)) + return false; + + return true; + } } } @@ -10915,7 +11340,7 @@ bool RecordExprEvaluator::VisitCXXParenListOrInitListExpr( if (!Result.hasValue()) Result = APValue(APValue::UninitStruct(), CXXRD ? CXXRD->getNumBases() : 0, - std::distance(RD->field_begin(), RD->field_end())); + RD->getNumFields()); unsigned ElementNo = 0; bool Success = true; @@ -11122,8 +11547,7 @@ bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) { if (ClosureClass->isInvalidDecl()) return false; - const size_t NumFields = - std::distance(ClosureClass->field_begin(), ClosureClass->field_end()); + const size_t NumFields = ClosureClass->getNumFields(); assert(NumFields == (size_t)std::distance(E->capture_init_begin(), E->capture_init_end()) && @@ -11346,6 +11770,42 @@ bool VectorExprEvaluator::VisitCastExpr(const CastExpr *E) { Elements.push_back(Val.getVectorElt(I)); return Success(Elements, E); } + case CK_HLSLMatrixTruncation: { + // TODO: See #168935. Add matrix truncation support to expr constant. + return Error(E); + } + case CK_HLSLAggregateSplatCast: { + APValue Val; + QualType ValTy; + + if (!hlslAggSplatHelper(Info, SE, Val, ValTy)) + return false; + + // cast our Val once. + APValue Result; + const FPOptions FPO = E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()); + if (!handleScalarCast(Info, FPO, E, ValTy, VTy->getElementType(), Val, + Result)) + return false; + + SmallVector<APValue, 4> SplatEls(NElts, Result); + return Success(SplatEls, E); + } + case CK_HLSLElementwiseCast: { + SmallVector<APValue> SrcVals; + SmallVector<QualType> SrcTypes; + + if (!hlslElementwiseCastHelper(Info, SE, E->getType(), SrcVals, SrcTypes)) + return false; + + const FPOptions FPO = E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()); + SmallVector<QualType, 4> DestTypes(NElts, VTy->getElementType()); + SmallVector<APValue, 4> ResultEls(NElts); + if (!handleElementwiseCast(Info, E, FPO, SrcVals, SrcTypes, DestTypes, + ResultEls)) + return false; + return Success(ResultEls, E); + } default: return ExprEvaluatorBaseTy::VisitCastExpr(E); } @@ -11621,126 +12081,164 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result, static bool evalShuffleGeneric( EvalInfo &Info, const CallExpr *Call, APValue &Out, - llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> + llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)> GetSourceIndex) { const auto *VT = Call->getType()->getAs<VectorType>(); if (!VT) return false; - APSInt MaskImm; - if (!EvaluateInteger(Call->getArg(2), MaskImm, Info)) - return false; - unsigned ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue()); + unsigned ShuffleMask = 0; + APValue A, MaskVector, B; + bool IsVectorMask = false; + bool IsSingleOperand = (Call->getNumArgs() == 2); - APValue A, B; - if (!EvaluateAsRValue(Info, Call->getArg(0), A) || - !EvaluateAsRValue(Info, Call->getArg(1), B)) - return false; + if (IsSingleOperand) { + QualType MaskType = Call->getArg(1)->getType(); + if (MaskType->isVectorType()) { + IsVectorMask = true; + if (!EvaluateAsRValue(Info, Call->getArg(0), A) || + !EvaluateAsRValue(Info, Call->getArg(1), MaskVector)) + return false; + B = A; + } else if (MaskType->isIntegerType()) { + APSInt MaskImm; + if (!EvaluateInteger(Call->getArg(1), MaskImm, Info)) + return false; + ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue()); + if (!EvaluateAsRValue(Info, Call->getArg(0), A)) + return false; + B = A; + } else { + return false; + } + } else { + QualType Arg2Type = Call->getArg(2)->getType(); + if (Arg2Type->isVectorType()) { + IsVectorMask = true; + if (!EvaluateAsRValue(Info, Call->getArg(0), A) || + !EvaluateAsRValue(Info, Call->getArg(1), MaskVector) || + !EvaluateAsRValue(Info, Call->getArg(2), B)) + return false; + } else if (Arg2Type->isIntegerType()) { + APSInt MaskImm; + if (!EvaluateInteger(Call->getArg(2), MaskImm, Info)) + return false; + ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue()); + if (!EvaluateAsRValue(Info, Call->getArg(0), A) || + !EvaluateAsRValue(Info, Call->getArg(1), B)) + return false; + } else { + return false; + } + } unsigned NumElts = VT->getNumElements(); - SmallVector<APValue, 16> ResultElements; + SmallVector<APValue, 64> ResultElements; ResultElements.reserve(NumElts); for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) { + if (IsVectorMask) { + ShuffleMask = static_cast<unsigned>( + MaskVector.getVectorElt(DstIdx).getInt().getZExtValue()); + } auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); - const APValue &Src = (SrcVecIdx == 0) ? A : B; - ResultElements.push_back(Src.getVectorElt(SrcIdx)); + + if (SrcIdx < 0) { + // Zero out this element + QualType ElemTy = VT->getElementType(); + if (ElemTy->isRealFloatingType()) { + ResultElements.push_back( + APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy)))); + } else if (ElemTy->isIntegerType()) { + APValue Zero(Info.Ctx.MakeIntValue(0, ElemTy)); + ResultElements.push_back(APValue(Zero)); + } else { + // Other types of fallback logic + ResultElements.push_back(APValue()); + } + } else { + const APValue &Src = (SrcVecIdx == 0) ? A : B; + ResultElements.push_back(Src.getVectorElt(SrcIdx)); + } } Out = APValue(ResultElements.data(), ResultElements.size()); return true; } +static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E, + APFloat OrigVal, APValue &Result) { -static bool evalPshufbBuiltin(EvalInfo &Info, const CallExpr *Call, - APValue &Out) { - APValue SrcVec, ControlVec; - if (!EvaluateAsRValue(Info, Call->getArg(0), SrcVec)) - return false; - if (!EvaluateAsRValue(Info, Call->getArg(1), ControlVec)) + if (OrigVal.isInfinity()) { + Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0; return false; - - const auto *VT = Call->getType()->getAs<VectorType>(); - if (!VT) + } + if (OrigVal.isNaN()) { + Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 1; return false; + } - QualType ElemT = VT->getElementType(); - unsigned NumElts = VT->getNumElements(); - - SmallVector<APValue, 64> ResultElements; - ResultElements.reserve(NumElts); - - for (unsigned Idx = 0; Idx != NumElts; ++Idx) { - APValue CtlVal = ControlVec.getVectorElt(Idx); - APSInt CtlByte = CtlVal.getInt(); - uint8_t Ctl = static_cast<uint8_t>(CtlByte.getZExtValue()); + APFloat Val = OrigVal; + bool LosesInfo = false; + APFloat::opStatus Status = Val.convert( + APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo); - if (Ctl & 0x80) { - APValue Zero(Info.Ctx.MakeIntValue(0, ElemT)); - ResultElements.push_back(Zero); - } else { - unsigned LaneBase = (Idx / 16) * 16; - unsigned SrcOffset = Ctl & 0x0F; - unsigned SrcIdx = LaneBase + SrcOffset; + if (LosesInfo || Val.isDenormal()) { + Info.CCEDiag(E, diag::note_constexpr_float_arithmetic_strict); + return false; + } - ResultElements.push_back(SrcVec.getVectorElt(SrcIdx)); - } + if (Status != APFloat::opOK) { + Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr); + return false; } - Out = APValue(ResultElements.data(), ResultElements.size()); + + Result = APValue(Val); return true; } +static bool evalShiftWithCount( + EvalInfo &Info, const CallExpr *Call, APValue &Out, + llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp, + llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) { -static bool evalPshufBuiltin(EvalInfo &Info, const CallExpr *Call, - bool IsShufHW, APValue &Out) { - APValue Vec; - APSInt Imm; - if (!EvaluateAsRValue(Info, Call->getArg(0), Vec)) - return false; - if (!EvaluateInteger(Call->getArg(1), Imm, Info)) - return false; - - const auto *VT = Call->getType()->getAs<VectorType>(); - if (!VT) + APValue Source, Count; + if (!EvaluateAsRValue(Info, Call->getArg(0), Source) || + !EvaluateAsRValue(Info, Call->getArg(1), Count)) return false; - QualType ElemT = VT->getElementType(); - unsigned ElemBits = Info.Ctx.getTypeSize(ElemT); - unsigned NumElts = VT->getNumElements(); - - unsigned LaneBits = 128u; - unsigned LaneElts = LaneBits / ElemBits; - if (!LaneElts || (NumElts % LaneElts) != 0) - return false; + assert(Call->getNumArgs() == 2); - uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue()); + QualType SourceTy = Call->getArg(0)->getType(); + assert(SourceTy->isVectorType() && + Call->getArg(1)->getType()->isVectorType()); - SmallVector<APValue, 32> ResultElements; - ResultElements.reserve(NumElts); + QualType DestEltTy = SourceTy->castAs<VectorType>()->getElementType(); + unsigned DestEltWidth = Source.getVectorElt(0).getInt().getBitWidth(); + unsigned DestLen = Source.getVectorLength(); + bool IsDestUnsigned = DestEltTy->isUnsignedIntegerType(); + unsigned CountEltWidth = Count.getVectorElt(0).getInt().getBitWidth(); + unsigned NumBitsInQWord = 64; + unsigned NumCountElts = NumBitsInQWord / CountEltWidth; + SmallVector<APValue, 64> Result; + Result.reserve(DestLen); - for (unsigned Idx = 0; Idx != NumElts; Idx++) { - unsigned LaneBase = (Idx / LaneElts) * LaneElts; - unsigned LaneIdx = Idx % LaneElts; - unsigned SrcIdx = Idx; - unsigned Sel = (Ctl >> (2 * LaneIdx)) & 0x3; + uint64_t CountLQWord = 0; + for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) { + uint64_t Elt = Count.getVectorElt(EltIdx).getInt().getZExtValue(); + CountLQWord |= (Elt << (EltIdx * CountEltWidth)); + } - if (ElemBits == 32) { - SrcIdx = LaneBase + Sel; + for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) { + APInt Elt = Source.getVectorElt(EltIdx).getInt(); + if (CountLQWord < DestEltWidth) { + Result.push_back( + APValue(APSInt(ShiftOp(Elt, CountLQWord), IsDestUnsigned))); } else { - constexpr unsigned HalfSize = 4; - bool InHigh = LaneIdx >= HalfSize; - if (!IsShufHW && !InHigh) { - SrcIdx = LaneBase + Sel; - } else if (IsShufHW && InHigh) { - unsigned Rel = LaneIdx - HalfSize; - Sel = (Ctl >> (2 * Rel)) & 0x3; - SrcIdx = LaneBase + HalfSize + Sel; - } + Result.push_back( + APValue(APSInt(OverflowOp(Elt, DestEltWidth), IsDestUnsigned))); } - - ResultElements.push_back(Vec.getVectorElt(SrcIdx)); } - - Out = APValue(ResultElements.data(), ResultElements.size()); + Out = APValue(Result.data(), Result.size()); return true; } @@ -11780,6 +12278,24 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), SourceLen), E); }; + auto EvalSelectScalar = [&](unsigned Len) -> bool { + APSInt Mask; + APValue AVal, WVal; + if (!EvaluateInteger(E->getArg(0), Mask, Info) || + !EvaluateAsRValue(Info, E->getArg(1), AVal) || + !EvaluateAsRValue(Info, E->getArg(2), WVal)) + return false; + + bool TakeA0 = (Mask.getZExtValue() & 1u) != 0; + SmallVector<APValue, 4> Res; + Res.reserve(Len); + Res.push_back(TakeA0 ? AVal.getVectorElt(0) : WVal.getVectorElt(0)); + for (unsigned I = 1; I < Len; ++I) + Res.push_back(WVal.getVectorElt(I)); + APValue V(Res.data(), Res.size()); + return Success(V, E); + }; + switch (E->getBuiltinCallee()) { default: return false; @@ -12083,6 +12599,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return APInt((Src).trunc(DstBits)); return APInt::getAllOnes(DstBits); }); + case clang::X86::BI__builtin_ia32_selectss_128: + return EvalSelectScalar(4); + case clang::X86::BI__builtin_ia32_selectsd_128: + return EvalSelectScalar(2); + case clang::X86::BI__builtin_ia32_selectsh_128: + case clang::X86::BI__builtin_ia32_selectsbf_128: + return EvalSelectScalar(8); case clang::X86::BI__builtin_ia32_pmuldq128: case clang::X86::BI__builtin_ia32_pmuldq256: case clang::X86::BI__builtin_ia32_pmuldq512: @@ -12431,6 +12954,120 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + + case X86::BI__builtin_ia32_cvtsd2ss: { + APValue VecA, VecB; + if (!EvaluateAsRValue(Info, E->getArg(0), VecA) || + !EvaluateAsRValue(Info, E->getArg(1), VecB)) + return false; + + SmallVector<APValue, 4> Elements; + + APValue ResultVal; + if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), + ResultVal)) + return false; + + Elements.push_back(ResultVal); + + unsigned NumEltsA = VecA.getVectorLength(); + for (unsigned I = 1; I < NumEltsA; ++I) { + Elements.push_back(VecA.getVectorElt(I)); + } + + return Success(Elements, E); + } + case X86::BI__builtin_ia32_cvtsd2ss_round_mask: { + APValue VecA, VecB, VecSrc, MaskValue; + + if (!EvaluateAsRValue(Info, E->getArg(0), VecA) || + !EvaluateAsRValue(Info, E->getArg(1), VecB) || + !EvaluateAsRValue(Info, E->getArg(2), VecSrc) || + !EvaluateAsRValue(Info, E->getArg(3), MaskValue)) + return false; + + unsigned Mask = MaskValue.getInt().getZExtValue(); + SmallVector<APValue, 4> Elements; + + if (Mask & 1) { + APValue ResultVal; + if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), + ResultVal)) + return false; + Elements.push_back(ResultVal); + } else { + Elements.push_back(VecSrc.getVectorElt(0)); + } + + unsigned NumEltsA = VecA.getVectorLength(); + for (unsigned I = 1; I < NumEltsA; ++I) { + Elements.push_back(VecA.getVectorElt(I)); + } + + return Success(Elements, E); + } + case X86::BI__builtin_ia32_cvtpd2ps: + case X86::BI__builtin_ia32_cvtpd2ps256: + case X86::BI__builtin_ia32_cvtpd2ps_mask: + case X86::BI__builtin_ia32_cvtpd2ps512_mask: { + + const auto BuiltinID = E->getBuiltinCallee(); + bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask || + BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask); + + APValue InputValue; + if (!EvaluateAsRValue(Info, E->getArg(0), InputValue)) + return false; + + APValue MergeValue; + unsigned Mask = 0xFFFFFFFF; + bool NeedsMerge = false; + if (IsMasked) { + APValue MaskValue; + if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue)) + return false; + Mask = MaskValue.getInt().getZExtValue(); + auto NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements(); + for (unsigned I = 0; I < NumEltsResult; ++I) { + if (!((Mask >> I) & 1)) { + NeedsMerge = true; + break; + } + } + if (NeedsMerge) { + if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue)) + return false; + } + } + + unsigned NumEltsResult = + E->getType()->getAs<VectorType>()->getNumElements(); + unsigned NumEltsInput = InputValue.getVectorLength(); + SmallVector<APValue, 8> Elements; + for (unsigned I = 0; I < NumEltsResult; ++I) { + if (IsMasked && !((Mask >> I) & 1)) { + if (!NeedsMerge) { + return false; + } + Elements.push_back(MergeValue.getVectorElt(I)); + continue; + } + + if (I >= NumEltsInput) { + Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle()))); + continue; + } + + APValue ResultVal; + if (!ConvertDoubleToFloatStrict( + Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal)) + return false; + + Elements.push_back(ResultVal); + } + return Success(Elements, E); + } + case X86::BI__builtin_ia32_shufps: case X86::BI__builtin_ia32_shufps256: case X86::BI__builtin_ia32_shufps512: { @@ -12438,7 +13075,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!evalShuffleGeneric( Info, E, R, [](unsigned DstIdx, - unsigned ShuffleMask) -> std::pair<unsigned, unsigned> { + unsigned ShuffleMask) -> std::pair<unsigned, int> { constexpr unsigned LaneBits = 128u; unsigned NumElemPerLane = LaneBits / 32; unsigned NumSelectableElems = NumElemPerLane / 2; @@ -12451,7 +13088,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return {SrcIdx, LaneOffset + Index}; + return {SrcIdx, static_cast<int>(LaneOffset + Index)}; })) return false; return Success(R, E); @@ -12463,7 +13100,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!evalShuffleGeneric( Info, E, R, [](unsigned DstIdx, - unsigned ShuffleMask) -> std::pair<unsigned, unsigned> { + unsigned ShuffleMask) -> std::pair<unsigned, int> { constexpr unsigned LaneBits = 128u; unsigned NumElemPerLane = LaneBits / 64; unsigned NumSelectableElems = NumElemPerLane / 2; @@ -12476,7 +13113,31 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return {SrcIdx, LaneOffset + Index}; + return {SrcIdx, static_cast<int>(LaneOffset + Index)}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_insertps128: { + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { + // Bits [3:0]: zero mask - if bit is set, zero this element + if ((Mask & (1 << DstIdx)) != 0) { + return {0, -1}; + } + // Bits [7:6]: select element from source vector Y (0-3) + // Bits [5:4]: select destination position (0-3) + unsigned SrcElem = (Mask >> 6) & 0x3; + unsigned DstElem = (Mask >> 4) & 0x3; + if (DstIdx == DstElem) { + // Insert element from source vector (B) at this position + return {1, static_cast<int>(SrcElem)}; + } else { + // Copy from destination vector (A) + return {0, static_cast<int>(DstIdx)}; + } })) return false; return Success(R, E); @@ -12485,7 +13146,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_pshufb256: case X86::BI__builtin_ia32_pshufb512: { APValue R; - if (!evalPshufbBuiltin(Info, E, R)) + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, + unsigned ShuffleMask) -> std::pair<unsigned, int> { + uint8_t Ctlb = static_cast<uint8_t>(ShuffleMask); + if (Ctlb & 0x80) + return std::make_pair(0, -1); + + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned SrcOffset = Ctlb & 0x0F; + unsigned SrcIdx = LaneBase + SrcOffset; + return std::make_pair(0, static_cast<int>(SrcIdx)); + })) return false; return Success(R, E); } @@ -12494,7 +13167,21 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_pshuflw256: case X86::BI__builtin_ia32_pshuflw512: { APValue R; - if (!evalPshufBuiltin(Info, E, false, R)) + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { + constexpr unsigned LaneBits = 128u; + constexpr unsigned ElemBits = 16u; + constexpr unsigned LaneElts = LaneBits / ElemBits; + constexpr unsigned HalfSize = 4; + unsigned LaneBase = (DstIdx / LaneElts) * LaneElts; + unsigned LaneIdx = DstIdx % LaneElts; + if (LaneIdx < HalfSize) { + unsigned Sel = (Mask >> (2 * LaneIdx)) & 0x3; + return std::make_pair(0, static_cast<int>(LaneBase + Sel)); + } + return std::make_pair(0, static_cast<int>(DstIdx)); + })) return false; return Success(R, E); } @@ -12503,20 +13190,154 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_pshufhw256: case X86::BI__builtin_ia32_pshufhw512: { APValue R; - if (!evalPshufBuiltin(Info, E, true, R)) + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { + constexpr unsigned LaneBits = 128u; + constexpr unsigned ElemBits = 16u; + constexpr unsigned LaneElts = LaneBits / ElemBits; + constexpr unsigned HalfSize = 4; + unsigned LaneBase = (DstIdx / LaneElts) * LaneElts; + unsigned LaneIdx = DstIdx % LaneElts; + if (LaneIdx >= HalfSize) { + unsigned Rel = LaneIdx - HalfSize; + unsigned Sel = (Mask >> (2 * Rel)) & 0x3; + return std::make_pair( + 0, static_cast<int>(LaneBase + HalfSize + Sel)); + } + return std::make_pair(0, static_cast<int>(DstIdx)); + })) return false; return Success(R, E); } case X86::BI__builtin_ia32_pshufd: case X86::BI__builtin_ia32_pshufd256: - case X86::BI__builtin_ia32_pshufd512: { + case X86::BI__builtin_ia32_pshufd512: + case X86::BI__builtin_ia32_vpermilps: + case X86::BI__builtin_ia32_vpermilps256: + case X86::BI__builtin_ia32_vpermilps512: { + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { + constexpr unsigned LaneBits = 128u; + constexpr unsigned ElemBits = 32u; + constexpr unsigned LaneElts = LaneBits / ElemBits; + unsigned LaneBase = (DstIdx / LaneElts) * LaneElts; + unsigned LaneIdx = DstIdx % LaneElts; + unsigned Sel = (Mask >> (2 * LaneIdx)) & 0x3; + return std::make_pair(0, static_cast<int>(LaneBase + Sel)); + })) + return false; + return Success(R, E); + } + + case X86::BI__builtin_ia32_vpermilvarpd: + case X86::BI__builtin_ia32_vpermilvarpd256: + case X86::BI__builtin_ia32_vpermilvarpd512: { + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { + unsigned NumElemPerLane = 2; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = Mask & 0b10 ? 1 : 0; + return std::make_pair( + 0, static_cast<int>(Lane * NumElemPerLane + Offset)); + })) + return false; + return Success(R, E); + } + + case X86::BI__builtin_ia32_vpermilpd: + case X86::BI__builtin_ia32_vpermilpd256: + case X86::BI__builtin_ia32_vpermilpd512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) { + unsigned NumElemPerLane = 2; + unsigned BitsPerElem = 1; + unsigned MaskBits = 8; + unsigned IndexMask = 0x1; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned LaneOffset = Lane * NumElemPerLane; + unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; + unsigned Index = (Control >> BitIndex) & IndexMask; + return std::make_pair(0, static_cast<int>(LaneOffset + Index)); + })) + return false; + return Success(R, E); + } + + case X86::BI__builtin_ia32_permdf256: + case X86::BI__builtin_ia32_permdi256: { APValue R; - if (!evalPshufBuiltin(Info, E, false, R)) + if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) { + // permute4x64 operates on 4 64-bit elements + // For element i (0-3), extract bits [2*i+1:2*i] from Control + unsigned Index = (Control >> (2 * DstIdx)) & 0x3; + return std::make_pair(0, static_cast<int>(Index)); + })) return false; return Success(R, E); } + case X86::BI__builtin_ia32_vpermilvarps: + case X86::BI__builtin_ia32_vpermilvarps256: + case X86::BI__builtin_ia32_vpermilvarps512: { + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { + unsigned NumElemPerLane = 4; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = Mask & 0b11; + return std::make_pair( + 0, static_cast<int>(Lane * NumElemPerLane + Offset)); + })) + return false; + return Success(R, E); + } + + case X86::BI__builtin_ia32_vpmultishiftqb128: + case X86::BI__builtin_ia32_vpmultishiftqb256: + case X86::BI__builtin_ia32_vpmultishiftqb512: { + assert(E->getNumArgs() == 2); + + APValue A, B; + if (!Evaluate(A, Info, E->getArg(0)) || !Evaluate(B, Info, E->getArg(1))) + return false; + + assert(A.getVectorLength() == B.getVectorLength()); + unsigned NumBytesInQWord = 8; + unsigned NumBitsInByte = 8; + unsigned NumBytes = A.getVectorLength(); + unsigned NumQWords = NumBytes / NumBytesInQWord; + SmallVector<APValue, 64> Result; + Result.reserve(NumBytes); + + for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) { + APInt BQWord(64, 0); + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordId * NumBytesInQWord + ByteIdx; + uint64_t Byte = B.getVectorElt(Idx).getInt().getZExtValue(); + BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); + } + + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordId * NumBytesInQWord + ByteIdx; + uint64_t Ctrl = A.getVectorElt(Idx).getInt().getZExtValue() & 0x3F; + + APInt Byte(8, 0); + for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) { + Byte.setBitVal(BitIdx, BQWord[(Ctrl + BitIdx) & 0x3F]); + } + Result.push_back(APValue(APSInt(Byte, /*isUnsigned*/ true))); + } + } + return Success(APValue(Result.data(), Result.size()), E); + } + case X86::BI__builtin_ia32_phminposuw128: { APValue Source; if (!Evaluate(Source, Info, E->getArg(0))) @@ -12551,6 +13372,66 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(Result.data(), Result.size()), E); } + case X86::BI__builtin_ia32_psraq128: + case X86::BI__builtin_ia32_psraq256: + case X86::BI__builtin_ia32_psraq512: + case X86::BI__builtin_ia32_psrad128: + case X86::BI__builtin_ia32_psrad256: + case X86::BI__builtin_ia32_psrad512: + case X86::BI__builtin_ia32_psraw128: + case X86::BI__builtin_ia32_psraw256: + case X86::BI__builtin_ia32_psraw512: { + APValue R; + if (!evalShiftWithCount( + Info, E, R, + [](const APInt &Elt, uint64_t Count) { return Elt.ashr(Count); }, + [](const APInt &Elt, unsigned Width) { + return Elt.ashr(Width - 1); + })) + return false; + return Success(R, E); + } + + case X86::BI__builtin_ia32_psllq128: + case X86::BI__builtin_ia32_psllq256: + case X86::BI__builtin_ia32_psllq512: + case X86::BI__builtin_ia32_pslld128: + case X86::BI__builtin_ia32_pslld256: + case X86::BI__builtin_ia32_pslld512: + case X86::BI__builtin_ia32_psllw128: + case X86::BI__builtin_ia32_psllw256: + case X86::BI__builtin_ia32_psllw512: { + APValue R; + if (!evalShiftWithCount( + Info, E, R, + [](const APInt &Elt, uint64_t Count) { return Elt.shl(Count); }, + [](const APInt &Elt, unsigned Width) { + return APInt::getZero(Width); + })) + return false; + return Success(R, E); + } + + case X86::BI__builtin_ia32_psrlq128: + case X86::BI__builtin_ia32_psrlq256: + case X86::BI__builtin_ia32_psrlq512: + case X86::BI__builtin_ia32_psrld128: + case X86::BI__builtin_ia32_psrld256: + case X86::BI__builtin_ia32_psrld512: + case X86::BI__builtin_ia32_psrlw128: + case X86::BI__builtin_ia32_psrlw256: + case X86::BI__builtin_ia32_psrlw512: { + APValue R; + if (!evalShiftWithCount( + Info, E, R, + [](const APInt &Elt, uint64_t Count) { return Elt.lshr(Count); }, + [](const APInt &Elt, unsigned Width) { + return APInt::getZero(Width); + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_pternlogd128_mask: case X86::BI__builtin_ia32_pternlogd256_mask: case X86::BI__builtin_ia32_pternlogd512_mask: @@ -12875,6 +13756,35 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { } return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case clang::X86::BI__builtin_ia32_addsubpd: + case clang::X86::BI__builtin_ia32_addsubps: + case clang::X86::BI__builtin_ia32_addsubpd256: + case clang::X86::BI__builtin_ia32_addsubps256: { + // Addsub: alternates between subtraction and addition + // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) + APValue SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + unsigned NumElems = SourceLHS.getVectorLength(); + SmallVector<APValue, 8> ResultElements; + ResultElements.reserve(NumElems); + llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E); + + for (unsigned I = 0; I != NumElems; ++I) { + APFloat LHS = SourceLHS.getVectorElt(I).getFloat(); + APFloat RHS = SourceRHS.getVectorElt(I).getFloat(); + if (I % 2 == 0) { + // Even indices: subtract + LHS.subtract(RHS, RM); + } else { + // Odd indices: add + LHS.add(RHS, RM); + } + ResultElements.push_back(APValue(LHS)); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case Builtin::BI__builtin_elementwise_fshl: case Builtin::BI__builtin_elementwise_fshr: { APValue SourceHi, SourceLo, SourceShift; @@ -12909,6 +13819,139 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_shuf_f32x4_256: + case X86::BI__builtin_ia32_shuf_i32x4_256: + case X86::BI__builtin_ia32_shuf_f64x2_256: + case X86::BI__builtin_ia32_shuf_i64x2_256: + case X86::BI__builtin_ia32_shuf_f32x4: + case X86::BI__builtin_ia32_shuf_i32x4: + case X86::BI__builtin_ia32_shuf_f64x2: + case X86::BI__builtin_ia32_shuf_i64x2: { + APValue SourceA, SourceB; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceA) || + !EvaluateAsRValue(Info, E->getArg(1), SourceB)) + return false; + + APSInt Imm; + if (!EvaluateInteger(E->getArg(2), Imm, Info)) + return false; + + // Destination and sources A, B all have the same type. + unsigned NumElems = SourceA.getVectorLength(); + const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>(); + QualType ElemQT = VT->getElementType(); + unsigned ElemBits = Info.Ctx.getTypeSize(ElemQT); + unsigned LaneBits = 128u; + unsigned NumLanes = (NumElems * ElemBits) / LaneBits; + unsigned NumElemsPerLane = LaneBits / ElemBits; + + unsigned DstLen = SourceA.getVectorLength(); + SmallVector<APValue, 16> ResultElements; + ResultElements.reserve(DstLen); + + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [NumLanes, NumElemsPerLane](unsigned DstIdx, unsigned ShuffleMask) + -> std::pair<unsigned, int> { + // DstIdx determines source. ShuffleMask selects lane in source. + unsigned BitsPerElem = NumLanes / 2; + unsigned IndexMask = (1u << BitsPerElem) - 1; + unsigned Lane = DstIdx / NumElemsPerLane; + unsigned SrcIdx = (Lane < NumLanes / 2) ? 0 : 1; + unsigned BitIdx = BitsPerElem * Lane; + unsigned SrcLaneIdx = (ShuffleMask >> BitIdx) & IndexMask; + unsigned ElemInLane = DstIdx % NumElemsPerLane; + unsigned IdxToPick = SrcLaneIdx * NumElemsPerLane + ElemInLane; + return {SrcIdx, IdxToPick}; + })) + return false; + return Success(R, E); + } + + case X86::BI__builtin_ia32_vgf2p8affineinvqb_v16qi: + case X86::BI__builtin_ia32_vgf2p8affineinvqb_v32qi: + case X86::BI__builtin_ia32_vgf2p8affineinvqb_v64qi: + case X86::BI__builtin_ia32_vgf2p8affineqb_v16qi: + case X86::BI__builtin_ia32_vgf2p8affineqb_v32qi: + case X86::BI__builtin_ia32_vgf2p8affineqb_v64qi: { + + APValue X, A; + APSInt Imm; + if (!EvaluateAsRValue(Info, E->getArg(0), X) || + !EvaluateAsRValue(Info, E->getArg(1), A) || + !EvaluateInteger(E->getArg(2), Imm, Info)) + return false; + + assert(X.isVector() && A.isVector()); + assert(X.getVectorLength() == A.getVectorLength()); + + bool IsInverse = false; + switch (E->getBuiltinCallee()) { + case X86::BI__builtin_ia32_vgf2p8affineinvqb_v16qi: + case X86::BI__builtin_ia32_vgf2p8affineinvqb_v32qi: + case X86::BI__builtin_ia32_vgf2p8affineinvqb_v64qi: { + IsInverse = true; + } + } + + unsigned NumBitsInByte = 8; + unsigned NumBytesInQWord = 8; + unsigned NumBitsInQWord = 64; + unsigned NumBytes = A.getVectorLength(); + unsigned NumQWords = NumBytes / NumBytesInQWord; + SmallVector<APValue, 64> Result; + Result.reserve(NumBytes); + + // computing A*X + Imm + for (unsigned QWordIdx = 0; QWordIdx != NumQWords; ++QWordIdx) { + // Extract the QWords from X, A + APInt XQWord(NumBitsInQWord, 0); + APInt AQWord(NumBitsInQWord, 0); + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx; + APInt XByte = X.getVectorElt(Idx).getInt(); + APInt AByte = A.getVectorElt(Idx).getInt(); + XQWord.insertBits(XByte, ByteIdx * NumBitsInByte); + AQWord.insertBits(AByte, ByteIdx * NumBitsInByte); + } + + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + uint8_t XByte = + XQWord.lshr(ByteIdx * NumBitsInByte).getLoBits(8).getZExtValue(); + Result.push_back(APValue(APSInt( + APInt(8, GFNIAffine(XByte, AQWord, Imm, IsInverse)), false))); + } + } + + return Success(APValue(Result.data(), Result.size()), E); + } + + case X86::BI__builtin_ia32_vgf2p8mulb_v16qi: + case X86::BI__builtin_ia32_vgf2p8mulb_v32qi: + case X86::BI__builtin_ia32_vgf2p8mulb_v64qi: { + APValue A, B; + if (!EvaluateAsRValue(Info, E->getArg(0), A) || + !EvaluateAsRValue(Info, E->getArg(1), B)) + return false; + + assert(A.isVector() && B.isVector()); + assert(A.getVectorLength() == B.getVectorLength()); + + unsigned NumBytes = A.getVectorLength(); + SmallVector<APValue, 64> Result; + Result.reserve(NumBytes); + + for (unsigned ByteIdx = 0; ByteIdx != NumBytes; ++ByteIdx) { + uint8_t AByte = A.getVectorElt(ByteIdx).getInt().getZExtValue(); + uint8_t BByte = B.getVectorElt(ByteIdx).getInt().getZExtValue(); + Result.push_back(APValue( + APSInt(APInt(8, GFNIMul(AByte, BByte)), /*IsUnsigned=*/false))); + } + + return Success(APValue(Result.data(), Result.size()), E); + } + case X86::BI__builtin_ia32_insertf32x4_256: case X86::BI__builtin_ia32_inserti32x4_256: case X86::BI__builtin_ia32_insertf64x2_256: @@ -12992,61 +14035,300 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_pslldqi128_byteshift: case X86::BI__builtin_ia32_pslldqi256_byteshift: case X86::BI__builtin_ia32_pslldqi512_byteshift: { - assert(E->getNumArgs() == 2); + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> { + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned LaneIdx = DstIdx % 16; + if (LaneIdx < Shift) + return std::make_pair(0, -1); + + return std::make_pair( + 0, static_cast<int>(LaneBase + LaneIdx - Shift)); + })) + return false; + return Success(R, E); + } + + case X86::BI__builtin_ia32_psrldqi128_byteshift: + case X86::BI__builtin_ia32_psrldqi256_byteshift: + case X86::BI__builtin_ia32_psrldqi512_byteshift: { + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> { + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned LaneIdx = DstIdx % 16; + if (LaneIdx + Shift < 16) + return std::make_pair( + 0, static_cast<int>(LaneBase + LaneIdx + Shift)); + + return std::make_pair(0, -1); + })) + return false; + return Success(R, E); + } + + case X86::BI__builtin_ia32_palignr128: + case X86::BI__builtin_ia32_palignr256: + case X86::BI__builtin_ia32_palignr512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Shift) { + // Default to -1 → zero-fill this destination element + unsigned VecIdx = 1; + int ElemIdx = -1; + + int Lane = DstIdx / 16; + int Offset = DstIdx % 16; + + // Elements come from VecB first, then VecA after the shift boundary + unsigned ShiftedIdx = Offset + (Shift & 0xFF); + if (ShiftedIdx < 16) { // from VecB + ElemIdx = ShiftedIdx + (Lane * 16); + } else if (ShiftedIdx < 32) { // from VecA + VecIdx = 0; + ElemIdx = (ShiftedIdx - 16) + (Lane * 16); + } + + return std::pair<unsigned, int>{VecIdx, ElemIdx}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_alignd128: + case X86::BI__builtin_ia32_alignd256: + case X86::BI__builtin_ia32_alignd512: + case X86::BI__builtin_ia32_alignq128: + case X86::BI__builtin_ia32_alignq256: + case X86::BI__builtin_ia32_alignq512: { + APValue R; + unsigned NumElems = E->getType()->castAs<VectorType>()->getNumElements(); + if (!evalShuffleGeneric(Info, E, R, + [NumElems](unsigned DstIdx, unsigned Shift) { + unsigned Imm = Shift & 0xFF; + unsigned EffectiveShift = Imm & (NumElems - 1); + unsigned SourcePos = DstIdx + EffectiveShift; + unsigned VecIdx = SourcePos < NumElems ? 1 : 0; + unsigned ElemIdx = SourcePos & (NumElems - 1); + + return std::pair<unsigned, int>{ + VecIdx, static_cast<int>(ElemIdx)}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_permvarsi256: + case X86::BI__builtin_ia32_permvarsf256: + case X86::BI__builtin_ia32_permvardf512: + case X86::BI__builtin_ia32_permvardi512: + case X86::BI__builtin_ia32_permvarhi128: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x7; + return std::pair<unsigned, int>{0, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_permvarqi128: + case X86::BI__builtin_ia32_permvarhi256: + case X86::BI__builtin_ia32_permvarsi512: + case X86::BI__builtin_ia32_permvarsf512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0xF; + return std::pair<unsigned, int>{0, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_permvardi256: + case X86::BI__builtin_ia32_permvardf256: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3; + return std::pair<unsigned, int>{0, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_permvarqi256: + case X86::BI__builtin_ia32_permvarhi512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x1F; + return std::pair<unsigned, int>{0, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_permvarqi512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3F; + return std::pair<unsigned, int>{0, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_vpermi2varq128: + case X86::BI__builtin_ia32_vpermi2varpd128: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x1; + unsigned SrcIdx = (ShuffleMask >> 1) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_vpermi2vard128: + case X86::BI__builtin_ia32_vpermi2varps128: + case X86::BI__builtin_ia32_vpermi2varq256: + case X86::BI__builtin_ia32_vpermi2varpd256: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3; + unsigned SrcIdx = (ShuffleMask >> 2) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_vpermi2varhi128: + case X86::BI__builtin_ia32_vpermi2vard256: + case X86::BI__builtin_ia32_vpermi2varps256: + case X86::BI__builtin_ia32_vpermi2varq512: + case X86::BI__builtin_ia32_vpermi2varpd512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x7; + unsigned SrcIdx = (ShuffleMask >> 3) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_vpermi2varqi128: + case X86::BI__builtin_ia32_vpermi2varhi256: + case X86::BI__builtin_ia32_vpermi2vard512: + case X86::BI__builtin_ia32_vpermi2varps512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0xF; + unsigned SrcIdx = (ShuffleMask >> 4) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_vpermi2varqi256: + case X86::BI__builtin_ia32_vpermi2varhi512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x1F; + unsigned SrcIdx = (ShuffleMask >> 5) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_vpermi2varqi512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3F; + unsigned SrcIdx = (ShuffleMask >> 6) & 0x1; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } + + case clang::X86::BI__builtin_ia32_vcvtps2ph: + case clang::X86::BI__builtin_ia32_vcvtps2ph256: { + APValue SrcVec; + if (!EvaluateAsRValue(Info, E->getArg(0), SrcVec)) + return false; - APValue Src; APSInt Imm; - if (!EvaluateAsRValue(Info, E->getArg(0), Src) || - !EvaluateInteger(E->getArg(1), Imm, Info)) + if (!EvaluateInteger(E->getArg(1), Imm, Info)) return false; - unsigned VecLen = Src.getVectorLength(); - unsigned Shift = Imm.getZExtValue() & 0xff; + const auto *SrcVTy = E->getArg(0)->getType()->castAs<VectorType>(); + unsigned SrcNumElems = SrcVTy->getNumElements(); + const auto *DstVTy = E->getType()->castAs<VectorType>(); + unsigned DstNumElems = DstVTy->getNumElements(); + QualType DstElemTy = DstVTy->getElementType(); - SmallVector<APValue> ResultElements; - for (unsigned Lane = 0; Lane != VecLen; Lane += 16) { - for (unsigned I = 0; I != 16; ++I) { - if (I < Shift) { - APSInt Zero(8, /*isUnsigned=*/true); - Zero = 0; - ResultElements.push_back(APValue(Zero)); - } else { - ResultElements.push_back(Src.getVectorElt(Lane + I - Shift)); - } + const llvm::fltSemantics &HalfSem = + Info.Ctx.getFloatTypeSemantics(Info.Ctx.HalfTy); + + int ImmVal = Imm.getZExtValue(); + bool UseMXCSR = (ImmVal & 4) != 0; + bool IsFPConstrained = + E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()).isFPConstrained(); + + llvm::RoundingMode RM; + if (!UseMXCSR) { + switch (ImmVal & 3) { + case 0: + RM = llvm::RoundingMode::NearestTiesToEven; + break; + case 1: + RM = llvm::RoundingMode::TowardNegative; + break; + case 2: + RM = llvm::RoundingMode::TowardPositive; + break; + case 3: + RM = llvm::RoundingMode::TowardZero; + break; + default: + llvm_unreachable("Invalid immediate rounding mode"); } + } else { + RM = llvm::RoundingMode::NearestTiesToEven; } - return Success(APValue(ResultElements.data(), ResultElements.size()), E); - } + SmallVector<APValue, 8> ResultElements; + ResultElements.reserve(DstNumElems); - case X86::BI__builtin_ia32_psrldqi128_byteshift: - case X86::BI__builtin_ia32_psrldqi256_byteshift: - case X86::BI__builtin_ia32_psrldqi512_byteshift: { - assert(E->getNumArgs() == 2); + for (unsigned I = 0; I < SrcNumElems; ++I) { + APFloat SrcVal = SrcVec.getVectorElt(I).getFloat(); - APValue Src; - APSInt Imm; - if (!EvaluateAsRValue(Info, E->getArg(0), Src) || - !EvaluateInteger(E->getArg(1), Imm, Info)) - return false; + bool LostInfo; + APFloat::opStatus St = SrcVal.convert(HalfSem, RM, &LostInfo); - unsigned VecLen = Src.getVectorLength(); - unsigned Shift = Imm.getZExtValue() & 0xff; + if (UseMXCSR && IsFPConstrained && St != APFloat::opOK) { + Info.FFDiag(E, diag::note_constexpr_dynamic_rounding); + return false; + } - SmallVector<APValue> ResultElements; - for (unsigned Lane = 0; Lane != VecLen; Lane += 16) { - for (unsigned I = 0; I != 16; ++I) { - if (I + Shift < 16) { - ResultElements.push_back(Src.getVectorElt(Lane + I + Shift)); - } else { - APSInt Zero(8, /*isUnsigned=*/true); - Zero = 0; - ResultElements.push_back(APValue(Zero)); - } + APSInt DstInt(SrcVal.bitcastToAPInt(), + DstElemTy->isUnsignedIntegerOrEnumerationType()); + ResultElements.push_back(APValue(DstInt)); + } + + if (DstNumElems > SrcNumElems) { + APSInt Zero = Info.Ctx.MakeIntValue(0, DstElemTy); + for (unsigned I = SrcNumElems; I < DstNumElems; ++I) { + ResultElements.push_back(APValue(Zero)); } } - return Success(APValue(ResultElements.data(), ResultElements.size()), E); + return Success(ResultElements, E); } } } @@ -13186,6 +14468,7 @@ namespace { bool VisitCallExpr(const CallExpr *E) { return handleCallExpr(E, Result, &This); } + bool VisitCastExpr(const CastExpr *E); bool VisitInitListExpr(const InitListExpr *E, QualType AllocType = QualType()); bool VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E); @@ -13256,6 +14539,49 @@ static bool MaybeElementDependentArrayFiller(const Expr *FillerExpr) { return true; } +bool ArrayExprEvaluator::VisitCastExpr(const CastExpr *E) { + const Expr *SE = E->getSubExpr(); + + switch (E->getCastKind()) { + default: + return ExprEvaluatorBaseTy::VisitCastExpr(E); + case CK_HLSLAggregateSplatCast: { + APValue Val; + QualType ValTy; + + if (!hlslAggSplatHelper(Info, SE, Val, ValTy)) + return false; + + unsigned NEls = elementwiseSize(Info, E->getType()); + + SmallVector<APValue> SplatEls(NEls, Val); + SmallVector<QualType> SplatType(NEls, ValTy); + + // cast the elements + const FPOptions FPO = E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()); + if (!constructAggregate(Info, FPO, E, Result, E->getType(), SplatEls, + SplatType)) + return false; + + return true; + } + case CK_HLSLElementwiseCast: { + SmallVector<APValue> SrcEls; + SmallVector<QualType> SrcTypes; + + if (!hlslElementwiseCastHelper(Info, SE, E->getType(), SrcEls, SrcTypes)) + return false; + + // cast the elements + const FPOptions FPO = E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()); + if (!constructAggregate(Info, FPO, E, Result, E->getType(), SrcEls, + SrcTypes)) + return false; + return true; + } + } +} + bool ArrayExprEvaluator::VisitInitListExpr(const InitListExpr *E, QualType AllocType) { const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType( @@ -14622,13 +15948,15 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return Success(Val.reverseBits(), E); } - + case Builtin::BI__builtin_bswapg: case Builtin::BI__builtin_bswap16: case Builtin::BI__builtin_bswap32: case Builtin::BI__builtin_bswap64: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; + if (Val.getBitWidth() == 8) + return Success(Val, E); return Success(Val.byteSwap(), E); } @@ -14819,8 +16147,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, auto Mode = Info.getLangOpts().AllocTokenMode.value_or(llvm::DefaultAllocTokenMode); uint64_t BitWidth = Info.Ctx.getTypeSize(Info.Ctx.getSizeType()); + auto MaxTokensOpt = Info.getLangOpts().AllocTokenMax; uint64_t MaxTokens = - Info.getLangOpts().AllocTokenMax.value_or(~0ULL >> (64 - BitWidth)); + MaxTokensOpt.value_or(0) ? *MaxTokensOpt : (~0ULL >> (64 - BitWidth)); auto MaybeToken = llvm::getAllocToken(Mode, *ATMD, MaxTokens); if (!MaybeToken) return Error(E, diag::note_constexpr_infer_alloc_token_stateful_mode); @@ -15614,6 +16943,69 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return Success(Val, E); } + case clang::X86::BI__builtin_ia32_ktestcqi: + case clang::X86::BI__builtin_ia32_ktestchi: + case clang::X86::BI__builtin_ia32_ktestcsi: + case clang::X86::BI__builtin_ia32_ktestcdi: { + APSInt A, B; + if (!EvaluateInteger(E->getArg(0), A, Info) || + !EvaluateInteger(E->getArg(1), B, Info)) + return false; + + return Success((~A & B) == 0, E); + } + + case clang::X86::BI__builtin_ia32_ktestzqi: + case clang::X86::BI__builtin_ia32_ktestzhi: + case clang::X86::BI__builtin_ia32_ktestzsi: + case clang::X86::BI__builtin_ia32_ktestzdi: { + APSInt A, B; + if (!EvaluateInteger(E->getArg(0), A, Info) || + !EvaluateInteger(E->getArg(1), B, Info)) + return false; + + return Success((A & B) == 0, E); + } + + case clang::X86::BI__builtin_ia32_kortestcqi: + case clang::X86::BI__builtin_ia32_kortestchi: + case clang::X86::BI__builtin_ia32_kortestcsi: + case clang::X86::BI__builtin_ia32_kortestcdi: { + APSInt A, B; + if (!EvaluateInteger(E->getArg(0), A, Info) || + !EvaluateInteger(E->getArg(1), B, Info)) + return false; + + return Success(~(A | B) == 0, E); + } + + case clang::X86::BI__builtin_ia32_kortestzqi: + case clang::X86::BI__builtin_ia32_kortestzhi: + case clang::X86::BI__builtin_ia32_kortestzsi: + case clang::X86::BI__builtin_ia32_kortestzdi: { + APSInt A, B; + if (!EvaluateInteger(E->getArg(0), A, Info) || + !EvaluateInteger(E->getArg(1), B, Info)) + return false; + + return Success((A | B) == 0, E); + } + + case clang::X86::BI__builtin_ia32_kunpckhi: + case clang::X86::BI__builtin_ia32_kunpckdi: + case clang::X86::BI__builtin_ia32_kunpcksi: { + APSInt A, B; + if (!EvaluateInteger(E->getArg(0), A, Info) || + !EvaluateInteger(E->getArg(1), B, Info)) + return false; + + // Generic kunpack: extract lower half of each operand and concatenate + // Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0] + unsigned BW = A.getBitWidth(); + APSInt Result(A.trunc(BW / 2).concat(B.trunc(BW / 2)), A.isUnsigned()); + return Success(Result, E); + } + case clang::X86::BI__builtin_ia32_lzcnt_u16: case clang::X86::BI__builtin_ia32_lzcnt_u32: case clang::X86::BI__builtin_ia32_lzcnt_u64: { @@ -15748,6 +17140,40 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); } + case X86::BI__builtin_ia32_kmovb: + case X86::BI__builtin_ia32_kmovw: + case X86::BI__builtin_ia32_kmovd: + case X86::BI__builtin_ia32_kmovq: { + APSInt Val; + if (!EvaluateInteger(E->getArg(0), Val, Info)) + return false; + return Success(Val, E); + } + + case X86::BI__builtin_ia32_kshiftliqi: + case X86::BI__builtin_ia32_kshiftlihi: + case X86::BI__builtin_ia32_kshiftlisi: + case X86::BI__builtin_ia32_kshiftlidi: { + return HandleMaskBinOp([](const APSInt &LHS, const APSInt &RHS) { + unsigned Amt = RHS.getZExtValue() & 0xFF; + if (Amt >= LHS.getBitWidth()) + return APSInt(APInt::getZero(LHS.getBitWidth()), LHS.isUnsigned()); + return APSInt(LHS.shl(Amt), LHS.isUnsigned()); + }); + } + + case X86::BI__builtin_ia32_kshiftriqi: + case X86::BI__builtin_ia32_kshiftrihi: + case X86::BI__builtin_ia32_kshiftrisi: + case X86::BI__builtin_ia32_kshiftridi: { + return HandleMaskBinOp([](const APSInt &LHS, const APSInt &RHS) { + unsigned Amt = RHS.getZExtValue() & 0xFF; + if (Amt >= LHS.getBitWidth()) + return APSInt(APInt::getZero(LHS.getBitWidth()), LHS.isUnsigned()); + return APSInt(LHS.lshr(Amt), LHS.isUnsigned()); + }); + } + case clang::X86::BI__builtin_ia32_vec_ext_v4hi: case clang::X86::BI__builtin_ia32_vec_ext_v16qi: case clang::X86::BI__builtin_ia32_vec_ext_v8hi: @@ -15767,6 +17193,37 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return Success(Vec.getVectorElt(Idx).getInt(), E); } + case clang::X86::BI__builtin_ia32_cvtb2mask128: + case clang::X86::BI__builtin_ia32_cvtb2mask256: + case clang::X86::BI__builtin_ia32_cvtb2mask512: + case clang::X86::BI__builtin_ia32_cvtw2mask128: + case clang::X86::BI__builtin_ia32_cvtw2mask256: + case clang::X86::BI__builtin_ia32_cvtw2mask512: + case clang::X86::BI__builtin_ia32_cvtd2mask128: + case clang::X86::BI__builtin_ia32_cvtd2mask256: + case clang::X86::BI__builtin_ia32_cvtd2mask512: + case clang::X86::BI__builtin_ia32_cvtq2mask128: + case clang::X86::BI__builtin_ia32_cvtq2mask256: + case clang::X86::BI__builtin_ia32_cvtq2mask512: { + assert(E->getNumArgs() == 1); + APValue Vec; + if (!EvaluateVector(E->getArg(0), Vec, Info)) + return false; + + unsigned VectorLen = Vec.getVectorLength(); + unsigned RetWidth = Info.Ctx.getIntWidth(E->getType()); + llvm::APInt Bits(RetWidth, 0); + + for (unsigned ElemNum = 0; ElemNum != VectorLen; ++ElemNum) { + const APSInt &A = Vec.getVectorElt(ElemNum).getInt(); + unsigned MSB = A[A.getBitWidth() - 1]; + Bits.setBitVal(ElemNum, MSB); + } + + APSInt RetMask(Bits, /*isUnsigned=*/true); + return Success(APValue(RetMask), E); + } + case clang::X86::BI__builtin_ia32_cmpb128_mask: case clang::X86::BI__builtin_ia32_cmpw128_mask: case clang::X86::BI__builtin_ia32_cmpd128_mask: @@ -15795,7 +17252,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, bool IsUnsigned = (BuiltinOp >= clang::X86::BI__builtin_ia32_ucmpb128_mask && - BuiltinOp <= clang::X86::BI__builtin_ia32_ucmpq512_mask); + BuiltinOp <= clang::X86::BI__builtin_ia32_ucmpw512_mask); APValue LHS, RHS; APSInt Mask, Opcode; @@ -15849,6 +17306,48 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return Success(APValue(RetMask), E); } + case X86::BI__builtin_ia32_vpshufbitqmb128_mask: + case X86::BI__builtin_ia32_vpshufbitqmb256_mask: + case X86::BI__builtin_ia32_vpshufbitqmb512_mask: { + assert(E->getNumArgs() == 3); + + APValue Source, ShuffleMask; + APSInt ZeroMask; + if (!EvaluateVector(E->getArg(0), Source, Info) || + !EvaluateVector(E->getArg(1), ShuffleMask, Info) || + !EvaluateInteger(E->getArg(2), ZeroMask, Info)) + return false; + + assert(Source.getVectorLength() == ShuffleMask.getVectorLength()); + assert(ZeroMask.getBitWidth() == Source.getVectorLength()); + + unsigned NumBytesInQWord = 8; + unsigned NumBitsInByte = 8; + unsigned NumBytes = Source.getVectorLength(); + unsigned NumQWords = NumBytes / NumBytesInQWord; + unsigned RetWidth = ZeroMask.getBitWidth(); + APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true); + + for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) { + APInt SourceQWord(64, 0); + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + uint64_t Byte = Source.getVectorElt(QWordId * NumBytesInQWord + ByteIdx) + .getInt() + .getZExtValue(); + SourceQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); + } + + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx; + unsigned M = + ShuffleMask.getVectorElt(SelIdx).getInt().getZExtValue() & 0x3F; + if (ZeroMask[SelIdx]) { + RetMask.setBitVal(SelIdx, SourceQWord[M]); + } + } + } + return Success(APValue(RetMask), E); + } } } @@ -17062,7 +18561,6 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_NoOp: case CK_LValueToRValueBitCast: case CK_HLSLArrayRValue: - case CK_HLSLElementwiseCast: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_MemberPointerToBoolean: @@ -17209,6 +18707,25 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { return Error(E); return Success(Val.getVectorElt(0), E); } + case CK_HLSLMatrixTruncation: { + // TODO: See #168935. Add matrix truncation support to expr constant. + return Error(E); + } + case CK_HLSLElementwiseCast: { + SmallVector<APValue> SrcVals; + SmallVector<QualType> SrcTypes; + + if (!hlslElementwiseCastHelper(Info, SubExpr, DestType, SrcVals, SrcTypes)) + return false; + + // cast our single element + const FPOptions FPO = E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()); + APValue ResultVal; + if (!handleScalarCast(Info, FPO, E, SrcTypes[0], DestType, SrcVals[0], + ResultVal)) + return false; + return Success(ResultVal, E); + } } llvm_unreachable("unknown cast resulting in integral value"); @@ -17746,6 +19263,9 @@ bool FloatExprEvaluator::VisitCastExpr(const CastExpr *E) { default: return ExprEvaluatorBaseTy::VisitCastExpr(E); + case CK_HLSLAggregateSplatCast: + llvm_unreachable("invalid cast kind for floating value"); + case CK_IntegralToFloating: { APSInt IntResult; const FPOptions FPO = E->getFPFeaturesInEffect( @@ -17784,6 +19304,27 @@ bool FloatExprEvaluator::VisitCastExpr(const CastExpr *E) { return Error(E); return Success(Val.getVectorElt(0), E); } + case CK_HLSLMatrixTruncation: { + // TODO: See #168935. Add matrix truncation support to expr constant. + return Error(E); + } + case CK_HLSLElementwiseCast: { + SmallVector<APValue> SrcVals; + SmallVector<QualType> SrcTypes; + + if (!hlslElementwiseCastHelper(Info, SubExpr, E->getType(), SrcVals, + SrcTypes)) + return false; + APValue Val; + + // cast our single element + const FPOptions FPO = E->getFPFeaturesInEffect(Info.Ctx.getLangOpts()); + APValue ResultVal; + if (!handleScalarCast(Info, FPO, E, SrcTypes[0], E->getType(), SrcVals[0], + ResultVal)) + return false; + return Success(ResultVal, E); + } } } @@ -17924,6 +19465,7 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_IntegralToFixedPoint: case CK_MatrixCast: case CK_HLSLVectorTruncation: + case CK_HLSLMatrixTruncation: case CK_HLSLElementwiseCast: case CK_HLSLAggregateSplatCast: llvm_unreachable("invalid cast kind for complex value"); @@ -18019,6 +19561,88 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) { llvm_unreachable("unknown cast resulting in complex value"); } +uint8_t GFNIMultiplicativeInverse(uint8_t Byte) { + // Lookup Table for Multiplicative Inverse in GF(2^8) + const uint8_t GFInv[256] = { + 0x00, 0x01, 0x8d, 0xf6, 0xcb, 0x52, 0x7b, 0xd1, 0xe8, 0x4f, 0x29, 0xc0, + 0xb0, 0xe1, 0xe5, 0xc7, 0x74, 0xb4, 0xaa, 0x4b, 0x99, 0x2b, 0x60, 0x5f, + 0x58, 0x3f, 0xfd, 0xcc, 0xff, 0x40, 0xee, 0xb2, 0x3a, 0x6e, 0x5a, 0xf1, + 0x55, 0x4d, 0xa8, 0xc9, 0xc1, 0x0a, 0x98, 0x15, 0x30, 0x44, 0xa2, 0xc2, + 0x2c, 0x45, 0x92, 0x6c, 0xf3, 0x39, 0x66, 0x42, 0xf2, 0x35, 0x20, 0x6f, + 0x77, 0xbb, 0x59, 0x19, 0x1d, 0xfe, 0x37, 0x67, 0x2d, 0x31, 0xf5, 0x69, + 0xa7, 0x64, 0xab, 0x13, 0x54, 0x25, 0xe9, 0x09, 0xed, 0x5c, 0x05, 0xca, + 0x4c, 0x24, 0x87, 0xbf, 0x18, 0x3e, 0x22, 0xf0, 0x51, 0xec, 0x61, 0x17, + 0x16, 0x5e, 0xaf, 0xd3, 0x49, 0xa6, 0x36, 0x43, 0xf4, 0x47, 0x91, 0xdf, + 0x33, 0x93, 0x21, 0x3b, 0x79, 0xb7, 0x97, 0x85, 0x10, 0xb5, 0xba, 0x3c, + 0xb6, 0x70, 0xd0, 0x06, 0xa1, 0xfa, 0x81, 0x82, 0x83, 0x7e, 0x7f, 0x80, + 0x96, 0x73, 0xbe, 0x56, 0x9b, 0x9e, 0x95, 0xd9, 0xf7, 0x02, 0xb9, 0xa4, + 0xde, 0x6a, 0x32, 0x6d, 0xd8, 0x8a, 0x84, 0x72, 0x2a, 0x14, 0x9f, 0x88, + 0xf9, 0xdc, 0x89, 0x9a, 0xfb, 0x7c, 0x2e, 0xc3, 0x8f, 0xb8, 0x65, 0x48, + 0x26, 0xc8, 0x12, 0x4a, 0xce, 0xe7, 0xd2, 0x62, 0x0c, 0xe0, 0x1f, 0xef, + 0x11, 0x75, 0x78, 0x71, 0xa5, 0x8e, 0x76, 0x3d, 0xbd, 0xbc, 0x86, 0x57, + 0x0b, 0x28, 0x2f, 0xa3, 0xda, 0xd4, 0xe4, 0x0f, 0xa9, 0x27, 0x53, 0x04, + 0x1b, 0xfc, 0xac, 0xe6, 0x7a, 0x07, 0xae, 0x63, 0xc5, 0xdb, 0xe2, 0xea, + 0x94, 0x8b, 0xc4, 0xd5, 0x9d, 0xf8, 0x90, 0x6b, 0xb1, 0x0d, 0xd6, 0xeb, + 0xc6, 0x0e, 0xcf, 0xad, 0x08, 0x4e, 0xd7, 0xe3, 0x5d, 0x50, 0x1e, 0xb3, + 0x5b, 0x23, 0x38, 0x34, 0x68, 0x46, 0x03, 0x8c, 0xdd, 0x9c, 0x7d, 0xa0, + 0xcd, 0x1a, 0x41, 0x1c}; + + return GFInv[Byte]; +} + +uint8_t GFNIAffine(uint8_t XByte, const APInt &AQword, const APSInt &Imm, + bool Inverse) { + unsigned NumBitsInByte = 8; + // Computing the affine transformation + uint8_t RetByte = 0; + for (uint32_t BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) { + uint8_t AByte = + AQword.lshr((7 - static_cast<int32_t>(BitIdx)) * NumBitsInByte) + .getLoBits(8) + .getZExtValue(); + uint8_t Product; + if (Inverse) { + Product = AByte & GFNIMultiplicativeInverse(XByte); + } else { + Product = AByte & XByte; + } + uint8_t Parity = 0; + + // Dot product in GF(2) uses XOR instead of addition + for (unsigned PBitIdx = 0; PBitIdx != NumBitsInByte; ++PBitIdx) { + Parity = Parity ^ ((Product >> PBitIdx) & 0x1); + } + + uint8_t Temp = Imm[BitIdx] ? 1 : 0; + RetByte |= (Temp ^ Parity) << BitIdx; + } + return RetByte; +} + +uint8_t GFNIMul(uint8_t AByte, uint8_t BByte) { + // Multiplying two polynomials of degree 7 + // Polynomial of degree 7 + // x^7 + x^6 + x^5 + x^4 + x^3 + x^2 + x + 1 + uint16_t TWord = 0; + unsigned NumBitsInByte = 8; + for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) { + if ((BByte >> BitIdx) & 0x1) { + TWord = TWord ^ (AByte << BitIdx); + } + } + + // When multiplying two polynomials of degree 7 + // results in a polynomial of degree 14 + // so the result has to be reduced to 7 + // Reduction polynomial is x^8 + x^4 + x^3 + x + 1 i.e. 0x11B + for (int32_t BitIdx = 14; BitIdx > 7; --BitIdx) { + if ((TWord >> BitIdx) & 0x1) { + TWord = TWord ^ (0x11B << (BitIdx - 8)); + } + } + return (TWord & 0xFF); +} + void HandleComplexComplexMul(APFloat A, APFloat B, APFloat C, APFloat D, APFloat &ResR, APFloat &ResI) { // This is an implementation of complex multiplication according to the diff --git a/clang/lib/AST/ExprObjC.cpp b/clang/lib/AST/ExprObjC.cpp index 83419a1..3509182 100644 --- a/clang/lib/AST/ExprObjC.cpp +++ b/clang/lib/AST/ExprObjC.cpp @@ -330,8 +330,7 @@ Stmt::child_range ObjCMessageExpr::children() { } Stmt::const_child_range ObjCMessageExpr::children() const { - auto Children = const_cast<ObjCMessageExpr *>(this)->children(); - return const_child_range(Children.begin(), Children.end()); + return const_cast<ObjCMessageExpr *>(this)->children(); } StringRef ObjCBridgedCastExpr::getBridgeKindName() const { diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp index 9f4dba9..715e1e0 100644 --- a/clang/lib/AST/JSONNodeDumper.cpp +++ b/clang/lib/AST/JSONNodeDumper.cpp @@ -272,15 +272,13 @@ void JSONNodeDumper::writeIncludeStack(PresumedLoc Loc, bool JustFirst) { JOS.attributeEnd(); } -void JSONNodeDumper::writeBareSourceLocation(SourceLocation Loc, - bool IsSpelling) { +void JSONNodeDumper::writeBareSourceLocation(SourceLocation Loc) { PresumedLoc Presumed = SM.getPresumedLoc(Loc); - unsigned ActualLine = IsSpelling ? SM.getSpellingLineNumber(Loc) - : SM.getExpansionLineNumber(Loc); - StringRef ActualFile = SM.getBufferName(Loc); - if (Presumed.isValid()) { - JOS.attribute("offset", SM.getDecomposedLoc(Loc).second); + StringRef ActualFile = SM.getBufferName(Loc); + auto [FID, FilePos] = SM.getDecomposedLoc(Loc); + unsigned ActualLine = SM.getLineNumber(FID, FilePos); + JOS.attribute("offset", FilePos); if (LastLocFilename != ActualFile) { JOS.attribute("file", ActualFile); JOS.attribute("line", ActualLine); @@ -318,18 +316,17 @@ void JSONNodeDumper::writeSourceLocation(SourceLocation Loc) { if (Expansion != Spelling) { // If the expansion and the spelling are different, output subobjects // describing both locations. - JOS.attributeObject("spellingLoc", [Spelling, this] { - writeBareSourceLocation(Spelling, /*IsSpelling*/ true); - }); + JOS.attributeObject( + "spellingLoc", [Spelling, this] { writeBareSourceLocation(Spelling); }); JOS.attributeObject("expansionLoc", [Expansion, Loc, this] { - writeBareSourceLocation(Expansion, /*IsSpelling*/ false); + writeBareSourceLocation(Expansion); // If there is a macro expansion, add extra information if the interesting // bit is the macro arg expansion. if (SM.isMacroArgExpansion(Loc)) JOS.attribute("isMacroArgExpansion", true); }); } else - writeBareSourceLocation(Spelling, /*IsSpelling*/ true); + writeBareSourceLocation(Spelling); } void JSONNodeDumper::writeSourceRange(SourceRange R) { @@ -1610,6 +1607,10 @@ void JSONNodeDumper::VisitCXXDefaultInitExpr(const CXXDefaultInitExpr *Node) { attributeOnlyIfTrue("hasRewrittenInit", Node->hasRewrittenInit()); } +void JSONNodeDumper::VisitLambdaExpr(const LambdaExpr *LE) { + JOS.attribute("hasExplicitParameters", LE->hasExplicitParameters()); +} + void JSONNodeDumper::VisitCXXDependentScopeMemberExpr( const CXXDependentScopeMemberExpr *DSME) { JOS.attribute("isArrow", DSME->isArrow()); diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 791df7e..2183d77 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -105,6 +105,8 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { return static_cast<const OMPFilterClause *>(C); case OMPC_ompx_dyn_cgroup_mem: return static_cast<const OMPXDynCGroupMemClause *>(C); + case OMPC_dyn_groupprivate: + return static_cast<const OMPDynGroupprivateClause *>(C); case OMPC_message: return static_cast<const OMPMessageClause *>(C); case OMPC_default: @@ -124,6 +126,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_nowait: case OMPC_untied: case OMPC_mergeable: + case OMPC_threadset: case OMPC_threadprivate: case OMPC_groupprivate: case OMPC_flush: @@ -1318,7 +1321,7 @@ OMPToClause *OMPToClause::Create( const ASTContext &C, const OMPVarListLocTy &Locs, ArrayRef<Expr *> Vars, ArrayRef<ValueDecl *> Declarations, MappableExprComponentListsRef ComponentLists, ArrayRef<Expr *> UDMapperRefs, - ArrayRef<OpenMPMotionModifierKind> MotionModifiers, + Expr *IteratorModifier, ArrayRef<OpenMPMotionModifierKind> MotionModifiers, ArrayRef<SourceLocation> MotionModifiersLoc, NestedNameSpecifierLoc UDMQualifierLoc, DeclarationNameInfo MapperId) { OMPMappableExprListSizeTy Sizes; @@ -1340,7 +1343,7 @@ OMPToClause *OMPToClause::Create( void *Mem = C.Allocate( totalSizeToAlloc<Expr *, ValueDecl *, unsigned, OMPClauseMappableExprCommon::MappableComponent>( - 2 * Sizes.NumVars, Sizes.NumUniqueDeclarations, + 2 * Sizes.NumVars + 1, Sizes.NumUniqueDeclarations, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); @@ -1350,6 +1353,7 @@ OMPToClause *OMPToClause::Create( Clause->setVarRefs(Vars); Clause->setUDMapperRefs(UDMapperRefs); Clause->setClauseInfo(Declarations, ComponentLists); + Clause->setIteratorModifier(IteratorModifier); return Clause; } @@ -1358,17 +1362,19 @@ OMPToClause *OMPToClause::CreateEmpty(const ASTContext &C, void *Mem = C.Allocate( totalSizeToAlloc<Expr *, ValueDecl *, unsigned, OMPClauseMappableExprCommon::MappableComponent>( - 2 * Sizes.NumVars, Sizes.NumUniqueDeclarations, + 2 * Sizes.NumVars + 1, Sizes.NumUniqueDeclarations, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); - return new (Mem) OMPToClause(Sizes); + OMPToClause *Clause = new (Mem) OMPToClause(Sizes); + Clause->setIteratorModifier(nullptr); + return Clause; } OMPFromClause *OMPFromClause::Create( const ASTContext &C, const OMPVarListLocTy &Locs, ArrayRef<Expr *> Vars, ArrayRef<ValueDecl *> Declarations, MappableExprComponentListsRef ComponentLists, ArrayRef<Expr *> UDMapperRefs, - ArrayRef<OpenMPMotionModifierKind> MotionModifiers, + Expr *IteratorModifier, ArrayRef<OpenMPMotionModifierKind> MotionModifiers, ArrayRef<SourceLocation> MotionModifiersLoc, NestedNameSpecifierLoc UDMQualifierLoc, DeclarationNameInfo MapperId) { OMPMappableExprListSizeTy Sizes; @@ -1390,7 +1396,7 @@ OMPFromClause *OMPFromClause::Create( void *Mem = C.Allocate( totalSizeToAlloc<Expr *, ValueDecl *, unsigned, OMPClauseMappableExprCommon::MappableComponent>( - 2 * Sizes.NumVars, Sizes.NumUniqueDeclarations, + 2 * Sizes.NumVars + 1, Sizes.NumUniqueDeclarations, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); @@ -1401,6 +1407,7 @@ OMPFromClause *OMPFromClause::Create( Clause->setVarRefs(Vars); Clause->setUDMapperRefs(UDMapperRefs); Clause->setClauseInfo(Declarations, ComponentLists); + Clause->setIteratorModifier(IteratorModifier); return Clause; } @@ -1410,10 +1417,12 @@ OMPFromClause::CreateEmpty(const ASTContext &C, void *Mem = C.Allocate( totalSizeToAlloc<Expr *, ValueDecl *, unsigned, OMPClauseMappableExprCommon::MappableComponent>( - 2 * Sizes.NumVars, Sizes.NumUniqueDeclarations, + 2 * Sizes.NumVars + 1, Sizes.NumUniqueDeclarations, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); - return new (Mem) OMPFromClause(Sizes); + OMPFromClause *Clause = new (Mem) OMPFromClause(Sizes); + Clause->setIteratorModifier(nullptr); + return Clause; } void OMPUseDevicePtrClause::setPrivateCopies(ArrayRef<Expr *> VL) { @@ -2035,6 +2044,13 @@ void OMPClausePrinter::VisitOMPDefaultClause(OMPDefaultClause *Node) { OS << ")"; } +void OMPClausePrinter::VisitOMPThreadsetClause(OMPThreadsetClause *Node) { + OS << "threadset(" + << getOpenMPSimpleClauseTypeName(OMPC_threadset, + unsigned(Node->getThreadsetKind())) + << ")"; +} + void OMPClausePrinter::VisitOMPProcBindClause(OMPProcBindClause *Node) { OS << "proc_bind(" << getOpenMPSimpleClauseTypeName(OMPC_proc_bind, @@ -2684,12 +2700,16 @@ template <typename T> void OMPClausePrinter::VisitOMPMotionClause(T *Node) { OS << '('; for (unsigned I = 0; I < NumberOfOMPMotionModifiers; ++I) { if (Node->getMotionModifier(I) != OMPC_MOTION_MODIFIER_unknown) { - OS << getOpenMPSimpleClauseTypeName(Node->getClauseKind(), - Node->getMotionModifier(I)); - if (Node->getMotionModifier(I) == OMPC_MOTION_MODIFIER_mapper) - PrintMapper(OS, Node, Policy); - if (I < ModifierCount - 1) - OS << ", "; + if (Node->getMotionModifier(I) == OMPC_MOTION_MODIFIER_iterator) { + PrintIterator(OS, Node, Policy); + } else { + OS << getOpenMPSimpleClauseTypeName(Node->getClauseKind(), + Node->getMotionModifier(I)); + if (Node->getMotionModifier(I) == OMPC_MOTION_MODIFIER_mapper) + PrintMapper(OS, Node, Policy); + if (I < ModifierCount - 1) + OS << ", "; + } } } OS << ':'; @@ -2849,6 +2869,24 @@ void OMPClausePrinter::VisitOMPXDynCGroupMemClause( OS << ")"; } +void OMPClausePrinter::VisitOMPDynGroupprivateClause( + OMPDynGroupprivateClause *Node) { + OS << "dyn_groupprivate("; + if (Node->getDynGroupprivateModifier() != OMPC_DYN_GROUPPRIVATE_unknown) { + OS << getOpenMPSimpleClauseTypeName(OMPC_dyn_groupprivate, + Node->getDynGroupprivateModifier()); + if (Node->getDynGroupprivateFallbackModifier() != + OMPC_DYN_GROUPPRIVATE_FALLBACK_unknown) { + OS << ", "; + OS << getOpenMPSimpleClauseTypeName( + OMPC_dyn_groupprivate, Node->getDynGroupprivateFallbackModifier()); + } + OS << ": "; + } + Node->getSize()->printPretty(OS, nullptr, Policy, 0); + OS << ')'; +} + void OMPClausePrinter::VisitOMPDoacrossClause(OMPDoacrossClause *Node) { OS << "doacross("; OpenMPDoacrossClauseModifier DepType = Node->getDependenceType(); diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp index ac18d4d..5d8f54f 100644 --- a/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/clang/lib/AST/RecordLayoutBuilder.cpp @@ -3363,16 +3363,15 @@ void MicrosoftRecordLayoutBuilder::computeVtorDispSet( /// position information. const ASTRecordLayout & ASTContext::getASTRecordLayout(const RecordDecl *D) const { - // These asserts test different things. A record has a definition - // as soon as we begin to parse the definition. That definition is - // not a complete definition (which is what isDefinition() tests) - // until we *finish* parsing the definition. - if (D->hasExternalLexicalStorage() && !D->getDefinition()) getExternalSource()->CompleteType(const_cast<RecordDecl*>(D)); // Complete the redecl chain (if necessary). (void)D->getMostRecentDecl(); + // These asserts test different things. A record has a definition + // as soon as we begin to parse the definition. That definition is + // not a complete definition (which is what isCompleteDefinition() tests) + // until we *finish* parsing the definition. D = D->getDefinition(); assert(D && "Cannot get layout of forward declarations!"); assert(!D->isInvalidDecl() && "Cannot get layout of invalid decl!"); diff --git a/clang/lib/AST/StmtOpenACC.cpp b/clang/lib/AST/StmtOpenACC.cpp index 39dfa19..ec8ceb9 100644 --- a/clang/lib/AST/StmtOpenACC.cpp +++ b/clang/lib/AST/StmtOpenACC.cpp @@ -324,30 +324,221 @@ OpenACCAtomicConstruct *OpenACCAtomicConstruct::Create( return Inst; } -static std::pair<const Expr *, const Expr *> getBinaryOpArgs(const Expr *Op) { +static std::optional<std::pair<const Expr *, const Expr *>> +getBinaryAssignOpArgs(const Expr *Op, bool &IsCompoundAssign) { if (const auto *BO = dyn_cast<BinaryOperator>(Op)) { - assert(BO->isAssignmentOp()); - return {BO->getLHS(), BO->getRHS()}; + if (!BO->isAssignmentOp()) + return std::nullopt; + IsCompoundAssign = BO->isCompoundAssignmentOp(); + return std::pair<const Expr *, const Expr *>(BO->getLHS(), BO->getRHS()); } - const auto *OO = cast<CXXOperatorCallExpr>(Op); - assert(OO->isAssignmentOp()); - return {OO->getArg(0), OO->getArg(1)}; + if (const auto *OO = dyn_cast<CXXOperatorCallExpr>(Op)) { + if (!OO->isAssignmentOp()) + return std::nullopt; + IsCompoundAssign = OO->getOperator() != OO_Equal; + return std::pair<const Expr *, const Expr *>(OO->getArg(0), OO->getArg(1)); + } + return std::nullopt; +} +static std::optional<std::pair<const Expr *, const Expr *>> +getBinaryAssignOpArgs(const Expr *Op) { + bool IsCompoundAssign; + return getBinaryAssignOpArgs(Op, IsCompoundAssign); } -static std::pair<bool, const Expr *> getUnaryOpArgs(const Expr *Op) { +static std::optional<std::pair<const Expr *, bool>> +getUnaryOpArgs(const Expr *Op) { if (const auto *UO = dyn_cast<UnaryOperator>(Op)) - return {true, UO->getSubExpr()}; + return {{UO->getSubExpr(), UO->isPostfix()}}; if (const auto *OpCall = dyn_cast<CXXOperatorCallExpr>(Op)) { // Post-inc/dec have a second unused argument to differentiate it, so we // accept -- or ++ as unary, or any operator call with only 1 arg. - if (OpCall->getNumArgs() == 1 || OpCall->getOperator() != OO_PlusPlus || - OpCall->getOperator() != OO_MinusMinus) - return {true, OpCall->getArg(0)}; + if (OpCall->getNumArgs() == 1 || OpCall->getOperator() == OO_PlusPlus || + OpCall->getOperator() == OO_MinusMinus) + return {{OpCall->getArg(0), /*IsPostfix=*/OpCall->getNumArgs() == 1}}; } - return {false, nullptr}; + return std::nullopt; +} + +// Read is of the form `v = x;`, where both sides are scalar L-values. This is a +// BinaryOperator or CXXOperatorCallExpr. +static std::optional<OpenACCAtomicConstruct::SingleStmtInfo> +getReadStmtInfo(const Expr *E, bool ForAtomicComputeSingleStmt = false) { + std::optional<std::pair<const Expr *, const Expr *>> BinaryArgs = + getBinaryAssignOpArgs(E); + + if (!BinaryArgs) + return std::nullopt; + + // We want the L-value for each side, so we ignore implicit casts. + auto Res = OpenACCAtomicConstruct::SingleStmtInfo::createRead( + E, BinaryArgs->first->IgnoreImpCasts(), + BinaryArgs->second->IgnoreImpCasts()); + + // The atomic compute single-stmt variant has to do a 'fixup' step for the 'X' + // value, since it is dependent on the RHS. So if we're in that version, we + // skip the checks on X. + if ((!ForAtomicComputeSingleStmt && + (!Res.X->isLValue() || !Res.X->getType()->isScalarType())) || + !Res.V->isLValue() || !Res.V->getType()->isScalarType()) + return std::nullopt; + + return Res; +} + +// Write supports only the format 'x = expr', where the expression is scalar +// type, and 'x' is a scalar l value. As above, this can come in 2 forms; +// Binary Operator or CXXOperatorCallExpr. +static std::optional<OpenACCAtomicConstruct::SingleStmtInfo> +getWriteStmtInfo(const Expr *E) { + std::optional<std::pair<const Expr *, const Expr *>> BinaryArgs = + getBinaryAssignOpArgs(E); + if (!BinaryArgs) + return std::nullopt; + // We want the L-value for ONLY the X side, so we ignore implicit casts. For + // the right side (the expr), we emit it as an r-value so we need to + // maintain implicit casts. + auto Res = OpenACCAtomicConstruct::SingleStmtInfo::createWrite( + E, BinaryArgs->first->IgnoreImpCasts(), BinaryArgs->second); + + if (!Res.X->isLValue() || !Res.X->getType()->isScalarType()) + return std::nullopt; + return Res; +} + +static std::optional<OpenACCAtomicConstruct::SingleStmtInfo> +getUpdateStmtInfo(const Expr *E) { + std::optional<std::pair<const Expr *, bool>> UnaryArgs = getUnaryOpArgs(E); + if (UnaryArgs) { + auto Res = OpenACCAtomicConstruct::SingleStmtInfo::createUpdate( + E, UnaryArgs->first->IgnoreImpCasts(), UnaryArgs->second); + + if (!Res.X->isLValue() || !Res.X->getType()->isScalarType()) + return std::nullopt; + + return Res; + } + + bool IsRHSCompoundAssign = false; + std::optional<std::pair<const Expr *, const Expr *>> BinaryArgs = + getBinaryAssignOpArgs(E, IsRHSCompoundAssign); + if (!BinaryArgs) + return std::nullopt; + + auto Res = OpenACCAtomicConstruct::SingleStmtInfo::createUpdate( + E, BinaryArgs->first->IgnoreImpCasts(), /*PostFixIncDec=*/false); + + if (!Res.X->isLValue() || !Res.X->getType()->isScalarType()) + return std::nullopt; + + // 'update' has to be either a compound-assignment operation, or + // assignment-to-a-binary-op. Return nullopt if these are not the case. + // If we are already compound-assign, we're done! + if (IsRHSCompoundAssign) + return Res; + + // else we have to check that we have a binary operator. + const Expr *RHS = BinaryArgs->second->IgnoreImpCasts(); + + if (isa<BinaryOperator>(RHS)) { + return Res; + } else if (const auto *OO = dyn_cast<CXXOperatorCallExpr>(RHS)) { + if (OO->isInfixBinaryOp()) + return Res; + } + + return std::nullopt; +} + +/// The statement associated with an atomic capture comes in 1 of two forms: A +/// compound statement containing two statements, or a single statement. In +/// either case, the compound/single statement is decomposed into 2 separate +/// operations, eihter a read/write, read/update, or update/read. This function +/// figures out that information in the form listed in the standard (filling in +/// V, X, or Expr) for each of these operations. +static OpenACCAtomicConstruct::StmtInfo +getCaptureStmtInfo(const Stmt *AssocStmt) { + + if (const auto *CmpdStmt = dyn_cast<CompoundStmt>(AssocStmt)) { + // We checked during Sema to ensure we only have 2 statements here, and + // that both are expressions, we can look at these to see what the valid + // options are. + const Expr *Stmt1 = cast<Expr>(*CmpdStmt->body().begin())->IgnoreImpCasts(); + const Expr *Stmt2 = + cast<Expr>(*(CmpdStmt->body().begin() + 1))->IgnoreImpCasts(); + + // The compound statement form allows read/write, read/update, or + // update/read. First we get the information for a 'Read' to see if this is + // one of the former two. + std::optional<OpenACCAtomicConstruct::SingleStmtInfo> Read = + getReadStmtInfo(Stmt1); + + if (Read) { + // READ : WRITE + // v = x; x = expr + // READ : UPDATE + // v = x; x binop = expr + // v = x; x = x binop expr + // v = x; x = expr binop x + // v = x; x++ + // v = x; ++x + // v = x; x-- + // v = x; --x + std::optional<OpenACCAtomicConstruct::SingleStmtInfo> Update = + getUpdateStmtInfo(Stmt2); + // Since we already know the first operation is a read, the second is + // either an update, which we check, or a write, which we can assume next. + if (Update) + return OpenACCAtomicConstruct::StmtInfo::createReadUpdate(*Read, + *Update); + + std::optional<OpenACCAtomicConstruct::SingleStmtInfo> Write = + getWriteStmtInfo(Stmt2); + return OpenACCAtomicConstruct::StmtInfo::createReadWrite(*Read, *Write); + } + // UPDATE: READ + // x binop = expr; v = x + // x = x binop expr; v = x + // x = expr binop x ; v = x + // ++ x; v = x + // x++; v = x + // --x; v = x + // x--; v = x + // Otherwise, it is one of the above forms for update/read. + std::optional<OpenACCAtomicConstruct::SingleStmtInfo> Update = + getUpdateStmtInfo(Stmt1); + Read = getReadStmtInfo(Stmt2); + + return OpenACCAtomicConstruct::StmtInfo::createUpdateRead(*Update, *Read); + } else { + // All of the forms that can be done in a single line fall into 2 + // categories: update/read, or read/update. The special cases are the + // postfix unary operators, which we have to make sure we do the 'read' + // first. However, we still parse these as the RHS first, so we have a + // 'reversing' step. READ: UPDATE v = x++; v = x--; UPDATE: READ v = ++x; v + // = --x; v = x binop=expr v = x = x binop expr v = x = expr binop x + + const Expr *E = cast<const Expr>(AssocStmt); + + std::optional<OpenACCAtomicConstruct::SingleStmtInfo> Read = + getReadStmtInfo(E, /*ForAtomicComputeSingleStmt=*/true); + std::optional<OpenACCAtomicConstruct::SingleStmtInfo> Update = + getUpdateStmtInfo(Read->X); + + // Fixup this, since the 'X' for the read is the result after write, but is + // the same value as the LHS-most variable of the update(its X). + Read->X = Update->X; + + // Postfix is a read FIRST, then an update. + if (Update->IsPostfixIncDec) + return OpenACCAtomicConstruct::StmtInfo::createReadUpdate(*Read, *Update); + + return OpenACCAtomicConstruct::StmtInfo::createUpdateRead(*Update, *Read); + } + return {}; } const OpenACCAtomicConstruct::StmtInfo @@ -357,48 +548,28 @@ OpenACCAtomicConstruct::getAssociatedStmtInfo() const { // asserts to ensure we don't get off into the weeds. assert(getAssociatedStmt() && "invalid associated stmt?"); - const Expr *AssocStmt = cast<const Expr>(getAssociatedStmt()); switch (AtomicKind) { - case OpenACCAtomicKind::Capture: - assert(false && "Only 'read'/'write'/'update' have been implemented here"); - return {}; - case OpenACCAtomicKind::Read: { - // Read only supports the format 'v = x'; where both sides are a scalar - // expression. This can come in 2 forms; BinaryOperator or - // CXXOperatorCallExpr (rarely). - std::pair<const Expr *, const Expr *> BinaryArgs = - getBinaryOpArgs(AssocStmt); - // We want the L-value for each side, so we ignore implicit casts. - return {BinaryArgs.first->IgnoreImpCasts(), - BinaryArgs.second->IgnoreImpCasts(), /*expr=*/nullptr}; - } - case OpenACCAtomicKind::Write: { - // Write supports only the format 'x = expr', where the expression is scalar - // type, and 'x' is a scalar l value. As above, this can come in 2 forms; - // Binary Operator or CXXOperatorCallExpr. - std::pair<const Expr *, const Expr *> BinaryArgs = - getBinaryOpArgs(AssocStmt); - // We want the L-value for ONLY the X side, so we ignore implicit casts. For - // the right side (the expr), we emit it as an r-value so we need to - // maintain implicit casts. - return {/*v=*/nullptr, BinaryArgs.first->IgnoreImpCasts(), - BinaryArgs.second}; - } + case OpenACCAtomicKind::Read: + return OpenACCAtomicConstruct::StmtInfo{ + OpenACCAtomicConstruct::StmtInfo::StmtForm::Read, + *getReadStmtInfo(cast<const Expr>(getAssociatedStmt())), + OpenACCAtomicConstruct::SingleStmtInfo::Empty()}; + + case OpenACCAtomicKind::Write: + return OpenACCAtomicConstruct::StmtInfo{ + OpenACCAtomicConstruct::StmtInfo::StmtForm::Write, + *getWriteStmtInfo(cast<const Expr>(getAssociatedStmt())), + OpenACCAtomicConstruct::SingleStmtInfo::Empty()}; + case OpenACCAtomicKind::None: - case OpenACCAtomicKind::Update: { - std::pair<bool, const Expr *> UnaryArgs = getUnaryOpArgs(AssocStmt); - if (UnaryArgs.first) - return {/*v=*/nullptr, UnaryArgs.second->IgnoreImpCasts(), - /*expr=*/nullptr}; - - std::pair<const Expr *, const Expr *> BinaryArgs = - getBinaryOpArgs(AssocStmt); - // For binary args, we just store the RHS as an expression (in the - // expression slot), since the codegen just wants the whole thing for a - // recipe. - return {/*v=*/nullptr, BinaryArgs.first->IgnoreImpCasts(), - BinaryArgs.second}; - } + case OpenACCAtomicKind::Update: + return OpenACCAtomicConstruct::StmtInfo{ + OpenACCAtomicConstruct::StmtInfo::StmtForm::Update, + *getUpdateStmtInfo(cast<const Expr>(getAssociatedStmt())), + OpenACCAtomicConstruct::SingleStmtInfo::Empty()}; + + case OpenACCAtomicKind::Capture: + return getCaptureStmtInfo(getAssociatedStmt()); } llvm_unreachable("unknown OpenACC atomic kind"); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 05b64cc..4a8c638 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -546,6 +546,8 @@ void OMPClauseProfiler::VisitOMPNocontextClause(const OMPNocontextClause *C) { void OMPClauseProfiler::VisitOMPDefaultClause(const OMPDefaultClause *C) { } +void OMPClauseProfiler::VisitOMPThreadsetClause(const OMPThreadsetClause *C) {} + void OMPClauseProfiler::VisitOMPProcBindClause(const OMPProcBindClause *C) { } void OMPClauseProfiler::VisitOMPUnifiedAddressClause( @@ -966,6 +968,12 @@ void OMPClauseProfiler::VisitOMPXDynCGroupMemClause( if (Expr *Size = C->getSize()) Profiler->VisitStmt(Size); } +void OMPClauseProfiler::VisitOMPDynGroupprivateClause( + const OMPDynGroupprivateClause *C) { + VisitOMPClauseWithPreInit(C); + if (auto *Size = C->getSize()) + Profiler->VisitStmt(Size); +} void OMPClauseProfiler::VisitOMPDoacrossClause(const OMPDoacrossClause *C) { VisitOMPClauseList(C); } diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 41aebdb..7bc0404d 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -850,7 +850,10 @@ void TextNodeDumper::Visit(const APValue &Value, QualType Ty) { return; } case APValue::AddrLabelDiff: - OS << "AddrLabelDiff <todo>"; + OS << "AddrLabelDiff "; + OS << "&&" << Value.getAddrLabelDiffLHS()->getLabel()->getName(); + OS << " - "; + OS << "&&" << Value.getAddrLabelDiffRHS()->getLabel()->getName(); return; } llvm_unreachable("Unknown APValue kind!"); diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index c18b2ea..d2881d5 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -131,8 +131,6 @@ public: void printBefore(QualType T, raw_ostream &OS); void printAfter(QualType T, raw_ostream &OS); - void AppendScope(DeclContext *DC, raw_ostream &OS, - DeclarationName NameInScope); void printTagType(const TagType *T, raw_ostream &OS); void printFunctionAfter(const FunctionType::ExtInfo &Info, raw_ostream &OS); #define ABSTRACT_TYPE(CLASS, PARENT) @@ -1226,7 +1224,7 @@ void TypePrinter::printTypeSpec(NamedDecl *D, raw_ostream &OS) { // In C, this will always be empty except when the type // being printed is anonymous within other Record. if (!Policy.SuppressScope) - AppendScope(D->getDeclContext(), OS, D->getDeclName()); + D->printNestedNameSpecifier(OS, Policy); IdentifierInfo *II = D->getIdentifier(); OS << II->getName(); @@ -1240,7 +1238,7 @@ void TypePrinter::printUnresolvedUsingBefore(const UnresolvedUsingType *T, OS << ' '; auto *D = T->getDecl(); if (Policy.FullyQualifiedName || T->isCanonicalUnqualified()) { - AppendScope(D->getDeclContext(), OS, D->getDeclName()); + D->printNestedNameSpecifier(OS, Policy); } else { T->getQualifier().print(OS, Policy); } @@ -1257,7 +1255,7 @@ void TypePrinter::printUsingBefore(const UsingType *T, raw_ostream &OS) { OS << ' '; auto *D = T->getDecl(); if (Policy.FullyQualifiedName) { - AppendScope(D->getDeclContext(), OS, D->getDeclName()); + D->printNestedNameSpecifier(OS, Policy); } else { T->getQualifier().print(OS, Policy); } @@ -1273,7 +1271,7 @@ void TypePrinter::printTypedefBefore(const TypedefType *T, raw_ostream &OS) { OS << ' '; auto *D = T->getDecl(); if (Policy.FullyQualifiedName) { - AppendScope(D->getDeclContext(), OS, D->getDeclName()); + D->printNestedNameSpecifier(OS, Policy); } else { T->getQualifier().print(OS, Policy); } @@ -1511,59 +1509,6 @@ void TypePrinter::printPredefinedSugarBefore(const PredefinedSugarType *T, void TypePrinter::printPredefinedSugarAfter(const PredefinedSugarType *T, raw_ostream &OS) {} -/// Appends the given scope to the end of a string. -void TypePrinter::AppendScope(DeclContext *DC, raw_ostream &OS, - DeclarationName NameInScope) { - if (DC->isTranslationUnit()) - return; - - // FIXME: Consider replacing this with NamedDecl::printNestedNameSpecifier, - // which can also print names for function and method scopes. - if (DC->isFunctionOrMethod()) - return; - - if (Policy.Callbacks && Policy.Callbacks->isScopeVisible(DC)) - return; - - if (const auto *NS = dyn_cast<NamespaceDecl>(DC)) { - if (Policy.SuppressUnwrittenScope && NS->isAnonymousNamespace()) - return AppendScope(DC->getParent(), OS, NameInScope); - - // Only suppress an inline namespace if the name has the same lookup - // results in the enclosing namespace. - if (Policy.SuppressInlineNamespace != - PrintingPolicy::SuppressInlineNamespaceMode::None && - NS->isInline() && NameInScope && - NS->isRedundantInlineQualifierFor(NameInScope)) - return AppendScope(DC->getParent(), OS, NameInScope); - - AppendScope(DC->getParent(), OS, NS->getDeclName()); - if (NS->getIdentifier()) - OS << NS->getName() << "::"; - else - OS << "(anonymous namespace)::"; - } else if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) { - AppendScope(DC->getParent(), OS, Spec->getDeclName()); - IncludeStrongLifetimeRAII Strong(Policy); - OS << Spec->getIdentifier()->getName(); - const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs(); - printTemplateArgumentList( - OS, TemplateArgs.asArray(), Policy, - Spec->getSpecializedTemplate()->getTemplateParameters()); - OS << "::"; - } else if (const auto *Tag = dyn_cast<TagDecl>(DC)) { - AppendScope(DC->getParent(), OS, Tag->getDeclName()); - if (TypedefNameDecl *Typedef = Tag->getTypedefNameForAnonDecl()) - OS << Typedef->getIdentifier()->getName() << "::"; - else if (Tag->getIdentifier()) - OS << Tag->getIdentifier()->getName() << "::"; - else - return; - } else { - AppendScope(DC->getParent(), OS, NameInScope); - } -} - void TypePrinter::printTagType(const TagType *T, raw_ostream &OS) { TagDecl *D = T->getDecl(); @@ -1593,7 +1538,7 @@ void TypePrinter::printTagType(const TagType *T, raw_ostream &OS) { // Compute the full nested-name-specifier for this type. // In C, this will always be empty except when the type // being printed is anonymous within other Record. - AppendScope(D->getDeclContext(), OS, D->getDeclName()); + D->printNestedNameSpecifier(OS, Policy); } if (const IdentifierInfo *II = D->getIdentifier()) @@ -1809,7 +1754,7 @@ void TypePrinter::printTemplateId(const TemplateSpecializationType *T, // FIXME: Null TD never exercised in test suite. if (FullyQualify && TD) { if (!Policy.SuppressScope) - AppendScope(TD->getDeclContext(), OS, TD->getDeclName()); + TD->printNestedNameSpecifier(OS, Policy); OS << TD->getName(); } else { |
