diff options
106 files changed, 4151 insertions, 408 deletions
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp index 8d023cd..4f060cd 100644 --- a/bolt/unittests/Profile/PerfSpeEvents.cpp +++ b/bolt/unittests/Profile/PerfSpeEvents.cpp @@ -161,4 +161,92 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) { parseAndCheckBrstackEvents(1234, ExpectedSamples); } +TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstackAndPbt) { + // Check perf input with SPE branch events as brstack format by + // combining with the previous branch target address (named as PBT). + // Example collection command: + // ``` + // perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY + // ``` + // How Bolt extracts the branch events: + // ``` + // perf script -F pid,brstack --itrace=bl + // ``` + + opts::ArmSPE = true; + opts::ReadPerfEvents = + // "<PID> <SRC>/<DEST>/PN/-/-/10/COND/- <NULL>/<PBT>/-/-/-/0//-\n" + " 4567 0xa002/0xa003/PN/-/-/10/COND/- 0x0/0xa001/-/-/-/0//-\n" + " 4567 0xb002/0xb003/P/-/-/4/RET/- 0x0/0xb001/-/-/-/0//-\n" + " 4567 0xc456/0xc789/P/-/-/13/-/- 0x0/0xc123/-/-/-/0//-\n" + " 4567 0xd456/0xd789/M/-/-/7/RET/- 0x0/0xd123/-/-/-/0//-\n" + " 4567 0xe005/0xe009/P/-/-/14/RET/- 0x0/0xe001/-/-/-/0//-\n" + " 4567 0xd456/0xd789/M/-/-/7/RET/- 0x0/0xd123/-/-/-/0//-\n" + " 4567 0xf002/0xf003/MN/-/-/8/COND/- 0x0/0xf001/-/-/-/0//-\n" + " 4567 0xc456/0xc789/P/-/-/13/-/- 0x0/0xc123/-/-/-/0//-\n"; + + // ExpectedSamples contains the aggregated information about + // a branch {{From, To, TraceTo}, {TakenCount, MispredCount}}. + // Where + // - From: is the source address of the sampled branch operation. + // - To: is the target address of the sampled branch operation. + // - TraceTo could be either + // - A 'Type = Trace::BR_ONLY', which means the trace only contains branch + // data. + // - Or an address, when the trace contains information about the previous + // branch. + // + // When FEAT_SPE_PBT is present, Arm SPE emits two records per sample: + // - the current branch (Spe.From/Spe.To), and + // - the previous taken branch target (PBT) (PBT.From, PBT.To). + // + // Together they behave like a depth-1 branch stack where: + // - the PBT entry is always taken + // - the current branch entry may represent a taken branch or a fall-through + // - the destination (Spe.To) is the architecturally executed target + // + // There can be fall-throughs to be inferred between the PBT entry and + // the current branch (Spe.From), but there cannot be between current + // branch's (Spe.From/Spe.To). + // + // PBT records only the target address (PBT.To), meaning we have no + // information as the branch source (PBT.From=0x0), branch type, and the + // prediction bit. + // + // Consider the trace pair: + // {{Spe.From, Spe.To, Type}, {TK, MP}}, + // {{PBT.From, PBT.To, TraceTo}, {TK, MP}} + // {{0xd456, 0xd789, Trace::BR_ONLY}, {2, 2}}, {{0x0, 0xd123, 0xd456}, {2, 0}} + // + // The first entry is the Spe record, which represents a trace from 0xd456 + // (Spe.From) to 0xd789 (Spe.To). Type = Trace::BR_ONLY, as Bolt processes the + // current branch event first. At this point we have no information about the + // previous trace (PBT). This entry has a TakenCount = 2, as we have two + // samples for (0xd456, 0xd789) in our input. It also has MispredsCount = 2, + // as 'M' misprediction flag appears in both cases. + // + // The second entry is the PBT record. TakenCount = 2 because the + // (PBT.From = 0x0, PBT.To = 0xd123) branch target appears twice in the input, + // and MispredsCount = 0 because prediction data is absent. There is no branch + // source information, so the PBT.From field is zero (0x0). TraceTo = 0xd456 + // connect the flow from the previous taken branch at 0xd123 (PBT.To) to the + // current source branch at 0xd456 (Spe.From), which then continues to 0xd789 + // (Spe.To). + std::vector<std::pair<Trace, TakenBranchInfo>> ExpectedSamples = { + {{0xa002, 0xa003, Trace::BR_ONLY}, {1, 0}}, + {{0x0, 0xa001, 0xa002}, {1, 0}}, + {{0xb002, 0xb003, Trace::BR_ONLY}, {1, 0}}, + {{0x0, 0xb001, 0xb002}, {1, 0}}, + {{0xc456, 0xc789, Trace::BR_ONLY}, {2, 0}}, + {{0x0, 0xc123, 0xc456}, {2, 0}}, + {{0xd456, 0xd789, Trace::BR_ONLY}, {2, 2}}, + {{0x0, 0xd123, 0xd456}, {2, 0}}, + {{0xe005, 0xe009, Trace::BR_ONLY}, {1, 0}}, + {{0x0, 0xe001, 0xe005}, {1, 0}}, + {{0xf002, 0xf003, Trace::BR_ONLY}, {1, 1}}, + {{0x0, 0xf001, 0xf002}, {1, 0}}}; + + parseAndCheckBrstackEvents(4567, ExpectedSamples); +} + #endif diff --git a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp b/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp index a89a896..e7d97b2 100644 --- a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp +++ b/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp @@ -13,17 +13,10 @@ using namespace clang::ast_matchers; namespace clang::tidy::hicpp { -namespace { -AST_MATCHER(VarDecl, isAsm) { return Node.hasAttr<clang::AsmLabelAttr>(); } -const ast_matchers::internal::VariadicDynCastAllOfMatcher<Decl, - FileScopeAsmDecl> - fileScopeAsmDecl; // NOLINT(readability-identifier-*) preserve clang style -} // namespace - void NoAssemblerCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher(asmStmt().bind("asm-stmt"), this); Finder->addMatcher(fileScopeAsmDecl().bind("asm-file-scope"), this); - Finder->addMatcher(varDecl(isAsm()).bind("asm-var"), this); + Finder->addMatcher(varDecl(hasAttr(attr::AsmLabel)).bind("asm-var"), this); } void NoAssemblerCheck::check(const MatchFinder::MatchResult &Result) { diff --git a/clang/docs/LibASTMatchersReference.html b/clang/docs/LibASTMatchersReference.html index 5b2a96d..ac1abb4 100644 --- a/clang/docs/LibASTMatchersReference.html +++ b/clang/docs/LibASTMatchersReference.html @@ -825,6 +825,20 @@ fieldDecl() </pre></td></tr> +<tr><td>Matcher<<a href="https://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>></td><td class="name" onclick="toggle('fileScopeAsmDecl0')"><a name="fileScopeAsmDecl0Anchor">fileScopeAsmDecl</a></td><td>Matcher<<a href="https://clang.llvm.org/doxygen/classclang_1_1FileScopeAsmDecl.html">FileScopeAsmDecl</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="fileScopeAsmDecl0"><pre>Matches top level asm declarations. + +Given + __asm("nop"); + void f() { + __asm("mov al, 2"); + } +fileScopeAsmDecl() + matches '__asm("nop")', + but not '__asm("mov al, 2")'. +</pre></td></tr> + + <tr><td>Matcher<<a href="https://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>></td><td class="name" onclick="toggle('friendDecl0')"><a name="friendDecl0Anchor">friendDecl</a></td><td>Matcher<<a href="https://clang.llvm.org/doxygen/classclang_1_1FriendDecl.html">FriendDecl</a>>...</td></tr> <tr><td colspan="4" class="doc" id="friendDecl0"><pre>Matches friend declarations. diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index 98e62de..bca2d84 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -2478,6 +2478,21 @@ extern const internal::VariadicDynCastAllOfMatcher<Stmt, NullStmt> nullStmt; /// matches '__asm("mov al, 2")' extern const internal::VariadicDynCastAllOfMatcher<Stmt, AsmStmt> asmStmt; +/// Matches top level asm declarations. +/// +/// Given +/// \code +/// __asm("nop"); +/// void f() { +/// __asm("mov al, 2"); +/// } +/// \endcode +/// fileScopeAsmDecl() +/// matches '__asm("nop")', +/// but not '__asm("mov al, 2")'. +extern const internal::VariadicDynCastAllOfMatcher<Decl, FileScopeAsmDecl> + fileScopeAsmDecl; + /// Matches bool literals. /// /// Example matches true diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 749f531..1013bfc 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -5017,6 +5017,10 @@ def HLSLUnparsedSemantic : HLSLAnnotationAttr { let Documentation = [InternalOnly]; } +def HLSLUserSemantic : HLSLSemanticAttr</* Indexable= */ 1> { + let Documentation = [InternalOnly]; +} + def HLSLSV_Position : HLSLSemanticAttr</* Indexable= */ 1> { let Documentation = [HLSLSV_PositionDocs]; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index fa50953..f43707e 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13184,6 +13184,7 @@ def err_hlsl_semantic_indexing_not_supported : Error<"semantic %0 does not allow indexing">; def err_hlsl_init_priority_unsupported : Error< "initializer priorities are not supported in HLSL">; +def err_hlsl_semantic_index_overlap : Error<"semantic index overlap %0">; def warn_hlsl_user_defined_type_missing_member: Warning<"binding type '%select{t|u|b|s|c}0' only applies to types containing %select{SRV resources|UAV resources|constant buffer resources|sampler state|numeric types}0">, InGroup<LegacyConstantRegisterBinding>; def err_hlsl_binding_type_mismatch: Error<"binding type '%select{t|u|b|s|c}0' only applies to %select{SRV resources|UAV resources|constant buffer resources|sampler state|numeric variables in the global scope}0">; diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 6f9a69e..1625851 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -4090,6 +4090,57 @@ def CIR_PrefetchOp : CIR_Op<"prefetch"> { } //===----------------------------------------------------------------------===// +// ObjSizeOp +//===----------------------------------------------------------------------===// + +def CIR_ObjSizeOp : CIR_Op<"objsize", [Pure]> { + let summary = "Implements the llvm.objsize builtin"; + let description = [{ + The `cir.objsize` operation is designed to provide information to the + optimizer to determine whether a) an operation (like memcpy) will + overflow a buffer that corresponds to an object, or b) that a runtime + check for overflow isn’t necessary. An object in this context means an + allocation of a specific class, structure, array, or other object. + + When the `min` attribute is present, the operation returns the minimum + guaranteed accessible size. When absent (max mode), it returns the maximum + possible object size. Corresponds to `llvm.objectsize`'s `min` argument. + + The `dynamic` attribute determines if the value should be evaluated at + runtime. Corresponds to `llvm.objectsize`'s `dynamic` argument. + + The `nullunknown` attribute controls how null pointers are handled. When + present, null pointers are treated as having unknown size. When absent, + null pointers are treated as having 0 size (in min mode) or -1 size + (in max mode). Corresponds to `llvm.objectsize`'s `nullunknown` argument. + + Example: + + ```mlir + %size = cir.objsize min %ptr : !cir.ptr<i32> -> i64 + %dsize = cir.objsize max dynamic %ptr : !cir.ptr<i32> -> i64 + %nsize = cir.objsize min nullunknown %ptr : !cir.ptr<i32> -> i64 + ``` + }]; + + let arguments = (ins + CIR_PointerType:$ptr, + UnitAttr:$min, + UnitAttr:$nullunknown, + UnitAttr:$dynamic + ); + + let results = (outs CIR_AnyFundamentalIntType:$result); + + let assemblyFormat = [{ + (`min` $min^) : (`max`)? + (`nullunknown` $nullunknown^)? + (`dynamic` $dynamic^)? + $ptr `:` qualified(type($ptr)) `->` qualified(type($result)) attr-dict + }]; +} + +//===----------------------------------------------------------------------===// // PtrDiffOp //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 6f099a7..af1ffff 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -215,6 +215,7 @@ struct MissingFeatures { static bool builtinCallMathErrno() { return false; } static bool builtinCheckKind() { return false; } static bool cgCapturedStmtInfo() { return false; } + static bool countedBySize() { return false; } static bool cgFPOptionsRAII() { return false; } static bool checkBitfieldClipping() { return false; } static bool cirgenABIInfo() { return false; } diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index 8c3b6ae..28b03ac 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -20,7 +20,9 @@ #include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/SourceLocation.h" #include "clang/Sema/SemaBase.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSet.h" #include "llvm/TargetParser/Triple.h" #include <initializer_list> @@ -259,9 +261,11 @@ private: HLSLSemanticAttr *createSemantic(const SemanticInfo &Semantic, DeclaratorDecl *TargetDecl); bool determineActiveSemanticOnScalar(FunctionDecl *FD, DeclaratorDecl *D, - SemanticInfo &ActiveSemantic); + SemanticInfo &ActiveSemantic, + llvm::StringSet<> &ActiveInputSemantics); bool determineActiveSemantic(FunctionDecl *FD, DeclaratorDecl *D, - SemanticInfo &ActiveSemantic); + SemanticInfo &ActiveSemantic, + llvm::StringSet<> &ActiveInputSemantics); void processExplicitBindingsOnDecl(VarDecl *D); diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 4e63400..84f7e62 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -6007,6 +6007,8 @@ bool Compiler<Emitter>::visitSwitchStmt(const SwitchStmt *S) { CaseLabels[SC] = this->getLabel(); const Expr *Value = CS->getLHS(); + if (Value->isValueDependent()) + return false; PrimType ValueT = this->classifyPrim(Value->getType()); // Compare the case statement's value to the switch condition. diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 8fab6ef..193f87c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -5452,10 +5452,13 @@ static EvalStmtResult EvaluateSwitch(StmtResult &Result, EvalInfo &Info, } const CaseStmt *CS = cast<CaseStmt>(SC); - APSInt LHS = CS->getLHS()->EvaluateKnownConstInt(Info.Ctx); - APSInt RHS = CS->getRHS() ? CS->getRHS()->EvaluateKnownConstInt(Info.Ctx) - : LHS; - if (LHS <= Value && Value <= RHS) { + const Expr *LHS = CS->getLHS(); + const Expr *RHS = CS->getRHS(); + if (LHS->isValueDependent() || (RHS && RHS->isValueDependent())) + return ESR_Failed; + APSInt LHSValue = LHS->EvaluateKnownConstInt(Info.Ctx); + APSInt RHSValue = RHS ? RHS->EvaluateKnownConstInt(Info.Ctx) : LHSValue; + if (LHSValue <= Value && Value <= RHSValue) { Found = SC; break; } diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp index 42f124b..0874b3d 100644 --- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp +++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp @@ -954,6 +954,8 @@ const internal::VariadicDynCastAllOfMatcher<Stmt, CXXTryStmt> cxxTryStmt; const internal::VariadicDynCastAllOfMatcher<Stmt, CXXThrowExpr> cxxThrowExpr; const internal::VariadicDynCastAllOfMatcher<Stmt, NullStmt> nullStmt; const internal::VariadicDynCastAllOfMatcher<Stmt, AsmStmt> asmStmt; +const internal::VariadicDynCastAllOfMatcher<Decl, FileScopeAsmDecl> + fileScopeAsmDecl; const internal::VariadicDynCastAllOfMatcher<Stmt, CXXBoolLiteralExpr> cxxBoolLiteral; const internal::VariadicDynCastAllOfMatcher<Stmt, StringLiteral> stringLiteral; diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp index 01c03f3..66848f7 100644 --- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp @@ -246,6 +246,7 @@ RegistryMaps::RegistryMaps() { REGISTER_MATCHER(expr); REGISTER_MATCHER(exprWithCleanups); REGISTER_MATCHER(fieldDecl); + REGISTER_MATCHER(fileScopeAsmDecl); REGISTER_MATCHER(fixedPointLiteral); REGISTER_MATCHER(floatLiteral); REGISTER_MATCHER(forCallable); diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index 0803910..4e6a5ee 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -481,6 +481,19 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, return emitCall(e->getCallee()->getType(), CIRGenCallee::forDirect(fnOp), e, returnValue); } + case Builtin::BI__builtin_dynamic_object_size: + case Builtin::BI__builtin_object_size: { + unsigned type = + e->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); + auto resType = mlir::cast<cir::IntType>(convertType(e->getType())); + + // We pass this builtin onto the optimizer so that it can figure out the + // object size in more complex cases. + bool isDynamic = builtinID == Builtin::BI__builtin_dynamic_object_size; + return RValue::get(emitBuiltinObjectSize(e->getArg(0), type, resType, + /*EmittedE=*/nullptr, isDynamic)); + } + case Builtin::BI__builtin_prefetch: { auto evaluateOperandAsInt = [&](const Expr *arg) { Expr::EvalResult res; @@ -663,3 +676,42 @@ mlir::Value CIRGenFunction::emitVAArg(VAArgExpr *ve) { mlir::Value vaList = emitVAListRef(ve->getSubExpr()).getPointer(); return cir::VAArgOp::create(builder, loc, type, vaList); } + +mlir::Value CIRGenFunction::emitBuiltinObjectSize(const Expr *e, unsigned type, + cir::IntType resType, + mlir::Value emittedE, + bool isDynamic) { + assert(!cir::MissingFeatures::opCallImplicitObjectSizeArgs()); + + // LLVM can't handle type=3 appropriately, and __builtin_object_size shouldn't + // evaluate e for side-effects. In either case, just like original LLVM + // lowering, we shouldn't lower to `cir.objsize` but to a constant instead. + if (type == 3 || (!emittedE && e->HasSideEffects(getContext()))) + return builder.getConstInt(getLoc(e->getSourceRange()), resType, + (type & 2) ? 0 : -1); + + mlir::Value ptr = emittedE ? emittedE : emitScalarExpr(e); + assert(mlir::isa<cir::PointerType>(ptr.getType()) && + "Non-pointer passed to __builtin_object_size?"); + + assert(!cir::MissingFeatures::countedBySize()); + + // Extract the min/max mode from type. CIR only supports type 0 + // (max, whole object) and type 2 (min, whole object), not type 1 or 3 + // (closest subobject variants). + const bool min = ((type & 2) != 0); + // For GCC compatibility, __builtin_object_size treats NULL as unknown size. + auto op = + cir::ObjSizeOp::create(builder, getLoc(e->getSourceRange()), resType, ptr, + min, /*nullUnknown=*/true, isDynamic); + return op.getResult(); +} + +mlir::Value CIRGenFunction::evaluateOrEmitBuiltinObjectSize( + const Expr *e, unsigned type, cir::IntType resType, mlir::Value emittedE, + bool isDynamic) { + uint64_t objectSize; + if (!e->tryEvaluateObjectSize(objectSize, getContext(), type)) + return emitBuiltinObjectSize(e, type, resType, emittedE, isDynamic); + return builder.getConstInt(getLoc(e->getSourceRange()), resType, objectSize); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 1c52a78..f879e58 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1307,6 +1307,28 @@ public: RValue emitBuiltinExpr(const clang::GlobalDecl &gd, unsigned builtinID, const clang::CallExpr *e, ReturnValueSlot returnValue); + /// Returns a Value corresponding to the size of the given expression by + /// emitting a `cir.objsize` operation. + /// + /// \param e The expression whose object size to compute + /// \param type Determines the semantics of the object size computation. + /// The type parameter is a 2-bit value where: + /// bit 0 (type & 1): 0 = whole object, 1 = closest subobject + /// bit 1 (type & 2): 0 = maximum size, 2 = minimum size + /// \param resType The result type for the size value + /// \param emittedE Optional pre-emitted pointer value. If non-null, we'll + /// call `cir.objsize` on this value rather than emitting e. + /// \param isDynamic If true, allows runtime evaluation via dynamic mode + mlir::Value emitBuiltinObjectSize(const clang::Expr *e, unsigned type, + cir::IntType resType, mlir::Value emittedE, + bool isDynamic); + + mlir::Value evaluateOrEmitBuiltinObjectSize(const clang::Expr *e, + unsigned type, + cir::IntType resType, + mlir::Value emittedE, + bool isDynamic); + RValue emitCall(const CIRGenFunctionInfo &funcInfo, const CIRGenCallee &callee, ReturnValueSlot returnValue, const CallArgList &args, cir::CIRCallOpInterface *callOp, diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index ba967a4..b4afed7 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -2832,6 +2832,29 @@ static void collectUnreachable(mlir::Operation *parent, } } +mlir::LogicalResult CIRToLLVMObjSizeOpLowering::matchAndRewrite( + cir::ObjSizeOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Type llvmResTy = getTypeConverter()->convertType(op.getType()); + mlir::Location loc = op->getLoc(); + + mlir::IntegerType i1Ty = rewriter.getI1Type(); + + auto i1Val = [&rewriter, &loc, &i1Ty](bool val) { + return mlir::LLVM::ConstantOp::create(rewriter, loc, i1Ty, val); + }; + + replaceOpWithCallLLVMIntrinsicOp(rewriter, op, "llvm.objectsize", llvmResTy, + { + adaptor.getPtr(), + i1Val(op.getMin()), + i1Val(op.getNullunknown()), + i1Val(op.getDynamic()), + }); + + return mlir::LogicalResult::success(); +} + void ConvertCIRToLLVMPass::processCIRAttrs(mlir::ModuleOp module) { // Lower the module attributes to LLVM equivalents. if (mlir::Attribute tripleAttr = diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 945f9e2..e392a12 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -549,6 +549,16 @@ static void addSPIRVBuiltinDecoration(llvm::GlobalVariable *GV, GV->addMetadata("spirv.Decorations", *Decoration); } +static void addLocationDecoration(llvm::GlobalVariable *GV, unsigned Location) { + LLVMContext &Ctx = GV->getContext(); + IRBuilder<> B(GV->getContext()); + MDNode *Operands = + MDNode::get(Ctx, {ConstantAsMetadata::get(B.getInt32(/* Location */ 30)), + ConstantAsMetadata::get(B.getInt32(Location))}); + MDNode *Decoration = MDNode::get(Ctx, {Operands}); + GV->addMetadata("spirv.Decorations", *Decoration); +} + static llvm::Value *createSPIRVBuiltinLoad(IRBuilder<> &B, llvm::Module &M, llvm::Type *Ty, const Twine &Name, unsigned BuiltInID) { @@ -562,6 +572,69 @@ static llvm::Value *createSPIRVBuiltinLoad(IRBuilder<> &B, llvm::Module &M, return B.CreateLoad(Ty, GV); } +static llvm::Value *createSPIRVLocationLoad(IRBuilder<> &B, llvm::Module &M, + llvm::Type *Ty, unsigned Location, + StringRef Name) { + auto *GV = new llvm::GlobalVariable( + M, Ty, /* isConstant= */ true, llvm::GlobalValue::ExternalLinkage, + /* Initializer= */ nullptr, /* Name= */ Name, /* insertBefore= */ nullptr, + llvm::GlobalVariable::GeneralDynamicTLSModel, + /* AddressSpace */ 7, /* isExternallyInitialized= */ true); + GV->setVisibility(llvm::GlobalValue::HiddenVisibility); + addLocationDecoration(GV, Location); + return B.CreateLoad(Ty, GV); +} + +llvm::Value * +CGHLSLRuntime::emitSPIRVUserSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type, + HLSLSemanticAttr *Semantic, + std::optional<unsigned> Index) { + Twine BaseName = Twine(Semantic->getAttrName()->getName()); + Twine VariableName = BaseName.concat(Twine(Index.value_or(0))); + + unsigned Location = SPIRVLastAssignedInputSemanticLocation; + + // DXC completely ignores the semantic/index pair. Location are assigned from + // the first semantic to the last. + llvm::ArrayType *AT = dyn_cast<llvm::ArrayType>(Type); + unsigned ElementCount = AT ? AT->getNumElements() : 1; + SPIRVLastAssignedInputSemanticLocation += ElementCount; + return createSPIRVLocationLoad(B, CGM.getModule(), Type, Location, + VariableName.str()); +} + +llvm::Value * +CGHLSLRuntime::emitDXILUserSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type, + HLSLSemanticAttr *Semantic, + std::optional<unsigned> Index) { + Twine BaseName = Twine(Semantic->getAttrName()->getName()); + Twine VariableName = BaseName.concat(Twine(Index.value_or(0))); + + // DXIL packing rules etc shall be handled here. + // FIXME: generate proper sigpoint, index, col, row values. + // FIXME: also DXIL loads vectors element by element. + SmallVector<Value *> Args{B.getInt32(4), B.getInt32(0), B.getInt32(0), + B.getInt8(0), + llvm::PoisonValue::get(B.getInt32Ty())}; + + llvm::Intrinsic::ID IntrinsicID = llvm::Intrinsic::dx_load_input; + llvm::Value *Value = B.CreateIntrinsic(/*ReturnType=*/Type, IntrinsicID, Args, + nullptr, VariableName); + return Value; +} + +llvm::Value *CGHLSLRuntime::emitUserSemanticLoad( + IRBuilder<> &B, llvm::Type *Type, const clang::DeclaratorDecl *Decl, + HLSLSemanticAttr *Semantic, std::optional<unsigned> Index) { + if (CGM.getTarget().getTriple().isSPIRV()) + return emitSPIRVUserSemanticLoad(B, Type, Semantic, Index); + + if (CGM.getTarget().getTriple().isDXIL()) + return emitDXILUserSemanticLoad(B, Type, Semantic, Index); + + llvm_unreachable("Unsupported target for user-semantic load."); +} + llvm::Value *CGHLSLRuntime::emitSystemSemanticLoad( IRBuilder<> &B, llvm::Type *Type, const clang::DeclaratorDecl *Decl, Attr *Semantic, std::optional<unsigned> Index) { @@ -626,6 +699,9 @@ CGHLSLRuntime::handleScalarSemanticLoad(IRBuilder<> &B, const FunctionDecl *FD, std::optional<unsigned> Index = std::nullopt; if (Semantic->isSemanticIndexExplicit()) Index = Semantic->getSemanticIndex(); + + if (isa<HLSLUserSemanticAttr>(Semantic)) + return emitUserSemanticLoad(B, Type, Decl, Semantic, Index); return emitSystemSemanticLoad(B, Type, Decl, Semantic, Index); } diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index d35df52..9d31714 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -200,9 +200,25 @@ private: llvm::GlobalVariable *BufGV); void initializeBufferFromBinding(const HLSLBufferDecl *BufDecl, llvm::GlobalVariable *GV); + void initializeBufferFromBinding(const HLSLBufferDecl *BufDecl, + llvm::GlobalVariable *GV, + HLSLResourceBindingAttr *RBA); + + llvm::Value *emitSPIRVUserSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type, + HLSLSemanticAttr *Semantic, + std::optional<unsigned> Index); + llvm::Value *emitDXILUserSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type, + HLSLSemanticAttr *Semantic, + std::optional<unsigned> Index); + llvm::Value *emitUserSemanticLoad(llvm::IRBuilder<> &B, llvm::Type *Type, + const clang::DeclaratorDecl *Decl, + HLSLSemanticAttr *Semantic, + std::optional<unsigned> Index); + llvm::Triple::ArchType getArch(); llvm::DenseMap<const clang::RecordType *, llvm::TargetExtType *> LayoutTypes; + unsigned SPIRVLastAssignedInputSemanticLocation = 0; }; } // namespace CodeGen diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index b9707f0..a06c57b 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -775,6 +775,10 @@ HLSLSemanticAttr *SemaHLSL::createSemantic(const SemanticInfo &Info, DeclaratorDecl *TargetDecl) { std::string SemanticName = Info.Semantic->getAttrName()->getName().upper(); + if (dyn_cast<HLSLUserSemanticAttr>(Info.Semantic)) + return createSemanticAttr<HLSLUserSemanticAttr>(*Info.Semantic, TargetDecl, + Info.Index); + if (SemanticName == "SV_DISPATCHTHREADID") { return createSemanticAttr<HLSLSV_DispatchThreadIDAttr>( *Info.Semantic, TargetDecl, Info.Index); @@ -797,9 +801,10 @@ HLSLSemanticAttr *SemaHLSL::createSemantic(const SemanticInfo &Info, return nullptr; } -bool SemaHLSL::determineActiveSemanticOnScalar(FunctionDecl *FD, - DeclaratorDecl *D, - SemanticInfo &ActiveSemantic) { +bool SemaHLSL::determineActiveSemanticOnScalar( + FunctionDecl *FD, DeclaratorDecl *D, SemanticInfo &ActiveSemantic, + llvm::StringSet<> &ActiveInputSemantics) { + if (ActiveSemantic.Semantic == nullptr) { ActiveSemantic.Semantic = D->getAttr<HLSLSemanticAttr>(); if (ActiveSemantic.Semantic && @@ -818,11 +823,31 @@ bool SemaHLSL::determineActiveSemanticOnScalar(FunctionDecl *FD, checkSemanticAnnotation(FD, D, A); FD->addAttr(A); + + unsigned Location = ActiveSemantic.Index.value_or(0); + + const ConstantArrayType *AT = dyn_cast<ConstantArrayType>(D->getType()); + unsigned ElementCount = AT ? AT->getZExtSize() : 1; + ActiveSemantic.Index = Location + ElementCount; + + Twine BaseName = Twine(ActiveSemantic.Semantic->getAttrName()->getName()); + for (unsigned I = 0; I < ElementCount; ++I) { + Twine VariableName = BaseName.concat(Twine(Location + I)); + + auto [_, Inserted] = ActiveInputSemantics.insert(VariableName.str()); + if (!Inserted) { + Diag(D->getLocation(), diag::err_hlsl_semantic_index_overlap) + << VariableName.str(); + return false; + } + } + return true; } -bool SemaHLSL::determineActiveSemantic(FunctionDecl *FD, DeclaratorDecl *D, - SemanticInfo &ActiveSemantic) { +bool SemaHLSL::determineActiveSemantic( + FunctionDecl *FD, DeclaratorDecl *D, SemanticInfo &ActiveSemantic, + llvm::StringSet<> &ActiveInputSemantics) { if (ActiveSemantic.Semantic == nullptr) { ActiveSemantic.Semantic = D->getAttr<HLSLSemanticAttr>(); if (ActiveSemantic.Semantic && @@ -833,12 +858,13 @@ bool SemaHLSL::determineActiveSemantic(FunctionDecl *FD, DeclaratorDecl *D, const Type *T = D->getType()->getUnqualifiedDesugaredType(); const RecordType *RT = dyn_cast<RecordType>(T); if (!RT) - return determineActiveSemanticOnScalar(FD, D, ActiveSemantic); + return determineActiveSemanticOnScalar(FD, D, ActiveSemantic, + ActiveInputSemantics); const RecordDecl *RD = RT->getDecl(); for (FieldDecl *Field : RD->fields()) { SemanticInfo Info = ActiveSemantic; - if (!determineActiveSemantic(FD, Field, Info)) { + if (!determineActiveSemantic(FD, Field, Info, ActiveInputSemantics)) { Diag(Field->getLocation(), diag::note_hlsl_semantic_used_here) << Field; return false; } @@ -911,12 +937,14 @@ void SemaHLSL::CheckEntryPoint(FunctionDecl *FD) { llvm_unreachable("Unhandled environment in triple"); } + llvm::StringSet<> ActiveInputSemantics; for (ParmVarDecl *Param : FD->parameters()) { SemanticInfo ActiveSemantic; ActiveSemantic.Semantic = nullptr; ActiveSemantic.Index = std::nullopt; - if (!determineActiveSemantic(FD, Param, ActiveSemantic)) { + if (!determineActiveSemantic(FD, Param, ActiveSemantic, + ActiveInputSemantics)) { Diag(Param->getLocation(), diag::note_previous_decl) << Param; FD->setInvalidDecl(); } @@ -947,6 +975,8 @@ void SemaHLSL::checkSemanticAnnotation(FunctionDecl *EntryPoint, return; DiagnoseAttrStageMismatch(SemanticAttr, ST, {llvm::Triple::Pixel}); break; + case attr::HLSLUserSemantic: + return; default: llvm_unreachable("Unknown SemanticAttr"); } @@ -1766,7 +1796,7 @@ void SemaHLSL::handleSemanticAttr(Decl *D, const ParsedAttr &AL) { if (AL.getAttrName()->getName().starts_with_insensitive("SV_")) diagnoseSystemSemanticAttr(D, AL, Index); else - Diag(AL.getLoc(), diag::err_hlsl_unknown_semantic) << AL; + D->addAttr(createSemanticAttr<HLSLUserSemanticAttr>(AL, nullptr, Index)); } void SemaHLSL::handlePackOffsetAttr(Decl *D, const ParsedAttr &AL) { diff --git a/clang/test/CIR/CodeGen/object-size-flex-array.c b/clang/test/CIR/CodeGen/object-size-flex-array.c new file mode 100644 index 0000000..74229fd --- /dev/null +++ b/clang/test/CIR/CodeGen/object-size-flex-array.c @@ -0,0 +1,317 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR --check-prefix=CIR-NO-STRICT +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-llvm -disable-llvm-passes %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM --check-prefix=LLVM-NO-STRICT +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -emit-llvm -disable-llvm-passes %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG --check-prefix=OGCG-NO-STRICT + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=0 -emit-cir %s -o %t-strict-0.cir +// RUN: FileCheck --input-file=%t-strict-0.cir %s --check-prefix=CIR --check-prefix=CIR-STRICT-0 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=0 -emit-llvm -disable-llvm-passes %s -o %t-cir-strict-0.ll +// RUN: FileCheck --input-file=%t-cir-strict-0.ll %s --check-prefix=LLVM --check-prefix=LLVM-STRICT-0 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fstrict-flex-arrays=0 -emit-llvm -disable-llvm-passes %s -o %t-strict-0.ll +// RUN: FileCheck --input-file=%t-strict-0.ll %s --check-prefix=OGCG --check-prefix=OGCG-STRICT-0 + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=1 -emit-cir %s -o %t-strict-1.cir +// RUN: FileCheck --input-file=%t-strict-1.cir %s --check-prefix=CIR --check-prefix=CIR-STRICT-1 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=1 -emit-llvm -disable-llvm-passes %s -o %t-cir-strict-1.ll +// RUN: FileCheck --input-file=%t-cir-strict-1.ll %s --check-prefix=LLVM --check-prefix=LLVM-STRICT-1 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fstrict-flex-arrays=1 -emit-llvm -disable-llvm-passes %s -o %t-strict-1.ll +// RUN: FileCheck --input-file=%t-strict-1.ll %s --check-prefix=OGCG --check-prefix=OGCG-STRICT-1 + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=2 -emit-cir %s -o %t-strict-2.cir +// RUN: FileCheck --input-file=%t-strict-2.cir %s --check-prefix=CIR --check-prefix=CIR-STRICT-2 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=2 -emit-llvm -disable-llvm-passes %s -o %t-cir-strict-2.ll +// RUN: FileCheck --input-file=%t-cir-strict-2.ll %s --check-prefix=LLVM --check-prefix=LLVM-STRICT-2 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fstrict-flex-arrays=2 -emit-llvm -disable-llvm-passes %s -o %t-strict-2.ll +// RUN: FileCheck --input-file=%t-strict-2.ll %s --check-prefix=OGCG --check-prefix=OGCG-STRICT-2 + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=3 -emit-cir %s -o %t-strict-3.cir +// RUN: FileCheck --input-file=%t-strict-3.cir %s --check-prefix=CIR --check-prefix=CIR-STRICT-3 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -fstrict-flex-arrays=3 -emit-llvm -disable-llvm-passes %s -o %t-cir-strict-3.ll +// RUN: FileCheck --input-file=%t-cir-strict-3.ll %s --check-prefix=LLVM --check-prefix=LLVM-STRICT-3 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fstrict-flex-arrays=3 -emit-llvm -disable-llvm-passes %s -o %t-strict-3.ll +// RUN: FileCheck --input-file=%t-strict-3.ll %s --check-prefix=OGCG --check-prefix=OGCG-STRICT-3 + +#define OBJECT_SIZE_BUILTIN __builtin_object_size + +typedef struct { + float f; + double c[]; +} foo_t; + +typedef struct { + float f; + double c[0]; +} foo0_t; + +typedef struct { + float f; + double c[1]; +} foo1_t; + +typedef struct { + float f; + double c[2]; +} foo2_t; + +// CIR-LABEL: @bar +// LLVM-LABEL: @bar( +// OGCG-LABEL: @bar( +unsigned bar(foo_t *f) { + // CIR-NO-STRICT: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-0: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-1: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-2: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-3: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // LLVM-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // LLVM-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // LLVM-STRICT-3: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // OGCG-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // OGCG-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // OGCG-STRICT-3: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CIR-LABEL: @bar0 +// LLVM-LABEL: @bar0( +// OGCG-LABEL: @bar0( +unsigned bar0(foo0_t *f) { + // CIR-NO-STRICT: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-0: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-1: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-2: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-3: cir.const #cir.int<0> + // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // LLVM-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // LLVM-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // LLVM-STRICT-3: store i32 0 + // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // OGCG-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // OGCG-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // OGCG-STRICT-3: ret i32 0 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CIR-LABEL: @bar1 +// LLVM-LABEL: @bar1( +// OGCG-LABEL: @bar1( +unsigned bar1(foo1_t *f) { + // CIR-NO-STRICT: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-0: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-1: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-2: cir.const #cir.int<8> + // CIR-STRICT-3: cir.const #cir.int<8> + // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // LLVM-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // LLVM-STRICT-2: store i32 8 + // LLVM-STRICT-3: store i32 8 + // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // OGCG-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // OGCG-STRICT-2: ret i32 8 + // OGCG-STRICT-3: ret i32 8 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CIR-LABEL: @bar2 +// LLVM-LABEL: @bar2( +// OGCG-LABEL: @bar2( +unsigned bar2(foo2_t *f) { + // CIR-NO-STRICT: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-0: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-1: cir.const #cir.int<16> + // CIR-STRICT-2: cir.const #cir.int<16> + // CIR-STRICT-3: cir.const #cir.int<16> + // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // LLVM-STRICT-1: store i32 16 + // LLVM-STRICT-2: store i32 16 + // LLVM-STRICT-3: store i32 16 + // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 false) + // OGCG-STRICT-1: ret i32 16 + // OGCG-STRICT-2: ret i32 16 + // OGCG-STRICT-3: ret i32 16 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +#define DYNAMIC_OBJECT_SIZE_BUILTIN __builtin_dynamic_object_size + +// CIR-LABEL: @dyn_bar +// LLVM-LABEL: @dyn_bar( +// OGCG-LABEL: @dyn_bar( +unsigned dyn_bar(foo_t *f) { + // CIR-NO-STRICT: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-0: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-1: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-2: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-3: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // LLVM-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // LLVM-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // LLVM-STRICT-3: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // OGCG-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // OGCG-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // OGCG-STRICT-3: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + return DYNAMIC_OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CIR-LABEL: @dyn_bar0 +// LLVM-LABEL: @dyn_bar0( +// OGCG-LABEL: @dyn_bar0( +unsigned dyn_bar0(foo0_t *f) { + // CIR-NO-STRICT: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-0: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-1: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-2: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-3: cir.const #cir.int<0> + // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // LLVM-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // LLVM-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // LLVM-STRICT-3: store i32 0 + // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // OGCG-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // OGCG-STRICT-2: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // OGCG-STRICT-3: ret i32 0 + return DYNAMIC_OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CIR-LABEL: @dyn_bar1 +// LLVM-LABEL: @dyn_bar1( +// OGCG-LABEL: @dyn_bar1( +unsigned dyn_bar1(foo1_t *f) { + // CIR-NO-STRICT: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-0: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-1: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-2: cir.const #cir.int<8> + // CIR-STRICT-3: cir.const #cir.int<8> + // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // LLVM-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // LLVM-STRICT-2: store i32 8 + // LLVM-STRICT-3: store i32 8 + // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // OGCG-STRICT-1: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // OGCG-STRICT-2: ret i32 8 + // OGCG-STRICT-3: ret i32 8 + return DYNAMIC_OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CIR-LABEL: @dyn_bar2 +// LLVM-LABEL: @dyn_bar2( +// OGCG-LABEL: @dyn_bar2( +unsigned dyn_bar2(foo2_t *f) { + // CIR-NO-STRICT: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-0: cir.objsize max nullunknown dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // CIR-STRICT-1: cir.const #cir.int<16> + // CIR-STRICT-2: cir.const #cir.int<16> + // CIR-STRICT-3: cir.const #cir.int<16> + // LLVM-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // LLVM-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // LLVM-STRICT-1: store i32 16 + // LLVM-STRICT-2: store i32 16 + // LLVM-STRICT-3: store i32 16 + // OGCG-NO-STRICT: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // OGCG-STRICT-0: llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 true) + // OGCG-STRICT-1: ret i32 16 + // OGCG-STRICT-2: ret i32 16 + // OGCG-STRICT-3: ret i32 16 + return DYNAMIC_OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// Also checks for non-trailing flex-array like members + +typedef struct { + double c[0]; + float f; +} foofoo0_t; + +typedef struct { + double c[1]; + float f; +} foofoo1_t; + +typedef struct { + double c[2]; + float f; +} foofoo2_t; + +// CIR-LABEL: @babar0 +// LLVM-LABEL: @babar0( +// OGCG-LABEL: @babar0( +unsigned babar0(foofoo0_t *f) { + // CIR-NO-STRICT: cir.const #cir.int<0> + // CIR-STRICT-0: cir.const #cir.int<0> + // CIR-STRICT-1: cir.const #cir.int<0> + // CIR-STRICT-2: cir.const #cir.int<0> + // CIR-STRICT-3: cir.const #cir.int<0> + // LLVM-NO-STRICT: store i32 0 + // LLVM-STRICT-0: store i32 0 + // LLVM-STRICT-1: store i32 0 + // LLVM-STRICT-2: store i32 0 + // LLVM-STRICT-3: store i32 0 + // OGCG-NO-STRICT: ret i32 0 + // OGCG-STRICT-0: ret i32 0 + // OGCG-STRICT-1: ret i32 0 + // OGCG-STRICT-2: ret i32 0 + // OGCG-STRICT-3: ret i32 0 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CIR-LABEL: @babar1 +// LLVM-LABEL: @babar1( +// OGCG-LABEL: @babar1( +unsigned babar1(foofoo1_t *f) { + // CIR-NO-STRICT: cir.const #cir.int<8> + // CIR-STRICT-0: cir.const #cir.int<8> + // CIR-STRICT-1: cir.const #cir.int<8> + // CIR-STRICT-2: cir.const #cir.int<8> + // CIR-STRICT-3: cir.const #cir.int<8> + // LLVM-NO-STRICT: store i32 8 + // LLVM-STRICT-0: store i32 8 + // LLVM-STRICT-1: store i32 8 + // LLVM-STRICT-2: store i32 8 + // LLVM-STRICT-3: store i32 8 + // OGCG-NO-STRICT: ret i32 8 + // OGCG-STRICT-0: ret i32 8 + // OGCG-STRICT-1: ret i32 8 + // OGCG-STRICT-2: ret i32 8 + // OGCG-STRICT-3: ret i32 8 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} + +// CIR-LABEL: @babar2 +// LLVM-LABEL: @babar2( +// OGCG-LABEL: @babar2( +unsigned babar2(foofoo2_t *f) { + // CIR-NO-STRICT: cir.const #cir.int<16> + // CIR-STRICT-0: cir.const #cir.int<16> + // CIR-STRICT-1: cir.const #cir.int<16> + // CIR-STRICT-2: cir.const #cir.int<16> + // CIR-STRICT-3: cir.const #cir.int<16> + // LLVM-NO-STRICT: store i32 16 + // LLVM-STRICT-0: store i32 16 + // LLVM-STRICT-1: store i32 16 + // LLVM-STRICT-2: store i32 16 + // LLVM-STRICT-3: store i32 16 + // OGCG-NO-STRICT: ret i32 16 + // OGCG-STRICT-0: ret i32 16 + // OGCG-STRICT-1: ret i32 16 + // OGCG-STRICT-2: ret i32 16 + // OGCG-STRICT-3: ret i32 16 + return OBJECT_SIZE_BUILTIN(f->c, 1); +} diff --git a/clang/test/CIR/CodeGen/object-size.c b/clang/test/CIR/CodeGen/object-size.c new file mode 100644 index 0000000..1b10fb8b --- /dev/null +++ b/clang/test/CIR/CodeGen/object-size.c @@ -0,0 +1,877 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG + +char gbuf[63]; +char *gp; +int gi, gj; + +// CIR-LABEL: @test1 +// LLVM-LABEL: define {{.*}} void @test1 +// OGCG-LABEL: define {{.*}} void @test1 +void test1(void) { + // CIR: cir.const #cir.int<59> + // LLVM: store i32 59 + // OGCG: store i32 59 + gi = __builtin_object_size(&gbuf[4], 1); +} + +// CIR-LABEL: @test2 +// LLVM-LABEL: define {{.*}} void @test2 +// OGCG-LABEL: define {{.*}} void @test2 +void test2(void) { + // CIR: cir.const #cir.int<63> + // LLVM: store i32 63 + // OGCG: store i32 63 + gi = __builtin_object_size(gbuf, 1); +} + +// CIR-LABEL: @test3 +// LLVM-LABEL: define {{.*}} void @test3 +// OGCG-LABEL: define {{.*}} void @test3 +void test3(void) { + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&gbuf[100], 1); +} + +// CIR-LABEL: @test4 +// LLVM-LABEL: define {{.*}} void @test4 +// OGCG-LABEL: define {{.*}} void @test4 +void test4(void) { + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size((char*)(void*)&gbuf[-1], 1); +} + +// CIR-LABEL: @test5 +// LLVM-LABEL: define {{.*}} void @test5 +// OGCG-LABEL: define {{.*}} void @test5 +void test5(void) { + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(gp, 0); +} + +// CIR-LABEL: @test6 +// LLVM-LABEL: define {{.*}} void @test6 +// OGCG-LABEL: define {{.*}} void @test6 +void test6(void) { + char buf[57]; + + // CIR: cir.const #cir.int<53> + // LLVM: store i32 53 + // OGCG: store i32 53 + gi = __builtin_object_size(&buf[4], 1); +} + +// CIR-LABEL: @test18 +// LLVM-LABEL: define {{.*}} i32 @test18 +// OGCG-LABEL: define {{.*}} i32 @test18 +unsigned test18(int cond) { + int a[4], b[4]; + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64 + // OGCG: call i64 @llvm.objectsize.i64 + return __builtin_object_size(cond ? a : b, 0); +} + +// CIR-LABEL: @test19 +// LLVM-LABEL: define {{.*}} void @test19 +// OGCG-LABEL: define {{.*}} void @test19 +void test19(void) { + struct { + int a, b; + } foo; + + // CIR: cir.const #cir.int<8> + // LLVM: store i32 8 + // OGCG: store i32 8 + gi = __builtin_object_size(&foo.a, 0); + + // CIR: cir.const #cir.int<4> + // LLVM: store i32 4 + // OGCG: store i32 4 + gi = __builtin_object_size(&foo.a, 1); + + // CIR: cir.const #cir.int<8> + // LLVM: store i32 8 + // OGCG: store i32 8 + gi = __builtin_object_size(&foo.a, 2); + + // CIR: cir.const #cir.int<4> + // LLVM: store i32 4 + // OGCG: store i32 4 + gi = __builtin_object_size(&foo.a, 3); + + // CIR: cir.const #cir.int<4> + // LLVM: store i32 4 + // OGCG: store i32 4 + gi = __builtin_object_size(&foo.b, 0); + + // CIR: cir.const #cir.int<4> + // LLVM: store i32 4 + // OGCG: store i32 4 + gi = __builtin_object_size(&foo.b, 1); + + // CIR: cir.const #cir.int<4> + // LLVM: store i32 4 + // OGCG: store i32 4 + gi = __builtin_object_size(&foo.b, 2); + + // CIR: cir.const #cir.int<4> + // LLVM: store i32 4 + // OGCG: store i32 4 + gi = __builtin_object_size(&foo.b, 3); +} + +// CIR-LABEL: @test20 +// LLVM-LABEL: define {{.*}} void @test20 +// OGCG-LABEL: define {{.*}} void @test20 +void test20(void) { + struct { int t[10]; } t[10]; + + // CIR: cir.const #cir.int<380> + // LLVM: store i32 380 + // OGCG: store i32 380 + gi = __builtin_object_size(&t[0].t[5], 0); + + // CIR: cir.const #cir.int<20> + // LLVM: store i32 20 + // OGCG: store i32 20 + gi = __builtin_object_size(&t[0].t[5], 1); + + // CIR: cir.const #cir.int<380> + // LLVM: store i32 380 + // OGCG: store i32 380 + gi = __builtin_object_size(&t[0].t[5], 2); + + // CIR: cir.const #cir.int<20> + // LLVM: store i32 20 + // OGCG: store i32 20 + gi = __builtin_object_size(&t[0].t[5], 3); +} + +// CIR-LABEL: @test21 +// LLVM-LABEL: define {{.*}} void @test21 +// OGCG-LABEL: define {{.*}} void @test21 +void test21(void) { + struct { int t; } t; + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t + 1, 0); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t + 1, 1); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t + 1, 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t + 1, 3); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t.t + 1, 0); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t.t + 1, 1); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t.t + 1, 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t.t + 1, 3); +} + +// CIR-LABEL: @test22 +// LLVM-LABEL: define {{.*}} void @test22 +// OGCG-LABEL: define {{.*}} void @test22 +void test22(void) { + struct { int t[10]; } t[10]; + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t[10], 0); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t[10], 1); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t[10], 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t[10], 3); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t[9].t[10], 0); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t[9].t[10], 1); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t[9].t[10], 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t[9].t[10], 3); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size((char*)&t[0] + sizeof(t), 0); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size((char*)&t[0] + sizeof(t), 1); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size((char*)&t[0] + sizeof(t), 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size((char*)&t[0] + sizeof(t), 3); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 0); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 1); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 3); +} + +struct Test23Ty { int a; int t[10]; }; + +// CIR-LABEL: @test23 +// LLVM-LABEL: define {{.*}} void @test23 +// OGCG-LABEL: define {{.*}} void @test23 +void test23(struct Test23Ty *p) { + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(p, 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(p, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size(p, 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(p, 3); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(&p->a, 0); + + // CIR: cir.const #cir.int<4> + // LLVM: store i32 4 + // OGCG: store i32 4 + gi = __builtin_object_size(&p->a, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size(&p->a, 2); + + // CIR: cir.const #cir.int<4> + // LLVM: store i32 4 + // OGCG: store i32 4 + gi = __builtin_object_size(&p->a, 3); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(&p->t[5], 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(&p->t[5], 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size(&p->t[5], 2); + + // CIR: cir.const #cir.int<20> + // LLVM: store i32 20 + // OGCG: store i32 20 + gi = __builtin_object_size(&p->t[5], 3); +} + +// CIR-LABEL: @test24 +// LLVM-LABEL: define {{.*}} void @test24 +// OGCG-LABEL: define {{.*}} void @test24 +void test24(void) { + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size((void*)0, 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size((void*)0, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size((void*)0, 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size((void*)0, 3); +} + +// CIR-LABEL: @test25 +// LLVM-LABEL: define {{.*}} void @test25 +// OGCG-LABEL: define {{.*}} void @test25 +void test25(void) { + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size((void*)0x1000, 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size((void*)0x1000, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size((void*)0x1000, 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size((void*)0x1000, 3); + + // Skipping (void*)0 + 0x1000 tests - void pointer arithmetic NYI in CIR +} + +// CIR-LABEL: @test26 +// LLVM-LABEL: define {{.*}} void @test26 +// OGCG-LABEL: define {{.*}} void @test26 +void test26(void) { + struct { int v[10]; } t[10]; + + // CIR: cir.const #cir.int<316> + // LLVM: store i32 316 + // OGCG: store i32 316 + gi = __builtin_object_size(&t[1].v[11], 0); + + // CIR: cir.const #cir.int<312> + // LLVM: store i32 312 + // OGCG: store i32 312 + gi = __builtin_object_size(&t[1].v[12], 1); + + // CIR: cir.const #cir.int<308> + // LLVM: store i32 308 + // OGCG: store i32 308 + gi = __builtin_object_size(&t[1].v[13], 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&t[1].v[14], 3); +} + +struct Test27IncompleteTy; + +// CIR-LABEL: @test27 +// LLVM-LABEL: define {{.*}} void @test27 +// OGCG-LABEL: define {{.*}} void @test27 +void test27(struct Test27IncompleteTy *t) { + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(t, 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(t, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size(t, 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(t, 3); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(&test27, 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(&test27, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr {{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size(&test27, 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(&test27, 3); +} + +// CIR-LABEL: @test28 +// LLVM-LABEL: define {{.*}} void @test28 +// OGCG-LABEL: define {{.*}} void @test28 +void test28(void) { + struct { int v[10]; } t[10]; + + // CIR: cir.const #cir.int<360> + // LLVM: store i32 360 + // OGCG: store i32 360 + gi = __builtin_object_size((char*)((short*)(&t[1])), 0); + + // CIR: cir.const #cir.int<360> + // LLVM: store i32 360 + // OGCG: store i32 360 + gi = __builtin_object_size((char*)((short*)(&t[1])), 1); + + // CIR: cir.const #cir.int<360> + // LLVM: store i32 360 + // OGCG: store i32 360 + gi = __builtin_object_size((char*)((short*)(&t[1])), 2); + + // CIR: cir.const #cir.int<360> + // LLVM: store i32 360 + // OGCG: store i32 360 + gi = __builtin_object_size((char*)((short*)(&t[1])), 3); + + // CIR: cir.const #cir.int<356> + // LLVM: store i32 356 + // OGCG: store i32 356 + gi = __builtin_object_size((char*)((short*)(&t[1].v[1])), 0); + + // CIR: cir.const #cir.int<36> + // LLVM: store i32 36 + // OGCG: store i32 36 + gi = __builtin_object_size((char*)((short*)(&t[1].v[1])), 1); + + // CIR: cir.const #cir.int<356> + // LLVM: store i32 356 + // OGCG: store i32 356 + gi = __builtin_object_size((char*)((short*)(&t[1].v[1])), 2); + + // CIR: cir.const #cir.int<36> + // LLVM: store i32 36 + // OGCG: store i32 36 + gi = __builtin_object_size((char*)((short*)(&t[1].v[1])), 3); +} + +struct DynStructVar { + char fst[16]; + char snd[]; +}; + +struct DynStruct0 { + char fst[16]; + char snd[0]; +}; + +struct DynStruct1 { + char fst[16]; + char snd[1]; +}; + +struct StaticStruct { + char fst[16]; + char snd[2]; +}; + +// CIR-LABEL: @test29 +// LLVM-LABEL: define {{.*}} void @test29 +// OGCG-LABEL: define {{.*}} void @test29 +void test29(struct DynStructVar *dv, struct DynStruct0 *d0, + struct DynStruct1 *d1, struct StaticStruct *ss) { + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(dv->snd, 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(dv->snd, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size(dv->snd, 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(dv->snd, 3); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(d0->snd, 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(d0->snd, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size(d0->snd, 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(d0->snd, 3); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(d1->snd, 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(d1->snd, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size(d1->snd, 2); + + // CIR: cir.const #cir.int<1> + // LLVM: store i32 1 + // OGCG: store i32 1 + gi = __builtin_object_size(d1->snd, 3); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(ss->snd, 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(ss->snd, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size(ss->snd, 2); + + // CIR: cir.const #cir.int<2> + // LLVM: store i32 2 + // OGCG: store i32 2 + gi = __builtin_object_size(ss->snd, 3); +} + +// CIR-LABEL: @test30 +// LLVM-LABEL: define {{.*}} void @test30 +// OGCG-LABEL: define {{.*}} void @test30 +void test30(void) { + struct { struct DynStruct1 fst, snd; } *nested; + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(nested->fst.snd, 0); + + // CIR: cir.const #cir.int<1> + // LLVM: store i32 1 + // OGCG: store i32 1 + gi = __builtin_object_size(nested->fst.snd, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size(nested->fst.snd, 2); + + // CIR: cir.const #cir.int<1> + // LLVM: store i32 1 + // OGCG: store i32 1 + gi = __builtin_object_size(nested->fst.snd, 3); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(nested->snd.snd, 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(nested->snd.snd, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size(nested->snd.snd, 2); + + // CIR: cir.const #cir.int<1> + // LLVM: store i32 1 + // OGCG: store i32 1 + gi = __builtin_object_size(nested->snd.snd, 3); + + union { struct DynStruct1 d1; char c[1]; } *u; + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(u->c, 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(u->c, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size(u->c, 2); + + // CIR: cir.const #cir.int<1> + // LLVM: store i32 1 + // OGCG: store i32 1 + gi = __builtin_object_size(u->c, 3); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(u->d1.snd, 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(u->d1.snd, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size(u->d1.snd, 2); + + // CIR: cir.const #cir.int<1> + // LLVM: store i32 1 + // OGCG: store i32 1 + gi = __builtin_object_size(u->d1.snd, 3); +} + +// CIR-LABEL: @test32 +// LLVM-LABEL: define {{.*}} i64 @test32 +// OGCG-LABEL: define {{.*}} i64 @test32 +static struct DynStructVar D32 = { + .fst = {}, + .snd = { 0, 1, 2, }, +}; +unsigned long test32(void) { + // CIR: cir.const #cir.int<19> + // LLVM: store i64 19 + // OGCG: ret i64 19 + return __builtin_object_size(&D32, 1); +} + +// CIR-LABEL: @test33 +// LLVM-LABEL: define {{.*}} i64 @test33 +// OGCG-LABEL: define {{.*}} i64 @test33 +static struct DynStructVar D33 = { + .fst = {}, + .snd = {}, +}; +unsigned long test33(void) { + // CIR: cir.const #cir.int<16> + // LLVM: store i64 16 + // OGCG: ret i64 16 + return __builtin_object_size(&D33, 1); +} + +// CIR-LABEL: @test34 +// LLVM-LABEL: define {{.*}} i64 @test34 +// OGCG-LABEL: define {{.*}} i64 @test34 +static struct DynStructVar D34 = { + .fst = {}, +}; +unsigned long test34(void) { + // CIR: cir.const #cir.int<16> + // LLVM: store i64 16 + // OGCG: ret i64 16 + return __builtin_object_size(&D34, 1); +} + +// CIR-LABEL: @test35 +// LLVM-LABEL: define {{.*}} i64 @test35 +// OGCG-LABEL: define {{.*}} i64 @test35 +unsigned long test35(void) { + // CIR: cir.const #cir.int<16> + // LLVM: store i64 16 + // OGCG: ret i64 16 + return __builtin_object_size(&(struct DynStructVar){}, 1); +} + +// CIR-LABEL: @test37 +// LLVM-LABEL: define {{.*}} i64 @test37 +// OGCG-LABEL: define {{.*}} i64 @test37 +struct Z { struct A { int x, y[]; } z; int a; int b[]; }; +static struct Z my_z = { .b = {1,2,3} }; +unsigned long test37(void) { + // CIR: cir.const #cir.int<4> + // LLVM: store i64 4 + // OGCG: ret i64 4 + return __builtin_object_size(&my_z.z, 1); +} + +// CIR-LABEL: @PR30346 +// LLVM-LABEL: define {{.*}} void @PR30346 +// OGCG-LABEL: define {{.*}} void @PR30346 +void PR30346(void) { + struct sa_family_t {}; + struct sockaddr { + struct sa_family_t sa_family; + char sa_data[14]; + }; + + struct sockaddr *sa; + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(sa->sa_data, 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 + gi = __builtin_object_size(sa->sa_data, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 + gi = __builtin_object_size(sa->sa_data, 2); + + // CIR: cir.const #cir.int<14> + // LLVM: store i32 14 + // OGCG: store i32 14 + gi = __builtin_object_size(sa->sa_data, 3); +} + +extern char incomplete_char_array[]; + +// CIR-LABEL: @incomplete_and_function_types +// LLVM-LABEL: define {{.*}} void @incomplete_and_function_types +// OGCG-LABEL: define {{.*}} void @incomplete_and_function_types +void incomplete_and_function_types(void) { + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0 + // OGCG: call i64 @llvm.objectsize.i64.p0 + gi = __builtin_object_size(incomplete_char_array, 0); + + // CIR: cir.objsize max nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0 + // OGCG: call i64 @llvm.objectsize.i64.p0 + gi = __builtin_object_size(incomplete_char_array, 1); + + // CIR: cir.objsize min nullunknown {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0 + // OGCG: call i64 @llvm.objectsize.i64.p0 + gi = __builtin_object_size(incomplete_char_array, 2); + + // CIR: cir.const #cir.int<0> + // LLVM: store i32 0 + // OGCG: store i32 0 + gi = __builtin_object_size(incomplete_char_array, 3); +} + +// CIR-LABEL: @deeply_nested +// LLVM-LABEL: define {{.*}} void @deeply_nested +// OGCG-LABEL: define {{.*}} void @deeply_nested +void deeply_nested(void) { + struct { + struct { + struct { + struct { + int e[2]; + char f; + } d[2]; + } c[2]; + } b[2]; + } *a; + + // CIR: cir.const #cir.int<4> + // LLVM: store i32 4 + // OGCG: store i32 4 + gi = __builtin_object_size(&a->b[1].c[1].d[1].e[1], 1); + + // CIR: cir.const #cir.int<4> + // LLVM: store i32 4 + // OGCG: store i32 4 + gi = __builtin_object_size(&a->b[1].c[1].d[1].e[1], 3); +} diff --git a/clang/test/CIR/CodeGen/object-size.cpp b/clang/test/CIR/CodeGen/object-size.cpp new file mode 100644 index 0000000..b60e245 --- /dev/null +++ b/clang/test/CIR/CodeGen/object-size.cpp @@ -0,0 +1,108 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG + +// C++-specific tests for __builtin_object_size + +int gi; + +// CIR-LABEL: @_Z5test1v +// LLVM-LABEL: define{{.*}} void @_Z5test1v() +// OGCG-LABEL: define{{.*}} void @_Z5test1v() +void test1() { + // Guaranteeing that our cast removal logic doesn't break more interesting + // cases. + struct A { int a; }; + struct B { int b; }; + struct C: public A, public B {}; + + C c; + + // CIR: cir.const #cir.int<8> + // LLVM: store i32 8 + // OGCG: store i32 8 + gi = __builtin_object_size(&c, 0); + // CIR: cir.const #cir.int<8> + // LLVM: store i32 8 + // OGCG: store i32 8 + gi = __builtin_object_size((A*)&c, 0); + // CIR: cir.const #cir.int<4> + // LLVM: store i32 4 + // OGCG: store i32 4 + gi = __builtin_object_size((B*)&c, 0); + + // CIR: cir.const #cir.int<8> + // LLVM: store i32 8 + // OGCG: store i32 8 + gi = __builtin_object_size((char*)&c, 0); + // CIR: cir.const #cir.int<8> + // LLVM: store i32 8 + // OGCG: store i32 8 + gi = __builtin_object_size((char*)(A*)&c, 0); + // CIR: cir.const #cir.int<4> + // LLVM: store i32 4 + // OGCG: store i32 4 + gi = __builtin_object_size((char*)(B*)&c, 0); +} + +// CIR-LABEL: @_Z5test2v() +// LLVM-LABEL: define{{.*}} void @_Z5test2v() +// OGCG-LABEL: define{{.*}} void @_Z5test2v() +void test2() { + struct A { char buf[16]; }; + struct B : A {}; + struct C { int i; B bs[1]; } *c; + + // CIR: cir.objsize max nullunknown %{{.+}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false) + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false) + gi = __builtin_object_size(&c->bs[0], 0); + // CIR: cir.objsize max nullunknown %{{.+}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false) + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false) + gi = __builtin_object_size(&c->bs[0], 1); + // CIR: cir.objsize min nullunknown %{{.+}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false) + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false) + gi = __builtin_object_size(&c->bs[0], 2); + // CIR: cir.const #cir.int<16> + // LLVM: store i32 16 + // OGCG: store i32 16 + gi = __builtin_object_size(&c->bs[0], 3); + + // NYI: DerivedToBase cast + // gi = __builtin_object_size((A*)&c->bs[0], 0); + + // CIR: cir.const #cir.int<16> + // LLVM: store i32 16 + // OGCG: store i32 16 + gi = __builtin_object_size((A*)&c->bs[0], 1); + + // NYI: DerivedToBase cast + // gi = __builtin_object_size((A*)&c->bs[0], 2); + + // CIR: cir.const #cir.int<16> + // LLVM: store i32 16 + // OGCG: store i32 16 + gi = __builtin_object_size((A*)&c->bs[0], 3); + + // CIR: cir.objsize max nullunknown %{{.+}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false) + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false) + gi = __builtin_object_size(&c->bs[0].buf[0], 0); + // CIR: cir.const #cir.int<16> + // LLVM: store i32 16 + // OGCG: store i32 16 + gi = __builtin_object_size(&c->bs[0].buf[0], 1); + // CIR: cir.objsize min nullunknown %{{.+}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false) + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false) + gi = __builtin_object_size(&c->bs[0].buf[0], 2); + // CIR: cir.const #cir.int<16> + // LLVM: store i32 16 + // OGCG: store i32 16 + gi = __builtin_object_size(&c->bs[0].buf[0], 3); +} diff --git a/clang/test/CIR/IR/objsize.cir b/clang/test/CIR/IR/objsize.cir new file mode 100644 index 0000000..bc24551 --- /dev/null +++ b/clang/test/CIR/IR/objsize.cir @@ -0,0 +1,89 @@ +// Test the cir.objsize operation can parse and print correctly (roundtrip) +// with all possible combinations of optional attributes + +// RUN: cir-opt %s --verify-roundtrip | FileCheck %s + +!u64i = !cir.int<u, 64> +!void = !cir.void + +module { + cir.func @test_max(%arg0: !cir.ptr<!void>) -> !u64i { + %0 = cir.objsize max %arg0 : !cir.ptr<!void> -> !u64i + cir.return %0 : !u64i + } + + cir.func @test_max_nullunknown(%arg0: !cir.ptr<!void>) -> !u64i { + %0 = cir.objsize max nullunknown %arg0 : !cir.ptr<!void> -> !u64i + cir.return %0 : !u64i + } + + cir.func @test_max_dynamic(%arg0: !cir.ptr<!void>) -> !u64i { + %0 = cir.objsize max dynamic %arg0 : !cir.ptr<!void> -> !u64i + cir.return %0 : !u64i + } + + cir.func @test_max_nullunknown_dynamic(%arg0: !cir.ptr<!void>) -> !u64i { + %0 = cir.objsize max nullunknown dynamic %arg0 : !cir.ptr<!void> -> !u64i + cir.return %0 : !u64i + } + + cir.func @test_min(%arg0: !cir.ptr<!void>) -> !u64i { + %0 = cir.objsize min %arg0 : !cir.ptr<!void> -> !u64i + cir.return %0 : !u64i + } + + cir.func @test_min_nullunknown(%arg0: !cir.ptr<!void>) -> !u64i { + %0 = cir.objsize min nullunknown %arg0 : !cir.ptr<!void> -> !u64i + cir.return %0 : !u64i + } + + cir.func @test_min_dynamic(%arg0: !cir.ptr<!void>) -> !u64i { + %0 = cir.objsize min dynamic %arg0 : !cir.ptr<!void> -> !u64i + cir.return %0 : !u64i + } + + cir.func @test_min_nullunknown_dynamic(%arg0: !cir.ptr<!void>) -> !u64i { + %0 = cir.objsize min nullunknown dynamic %arg0 : !cir.ptr<!void> -> !u64i + cir.return %0 : !u64i + } +} + +// CHECK: cir.func @test_max(%arg0: !cir.ptr<!void>) -> !u64i { +// CHECK: %0 = cir.objsize max %arg0 : !cir.ptr<!void> -> !u64i +// CHECK: cir.return %0 : !u64i +// CHECK: } + +// CHECK: cir.func @test_max_nullunknown(%arg0: !cir.ptr<!void>) -> !u64i { +// CHECK: %0 = cir.objsize max nullunknown %arg0 : !cir.ptr<!void> -> !u64i +// CHECK: cir.return %0 : !u64i +// CHECK: } + +// CHECK: cir.func @test_max_dynamic(%arg0: !cir.ptr<!void>) -> !u64i { +// CHECK: %0 = cir.objsize max dynamic %arg0 : !cir.ptr<!void> -> !u64i +// CHECK: cir.return %0 : !u64i +// CHECK: } + +// CHECK: cir.func @test_max_nullunknown_dynamic(%arg0: !cir.ptr<!void>) -> !u64i { +// CHECK: %0 = cir.objsize max nullunknown dynamic %arg0 : !cir.ptr<!void> -> !u64i +// CHECK: cir.return %0 : !u64i +// CHECK: } + +// CHECK: cir.func @test_min(%arg0: !cir.ptr<!void>) -> !u64i { +// CHECK: %0 = cir.objsize min %arg0 : !cir.ptr<!void> -> !u64i +// CHECK: cir.return %0 : !u64i +// CHECK: } + +// CHECK: cir.func @test_min_nullunknown(%arg0: !cir.ptr<!void>) -> !u64i { +// CHECK: %0 = cir.objsize min nullunknown %arg0 : !cir.ptr<!void> -> !u64i +// CHECK: cir.return %0 : !u64i +// CHECK: } + +// CHECK: cir.func @test_min_dynamic(%arg0: !cir.ptr<!void>) -> !u64i { +// CHECK: %0 = cir.objsize min dynamic %arg0 : !cir.ptr<!void> -> !u64i +// CHECK: cir.return %0 : !u64i +// CHECK: } + +// CHECK: cir.func @test_min_nullunknown_dynamic(%arg0: !cir.ptr<!void>) -> !u64i { +// CHECK: %0 = cir.objsize min nullunknown dynamic %arg0 : !cir.ptr<!void> -> !u64i +// CHECK: cir.return %0 : !u64i +// CHECK: } diff --git a/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl b/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl index 7aeb877..b0abaed 100644 --- a/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl +++ b/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl @@ -24,4 +24,3 @@ void foo(uint Idx : SV_DispatchThreadID) {} [shader("compute")] [numthreads(8,8,1)] void bar(uint2 Idx : SV_DispatchThreadID) {} - diff --git a/clang/test/CodeGenHLSL/semantics/semantic.arbitrary.hlsl b/clang/test/CodeGenHLSL/semantics/semantic.arbitrary.hlsl new file mode 100644 index 0000000..96d5b99 --- /dev/null +++ b/clang/test/CodeGenHLSL/semantics/semantic.arbitrary.hlsl @@ -0,0 +1,36 @@ +// RUN: %clang_cc1 -triple spirv-unknown-vulkan-vertex -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV -DTARGET=spv +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-vertex -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL -DTARGET=dx + +// CHECK-SPIRV-DAG: @AAA0 = external hidden thread_local addrspace(7) externally_initialized constant float, !spirv.Decorations ![[#METADATA_0:]] +// CHECK-SPIRV-DAG: @B0 = external hidden thread_local addrspace(7) externally_initialized constant i32, !spirv.Decorations ![[#METADATA_2:]] +// CHECK-SPIRV-DAG: @CC0 = external hidden thread_local addrspace(7) externally_initialized constant <2 x float>, !spirv.Decorations ![[#METADATA_4:]] + + +// FIXME: replace `float2 c` with a matrix when available. +void main(float a : AAA, int b : B, float2 c : CC) { + float tmp = a + b + c.x + c.y; +} +// CHECK-SPIRV: define internal spir_func void @_Z4mainfiDv2_f(float noundef nofpclass(nan inf) %a, i32 noundef %b, <2 x float> noundef nofpclass(nan inf) %c) #0 { + +// CHECK: define void @main() + +// CHECK-DXIL: %AAA0 = call float @llvm.dx.load.input.f32(i32 4, i32 0, i32 0, i8 0, i32 poison) +// CHECK-DXIL: %B0 = call i32 @llvm.dx.load.input.i32(i32 4, i32 0, i32 0, i8 0, i32 poison) +// CHECK-DXIL %CC0 = call <2 x float> @llvm.dx.load.input.v2f32(i32 4, i32 0, i32 0, i8 0, i32 poison) +// CHECK-DXIL: call void @_Z4mainfiDv2_f(float %AAA0, i32 %B0, <2 x float> %CC0) + +// CHECK-SPIRV: %[[#AAA0:]] = load float, ptr addrspace(7) @AAA0, align 4 +// CHECK-SPIRV: %[[#B0:]] = load i32, ptr addrspace(7) @B0, align 4 +// CHECK-SPIRV: %[[#CC0:]] = load <2 x float>, ptr addrspace(7) @CC0, align 8 +// CHECK-SPIRV: call spir_func void @_Z4mainfiDv2_f(float %[[#AAA0]], i32 %[[#B0]], <2 x float> %[[#CC0]]) [ "convergencectrl"(token %0) ] + + +// CHECK-SPIRV-DAG: ![[#METADATA_0]] = !{![[#METADATA_1:]]} +// CHECK-SPIRV-DAG: ![[#METADATA_2]] = !{![[#METADATA_3:]]} +// CHECK-SPIRV-DAG: ![[#METADATA_4]] = !{![[#METADATA_5:]]} + +// CHECK-SPIRV-DAG: ![[#METADATA_1]] = !{i32 30, i32 0} +// CHECK-SPIRV-DAG: ![[#METADATA_3]] = !{i32 30, i32 1} +// CHECK-SPIRV-DAG: ![[#METADATA_5]] = !{i32 30, i32 2} +// | `- Location index +// `-> Decoration "Location" diff --git a/clang/test/CodeGenHLSL/semantics/semantic.array.hlsl b/clang/test/CodeGenHLSL/semantics/semantic.array.hlsl new file mode 100644 index 0000000..b2cb3da --- /dev/null +++ b/clang/test/CodeGenHLSL/semantics/semantic.array.hlsl @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -triple spirv-linux-vulkan-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV -DTARGET=spv +// RUN: %clang_cc1 -triple dxil-px-shadermodel6.3-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL -DTARGET=dx + +struct S0 { + float4 position[2]; + float4 color; +}; + +// CHECK: %struct.S0 = type { [2 x <4 x float>], <4 x float> } + +// CHECK-SPIRV: @A0 = external hidden thread_local addrspace(7) externally_initialized constant [2 x <4 x float>], !spirv.Decorations ![[#MD_0:]] +// CHECK-SPIRV: @A2 = external hidden thread_local addrspace(7) externally_initialized constant <4 x float>, !spirv.Decorations ![[#MD_2:]] + +// CHECK: define void @main0() +// CHECK-DXIL: %A0 = call [2 x <4 x float>] @llvm.dx.load.input.a2v4f32(i32 4, i32 0, i32 0, i8 0, i32 poison) +// CHECK-DXIL: %[[#TMP0:]] = insertvalue %struct.S0 poison, [2 x <4 x float>] %A0, 0 +// CHECK-DXIL: %A2 = call <4 x float> @llvm.dx.load.input.v4f32(i32 4, i32 0, i32 0, i8 0, i32 poison) +// CHECK-DXIL: %[[#TMP1:]] = insertvalue %struct.S0 %[[#TMP0]], <4 x float> %A2, 1 + +// CHECK-SPIRV: %[[#A0:]] = load [2 x <4 x float>], ptr addrspace(7) @A0, align 16 +// CHECK-SPIRV: %[[#TMP0:]] = insertvalue %struct.S0 poison, [2 x <4 x float>] %[[#A0]], 0 +// CHECK-SPIRV: %[[#A2:]] = load <4 x float>, ptr addrspace(7) @A2, align 16 +// CHECK-SPIRV: %[[#TMP1:]] = insertvalue %struct.S0 %[[#TMP0]], <4 x float> %[[#A2]], 1 + +// CHECK: %[[#ARG:]] = alloca %struct.S0, align 16 +// CHECK: store %struct.S0 %[[#TMP1]], ptr %[[#ARG]], align 16 +// CHECK-DXIL: call void @{{.*}}main0{{.*}}(ptr %[[#ARG]]) +// CHECK-SPIRV: call spir_func void @{{.*}}main0{{.*}}(ptr %[[#ARG]]) +[shader("pixel")] +void main0(S0 p : A) { + float tmp = p.position[0] + p.position[1] + p.color; +} + +// CHECK-SPIRV: ![[#MD_0]] = !{![[#MD_1:]]} +// CHECK-SPIRV: ![[#MD_1]] = !{i32 30, i32 0} +// CHECK-SPIRV: ![[#MD_2]] = !{![[#MD_3:]]} +// CHECK-SPIRV: ![[#MD_3]] = !{i32 30, i32 2} diff --git a/clang/test/CodeGenHLSL/semantics/semantic.struct.hlsl b/clang/test/CodeGenHLSL/semantics/semantic.struct.hlsl new file mode 100644 index 0000000..733cf3a --- /dev/null +++ b/clang/test/CodeGenHLSL/semantics/semantic.struct.hlsl @@ -0,0 +1,77 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL -DTARGET=dx +// RUN: %clang_cc1 -triple spirv-linux-vulkan-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV -DTARGET=spv + +struct S0 { + uint Idx : SV_DispatchThreadID; +}; + +// CHECK: define void @main0() +// CHECK-DXIL: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id(i32 0) +// CHECK-SPIRV: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id.i32(i32 0) +// CHECK: %[[#TMP:]] = insertvalue %struct.S0 poison, i32 %[[#ID:]], 0 +// CHECK: %[[#ARG:]] = alloca %struct.S0, align 8 +// CHECK: store %struct.S0 %[[#TMP]], ptr %[[#ARG]], align 4 +// CHECK-DXIL: call void @{{.*}}main0{{.*}}(ptr %[[#ARG]]) +// CHECK-SPIRV: call spir_func void @{{.*}}main0{{.*}}(ptr %[[#ARG]]) +[shader("compute")] +[numthreads(8,8,1)] +void main0(S0 p) {} + +struct S1 { + uint2 a : SV_DispatchThreadID; + uint2 b : SV_GroupThreadID; +}; + +// CHECK: define void @main1() +// CHECK-DXIL: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id(i32 0) +// CHECK-SPIRV: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id.i32(i32 0) +// CHECK: %[[#AX_:]] = insertelement <2 x i32> poison, i32 %[[#ID]], i64 0 +// CHECK-DXIL: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id(i32 1) +// CHECK-SPIRV: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id.i32(i32 1) +// CHECK: %[[#AXY:]] = insertelement <2 x i32> %[[#AX_]], i32 %[[#ID]], i64 1 +// CHECK: %[[#S1A_:]] = insertvalue %struct.S1 poison, <2 x i32> %[[#AXY]], 0 +// CHECK-DXIL: %[[#ID_X:]] = call i32 @llvm.[[TARGET]].thread.id.in.group(i32 0) +// CHECK-SPIRV: %[[#ID_X:]] = call i32 @llvm.[[TARGET]].thread.id.in.group.i32(i32 0) +// CHECK: %[[#ID_X_:]] = insertelement <2 x i32> poison, i32 %[[#ID_X]], i64 0 +// CHECK-DXIL: %[[#ID_Y:]] = call i32 @llvm.[[TARGET]].thread.id.in.group(i32 1) +// CHECK-SPIRV: %[[#ID_Y:]] = call i32 @llvm.[[TARGET]].thread.id.in.group.i32(i32 1) +// CHECK: %[[#ID_XY:]] = insertelement <2 x i32> %[[#ID_X_]], i32 %[[#ID_Y]], i64 1 +// CHECK: %[[#S1AB:]] = insertvalue %struct.S1 %[[#S1A_]], <2 x i32> %[[#ID_XYZ:]], 1 +// CHECK: %[[#ARG:]] = alloca %struct.S1, align 8 +// CHECK: store %struct.S1 %[[#S1AB]], ptr %[[#ARG]], align 8 +// CHECK-DXIL: call void @{{.*}}main1{{.*}}(ptr %[[#ARG]]) +// CHECK-SPIRV: call spir_func void @{{.*}}main1{{.*}}(ptr %[[#ARG]]) +[shader("compute")] +[numthreads(8,8,1)] +void main1(S1 p) {} + +struct S2C { + uint2 b : SV_GroupThreadID; +}; + +struct S2 { + uint a : SV_DispatchThreadID; + S2C child; +}; + +// CHECK: define void @main2() +// CHECK-DXIL: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id(i32 0) +// CHECK-SPIRV: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id.i32(i32 0) +// CHECK: %[[#S2A_:]] = insertvalue %struct.S2 poison, i32 %[[#ID:]], 0 + +// CHECK-DXIL: %[[#ID_X:]] = call i32 @llvm.[[TARGET]].thread.id.in.group(i32 0) +// CHECK-SPIRV: %[[#ID_X:]] = call i32 @llvm.[[TARGET]].thread.id.in.group.i32(i32 0) +// CHECK: %[[#ID_X_:]] = insertelement <2 x i32> poison, i32 %[[#ID_X]], i64 0 +// CHECK-DXIL: %[[#ID_Y:]] = call i32 @llvm.[[TARGET]].thread.id.in.group(i32 1) +// CHECK-SPIRV: %[[#ID_Y:]] = call i32 @llvm.[[TARGET]].thread.id.in.group.i32(i32 1) +// CHECK: %[[#ID_XY:]] = insertelement <2 x i32> %[[#ID_X_]], i32 %[[#ID_Y]], i64 1 +// CHECK: %[[#S2C:]] = insertvalue %struct.S2C poison, <2 x i32> %[[#ID_XY:]], 0 + +// CHECK: %[[#S2AB:]] = insertvalue %struct.S2 %[[#S2A_]], %struct.S2C %[[#S2V:]], 1 +// CHECK: %[[#ARG:]] = alloca %struct.S2, align 8 +// CHECK: store %struct.S2 %[[#S2AB]], ptr %[[#ARG]], align 1 +// CHECK-DXIL: call void @{{.*}}main2{{.*}}(ptr %[[#ARG]]) +// CHECK-SPIRV: call spir_func void @{{.*}}main2{{.*}}(ptr %[[#ARG]]) +[shader("compute")] +[numthreads(8,8,1)] +void main2(S2 p) {} diff --git a/clang/test/ParserHLSL/semantic_parsing.hlsl b/clang/test/ParserHLSL/semantic_parsing.hlsl index 726dead..bff7bd0 100644 --- a/clang/test/ParserHLSL/semantic_parsing.hlsl +++ b/clang/test/ParserHLSL/semantic_parsing.hlsl @@ -12,30 +12,33 @@ void Pony(int GI : SV_IWantAPony) { } // expected-note@+1 {{to match this '('}} void SuperPony(int GI : 0) { } -// expected-error@+1 {{unknown HLSL semantic '_'}} +// '_' is a valid CPP identifier. void MegaPony(int GI : _) { } -// expected-error@+1 {{unknown HLSL semantic 'A0A'}} +void GarguantuanPony(int GI : _1) { } + void CoolPony(int GI : A0A0) { } -// expected-error@+1 {{unknown HLSL semantic 'A_'}} void NicePony(int GI : A_0) { } -// expected-error@+1 {{unknown HLSL semantic 'A'}} void CutePony(int GI : A00) { } -// expected-error@+3 {{unknown HLSL semantic 'A'}} // expected-error@+2 {{expected ')'}} // expected-note@+1 {{to match this '('}} void DoublePony(int GI : A00 B) { } -// expected-error@+1 {{unknown HLSL semantic 'é'}} -void BigPony(int GI : é) { } +// Unicode can be used: +// https://timsong-cpp.github.io/cppwp/n3337/charname.allowed +void FrenchPony(int GI : garçon_de_café) { } +void UnicodePony(int GI : ℮) { } + +// Since P1949 seems Emojis are not allowed, even if in the range +// mentioned in N3337. +// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2021/p1949r7.html // expected-error@+2 {{unexpected character <U+1F60A>}} // expected-error@+1 {{expected HLSL Semantic identifier}} void UTFPony(int GI : 😊) { } -// expected-error@+2 {{character <U+1F60A> not allowed in an identifier}} -// expected-error@+1 {{unknown HLSL semantic 'PonyWithA😊'}} +// expected-error@+1 {{character <U+1F60A> not allowed in an identifier}} void SmilingPony(int GI : PonyWithA😊) { } diff --git a/clang/test/SemaCXX/dependent-switch-case.cpp b/clang/test/SemaCXX/dependent-switch-case.cpp new file mode 100644 index 0000000..bbeab3a --- /dev/null +++ b/clang/test/SemaCXX/dependent-switch-case.cpp @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -std=c++20 %s -verify +// RUN: %clang_cc1 -std=c++20 %s -verify -fexperimental-new-constant-interpreter + +constexpr bool e(int){switch(0)0=0:return t(;} // expected-error {{expression is not assignable}} \ + // expected-error {{expected 'case' keyword before expression}} \ + // expected-error {{expected expression}} diff --git a/clang/test/SemaHLSL/Semantics/semantics-invalid.hlsl b/clang/test/SemaHLSL/Semantics/semantics-invalid.hlsl new file mode 100644 index 0000000..fdba6f6 --- /dev/null +++ b/clang/test/SemaHLSL/Semantics/semantics-invalid.hlsl @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -fsyntax-only -hlsl-entry main -verify %s + +typedef float t_f : SEMANTIC; // expected-warning{{'SEMANTIC' attribute only applies to parameters, non-static data members, and functions}} + +struct semantic_on_struct : SEMANTIC { // expected-error{{expected class name}} + float a; +}; + +struct s_fields_multiple_semantics { + float a : semantic_a : semantic_c; // expected-error{{use of undeclared identifier 'semantic_c'}} + float b : semantic_b; +}; + +[numthreads(1, 1, 1)] +void main() { + float a : SEM_A; // expected-warning{{'SEM_A' attribute only applies to parameters, non-static data members, and functions}} +} diff --git a/clang/test/SemaHLSL/Semantics/semantics-valid.hlsl b/clang/test/SemaHLSL/Semantics/semantics-valid.hlsl new file mode 100644 index 0000000..1e6bae4 --- /dev/null +++ b/clang/test/SemaHLSL/Semantics/semantics-valid.hlsl @@ -0,0 +1,33 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -hlsl-entry CSMain -x hlsl -finclude-default-header -ast-dump -o - %s | FileCheck %s + +struct s_fields { + float a : semantic_a; + float b : semantic_b; +// CHECK: |-CXXRecordDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> line:[[@LINE-3]]:8 struct s_fields definition +// CHECK: | |-FieldDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> col:9 a 'float' +// CHECK: | | `-HLSLUserSemanticAttr 0x{{[0-9a-fA-F]+}} <col:13> +// CHECK: | `-FieldDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> col:9 b 'float' +// CHECK: | `-HLSLUserSemanticAttr 0x{{[0-9a-fA-F]+}} <col:13> +}; + +float fn_foo1(float a : a, float b : b) : sem_ret { return 1.0f; } +// CHECK: |-FunctionDecl {{.*}} <{{.*}}> col:7 fn_foo1 'float (float, float)' +// CHECK-NEXT: | |-ParmVarDecl {{.*}} <{{.*}}> col:21 a 'float' +// CHECK-NEXT: | | `-HLSLUserSemanticAttr {{.*}} <{{.*}}> +// CHECK-NEXT: | |-ParmVarDecl {{.*}} <{{.*}}> col:34 b 'float' +// CHECK-NEXT: | | `-HLSLUserSemanticAttr {{.*}} <{{.*}}> +// CHECK-NEXT: | |-CompoundStmt {{.*}} <{{.*}}> +// CHECK-NEXT: | | `-ReturnStmt {{.*}} <{{.*}}> +// CHECK-NEXT: | | `-FloatingLiteral {{.*}} <{{.*}}> 'float' 1.000000e+00 +// CHECK-NEXT: | `-HLSLUserSemanticAttr {{.*}} <{{.*}}> +float fn_foo2(float a : a, float b : b) : sem_ret : also_ret { return 1.0f; } +// CHECK: `-FunctionDecl {{.*}} <{{.*}}> col:7 fn_foo2 'float (float, float)' +// CHECK-NEXT: |-ParmVarDecl {{.*}} <{{.*}}> col:21 a 'float' +// CHECK-NEXT: | `-HLSLUserSemanticAttr {{.*}} <{{.*}}> +// CHECK-NEXT: |-ParmVarDecl {{.*}} <{{.*}}> col:34 b 'float' +// CHECK-NEXT: | `-HLSLUserSemanticAttr {{.*}} <{{.*}}> +// CHECK-NEXT: |-CompoundStmt {{.*}} <{{.*}}> +// CHECK-NEXT: | `-ReturnStmt {{.*}} <{{.*}}> +// CHECK-NEXT: | `-FloatingLiteral {{.*}} <{{.*}}> 'float' 1.000000e+00 +// CHECK-NEXT: |-HLSLUserSemanticAttr {{.*}} <{{.*}}> +// CHECK-NEXT: `-HLSLUserSemanticAttr {{.*}} <{{.*}}> diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp index 9692d6e..3fcb558 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp @@ -1179,6 +1179,12 @@ TEST_P(ASTMatchersTest, PredefinedExpr) { has(stringLiteral())))); } +TEST_P(ASTMatchersTest, FileScopeAsmDecl) { + EXPECT_TRUE(matches("__asm(\"nop\");", fileScopeAsmDecl())); + EXPECT_TRUE( + notMatches("void f() { __asm(\"mov al, 2\"); }", fileScopeAsmDecl())); +} + TEST_P(ASTMatchersTest, AsmStatement) { EXPECT_TRUE(matches("void foo() { __asm(\"mov al, 2\"); }", asmStmt())); } @@ -2442,7 +2448,8 @@ TEST_P(ASTMatchersTest, LambdaCaptureTest_BindsToCaptureOfReferenceType) { "int main() {" " int a;" " f(a);" - "}", matcher)); + "}", + matcher)); EXPECT_FALSE(matches("template <class ...T> void f(T &...args) {" " [...args = args] () mutable {" " }();" @@ -2450,7 +2457,8 @@ TEST_P(ASTMatchersTest, LambdaCaptureTest_BindsToCaptureOfReferenceType) { "int main() {" " int a;" " f(a);" - "}", matcher)); + "}", + matcher)); } TEST_P(ASTMatchersTest, IsDerivedFromRecursion) { @@ -2628,7 +2636,7 @@ TEST(ASTMatchersTestObjC, ObjCStringLiteral) { " [Test someFunction:@\"Ola!\"]; " "}\n" "@end "; - EXPECT_TRUE(matchesObjC(Objc1String, objcStringLiteral())); + EXPECT_TRUE(matchesObjC(Objc1String, objcStringLiteral())); } TEST(ASTMatchersTestObjC, ObjCDecls) { diff --git a/clang/unittests/Support/TimeProfilerTest.cpp b/clang/unittests/Support/TimeProfilerTest.cpp index e544c89..3b18aa83 100644 --- a/clang/unittests/Support/TimeProfilerTest.cpp +++ b/clang/unittests/Support/TimeProfilerTest.cpp @@ -186,7 +186,8 @@ std::string buildTraceGraph(StringRef Json) { } // namespace -TEST(TimeProfilerTest, ConstantEvaluationCxx20) { +// FIXME: Flaky test. See https://github.com/llvm/llvm-project/pull/138613 +TEST(TimeProfilerTest, DISABLED_ConstantEvaluationCxx20) { std::string Code = R"( void print(double value); diff --git a/compiler-rt/test/hwasan/TestCases/Linux/fixed-shadow.c b/compiler-rt/test/hwasan/TestCases/Linux/fixed-shadow.c index 08a04fc..fc83b21 100644 --- a/compiler-rt/test/hwasan/TestCases/Linux/fixed-shadow.c +++ b/compiler-rt/test/hwasan/TestCases/Linux/fixed-shadow.c @@ -3,12 +3,12 @@ // Default compiler instrumentation works with any shadow base (dynamic or fixed). // RUN: %clang_hwasan %s -o %t // RUN: %run %t -// RUN: env HWASAN_OPTIONS=fixed_shadow_base=263878495698944 %run %t +// RUN: env HWASAN_OPTIONS=fixed_shadow_base=263878495698944 %run %t 2>%t.out || (cat %t.out | FileCheck %s) // RUN: env HWASAN_OPTIONS=fixed_shadow_base=4398046511104 %run %t // // If -hwasan-mapping-offset is set, then the fixed_shadow_base needs to match. // RUN: %clang_hwasan %s -mllvm -hwasan-mapping-offset=263878495698944 -o %t -// RUN: env HWASAN_OPTIONS=fixed_shadow_base=263878495698944 %run %t +// RUN: env HWASAN_OPTIONS=fixed_shadow_base=263878495698944 %run %t 2>%t.out || (cat %t.out | FileCheck %s) // RUN: env HWASAN_OPTIONS=fixed_shadow_base=4398046511104 not %run %t // RUN: %clang_hwasan %s -mllvm -hwasan-mapping-offset=4398046511104 -o %t @@ -26,6 +26,8 @@ // // UNSUPPORTED: android +// CHECK: FATAL: HWAddressSanitizer: Shadow range {{.*}} is not available + #include <assert.h> #include <sanitizer/allocator_interface.h> #include <sanitizer/hwasan_interface.h> diff --git a/libc/src/__support/CPP/type_traits/is_destructible.h b/libc/src/__support/CPP/type_traits/is_destructible.h index 7ada223..dc5e62b 100644 --- a/libc/src/__support/CPP/type_traits/is_destructible.h +++ b/libc/src/__support/CPP/type_traits/is_destructible.h @@ -15,6 +15,7 @@ #include "src/__support/CPP/type_traits/remove_all_extents.h" #include "src/__support/CPP/type_traits/true_type.h" #include "src/__support/CPP/type_traits/type_identity.h" +#include "src/__support/CPP/utility/declval.h" #include "src/__support/macros/attributes.h" #include "src/__support/macros/config.h" diff --git a/libc/startup/baremetal/arm/start.cpp b/libc/startup/baremetal/arm/start.cpp index c089a14..4740067 100644 --- a/libc/startup/baremetal/arm/start.cpp +++ b/libc/startup/baremetal/arm/start.cpp @@ -131,6 +131,32 @@ namespace LIBC_NAMESPACE_DECL { __arm_wsr("CPSR_c", 0x13); // SVC #endif +#ifdef __ARM_FP +// Enable FPU +#if __ARM_ARCH_PROFILE == 'M' + // Based on + // https://developer.arm.com/documentation/dui0646/c/Cortex-M7-Peripherals/Floating-Point-Unit/Enabling-the-FPU + // Set CPACR cp10 and cp11 + auto cpacr = (volatile uint32_t *const)0xE000ED88; + *cpacr |= (0xF << 20); + __dsb(0xF); + __isb(0xF); +#elif __ARM_ARCH_PROFILE == 'A' || __ARM_ARCH_PROFILE == 'R' + // Based on + // https://developer.arm.com/documentation/dui0472/m/Compiler-Coding-Practices/Enabling-NEON-and-FPU-for-bare-metal + // Set CPACR cp10 and cp11 + uint32_t cpacr = __arm_rsr("p15:0:c1:c0:2"); + cpacr |= (0xF << 20); + __arm_wsr("p15:0:c1:c0:2", cpacr); + __isb(0xF); + // Set FPEXC.EN + uint32_t fpexc; + __asm__ __volatile__("vmrs %0, FPEXC" : "=r"(fpexc) : :); + fpexc |= (1 << 30); + __asm__ __volatile__("vmsr FPEXC, %0" : : "r"(fpexc) :); +#endif +#endif + // Perform the equivalent of scatterloading LIBC_NAMESPACE::memcpy(__data_start, __data_source, reinterpret_cast<uintptr_t>(__data_size)); diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst index 7499613..9ecd390 100644 --- a/llvm/docs/SPIRVUsage.rst +++ b/llvm/docs/SPIRVUsage.rst @@ -241,6 +241,8 @@ Below is a list of supported SPIR-V extensions, sorted alphabetically by their e - Adds predicated load and store instructions that conditionally read from or write to memory based on a boolean predicate. * - ``SPV_KHR_maximal_reconvergence`` - Adds execution mode and capability to enable maximal reconvergence. + * - ``SPV_ALTERA_blocking_pipes`` + - Adds new pipe read and write functions that have blocking semantics instead of the non-blocking semantics of the existing pipe read/write functions. SPIR-V representation in LLVM IR ================================ diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 221d8f1..f585257 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1331,8 +1331,8 @@ public: bool SplitDst = TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == TargetLowering::TypeSplitVector; - if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isVector() && - DstVTy->getElementCount().isVector()) { + if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isKnownEven() && + DstVTy->getElementCount().isKnownEven()) { Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy); Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy); const T *TTI = thisT(); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 268025e7..9d6038d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -297,6 +297,10 @@ private: /// \pre \p U is a call instruction. bool translateCall(const User &U, MachineIRBuilder &MIRBuilder); + bool translateIntrinsic( + const CallBase &CB, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder, + const TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr); + /// When an invoke or a cleanupret unwinds to the next EH pad, there are /// many places it could ultimately go. In the IR, we have a single unwind /// destination, but in the machine CFG, we enumerate all the possible blocks. diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 9924b90..d7db935 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -176,4 +176,10 @@ def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, l def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; + +def int_dx_load_input + : DefaultAttrsIntrinsic<[llvm_any_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i8_ty, + llvm_i32_ty], + [IntrConvergent]>; } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 4fd2204..be1b51f 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2821,20 +2821,34 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (translateKnownIntrinsic(CI, ID, MIRBuilder)) return true; + TargetLowering::IntrinsicInfo Info; + bool IsTgtMemIntrinsic = TLI->getTgtMemIntrinsic(Info, CI, *MF, ID); + + return translateIntrinsic(CI, ID, MIRBuilder, + IsTgtMemIntrinsic ? &Info : nullptr); +} + +/// Translate a call to an intrinsic. +/// Depending on whether TLI->getTgtMemIntrinsic() is true, TgtMemIntrinsicInfo +/// is a pointer to the correspondingly populated IntrinsicInfo object. +/// Otherwise, this pointer is null. +bool IRTranslator::translateIntrinsic( + const CallBase &CB, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder, + const TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) { ArrayRef<Register> ResultRegs; - if (!CI.getType()->isVoidTy()) - ResultRegs = getOrCreateVRegs(CI); + if (!CB.getType()->isVoidTy()) + ResultRegs = getOrCreateVRegs(CB); // Ignore the callsite attributes. Backend code is most likely not expecting // an intrinsic to sometimes have side effects and sometimes not. MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, ResultRegs); - if (isa<FPMathOperator>(CI)) - MIB->copyIRFlags(CI); + if (isa<FPMathOperator>(CB)) + MIB->copyIRFlags(CB); - for (const auto &Arg : enumerate(CI.args())) { + for (const auto &Arg : enumerate(CB.args())) { // If this is required to be an immediate, don't materialize it in a // register. - if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { + if (CB.paramHasAttr(Arg.index(), Attribute::ImmArg)) { if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) { // imm arguments are more convenient than cimm (and realistically // probably sufficient), so use them. @@ -2863,29 +2877,33 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { } // Add a MachineMemOperand if it is a target mem intrinsic. - TargetLowering::IntrinsicInfo Info; - // TODO: Add a GlobalISel version of getTgtMemIntrinsic. - if (TLI->getTgtMemIntrinsic(Info, CI, *MF, ID)) { - Align Alignment = Info.align.value_or( - DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext()))); - LLT MemTy = Info.memVT.isSimple() - ? getLLTForMVT(Info.memVT.getSimpleVT()) - : LLT::scalar(Info.memVT.getStoreSizeInBits()); + if (TgtMemIntrinsicInfo) { + const Function *F = CB.getCalledFunction(); + + Align Alignment = TgtMemIntrinsicInfo->align.value_or(DL->getABITypeAlign( + TgtMemIntrinsicInfo->memVT.getTypeForEVT(F->getContext()))); + LLT MemTy = + TgtMemIntrinsicInfo->memVT.isSimple() + ? getLLTForMVT(TgtMemIntrinsicInfo->memVT.getSimpleVT()) + : LLT::scalar(TgtMemIntrinsicInfo->memVT.getStoreSizeInBits()); // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic // didn't yield anything useful. MachinePointerInfo MPI; - if (Info.ptrVal) - MPI = MachinePointerInfo(Info.ptrVal, Info.offset); - else if (Info.fallbackAddressSpace) - MPI = MachinePointerInfo(*Info.fallbackAddressSpace); + if (TgtMemIntrinsicInfo->ptrVal) { + MPI = MachinePointerInfo(TgtMemIntrinsicInfo->ptrVal, + TgtMemIntrinsicInfo->offset); + } else if (TgtMemIntrinsicInfo->fallbackAddressSpace) { + MPI = MachinePointerInfo(*TgtMemIntrinsicInfo->fallbackAddressSpace); + } MIB.addMemOperand(MF->getMachineMemOperand( - MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata(), - /*Ranges=*/nullptr, Info.ssid, Info.order, Info.failureOrder)); + MPI, TgtMemIntrinsicInfo->flags, MemTy, Alignment, CB.getAAMetadata(), + /*Ranges=*/nullptr, TgtMemIntrinsicInfo->ssid, + TgtMemIntrinsicInfo->order, TgtMemIntrinsicInfo->failureOrder)); } - if (CI.isConvergent()) { - if (auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl)) { + if (CB.isConvergent()) { + if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) { auto *Token = Bundle->Inputs[0].get(); Register TokenReg = getOrCreateVReg(*Token); MIB.addUse(TokenReg, RegState::Implicit); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fa0c899..9961c98 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3526,8 +3526,7 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { // Update successor info. addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne()); - for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) { - BasicBlock *Dest = I.getIndirectDest(i); + for (BasicBlock *Dest : I.getIndirectDests()) { MachineBasicBlock *Target = FuncInfo.getMBB(Dest); Target->setIsInlineAsmBrIndirectTarget(); // If we introduce a type of asm goto statement that is permitted to use an @@ -5313,18 +5312,26 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { DAG.setRoot(OutChain); } -/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC -/// node. -void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, - unsigned Intrinsic) { - // Ignore the callsite's attributes. A specific call site may be marked with - // readnone, but the lowering code will expect the chain based on the - // definition. +/// Check if this intrinsic call depends on the chain (1st return value) +/// and if it only *loads* memory. +/// Ignore the callsite's attributes. A specific call site may be marked with +/// readnone, but the lowering code will expect the chain based on the +/// definition. +std::pair<bool, bool> +SelectionDAGBuilder::getTargetIntrinsicCallProperties(const CallBase &I) { const Function *F = I.getCalledFunction(); bool HasChain = !F->doesNotAccessMemory(); bool OnlyLoad = HasChain && F->onlyReadsMemory() && F->willReturn() && F->doesNotThrow(); + return {HasChain, OnlyLoad}; +} + +SmallVector<SDValue, 8> SelectionDAGBuilder::getTargetIntrinsicOperands( + const CallBase &I, bool HasChain, bool OnlyLoad, + TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Build the operand list. SmallVector<SDValue, 8> Ops; if (HasChain) { // If this intrinsic has side-effects, chainify it. @@ -5336,17 +5343,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } } - // Info is set by getTgtMemIntrinsic - TargetLowering::IntrinsicInfo Info; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, - DAG.getMachineFunction(), - Intrinsic); - // Add the intrinsic ID as an integer operand if it's not a target intrinsic. - if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || - Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), + if (!TgtMemIntrinsicInfo || TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_VOID || + TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_W_CHAIN) + Ops.push_back(DAG.getTargetConstant(I.getIntrinsicID(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); // Add all operands of the call to the operand list. @@ -5369,13 +5369,85 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } } + if (std::optional<OperandBundleUse> Bundle = + I.getOperandBundle(LLVMContext::OB_convergencectrl)) { + Value *Token = Bundle->Inputs[0].get(); + SDValue ConvControlToken = getValue(Token); + assert(Ops.back().getValueType() != MVT::Glue && + "Did not expect another glue node here."); + ConvControlToken = + DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken); + Ops.push_back(ConvControlToken); + } + + return Ops; +} + +SDVTList SelectionDAGBuilder::getTargetIntrinsicVTList(const CallBase &I, + bool HasChain) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); - SDVTList VTs = DAG.getVTList(ValueVTs); + return DAG.getVTList(ValueVTs); +} + +/// Get an INTRINSIC node for a target intrinsic which does not touch memory. +SDValue SelectionDAGBuilder::getTargetNonMemIntrinsicNode( + const Type &IntrinsicVT, bool HasChain, ArrayRef<SDValue> Ops, + const SDVTList &VTs) { + if (!HasChain) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); + if (!IntrinsicVT.isVoidTy()) + return DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); + return DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); +} + +/// Set root, convert return type if necessary and check alignment. +SDValue SelectionDAGBuilder::handleTargetIntrinsicRet(const CallBase &I, + bool HasChain, + bool OnlyLoad, + SDValue Result) { + if (HasChain) { + SDValue Chain = Result.getValue(Result.getNode()->getNumValues() - 1); + if (OnlyLoad) + PendingLoads.push_back(Chain); + else + DAG.setRoot(Chain); + } + + if (I.getType()->isVoidTy()) + return Result; + + if (MaybeAlign Alignment = I.getRetAlign(); InsertAssertAlign && Alignment) { + // Insert `assertalign` node if there's an alignment. + Result = DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne()); + } else if (!isa<VectorType>(I.getType())) { + Result = lowerRangeToAssertZExt(DAG, I, Result); + } + + return Result; +} + +/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC +/// node. +void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, + unsigned Intrinsic) { + auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I); + + // Info is set by getTgtMemIntrinsic + TargetLowering::IntrinsicInfo Info; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + bool IsTgtMemIntrinsic = + TLI.getTgtMemIntrinsic(Info, I, DAG.getMachineFunction(), Intrinsic); + + SmallVector<SDValue, 8> Ops = getTargetIntrinsicOperands( + I, HasChain, OnlyLoad, IsTgtMemIntrinsic ? &Info : nullptr); + SDVTList VTs = getTargetIntrinsicVTList(I, HasChain); // Propagate fast-math-flags from IR to node(s). SDNodeFlags Flags; @@ -5386,19 +5458,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Create the node. SDValue Result; - if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) { - auto *Token = Bundle->Inputs[0].get(); - SDValue ConvControlToken = getValue(Token); - assert(Ops.back().getValueType() != MVT::Glue && - "Did not expected another glue node here."); - ConvControlToken = - DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken); - Ops.push_back(ConvControlToken); - } - // In some cases, custom collection of operands from CallInst I may be needed. TLI.CollectTargetIntrinsicOperands(I, Ops, DAG); - if (IsTgtIntrinsic) { + if (IsTgtMemIntrinsic) { // This is target intrinsic that touches memory // // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic @@ -5418,34 +5480,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, Info.ssid, Info.order, Info.failureOrder); Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, MemVT, MMO); - } else if (!HasChain) { - Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); - } else if (!I.getType()->isVoidTy()) { - Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); } else { - Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); + Result = getTargetNonMemIntrinsicNode(*I.getType(), HasChain, Ops, VTs); } - if (HasChain) { - SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); - if (OnlyLoad) - PendingLoads.push_back(Chain); - else - DAG.setRoot(Chain); - } - - if (!I.getType()->isVoidTy()) { - if (!isa<VectorType>(I.getType())) - Result = lowerRangeToAssertZExt(DAG, I, Result); - - MaybeAlign Alignment = I.getRetAlign(); - - // Insert `assertalign` node if there's an alignment. - if (InsertAssertAlign && Alignment) { - Result = - DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne()); - } - } + Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result); setValue(&I, Result); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 47e19f7..ed63bee 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -727,6 +727,17 @@ private: MCSymbol *&BeginLabel); SDValue lowerEndEH(SDValue Chain, const InvokeInst *II, const BasicBlock *EHPadBB, MCSymbol *BeginLabel); + + std::pair<bool, bool> getTargetIntrinsicCallProperties(const CallBase &I); + SmallVector<SDValue, 8> getTargetIntrinsicOperands( + const CallBase &I, bool HasChain, bool OnlyLoad, + TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr); + SDVTList getTargetIntrinsicVTList(const CallBase &I, bool HasChain); + SDValue getTargetNonMemIntrinsicNode(const Type &IntrinsicVT, bool HasChain, + ArrayRef<SDValue> Ops, + const SDVTList &VTs); + SDValue handleTargetIntrinsicRet(const CallBase &I, bool HasChain, + bool OnlyLoad, SDValue Result); }; /// This struct represents the registers (physical or virtual) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d08f9b9..40e6400 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -50,6 +50,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetCallingConv.h" @@ -104,7 +105,6 @@ #include <vector> using namespace llvm; -using namespace llvm::PatternMatch; #define DEBUG_TYPE "aarch64-lower" @@ -1174,6 +1174,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::VECTOR_DEINTERLEAVE); + setTargetDAGCombine(ISD::CTPOP); // In case of strict alignment, avoid an excessive number of byte wide stores. MaxStoresPerMemsetOptSize = 8; @@ -11330,9 +11331,10 @@ SDValue AArch64TargetLowering::LowerMinMax(SDValue Op, break; } + // Note: This lowering only overrides NEON for v1i64 and v2i64, where we + // prefer using SVE if available. if (VT.isScalableVector() || - useSVEForFixedLengthVectorVT( - VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) { + useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) { switch (Opcode) { default: llvm_unreachable("Wrong instruction"); @@ -17554,6 +17556,7 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion( // udot instruction. if (SrcWidth * 4 <= DstWidth) { if (all_of(I->users(), [&](auto *U) { + using namespace llvm::PatternMatch; auto *SingleUser = cast<Instruction>(&*U); if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value())))) return true; @@ -17825,6 +17828,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad( // into shift / and masks. For the moment we do this just for uitofp (not // zext) to avoid issues with widening instructions. if (Shuffles.size() == 4 && all_of(Shuffles, [](ShuffleVectorInst *SI) { + using namespace llvm::PatternMatch; return SI->hasOneUse() && match(SI->user_back(), m_UIToFP(m_Value())) && SI->getType()->getScalarSizeInBits() * 4 == SI->user_back()->getType()->getScalarSizeInBits(); @@ -27841,6 +27845,35 @@ static SDValue performRNDRCombine(SDNode *N, SelectionDAG &DAG) { {A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL); } +static SDValue performCTPOPCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + using namespace llvm::SDPatternMatch; + if (!DCI.isBeforeLegalize()) + return SDValue(); + + // ctpop(zext(bitcast(vector_mask))) -> neg(signed_reduce_add(vector_mask)) + SDValue Mask; + if (!sd_match(N->getOperand(0), m_ZExt(m_BitCast(m_Value(Mask))))) + return SDValue(); + + EVT VT = N->getValueType(0); + EVT MaskVT = Mask.getValueType(); + + if (VT.isVector() || !MaskVT.isFixedLengthVector() || + MaskVT.getVectorElementType() != MVT::i1) + return SDValue(); + + EVT ReduceInVT = + EVT::getVectorVT(*DAG.getContext(), VT, MaskVT.getVectorElementCount()); + + SDLoc DL(N); + // Sign extend to best fit ZeroOrNegativeOneBooleanContent. + SDValue ExtMask = DAG.getNode(ISD::SIGN_EXTEND, DL, ReduceInVT, Mask); + SDValue NegPopCount = DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, ExtMask); + return DAG.getNegative(NegPopCount, DL, VT); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -28186,6 +28219,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performScalarToVectorCombine(N, DCI, DAG); case ISD::SHL: return performSHLCombine(N, DCI, DAG); + case ISD::CTPOP: + return performCTPOPCombine(N, DCI, DAG); } return SDValue(); } diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 636e31c..bf9de0a 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1583,7 +1583,10 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { if (!TII->isAddImmediate(*DeadMI, Reg)) continue; LIS->RemoveMachineInstrFromMaps(*DeadMI); + Register AddReg = DeadMI->getOperand(1).getReg(); DeadMI->eraseFromParent(); + if (AddReg.isVirtual()) + LIS->shrinkToUses(&LIS->getInterval(AddReg)); } } } @@ -1869,11 +1872,15 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { // Loop over the dead AVL values, and delete them now. This has // to be outside the above loop to avoid invalidating iterators. for (auto *MI : ToDelete) { + assert(MI->getOpcode() == RISCV::ADDI); + Register AddReg = MI->getOperand(1).getReg(); if (LIS) { LIS->removeInterval(MI->getOperand(0).getReg()); LIS->RemoveMachineInstrFromMaps(*MI); } MI->eraseFromParent(); + if (LIS && AddReg.isVirtual()) + LIS->shrinkToUses(&LIS->getInterval(AddReg)); } } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index 56a38bb..b2cbdb2 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -2390,6 +2390,15 @@ static bool generateBindlessImageINTELInst(const SPIRV::IncomingCall *Call, return buildBindlessImageINTELInst(Call, Opcode, MIRBuilder, GR); } +static bool generateBlockingPipesInst(const SPIRV::IncomingCall *Call, + MachineIRBuilder &MIRBuilder, + SPIRVGlobalRegistry *GR) { + const SPIRV::DemangledBuiltin *Builtin = Call->Builtin; + unsigned Opcode = + SPIRV::lookupNativeBuiltin(Builtin->Name, Builtin->Set)->Opcode; + return buildOpFromWrapper(MIRBuilder, Opcode, Call, Register(0)); +} + static bool generateTernaryBitwiseFunctionINTELInst(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, @@ -3050,6 +3059,8 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall, return generatePipeInst(Call.get(), MIRBuilder, GR); case SPIRV::PredicatedLoadStore: return generatePredicatedLoadStoreInst(Call.get(), MIRBuilder, GR); + case SPIRV::BlockingPipes: + return generateBlockingPipesInst(Call.get(), MIRBuilder, GR); } return false; } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index c259cce..492a98e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -71,6 +71,7 @@ def TernaryBitwiseINTEL : BuiltinGroup; def Block2DLoadStore : BuiltinGroup; def Pipe : BuiltinGroup; def PredicatedLoadStore : BuiltinGroup; +def BlockingPipes : BuiltinGroup; //===----------------------------------------------------------------------===// // Class defining a demangled builtin record. The information in the record @@ -1174,6 +1175,10 @@ defm : DemangledNativeBuiltin<"clock_read_sub_group", OpenCL_std, KernelClock, 0 defm : DemangledNativeBuiltin<"clock_read_hilo_device", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>; defm : DemangledNativeBuiltin<"clock_read_hilo_work_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>; defm : DemangledNativeBuiltin<"clock_read_hilo_sub_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>; + +//SPV_ALTERA_blocking_pipes +defm : DemangledNativeBuiltin<"__spirv_WritePipeBlockingINTEL", OpenCL_std, BlockingPipes, 0, 0, OpWritePipeBlockingALTERA>; +defm : DemangledNativeBuiltin<"__spirv_ReadPipeBlockingINTEL", OpenCL_std, BlockingPipes, 0, 0, OpReadPipeBlockingALTERA>; defm : DemangledNativeBuiltin<"__spirv_ReadClockKHR", OpenCL_std, KernelClock, 1, 1, OpReadClockKHR>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index 43b2869..f681b0d 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -159,7 +159,9 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>> {"SPV_KHR_maximal_reconvergence", SPIRV::Extension::Extension::SPV_KHR_maximal_reconvergence}, {"SPV_INTEL_kernel_attributes", - SPIRV::Extension::Extension::SPV_INTEL_kernel_attributes}}; + SPIRV::Extension::Extension::SPV_INTEL_kernel_attributes}, + {"SPV_ALTERA_blocking_pipes", + SPIRV::Extension::Extension::SPV_ALTERA_blocking_pipes}}; bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName, StringRef ArgValue, diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index a61351e..03bd61b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -993,3 +993,9 @@ def OpPredicatedLoadINTEL: Op<6528, (outs ID:$res), (ins TYPE:$resType, ID:$ptr, "$res = OpPredicatedLoadINTEL $resType $ptr $predicate $default_value">; def OpPredicatedStoreINTEL: Op<6529, (outs), (ins ID:$ptr, ID:$object, ID:$predicate, variable_ops), "OpPredicatedStoreINTEL $ptr $object $predicate">; + +//SPV_ALTERA_blocking_pipes +def OpReadPipeBlockingALTERA :Op<5946, (outs), (ins ID:$pipe, ID:$pointer, ID:$packetSize, ID:$packetAlignment), + "OpReadPipeBlockingALTERA $pipe $pointer $packetSize $packetAlignment">; +def OpWritePipeBlockingALTERA :Op<5946, (outs), (ins ID:$pipe, ID:$pointer, ID:$packetSize, ID:$packetAlignment), + "OpWritePipeBlockingALTERA $pipe $pointer $packetSize $packetAlignment">; diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index e5ac76c4..af76016 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -1885,6 +1885,13 @@ void addInstrRequirements(const MachineInstr &MI, Reqs.addCapability( SPIRV::Capability::CooperativeMatrixCheckedInstructionsINTEL); break; + case SPIRV::OpReadPipeBlockingALTERA: + case SPIRV::OpWritePipeBlockingALTERA: + if (ST.canUseExtension(SPIRV::Extension::SPV_ALTERA_blocking_pipes)) { + Reqs.addExtension(SPIRV::Extension::SPV_ALTERA_blocking_pipes); + Reqs.addCapability(SPIRV::Capability::BlockingPipesALTERA); + } + break; case SPIRV::OpCooperativeMatrixGetElementCoordINTEL: if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_joint_matrix)) report_fatal_error("OpCooperativeMatrixGetElementCoordINTEL requires the " diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp index 4e4e6fb..be88f33 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp @@ -56,6 +56,13 @@ public: } }; +static cl::list<std::string> SPVAllowUnknownIntrinsics( + "spv-allow-unknown-intrinsics", cl::CommaSeparated, + cl::desc("Emit unknown intrinsics as calls to external functions. A " + "comma-separated input list of intrinsic prefixes must be " + "provided, and only intrinsics carrying a listed prefix get " + "emitted as described."), + cl::value_desc("intrinsic_prefix_0,intrinsic_prefix_1"), cl::ValueOptional); } // namespace char SPIRVPrepareFunctions::ID = 0; @@ -445,6 +452,15 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) { EraseFromParent); Changed = true; break; + default: + if (TM.getTargetTriple().getVendor() == Triple::AMD || + any_of(SPVAllowUnknownIntrinsics, [II](auto &&Prefix) { + if (Prefix.empty()) + return false; + return II->getCalledFunction()->getName().starts_with(Prefix); + })) + Changed |= lowerIntrinsicToFunction(II); + break; } } } diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index 1b4b29b..65a8885 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -309,7 +309,7 @@ defm SPV_KHR_shader_clock : ExtensionOperand<54, [EnvVulkan, EnvOpenCL]>; defm SPV_INTEL_unstructured_loop_controls : ExtensionOperand<55, [EnvOpenCL]>; defm SPV_EXT_demote_to_helper_invocation : ExtensionOperand<56, [EnvVulkan]>; defm SPV_INTEL_fpga_reg : ExtensionOperand<57, [EnvOpenCL]>; -defm SPV_INTEL_blocking_pipes : ExtensionOperand<58, [EnvOpenCL]>; +defm SPV_ALTERA_blocking_pipes : ExtensionOperand<58, [EnvOpenCL]>; defm SPV_GOOGLE_user_type : ExtensionOperand<59, [EnvVulkan]>; defm SPV_KHR_physical_storage_buffer : ExtensionOperand<60, [EnvVulkan]>; defm SPV_INTEL_kernel_attributes : ExtensionOperand<61, [EnvOpenCL]>; @@ -611,6 +611,7 @@ defm TensorFloat32RoundingINTEL : CapabilityOperand<6425, 0, 0, [SPV_INTEL_tenso defm BFloat16TypeKHR : CapabilityOperand<5116, 0, 0, [SPV_KHR_bfloat16], []>; defm BFloat16DotProductKHR : CapabilityOperand<5117, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR]>; defm BFloat16CooperativeMatrixKHR : CapabilityOperand<5118, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR, CooperativeMatrixKHR]>; +defm BlockingPipesALTERA : CapabilityOperand<5945, 0, 0, [SPV_ALTERA_blocking_pipes], []>; //===----------------------------------------------------------------------===// // Multiclass used to define SourceLanguage enum values and at the same time diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4d44227b3..168e041 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53442,7 +53442,8 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, } SDValue NewStore = - DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(), + DAG.getStore(St->getChain(), DL, Res, NewPtr, + MachinePointerInfo(St->getPointerInfo().getAddrSpace()), Align(), St->getMemOperand()->getFlags()); // If there are other uses of StoredVal, replace with a new load of the @@ -54639,7 +54640,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, SDValue NewPtr = DAG.getMemBasePlusOffset( Ld->getBasePtr(), PtrByteOfs, DL, SDNodeFlags::NoUnsignedWrap); SDValue NewLoad = - DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(), + DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, + MachinePointerInfo(Ld->getPointerInfo().getAddrSpace()), Align(), Ld->getMemOperand()->getFlags()); DAG.makeEquivalentMemoryOrdering(Ld, NewLoad); return NewLoad; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8670822..3062e1c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1163,10 +1163,10 @@ public: bool opcodeMayReadOrWriteFromMemory() const; /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override; + bool usesFirstLaneOnly(const VPValue *Op) const override; /// Returns true if the recipe only uses the first part of operand \p Op. - bool onlyFirstPartUsed(const VPValue *Op) const override; + bool usesFirstPartOnly(const VPValue *Op) const override; /// Returns true if this VPInstruction produces a scalar value from a vector, /// e.g. by performing a reduction or extracting a lane. @@ -1393,13 +1393,13 @@ public: return true; } - bool onlyFirstPartUsed(const VPValue *Op) const override { + bool usesFirstPartOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; } - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; @@ -1628,7 +1628,7 @@ public: VPSlotTracker &SlotTracker) const override; #endif - bool onlyFirstLaneUsed(const VPValue *Op) const override; + bool usesFirstLaneOnly(const VPValue *Op) const override; }; /// A recipe for widening Call instructions using library calls. @@ -1767,7 +1767,7 @@ struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags, } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return Op == getCond() && isInvariantCond(); @@ -1833,7 +1833,7 @@ public: #endif /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); if (Op == getOperand(0)) @@ -1870,7 +1870,7 @@ public: void execute(VPTransformState &State) override; - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; @@ -1884,7 +1884,7 @@ public: } /// Returns true if the recipe only uses the first part of operand \p Op. - bool onlyFirstPartUsed(const VPValue *Op) const override { + bool usesFirstPartOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); assert(getNumOperands() <= 2 && "must have at most two operands"); @@ -1922,14 +1922,14 @@ public: Type *getSourceElementType() const { return SourceElementTy; } - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; } /// Returns true if the recipe only uses the first part of operand \p Op. - bool onlyFirstPartUsed(const VPValue *Op) const override { + bool usesFirstPartOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); assert(getNumOperands() <= 2 && "must have at most two operands"); @@ -2110,7 +2110,7 @@ public: } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); // The recipe creates its own wide start value, so it only requests the @@ -2325,7 +2325,7 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { #endif /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return Op == getStartValue(); @@ -2399,7 +2399,7 @@ public: bool isInLoop() const { return IsInLoop; } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return isOrdered() || isInLoop(); @@ -2468,13 +2468,13 @@ public: #endif /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); // Recursing through Blend recipes only, must terminate at header phi's the // latest. return all_of(users(), - [this](VPUser *U) { return U->onlyFirstLaneUsed(this); }); + [this](VPUser *U) { return U->usesFirstLaneOnly(this); }); } }; @@ -2562,7 +2562,7 @@ public: VPCostContext &Ctx) const override; /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override = 0; + bool usesFirstLaneOnly(const VPValue *Op) const override = 0; /// Returns the number of stored operands of this interleave group. Returns 0 /// for load interleave groups. @@ -2608,7 +2608,7 @@ public: VPSlotTracker &SlotTracker) const override; #endif - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); @@ -2656,7 +2656,7 @@ public: #endif /// The recipe only uses the first lane of the address, and EVL operand. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) || @@ -2862,7 +2862,7 @@ public: VPValue *getEVL() const { return getOperand(2); } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return Op == getEVL(); @@ -2924,7 +2924,7 @@ public: bool isPredicated() const { return IsPredicated; } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return isSingleScalar(); @@ -3212,9 +3212,8 @@ protected: Alignment(getLoadStoreAlignment(&I)), Consecutive(Consecutive), Reverse(Reverse) { assert((Consecutive || !Reverse) && "Reverse implies consecutive"); - assert(isa<VPVectorEndPointerRecipe>(getAddr()) || - !Reverse && - "Reversed acccess without VPVectorEndPointerRecipe address?"); + assert((isa<VPVectorEndPointerRecipe>(getAddr()) || !Reverse) && + "Reversed acccess without VPVectorEndPointerRecipe address?"); } public: @@ -3300,7 +3299,7 @@ struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe, #endif /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); // Widened, consecutive loads operations only demand the first lane of @@ -3341,7 +3340,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue { #endif /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); // Widened loads only demand the first lane of EVL and consecutive loads @@ -3382,7 +3381,7 @@ struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe { #endif /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); // Widened, consecutive stores only demand the first lane of their address, @@ -3425,7 +3424,7 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe { #endif /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); if (Op == getEVL()) { @@ -3509,14 +3508,14 @@ public: } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; } /// Returns true if the recipe only uses the first part of operand \p Op. - bool onlyFirstPartUsed(const VPValue *Op) const override { + bool usesFirstPartOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; @@ -3591,7 +3590,7 @@ public: } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; @@ -3701,7 +3700,7 @@ public: VPValue *getStepValue() const { return getOperand(2); } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; @@ -3766,7 +3765,7 @@ public: VPValue *getStepValue() const { return getOperand(1); } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { + bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return true; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f792d0a..80cd112 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1276,7 +1276,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { } } -bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { +bool VPInstruction::usesFirstLaneOnly(const VPValue *Op) const { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode())) return vputils::onlyFirstLaneUsed(this); @@ -1325,7 +1325,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { llvm_unreachable("switch should return"); } -bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const { +bool VPInstruction::usesFirstPartOnly(const VPValue *Op) const { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); if (Instruction::isBinaryOp(getOpcode())) return vputils::onlyFirstPartUsed(this); @@ -1692,7 +1692,7 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { if (!VFTy->getParamType(I.index())->isVectorTy()) Arg = State.get(I.value(), VPLane(0)); else - Arg = State.get(I.value(), onlyFirstLaneUsed(I.value())); + Arg = State.get(I.value(), usesFirstLaneOnly(I.value())); Args.push_back(Arg); } @@ -1761,7 +1761,7 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) { State.TTI)) Arg = State.get(I.value(), VPLane(0)); else - Arg = State.get(I.value(), onlyFirstLaneUsed(I.value())); + Arg = State.get(I.value(), usesFirstLaneOnly(I.value())); if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(), State.TTI)) TysForDecl.push_back(Arg->getType()); @@ -1843,7 +1843,7 @@ StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const { return Intrinsic::getBaseName(VectorIntrinsicID); } -bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const { +bool VPWidenIntrinsicRecipe::usesFirstLaneOnly(const VPValue *Op) const { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return all_of(enumerate(operands()), [this, &Op](const auto &X) { auto [Idx, V] = X; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 82bf79e..48bd697 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -204,7 +204,7 @@ static bool sinkScalarOperands(VPlan &Plan) { return cast<VPRecipeBase>(U)->getParent() != SinkTo; }); if (any_of(UsersOutsideSinkTo, [SinkCandidate](VPUser *U) { - return !U->onlyFirstLaneUsed(SinkCandidate); + return !U->usesFirstLaneOnly(SinkCandidate); })) continue; bool NeedsDuplicating = !UsersOutsideSinkTo.empty(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index d6a0028..d4b8b72b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -582,7 +582,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { /// Users that only demand the first lane can use the definition for lane /// 0. DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) { - return U.onlyFirstLaneUsed(DefR); + return U.usesFirstLaneOnly(DefR); }); // Update each build vector user that currently has DefR as its only diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index c6380d3..e22c5df 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -18,12 +18,12 @@ using namespace llvm::VPlanPatternMatch; bool vputils::onlyFirstLaneUsed(const VPValue *Def) { return all_of(Def->users(), - [Def](const VPUser *U) { return U->onlyFirstLaneUsed(Def); }); + [Def](const VPUser *U) { return U->usesFirstLaneOnly(Def); }); } bool vputils::onlyFirstPartUsed(const VPValue *Def) { return all_of(Def->users(), - [Def](const VPUser *U) { return U->onlyFirstPartUsed(Def); }); + [Def](const VPUser *U) { return U->usesFirstPartOnly(Def); }); } bool vputils::onlyScalarValuesUsed(const VPValue *Def) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 83e3fca..5da7463 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -274,12 +274,12 @@ public: virtual bool usesScalars(const VPValue *Op) const { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); - return onlyFirstLaneUsed(Op); + return usesFirstLaneOnly(Op); } /// Returns true if the VPUser only uses the first lane of operand \p Op. /// Conservatively returns false. - virtual bool onlyFirstLaneUsed(const VPValue *Op) const { + virtual bool usesFirstLaneOnly(const VPValue *Op) const { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return false; @@ -287,7 +287,7 @@ public: /// Returns true if the VPUser only uses the first part of operand \p Op. /// Conservatively returns false. - virtual bool onlyFirstPartUsed(const VPValue *Op) const { + virtual bool usesFirstPartOnly(const VPValue *Op) const { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); return false; diff --git a/llvm/test/CodeGen/AArch64/popcount_vmask.ll b/llvm/test/CodeGen/AArch64/popcount_vmask.ll new file mode 100644 index 0000000..e784ead --- /dev/null +++ b/llvm/test/CodeGen/AArch64/popcount_vmask.ll @@ -0,0 +1,315 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define i32 @vmask_popcount_i32_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: vmask_popcount_i32_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.8b, v1.8b, v0.8b +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: saddlv s0, v0.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <8 x i8> %a, %b + %t1 = bitcast <8 x i1> %mask to i8 + %t2 = call i8 @llvm.ctpop(i8 %t1) + %t3 = zext i8 %t2 to i32 + ret i32 %t3 +} + +define i32 @vmask_popcount_i32_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vmask_popcount_i32_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: saddl2 v2.4s, v0.8h, v1.8h +; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <16 x i8> %a, %b + %t1 = bitcast <16 x i1> %mask to i16 + %t2 = call i16 @llvm.ctpop(i16 %t1) + %t3 = zext i16 %t2 to i32 + ret i32 %t3 +} + +define i32 @vmask_popcount_i32_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: vmask_popcount_i32_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h +; CHECK-NEXT: saddlv s0, v0.4h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <4 x i16> %a, %b + %t1 = bitcast <4 x i1> %mask to i4 + %t2 = call i4 @llvm.ctpop(i4 %t1) + %t3 = zext i4 %t2 to i32 + ret i32 %t3 +} + +define i32 @vmask_popcount_i32_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vmask_popcount_i32_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h +; CHECK-NEXT: saddlv s0, v0.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <8 x i16> %a, %b + %t1 = bitcast <8 x i1> %mask to i8 + %t2 = call i8 @llvm.ctpop(i8 %t1) + %t3 = zext i8 %t2 to i32 + ret i32 %t3 +} + +define i32 @vmask_popcount_i32_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: vmask_popcount_i32_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.2s, v1.2s, v0.2s +; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <2 x i32> %a, %b + %t1 = bitcast <2 x i1> %mask to i2 + %t2 = call i2 @llvm.ctpop(i2 %t1) + %t3 = zext i2 %t2 to i32 + ret i32 %t3 +} + +define i32 @vmask_popcount_i32_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vmask_popcount_i32_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <4 x i32> %a, %b + %t1 = bitcast <4 x i1> %mask to i4 + %t2 = call i4 @llvm.ctpop(i4 %t1) + %t3 = zext i4 %t2 to i32 + ret i32 %t3 +} + +define i32 @vmask_popcount_i32_v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: vmask_popcount_i32_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret + %mask = icmp slt <1 x i64> %a, %b + %t1 = bitcast <1 x i1> %mask to i1 + %t2 = call i1 @llvm.ctpop(i1 %t1) + %t3 = zext i1 %t2 to i32 + ret i32 %t3 +} + +define i32 @vmask_popcount_i32_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: vmask_popcount_i32_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.2d, v1.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <2 x i64> %a, %b + %t1 = bitcast <2 x i1> %mask to i2 + %t2 = call i2 @llvm.ctpop(i2 %t1) + %t3 = zext i2 %t2 to i32 + ret i32 %t3 +} + +define i64 @vmask_popcount_i64_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: vmask_popcount_i64_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.8b, v1.8b, v0.8b +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: saddlv s0, v0.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <8 x i8> %a, %b + %t1 = bitcast <8 x i1> %mask to i8 + %t2 = call i8 @llvm.ctpop(i8 %t1) + %t3 = zext i8 %t2 to i64 + ret i64 %t3 +} + +define i64 @vmask_popcount_i64_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vmask_popcount_i64_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: saddl2 v2.4s, v0.8h, v1.8h +; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <16 x i8> %a, %b + %t1 = bitcast <16 x i1> %mask to i16 + %t2 = call i16 @llvm.ctpop(i16 %t1) + %t3 = zext i16 %t2 to i64 + ret i64 %t3 +} + +define i64 @vmask_popcount_i64_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: vmask_popcount_i64_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h +; CHECK-NEXT: saddlv s0, v0.4h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <4 x i16> %a, %b + %t1 = bitcast <4 x i1> %mask to i4 + %t2 = call i4 @llvm.ctpop(i4 %t1) + %t3 = zext i4 %t2 to i64 + ret i64 %t3 +} + +define i64 @vmask_popcount_i64_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vmask_popcount_i64_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h +; CHECK-NEXT: saddlv s0, v0.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <8 x i16> %a, %b + %t1 = bitcast <8 x i1> %mask to i8 + %t2 = call i8 @llvm.ctpop(i8 %t1) + %t3 = zext i8 %t2 to i64 + ret i64 %t3 +} + +define i64 @vmask_popcount_i64_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: vmask_popcount_i64_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.2s, v1.2s, v0.2s +; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <2 x i32> %a, %b + %t1 = bitcast <2 x i1> %mask to i2 + %t2 = call i2 @llvm.ctpop(i2 %t1) + %t3 = zext i2 %t2 to i64 + ret i64 %t3 +} + +define i64 @vmask_popcount_i64_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vmask_popcount_i64_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <4 x i32> %a, %b + %t1 = bitcast <4 x i1> %mask to i4 + %t2 = call i4 @llvm.ctpop(i4 %t1) + %t3 = zext i4 %t2 to i64 + ret i64 %t3 +} + +define i64 @vmask_popcount_i64_v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: vmask_popcount_i64_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret + %mask = icmp slt <1 x i64> %a, %b + %t1 = bitcast <1 x i1> %mask to i1 + %t2 = call i1 @llvm.ctpop(i1 %t1) + %t3 = zext i1 %t2 to i64 + ret i64 %t3 +} + +define i64 @vmask_popcount_i64_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: vmask_popcount_i64_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.2d, v1.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <2 x i64> %a, %b + %t1 = bitcast <2 x i1> %mask to i2 + %t2 = call i2 @llvm.ctpop(i2 %t1) + %t3 = zext i2 %t2 to i64 + ret i64 %t3 +} + +define i32 @non_vmask_popcount_1(half %a) { +; CHECK-LABEL: non_vmask_popcount_1: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: cnt v0.8b, v0.8b +; CHECK-NEXT: addv b0, v0.8b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret + %t1 = bitcast half %a to i16 + %t2 = call i16 @llvm.ctpop(i16 %t1) + %t3 = zext i16 %t2 to i32 + ret i32 %t3 +} + +define i32 @non_vmask_popcount_2(<8 x i16> %a) { +; CHECK-LABEL: non_vmask_popcount_2: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: umov w8, v0.b[0] +; CHECK-NEXT: umov w9, v0.b[1] +; CHECK-NEXT: umov w10, v0.b[2] +; CHECK-NEXT: and w8, w8, #0x3 +; CHECK-NEXT: bfi w8, w9, #2, #2 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: bfi w8, w10, #4, #2 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: bfi w8, w9, #6, #2 +; CHECK-NEXT: umov w9, v0.b[5] +; CHECK-NEXT: bfi w8, w10, #8, #2 +; CHECK-NEXT: umov w10, v0.b[6] +; CHECK-NEXT: bfi w8, w9, #10, #2 +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: bfi w8, w10, #12, #2 +; CHECK-NEXT: orr w8, w8, w9, lsl #14 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: cnt v0.8b, v0.8b +; CHECK-NEXT: addv b0, v0.8b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %mask = trunc <8 x i16> %a to <8 x i2> + %t1 = bitcast <8 x i2> %mask to i16 + %t2 = call i16 @llvm.ctpop(i16 %t1) + %t3 = zext i16 %t2 to i32 + ret i32 %t3 +} diff --git a/llvm/test/CodeGen/AArch64/vector-minmax.ll b/llvm/test/CodeGen/AArch64/vector-minmax.ll new file mode 100644 index 0000000..6696f94 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vector-minmax.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mattr=+neon,+sve | FileCheck %s --check-prefix=CHECK-SVE + +define <2 x i64> @smax_v2i64(<2 x i64> %a, <2 x i64> %b){ +; CHECK-LABEL: smax_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +; +; CHECK-SVE-LABEL: smax_v2i64: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ptrue p0.d, vl2 +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-SVE-NEXT: smax z0.d, p0/m, z0.d, z1.d +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-SVE-NEXT: ret +entry: + %0 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %0 +} + +define <2 x i64> @smin_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: smin_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +; +; CHECK-SVE-LABEL: smin_v2i64: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ptrue p0.d, vl2 +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-SVE-NEXT: smin z0.d, p0/m, z0.d, z1.d +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-SVE-NEXT: ret +entry: + %0 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %0 +} + +define <2 x i64> @umax_v2i64(<2 x i64> %a, <2 x i64> %b){ +; CHECK-LABEL: umax_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmhi v2.2d, v0.2d, v1.2d +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +; +; CHECK-SVE-LABEL: umax_v2i64: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ptrue p0.d, vl2 +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-SVE-NEXT: umax z0.d, p0/m, z0.d, z1.d +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-SVE-NEXT: ret +entry: + %0 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %0 +} + +define <2 x i64> @umin_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: umin_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmhi v2.2d, v1.2d, v0.2d +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +; +; CHECK-SVE-LABEL: umin_v2i64: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ptrue p0.d, vl2 +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-SVE-NEXT: umin z0.d, p0/m, z0.d, z1.d +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-SVE-NEXT: ret +entry: + %0 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %0 +} + +define <1 x i64> @smax_v1i64(<1 x i64> %a, <1 x i64> %b){ +; CHECK-LABEL: smax_v1i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmgt d2, d0, d1 +; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-NEXT: ret +; +; CHECK-SVE-LABEL: smax_v1i64: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ptrue p0.d, vl1 +; CHECK-SVE-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-SVE-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-SVE-NEXT: smax z0.d, p0/m, z0.d, z1.d +; CHECK-SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-SVE-NEXT: ret +entry: + %0 = call <1 x i64> @llvm.smax.v2i64(<1 x i64> %a, <1 x i64> %b) + ret <1 x i64> %0 +} + +; This is legal for Neon, so this should use the Neon smax. +define <4 x i32> @smax_v4i32(<4 x i32> %a, <4 x i32> %b){ +; CHECK-LABEL: smax_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret +; +; CHECK-SVE-LABEL: smax_v4i32: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-SVE-NEXT: ret +entry: + %0 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %0 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll index 20034b6..b6e29cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -863,3 +863,19 @@ entry: i64 2) ret <vscale x 1 x double> %2 } + +; The two vsetvlis will be coalesced so the add will be made dead and +; removed. Make sure we shrink the live interval of %x. +define void @non_li_addi(i64 %x, ptr %p) { +; CHECK-LABEL: non_li_addi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: ret +entry: + %add = add i64 %x, 1 + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %add, i64 3, i64 0) + %1 = call <vscale x 8 x i8> @llvm.riscv.vle(<vscale x 8 x i8> poison, ptr %p, i64 %0) + %2 = tail call i64 @llvm.riscv.vsetvli(i64 1, i64 3, i64 0) + %3 = tail call { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff(<vscale x 8 x i8> poison, ptr %p, i64 %2) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir index fdd30c9..f9929c9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir @@ -104,6 +104,10 @@ ret void } + define void @non_li_addi() { + ret void + } + declare <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1 declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>, ptr nocapture, i64) #4 @@ -664,3 +668,23 @@ body: | bb.2: $x10 = COPY %vl PseudoRET implicit killed $x10 +... +--- +# The two vsetvlis will be coalesced so the ADDI will be made dead and removed. +# Make sure we shrink the live interval of %0. +name: non_li_addi +tracksRegLiveness: true +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: non_li_addi + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: dead [[PseudoVSETIVLI:%[0-9]+]]:gprnox0 = PseudoVSETIVLI 1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: PseudoRET + %0:gpr = COPY $x10 + %1:gprnox0 = ADDI %0, 1 + %2:gprnox0 = PseudoVSETVLI %1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype + %3:gprnox0 = PseudoVSETIVLI 1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype + PseudoRET diff --git a/llvm/test/CodeGen/SPIRV/allow_unknown_intrinsics.ll b/llvm/test/CodeGen/SPIRV/allow_unknown_intrinsics.ll new file mode 100644 index 0000000..677291a --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/allow_unknown_intrinsics.ll @@ -0,0 +1,36 @@ +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck -check-prefix=CHECK-ERROR %s +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics %s -o %t.spvt 2>&1 | FileCheck -check-prefix=CHECK-ERROR %s +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=notllvm %s -o %t.spvt 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=llvm.some.custom %s -o %t.spvt 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=llvm. %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=llvm.,random.prefix %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-amd-amdhsa %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=llvm. %s -o - -filetype=obj | spirv-val %} +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-amd-amdhsa %s -o - -filetype=obj | spirv-val %} + +; The test checks command-line option which allows to represent unknown +; intrinsics as external function calls in SPIR-V. + +; CHECK-ERROR: LLVM ERROR: unable to legalize instruction: %3:iid(s64) = G_READCYCLECOUNTER (in function: foo) + +; CHECK: Name %[[READCYCLECOUNTER:[0-9]+]] "spirv.llvm_readcyclecounter" +; CHECK: Name %[[SOME_CUSTOM_INTRINSIC:[0-9]+]] "spirv.llvm_some_custom_intrinsic" +; CHECK-DAG: Decorate %[[READCYCLECOUNTER]] LinkageAttributes {{.*}} Import +; CHECK: Decorate %[[SOME_CUSTOM_INTRINSIC]] LinkageAttributes {{.*}} Import +; CHECK-DAG: %[[I64:[0-9]+]] = OpTypeInt 64 +; CHECK: %[[FnTy:[0-9]+]] = OpTypeFunction %[[I64]] +; CHECK: %[[READCYCLECOUNTER]] = OpFunction %[[I64]] {{.*}} %[[FnTy]] +; CHECK-DAG: %[[SOME_CUSTOM_INTRINSIC]] = OpFunction %[[I64]] {{.*}} %[[FnTy]] +; CHECK-DAG: OpFunctionCall %[[I64]] %[[READCYCLECOUNTER]] +; CHECK: OpFunctionCall %[[I64]] %[[SOME_CUSTOM_INTRINSIC]] + +define spir_func void @foo() { +entry: +; TODO: if and when the SPIR-V learns how to lower readcyclecounter, we will have to pick another unhandled intrinsic + %0 = call i64 @llvm.readcyclecounter() + %1 = call i64 @llvm.some.custom.intrinsic() + ret void +} + +declare i64 @llvm.readcyclecounter() +declare i64 @llvm.some.custom.intrinsic() diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_blocking_pipes/PipeBlocking.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_blocking_pipes/PipeBlocking.ll new file mode 100644 index 0000000..f6b6115 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_blocking_pipes/PipeBlocking.ll @@ -0,0 +1,98 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_ALTERA_blocking_pipes %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_blocking_pipes %s -o - -filetype=obj | spirv-val %} + +%opencl.pipe_ro_t = type opaque +%opencl.pipe_wo_t = type opaque + +; CHECK-SPIRV: OpCapability BlockingPipesALTERA +; CHECK-SPIRV: OpExtension "SPV_ALTERA_blocking_pipes" +; CHECK-SPIRV: %[[PipeRTy:[0-9]+]] = OpTypePipe ReadOnly +; CHECK-SPIRV: %[[PipeWTy:[0-9]+]] = OpTypePipe WriteOnly +; CHECK-SPIRV: %[[PipeR1:[0-9]+]] = OpLoad %[[PipeRTy]] %[[#]] Aligned 8 +; CHECK-SPIRV: OpReadPipeBlockingALTERA %[[PipeR1]] %[[#]] %[[#]] %[[#]] +; CHECK-SPIRV: %[[PipeR2:[0-9]+]] = OpLoad %[[PipeRTy]] %[[#]] Aligned 8 +; CHECK-SPIRV: OpReadPipeBlockingALTERA %[[PipeR2]] %[[#]] %[[#]] %[[#]] +; CHECK-SPIRV: %[[PipeW1:[0-9]+]] = OpLoad %[[PipeWTy]] %[[#]] Aligned 8 +; CHECK-SPIRV: OpWritePipeBlockingALTERA %[[PipeW1]] %[[#]] %[[#]] %[[#]] +; CHECK-SPIRV: %[[PipeW2:[0-9]+]] = OpLoad %[[PipeWTy]] %[[#]] Aligned 8 +; CHECK-SPIRV: OpWritePipeBlockingALTERA %[[PipeW2]] %[[#]] %[[#]] %[[#]] + +define spir_func void @foo(target("spirv.Pipe", 0) %p, ptr addrspace(1) %ptr) { +entry: + %p.addr = alloca target("spirv.Pipe", 0), align 8 + %ptr.addr = alloca ptr addrspace(1), align 8 + store target("spirv.Pipe", 0) %p, target("spirv.Pipe", 0)* %p.addr, align 8 + store ptr addrspace(1) %ptr, ptr %ptr.addr, align 8 + %0 = load target("spirv.Pipe", 0), target("spirv.Pipe", 0)* %p.addr, align 8 + %1 = load ptr addrspace(1), ptr %ptr.addr, align 8 + %2 = addrspacecast ptr addrspace(1) %1 to ptr addrspace(4) + call spir_func void @_Z29__spirv_ReadPipeBlockingINTELIiEv8ocl_pipePiii(target("spirv.Pipe", 0) %0, ptr addrspace(4) %2, i32 4, i32 4) + ret void +} + +declare dso_local spir_func void @_Z29__spirv_ReadPipeBlockingINTELIiEv8ocl_pipePiii(target("spirv.Pipe", 0), ptr addrspace(4), i32, i32) + +define spir_func void @bar(target("spirv.Pipe", 0) %p, ptr addrspace(1) %ptr) { +entry: + %p.addr = alloca target("spirv.Pipe", 0), align 8 + %ptr.addr = alloca ptr addrspace(1), align 8 + store target("spirv.Pipe", 0) %p, target("spirv.Pipe", 0)* %p.addr, align 8 + store ptr addrspace(1) %ptr, ptr %ptr.addr, align 8 + %0 = load target("spirv.Pipe", 0), target("spirv.Pipe", 0)* %p.addr, align 8 + %1 = load ptr addrspace(1), ptr %ptr.addr, align 8 + %2 = addrspacecast ptr addrspace(1) %1 to ptr addrspace(4) + call spir_func void @_Z29__spirv_ReadPipeBlockingINTELIiEv8ocl_pipePvii(target("spirv.Pipe", 0) %0, ptr addrspace(4) %2, i32 4, i32 4) + ret void +} + +declare dso_local spir_func void @_Z29__spirv_ReadPipeBlockingINTELIiEv8ocl_pipePvii(target("spirv.Pipe", 0), ptr addrspace(4), i32, i32) + +define spir_func void @boo(target("spirv.Pipe", 1) %p, ptr addrspace(1) %ptr) { +entry: + %p.addr = alloca target("spirv.Pipe", 1), align 8 + %ptr.addr = alloca ptr addrspace(1), align 8 + store target("spirv.Pipe", 1) %p, target("spirv.Pipe", 1)* %p.addr, align 8 + store ptr addrspace(1) %ptr, ptr %ptr.addr, align 8 + %0 = load target("spirv.Pipe", 1), target("spirv.Pipe", 1)* %p.addr, align 8 + %1 = load ptr addrspace(1), ptr %ptr.addr, align 8 + %2 = addrspacecast ptr addrspace(1) %1 to ptr addrspace(4) + call spir_func void @_Z30__spirv_WritePipeBlockingINTELIKiEv8ocl_pipePiii(target("spirv.Pipe", 1) %0, ptr addrspace(4) %2, i32 4, i32 4) + ret void +} + +declare dso_local spir_func void @_Z30__spirv_WritePipeBlockingINTELIKiEv8ocl_pipePiii(target("spirv.Pipe", 1), ptr addrspace(4), i32, i32) + +define spir_func void @baz(target("spirv.Pipe", 1) %p, ptr addrspace(1) %ptr) { +entry: + %p.addr = alloca target("spirv.Pipe", 1), align 8 + %ptr.addr = alloca ptr addrspace(1), align 8 + store target("spirv.Pipe", 1) %p, target("spirv.Pipe", 1)* %p.addr, align 8 + store ptr addrspace(1) %ptr, ptr %ptr.addr, align 8 + %0 = load target("spirv.Pipe", 1), target("spirv.Pipe", 1)* %p.addr, align 8 + %1 = load ptr addrspace(1), ptr %ptr.addr, align 8 + %2 = addrspacecast ptr addrspace(1) %1 to ptr addrspace(4) + call spir_func void @_Z30__spirv_WritePipeBlockingINTELIKiEv8ocl_pipePvii(target("spirv.Pipe", 1) %0, ptr addrspace(4) %2, i32 4, i32 4) + ret void +} + +declare dso_local spir_func void @_Z30__spirv_WritePipeBlockingINTELIKiEv8ocl_pipePvii(target("spirv.Pipe", 1), ptr addrspace(4), i32, i32) + +; CHECK-LLVM: declare spir_func void @__read_pipe_2_bl(ptr addrspace(1), ptr addrspace(4), i32, i32) +; CHECK-LLVM: declare spir_func void @__write_pipe_2_bl(ptr addrspace(1), ptr addrspace(4), i32, i32) + +define linkonce_odr dso_local spir_func void @WritePipeBLockingi9Pointer(ptr addrspace(4) align 2 dereferenceable(2) %_Data) { +entry: + %_Data.addr = alloca ptr addrspace(4), align 8 + %_WPipe = alloca target("spirv.Pipe", 1), align 8 + %_Data.addr.ascast = addrspacecast ptr %_Data.addr to ptr addrspace(4) + %_WPipe.ascast = addrspacecast target("spirv.Pipe", 1)* %_WPipe to target("spirv.Pipe", 1) addrspace(4)* + store ptr addrspace(4) %_Data, ptr addrspace(4) %_Data.addr.ascast, align 8 + %0 = bitcast target("spirv.Pipe", 1)* %_WPipe to ptr + %1 = load target("spirv.Pipe", 1), target("spirv.Pipe", 1) addrspace(4)* %_WPipe.ascast, align 8 + %2 = load ptr addrspace(4), ptr addrspace(4) %_Data.addr.ascast, align 8 + call spir_func void @_Z30__spirv_WritePipeBlockingINTELIDU9_Ev8ocl_pipePKT_ii(target("spirv.Pipe", 1) %1, ptr addrspace(4) %2, i32 2, i32 2) + ret void +} + +declare dso_local spir_func void @_Z30__spirv_WritePipeBlockingINTELIDU9_Ev8ocl_pipePKT_ii(target("spirv.Pipe", 1), ptr addrspace(4), i32, i32) +
\ No newline at end of file diff --git a/llvm/test/CodeGen/X86/pr166744.ll b/llvm/test/CodeGen/X86/pr166744.ll new file mode 100644 index 0000000..21b25d8 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr166744.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=POSTRA +; RUN: llc < %s -mtriple=x86_64-- -mcpu=haswell | FileCheck %s --check-prefixes=NOPOSTRA +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=NOPOSTRA + +; Ensure reloads are after narrowed i512 -> i32 store +define i1 @PR166744(ptr %v, i64 %idx, i1 zeroext %b) { +; POSTRA-LABEL: PR166744: +; POSTRA: # %bb.0: +; POSTRA-NEXT: movl $1029, %eax # imm = 0x405 +; POSTRA-NEXT: shlxl %esi, %edx, %edx +; POSTRA-NEXT: bextrl %eax, %esi, %eax +; POSTRA-NEXT: movl (%rdi,%rax,4), %ecx +; POSTRA-NEXT: btrl %esi, %ecx +; POSTRA-NEXT: orl %ecx, %edx +; POSTRA-NEXT: movl %edx, (%rdi,%rax,4) +; POSTRA-NEXT: movq 16(%rdi), %rax +; POSTRA-NEXT: movq (%rdi), %rcx +; POSTRA-NEXT: movq 24(%rdi), %rdx +; POSTRA-NEXT: movq 8(%rdi), %rsi +; POSTRA-NEXT: orq 56(%rdi), %rdx +; POSTRA-NEXT: orq 40(%rdi), %rsi +; POSTRA-NEXT: orq 48(%rdi), %rax +; POSTRA-NEXT: orq 32(%rdi), %rcx +; POSTRA-NEXT: orq %rdx, %rsi +; POSTRA-NEXT: orq %rax, %rcx +; POSTRA-NEXT: orq %rsi, %rcx +; POSTRA-NEXT: setne %al +; POSTRA-NEXT: retq +; +; NOPOSTRA-LABEL: PR166744: +; NOPOSTRA: # %bb.0: +; NOPOSTRA-NEXT: movl %esi, %eax +; NOPOSTRA-NEXT: shrl $3, %eax +; NOPOSTRA-NEXT: andl $60, %eax +; NOPOSTRA-NEXT: movl (%rdi,%rax), %ecx +; NOPOSTRA-NEXT: btrl %esi, %ecx +; NOPOSTRA-NEXT: shlxl %esi, %edx, %edx +; NOPOSTRA-NEXT: orl %ecx, %edx +; NOPOSTRA-NEXT: movl %edx, (%rdi,%rax) +; NOPOSTRA-NEXT: movq 16(%rdi), %rax +; NOPOSTRA-NEXT: movq (%rdi), %rcx +; NOPOSTRA-NEXT: movq 8(%rdi), %rdx +; NOPOSTRA-NEXT: movq 24(%rdi), %rsi +; NOPOSTRA-NEXT: orq 56(%rdi), %rsi +; NOPOSTRA-NEXT: orq 40(%rdi), %rdx +; NOPOSTRA-NEXT: orq 48(%rdi), %rax +; NOPOSTRA-NEXT: orq 32(%rdi), %rcx +; NOPOSTRA-NEXT: orq %rsi, %rdx +; NOPOSTRA-NEXT: orq %rax, %rcx +; NOPOSTRA-NEXT: orq %rdx, %rcx +; NOPOSTRA-NEXT: setne %al +; NOPOSTRA-NEXT: retq + %rem = and i64 %idx, 511 + %sh_prom = zext nneg i64 %rem to i512 + %shl = shl nuw i512 1, %sh_prom + %not = xor i512 %shl, -1 + %load = load i512, ptr %v, align 8 + %and = and i512 %load, %not + %conv2 = zext i1 %b to i512 + %shl4 = shl nuw i512 %conv2, %sh_prom + %or = or i512 %and, %shl4 + store i512 %or, ptr %v, align 8 + %cmp = icmp ne i512 %or, 0 + ret i1 %cmp +} diff --git a/llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll b/llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll new file mode 100644 index 0000000..921bcf0 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll @@ -0,0 +1,11 @@ +; RUN: opt -passes=vector-combine %s -S -o - | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define <vscale x 4 x i16> @interleave2_same_const_splat_nxv4i16() { +;CHECK-LABEL: @interleave2_same_const_splat_nxv4i16( +;CHECK: call <vscale x 4 x i16> @llvm.vector.interleave2 +;CHECK: ret <vscale x 4 x i16> %retval + %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3)) + ret <vscale x 4 x i16> %retval +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll new file mode 100644 index 0000000..2926371 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=ASM +; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=MIR + +define i64 @test1(i64 %i) nounwind readnone { + %loc = alloca i64 + %j = load i64, ptr %loc + %r = add i64 %i, %j + ret i64 %r +} + +define i64 @test2(i32 %i) nounwind readnone { + %loc = alloca i32 + %j = load i32, ptr %loc + %r = add i32 %i, %j + %ext = zext i32 %r to i64 + ret i64 %ext +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected new file mode 100644 index 0000000..88cb03e --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=ASM +; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=MIR + +define i64 @test1(i64 %i) nounwind readnone { +; ASM-LABEL: test1: +; ASM: # %bb.0: +; ASM-NEXT: movq %rdi, %rax +; ASM-NEXT: addq -{{[0-9]+}}(%rsp), %rax +; ASM-NEXT: retq +; MIR-LABEL: name: test1 +; MIR: bb.0 (%ir-block.0): +; MIR-NEXT: liveins: $rdi +; MIR-NEXT: {{ $}} +; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi +; MIR-NEXT: [[ADD64rm:%[0-9]+]]:gr64 = ADD64rm [[COPY]], %stack.0.loc, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (dereferenceable load (s64) from %ir.loc) +; MIR-NEXT: $rax = COPY [[ADD64rm]] +; MIR-NEXT: RET 0, $rax + %loc = alloca i64 + %j = load i64, ptr %loc + %r = add i64 %i, %j + ret i64 %r +} + +define i64 @test2(i32 %i) nounwind readnone { +; ASM-LABEL: test2: +; ASM: # %bb.0: +; ASM-NEXT: movl %edi, %eax +; ASM-NEXT: addl -{{[0-9]+}}(%rsp), %eax +; ASM-NEXT: retq +; MIR-LABEL: name: test2 +; MIR: bb.0 (%ir-block.0): +; MIR-NEXT: liveins: $edi +; MIR-NEXT: {{ $}} +; MIR-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi +; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[COPY]], %stack.0.loc, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (dereferenceable load (s32) from %ir.loc) +; MIR-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[ADD32rm]], %subreg.sub_32bit +; MIR-NEXT: $rax = COPY [[SUBREG_TO_REG]] +; MIR-NEXT: RET 0, $rax + %loc = alloca i32 + %j = load i32, ptr %loc + %r = add i32 %i, %j + %ext = zext i32 %r to i64 + ret i64 %ext +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll new file mode 100644 index 0000000..7167bcf --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECK + +define i32 @add(i32 %a, i32 %b) { + %sum = add i32 %a, %b + ret i32 %sum +} + +define i32 @sub(i32 %a, i32 %b) { + %diff = sub i32 %a, %b + ret i32 %diff +} + diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected new file mode 100644 index 0000000..1ba920d --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECK + +define i32 @add(i32 %a, i32 %b) { + %sum = add i32 %a, %b + ret i32 %sum +} + +define i32 @sub(i32 %a, i32 %b) { + %diff = sub i32 %a, %b + ret i32 %diff +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test new file mode 100644 index 0000000..6fc57b5 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test @@ -0,0 +1,9 @@ +# REQUIRES: x86-registered-target +## Test checking that update_llc_test_checks.py can generate both ASM and MIR checks in the same file + +# RUN: cp -f %S/Inputs/x86_asm_mir_mixed.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/x86_asm_mir_mixed.ll.expected %t.ll + +## Verify that running the script again on an already updated file doesn't add duplicate checks +# RUN: %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/x86_asm_mir_mixed.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test new file mode 100644 index 0000000..0f8aaa54 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test @@ -0,0 +1,8 @@ +# REQUIRES: x86-registered-target +## Test that using the same prefix for both ASM and MIR outputs generates a warning +## and doesn't produce any checks. + +# RUN: cp -f %S/Inputs/x86_asm_mir_same_prefix.ll %t.ll && %update_llc_test_checks %t.ll 2>&1 | FileCheck %s --check-prefix=WARNING +# RUN: diff -u %S/Inputs/x86_asm_mir_same_prefix.ll.expected %t.ll + +# WARNING: WARNING: The following prefixes are used for both ASM and MIR output, which will cause FileCheck failures: CHECK diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 2dad16a..baa0377 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -605,6 +605,7 @@ TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)") MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)") DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)") +STOP_PASS_RE = re.compile(r"-stop-(before|after)=(\w+)") IS_DEBUG_RECORD_RE = re.compile(r"^(\s+)#dbg_") IS_SWITCH_CASE_RE = re.compile(r"^\s+i\d+ \d+, label %\S+") diff --git a/llvm/utils/UpdateTestChecks/mir.py b/llvm/utils/UpdateTestChecks/mir.py index 24bb8b3..01ee0e1 100644 --- a/llvm/utils/UpdateTestChecks/mir.py +++ b/llvm/utils/UpdateTestChecks/mir.py @@ -163,13 +163,15 @@ def add_mir_checks_for_function( print_fixed_stack, first_check_is_next, at_the_function_name, + check_indent=None, ): printed_prefixes = set() for run in run_list: for prefix in run[0]: if prefix in printed_prefixes: break - if not func_dict[prefix][func_name]: + # func_info can be empty if there was a prefix conflict. + if not func_dict[prefix].get(func_name): continue if printed_prefixes: # Add some space between different check prefixes. @@ -185,6 +187,7 @@ def add_mir_checks_for_function( func_dict[prefix][func_name], print_fixed_stack, first_check_is_next, + check_indent, ) break else: @@ -204,6 +207,7 @@ def add_mir_check_lines( func_info, print_fixed_stack, first_check_is_next, + check_indent=None, ): func_body = str(func_info).splitlines() if single_bb: @@ -220,7 +224,10 @@ def add_mir_check_lines( first_line = func_body[0] indent = len(first_line) - len(first_line.lstrip(" ")) # A check comment, indented the appropriate amount - check = "{:>{}}; {}".format("", indent, prefix) + if check_indent is not None: + check = "{}; {}".format(check_indent, prefix) + else: + check = "{:>{}}; {}".format("", indent, prefix) output_lines.append("{}-LABEL: name: {}".format(check, func_name)) diff --git a/llvm/utils/update_llc_test_checks.py b/llvm/utils/update_llc_test_checks.py index 8c57e75..98864be 100755 --- a/llvm/utils/update_llc_test_checks.py +++ b/llvm/utils/update_llc_test_checks.py @@ -15,7 +15,7 @@ import argparse import os # Used to advertise this file's name ("autogenerated_note"). import sys -from UpdateTestChecks import common +from UpdateTestChecks import common, mir # llc is the only llc-like in the LLVM tree but downstream forks can add # additional ones here if they have them. @@ -33,6 +33,7 @@ def update_test(ti: common.TestInfo): break run_list = [] + mir_run_list = [] for l in ti.run_lines: if "|" not in l: common.warn("Skipping unparsable RUN line: " + l) @@ -57,9 +58,14 @@ def update_test(ti: common.TestInfo): if m: march_in_cmd = m.groups()[0] + target_list = run_list m = common.DEBUG_ONLY_ARG_RE.search(llc_cmd) if m and m.groups()[0] == "isel": from UpdateTestChecks import isel as output_type + elif not m and common.STOP_PASS_RE.search(llc_cmd): + # MIR output mode. If -debug-only is present assume + # the debug output is the main point of interest. + target_list = mir_run_list else: from UpdateTestChecks import asm as output_type @@ -84,7 +90,7 @@ def update_test(ti: common.TestInfo): # FIXME: We should use multiple check prefixes to common check lines. For # now, we just ignore all but the last. - run_list.append( + target_list.append( ( check_prefixes, llc_tool, @@ -119,14 +125,20 @@ def update_test(ti: common.TestInfo): ginfo=ginfo, ) - for ( - prefixes, - llc_tool, - llc_args, - preprocess_cmd, - triple_in_cmd, - march_in_cmd, - ) in run_list: + # Dictionary to store MIR function bodies separately + mir_func_dict = {} + for run_tuple, is_mir in [(run, False) for run in run_list] + [ + (run, True) for run in mir_run_list + ]: + ( + prefixes, + llc_tool, + llc_args, + preprocess_cmd, + triple_in_cmd, + march_in_cmd, + ) = run_tuple + common.debug("Extracted LLC cmd:", llc_tool, llc_args) common.debug("Extracted FileCheck prefixes:", str(prefixes)) @@ -141,22 +153,54 @@ def update_test(ti: common.TestInfo): if not triple: triple = common.get_triple_from_march(march_in_cmd) - scrubber, function_re = output_type.get_run_handler(triple) - if 0 == builder.process_run_line( - function_re, scrubber, raw_tool_output, prefixes - ): - common.warn( - "Couldn't match any function. Possibly the wrong target triple has been provided" + if is_mir: + # MIR output mode + common.debug("Detected MIR output mode for prefixes:", str(prefixes)) + for prefix in prefixes: + if prefix not in mir_func_dict: + mir_func_dict[prefix] = {} + + mir.build_function_info_dictionary( + ti.path, + raw_tool_output, + triple, + prefixes, + mir_func_dict, + ti.args.verbose, ) - builder.processed_prefixes(prefixes) + else: + # ASM output mode + scrubber, function_re = output_type.get_run_handler(triple) + if 0 == builder.process_run_line( + function_re, scrubber, raw_tool_output, prefixes + ): + common.warn( + "Couldn't match any function. Possibly the wrong target triple has been provided" + ) + builder.processed_prefixes(prefixes) func_dict = builder.finish_and_get_func_dict() + + # Check for conflicts: same prefix used for both ASM and MIR + conflicting_prefixes = set(func_dict.keys()) & set(mir_func_dict.keys()) + if conflicting_prefixes: + common.warn( + "The following prefixes are used for both ASM and MIR output, which will cause FileCheck failures: {}".format( + ", ".join(sorted(conflicting_prefixes)) + ), + test_file=ti.path, + ) + for prefix in conflicting_prefixes: + mir_func_dict[prefix] = {} + func_dict[prefix] = {} + global_vars_seen_dict = {} is_in_function = False is_in_function_start = False func_name = None prefix_set = set([prefix for p in run_list for prefix in p[0]]) + prefix_set.update([prefix for p in mir_run_list for prefix in p[0]]) common.debug("Rewriting FileCheck prefixes:", str(prefix_set)) output_lines = [] @@ -221,6 +265,22 @@ def update_test(ti: common.TestInfo): is_filtered=builder.is_filtered(), ) ) + + # Also add MIR checks if we have them for this function + if mir_run_list and func_name: + mir.add_mir_checks_for_function( + ti.path, + output_lines, + mir_run_list, + mir_func_dict, + func_name, + single_bb=False, # Don't skip basic block labels. + print_fixed_stack=False, # Don't print fixed stack (ASM tests don't need it). + first_check_is_next=False, # First check is LABEL, not NEXT. + at_the_function_name=False, # Use "name:" not "@name". + check_indent="", # No indentation for IR files (not MIR files). + ) + is_in_function_start = False if is_in_function: diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 8728e66..70d424b 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -21,13 +21,6 @@ include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/IR/OpBase.td" include "mlir/IR/RegionKindInterface.td" -// This is roughly similar to OpFoldResult assuming the handle produces a single -// value in the payload IR. -def TransformAnyParamTypeOrAnyHandle : Type< - Or<[TransformHandleTypeInterface.predicate, - TransformParamTypeInterface.predicate]>, - "transform any param type or any handle type">; - //===----------------------------------------------------------------------===// // Apply...PatternsOp //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index 48978eb..de07f50 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -33,22 +33,14 @@ namespace linalg { //===----------------------------------------------------------------------===// // Utilities for inferring various semantics properties of Linalg ops. //===----------------------------------------------------------------------===// -/// Shell function to compute the Destination Permutation of PackOp -/// This function uses the helper function `computePackUnPackPerm` to get -/// the permutation vector. Only major difference between UnPack and Pack is -/// that packOp uses destination rank whereas unpack Uses source rank. -SmallVector<int64_t> getPackInverseDestPerm(linalg::PackOp packOp); - -/// Shell function to compute the Source Permutation of unPackOp. -/// This function, like the getPackInverseDestPerm uses the helper function -/// computePackUnPackPerm` to get the permutation vector. -/// Only major difference between UnPack and Pack is that packOp uses -/// destination rank whereas unpack Uses source rank. -SmallVector<int64_t> getUnPackInverseSrcPerm(linalg::UnPackOp unpackOp); - -/// Shell function to compute the Source rank permutation for unpackOp -/// Unpack requires some packing metadata data information, so created -/// another function where this value is passed by reference. + +/// Compute inverse permutation for the destination tensor (i.e. in the packed +/// domain). +SmallVector<int64_t> getPackInverseDestPerm(linalg::PackOp packOp, + PackingMetadata &metadata); + +/// Compute inverse permutation for the source tensor (i.e. in the packed +/// domain). SmallVector<int64_t> getUnPackInverseSrcPerm(linalg::UnPackOp, PackingMetadata &metadata); diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td index 0e42d08..b628f1a 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td @@ -395,7 +395,7 @@ def SPV_INTEL_fpga_buffer_location : I32EnumAttrCase<"SPV_INTEL_fp def SPV_INTEL_arbitrary_precision_fixed_point : I32EnumAttrCase<"SPV_INTEL_arbitrary_precision_fixed_point", 4019>; def SPV_INTEL_usm_storage_classes : I32EnumAttrCase<"SPV_INTEL_usm_storage_classes", 4020>; def SPV_INTEL_io_pipes : I32EnumAttrCase<"SPV_INTEL_io_pipes", 4021>; -def SPV_INTEL_blocking_pipes : I32EnumAttrCase<"SPV_INTEL_blocking_pipes", 4022>; +def SPV_ALTERA_blocking_pipes : I32EnumAttrCase<"SPV_ALTERA_blocking_pipes", 4022>; def SPV_INTEL_fpga_reg : I32EnumAttrCase<"SPV_INTEL_fpga_reg", 4023>; def SPV_INTEL_long_constant_composite : I32EnumAttrCase<"SPV_INTEL_long_constant_composite", 4024>; def SPV_INTEL_optnone : I32EnumAttrCase<"SPV_INTEL_optnone", 4025>; @@ -465,7 +465,7 @@ def SPIRV_ExtensionAttr : SPV_INTEL_kernel_attributes, SPV_INTEL_fpga_memory_accesses, SPV_INTEL_fpga_cluster_attributes, SPV_INTEL_loop_fuse, SPV_INTEL_fpga_buffer_location, SPV_INTEL_arbitrary_precision_fixed_point, - SPV_INTEL_usm_storage_classes, SPV_INTEL_io_pipes, SPV_INTEL_blocking_pipes, + SPV_INTEL_usm_storage_classes, SPV_INTEL_io_pipes, SPV_ALTERA_blocking_pipes, SPV_INTEL_fpga_reg, SPV_INTEL_long_constant_composite, SPV_INTEL_optnone, SPV_INTEL_debug_module, SPV_INTEL_fp_fast_math_mode, SPV_INTEL_memory_access_aliasing, SPV_INTEL_split_barrier, @@ -807,9 +807,9 @@ def SPIRV_C_IOPipesINTEL : I32EnumAttrCase<"IOPip Extension<[SPV_INTEL_io_pipes]> ]; } -def SPIRV_C_BlockingPipesINTEL : I32EnumAttrCase<"BlockingPipesINTEL", 5945> { +def SPIRV_C_BlockingPipesALTERA : I32EnumAttrCase<"BlockingPipesALTERA", 5945> { list<Availability> availability = [ - Extension<[SPV_INTEL_blocking_pipes]> + Extension<[SPV_ALTERA_blocking_pipes]> ]; } def SPIRV_C_FPGARegINTEL : I32EnumAttrCase<"FPGARegINTEL", 5948> { @@ -1519,7 +1519,7 @@ def SPIRV_CapabilityAttr : SPIRV_C_FPGAMemoryAccessesINTEL, SPIRV_C_FPGAClusterAttributesINTEL, SPIRV_C_LoopFuseINTEL, SPIRV_C_MemoryAccessAliasingINTEL, SPIRV_C_FPGABufferLocationINTEL, SPIRV_C_ArbitraryPrecisionFixedPointINTEL, - SPIRV_C_USMStorageClassesINTEL, SPIRV_C_IOPipesINTEL, SPIRV_C_BlockingPipesINTEL, + SPIRV_C_USMStorageClassesINTEL, SPIRV_C_IOPipesINTEL, SPIRV_C_BlockingPipesALTERA, SPIRV_C_FPGARegINTEL, SPIRV_C_DotProductInputAll, SPIRV_C_DotProductInput4x8BitPacked, SPIRV_C_DotProduct, SPIRV_C_RayCullMaskKHR, SPIRV_C_CooperativeMatrixKHR, SPIRV_C_ReplicatedCompositesEXT, diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformTypes.td b/mlir/include/mlir/Dialect/Transform/IR/TransformTypes.td index 2d9a26e..3e3fff4 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformTypes.td +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformTypes.td @@ -103,4 +103,9 @@ def TransformAnyHandle : Type< TransformValueHandleTypeInterface.predicate]>, "transform operation or value handle">; +def TransformAnyParamTypeOrAnyHandle : Type< + Or<[TransformHandleTypeInterface.predicate, + TransformParamTypeInterface.predicate]>, + "transform any param type or any handle type">; + #endif // MLIR_DIALECT_TRANSFORM_IR_TRANSFORMTYPES diff --git a/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt b/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt index 9f57627..cb1e9d0 100644 --- a/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(IR) add_subdirectory(Transforms) +add_subdirectory(TransformOps) diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/CMakeLists.txt b/mlir/include/mlir/Dialect/XeGPU/TransformOps/CMakeLists.txt new file mode 100644 index 0000000..59246064 --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/CMakeLists.txt @@ -0,0 +1,6 @@ +set(LLVM_TARGET_DEFINITIONS XeGPUTransformOps.td) +mlir_tablegen(XeGPUTransformOps.h.inc -gen-op-decls) +mlir_tablegen(XeGPUTransformOps.cpp.inc -gen-op-defs) +add_public_tablegen_target(MLIRXeGPUTransformOpsIncGen) + +add_mlir_doc(XeGPUTransformOps XeGPUTransformOps Dialects/ -gen-op-doc) diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.h b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.h new file mode 100644 index 0000000..3e16d1e --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.h @@ -0,0 +1,28 @@ +//===- XeGPUTransformOps.h - XeGPU transformation ops -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_XEGPU_TRANSFORMOPS_XEGPUTRANSFORMOPS_H +#define MLIR_DIALECT_XEGPU_TRANSFORMOPS_XEGPUTRANSFORMOPS_H + +#include "mlir/Dialect/Transform/IR/TransformDialect.h" +#include "mlir/Dialect/Transform/IR/TransformTypes.h" +#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h" +#include "mlir/Dialect/Utils/StaticValueUtils.h" + +#define GET_OP_CLASSES +#include "mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.h.inc" + +namespace mlir { +class DialectRegistry; + +namespace xegpu { +void registerTransformDialectExtension(DialectRegistry ®istry); +} // namespace xegpu +} // namespace mlir + +#endif // MLIR_DIALECT_XEGPU_TRANSFORMOPS_XEGPUTRANSFORMOPS_H diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td new file mode 100644 index 0000000..b985d54 --- /dev/null +++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td @@ -0,0 +1,81 @@ +//===- XeGPUTransformOps.td - XeGPU transformation ops -----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef XEGPU_TRANSFORM_OPS +#define XEGPU_TRANSFORM_OPS + +include "mlir/Dialect/Transform/IR/TransformAttrs.td" +include "mlir/Dialect/Transform/IR/TransformDialect.td" +include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.td" +include "mlir/Dialect/Transform/IR/TransformTypes.td" +include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/IR/OpBase.td" + +def SetDescLayoutOp : Op<Transform_Dialect, "xegpu.set_desc_layout", [ + AttrSizedOperandSegments, + DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, + TransformOpInterface +]> { + + let summary = "Set xegpu.layout attribute to a xegpu.create_nd_desc op result."; + let description = [{ + Given an `xegpu.create_nd_desc` operation, this transform adds `xegpu.layout` + attribute to the result tensor descriptor. The layout is defined by the + `sg_layout`, and `sg_data` and optional `inst_data` attributes. Returns a handle + to the transformed op. + }]; + + let arguments = (ins + TransformHandleTypeInterface : $target, + Variadic<TransformAnyParamTypeOrAnyHandle> : $sg_layout, + Variadic<TransformAnyParamTypeOrAnyHandle> : $sg_data, + Variadic<TransformAnyParamTypeOrAnyHandle> : $inst_data, + DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_layout, + DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_data, + DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_inst_data + ); + + let results = (outs TransformHandleTypeInterface : $transformed); + let builders = [ + OpBuilder<(ins "Value":$target, + "ArrayRef<OpFoldResult>":$mixedSgLayout, + "ArrayRef<OpFoldResult>":$mixedSgData, + "ArrayRef<OpFoldResult>":$mixedInstData + )>, + ]; + + let assemblyFormat = [{ + $target + `sg_layout` `=` custom<DynamicIndexList>($sg_layout, $static_sg_layout) + `sg_data` `=` custom<DynamicIndexList>($sg_data, $static_sg_data) + (`inst_data` `=` custom<DynamicIndexList>($inst_data, $static_inst_data)^)? + attr-dict `:` functional-type(operands, results) + }]; + + let extraClassDeclaration = [{ + ::mlir::DiagnosedSilenceableFailure apply( + ::mlir::transform::TransformRewriter &rewriter, + ::mlir::transform::TransformResults &transformResults, + ::mlir::transform::TransformState &state); + + ::llvm::SmallVector<::mlir::OpFoldResult> getMixedSgLayout() { + Builder b(getContext()); + return getMixedValues(getStaticSgLayout(), getSgLayout(), b); + } + ::llvm::SmallVector<::mlir::OpFoldResult> getMixedSgData() { + Builder b(getContext()); + return getMixedValues(getStaticSgData(), getSgData(), b); + } + ::llvm::SmallVector<::mlir::OpFoldResult> getMixedInstData() { + Builder b(getContext()); + return getMixedValues(getStaticInstData(), getInstData(), b); + } + }]; +} + +#endif // XEGPU_TRANSFORM_OPS diff --git a/mlir/include/mlir/Support/Timing.h b/mlir/include/mlir/Support/Timing.h index 3d61a0a..50ae847 100644 --- a/mlir/include/mlir/Support/Timing.h +++ b/mlir/include/mlir/Support/Timing.h @@ -473,6 +473,11 @@ void registerDefaultTimingManagerCLOptions(); /// 'registerDefaultTimingManagerOptions' to a `DefaultTimingManager`. void applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm); +/// Create an output strategy for the specified format, to be passed to +/// DefaultTimingManager::setOutput(). +std::unique_ptr<OutputStrategy> +createOutputStrategy(DefaultTimingManager::OutputFormat fmt, raw_ostream &os); + } // namespace mlir #endif // MLIR_SUPPORT_TIMING_H diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index bd25e94..027268c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -232,10 +232,9 @@ FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter, // 2. Compute the permutation vector to shuffle packed shape into the shape // before any outer or inner permutations have been applied. - PackingMetadata packingMetadata = computePackingMetadata( - packedTensorType.getRank(), packOp.getInnerDimsPos()); + PackingMetadata packingMetadata; SmallVector<int64_t> packedToStripMinedShapePerm = - getPackInverseDestPerm(packOp); + getPackInverseDestPerm(packOp, packingMetadata); // 3. Compute the stripMinedShape: this is the packed shape before any outer // or inner permutations have been applied. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index cb6199f..19d2d85 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1564,13 +1564,6 @@ vectorizeAsLinalgGeneric(RewriterBase &rewriter, VectorizationState &state, return success(); } -/// Given a linalg::PackOp, return the `dest` shape before any packing -/// permutations. -static SmallVector<int64_t> getTiledPackShape(linalg::PackOp packOp, - ArrayRef<int64_t> destShape) { - return applyPermutation(destShape, linalg::getPackInverseDestPerm(packOp)); -} - /// Determines whether a mask for xfer_write is trivially "all true" /// /// Given all the inputs required to generate a mask (mask sizes and shapes), @@ -1761,99 +1754,6 @@ createWriteOrMaskedWrite(OpBuilder &builder, Location loc, Value vecToStore, return mlir::vector::maskOperation(builder, write, maskForWrite); } -/// Vectorize linalg::PackOp with (1) static inner_tiles (2) constant -/// padding value and (3) input vector sizes into: -/// -/// masked_transfer_read->shape_cast->transpose->transfer_write_in_bounds -/// -/// As in the following example: -/// %pack = tensor.pack %src inner_dims_pos = [2, 1] inner_tiles = [16, 2] -/// into %dst : tensor<32x8x16xf32> -> tensor<32x4x1x16x2xf32> -/// -/// This pack would be vectorized to: -/// -/// %load = vector.mask %mask { -/// vector.transfer_read %arg0[%c0, %c0, %c0], %cst -/// {in_bounds = [true, true, true]} : -/// tensor<32x7x16xf32>, vector<32x8x16xf32> -/// } : vector<32x8x16xi1> -> vector<32x8x16xf32> -/// %shape_cast = vector.shape_cast %load : vector<32x8x16xf32> -/// to vector<32x4x2x1x16xf32> -/// %transpose = vector.transpose %shape_cast, [0, 1, 3, 4, 2] -/// : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32> -/// %write = vector.transfer_write %transpose, -/// %empty[%c0_0, %c0_0, %c0_0, %c0_0, %c0_0] -/// {in_bounds = [true, true, true, true, true]} -/// : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> -/// -/// If the (3) input vector sizes are not provided, the vector sizes are -/// determined by the result tensor shape and the `in_bounds` -/// attribute is used instead of masking to mark out-of-bounds accesses. -/// -/// NOTE: The input vector sizes specify the dimensions corresponding to the -/// outer dimensions of the output tensor. The remaining dimensions are -/// computed based on, e.g., the static inner tiles. -/// Supporting dynamic inner tiles will require the user to specify the -/// missing vector sizes. This is left as a TODO. -static LogicalResult -vectorizeAsTensorPackOp(RewriterBase &rewriter, linalg::PackOp packOp, - ArrayRef<int64_t> inputVectorSizes, - SmallVectorImpl<Value> &newResults) { - // TODO: Introduce a parent class that will handle the insertion point update. - OpBuilder::InsertionGuard g(rewriter); - rewriter.setInsertionPoint(packOp); - - Location loc = packOp.getLoc(); - std::optional<Value> padValue = packOp.getPaddingValue() - ? std::optional(packOp.getPaddingValue()) - : std::nullopt; - - // If the input vector sizes are not provided, then the vector sizes are - // determined by the result tensor shape. In case the vector sizes aren't - // provided, we update the inBounds attribute instead of masking. - bool useInBoundsInsteadOfMasking = false; - if (inputVectorSizes.empty()) { - ArrayRef<int64_t> resultTensorShape = packOp.getDestType().getShape(); - inputVectorSizes = resultTensorShape.take_front(packOp.getSourceRank()); - useInBoundsInsteadOfMasking = true; - } - - // Create masked TransferReadOp. - SmallVector<int64_t> inputShape(inputVectorSizes); - auto innerTiles = packOp.getStaticInnerTiles(); - auto innerDimsPos = packOp.getInnerDimsPos(); - auto outerDimsPerm = packOp.getOuterDimsPerm(); - if (!outerDimsPerm.empty()) - applyPermutationToVector(inputShape, - invertPermutationVector(outerDimsPerm)); - for (auto [idx, size] : enumerate(innerTiles)) - inputShape[innerDimsPos[idx]] *= size; - auto maskedRead = vector::createReadOrMaskedRead( - rewriter, loc, packOp.getSource(), inputShape, padValue, - useInBoundsInsteadOfMasking, - /*inputScalableVecSizes=*/{}); - - // Create ShapeCastOp. - SmallVector<int64_t> destShape(inputVectorSizes); - destShape.append(innerTiles.begin(), innerTiles.end()); - auto tiledPackType = VectorType::get(getTiledPackShape(packOp, destShape), - packOp.getDestType().getElementType()); - auto shapeCastOp = - vector::ShapeCastOp::create(rewriter, loc, tiledPackType, maskedRead); - - // Create TransposeOp. - auto destPermutation = - invertPermutationVector(getPackInverseDestPerm(packOp)); - auto transposeOp = vector::TransposeOp::create( - rewriter, loc, shapeCastOp.getResult(), destPermutation); - - // Create TransferWriteOp. - Operation *write = createWriteOrMaskedWrite( - rewriter, loc, transposeOp.getResult(), packOp.getDest()); - newResults.push_back(write->getResult(0)); - return success(); -} - /// Given the re-associations, "collapses" the input Vector type /// /// This is similar to CollapseShapeOp::inferCollapsedType with two notable @@ -1901,12 +1801,121 @@ static VectorType getCollapsedVecType(VectorType type, return VectorType::get(newShape, type.getElementType(), newScalableFlags); } +/// Vectorize `linalg.pack` as: +/// * xfer_read -> shape_cast -> transpose -> xfer_write +/// +/// The input-vector-sizes specify the _write_ vector sizes (i.e. the vector +/// sizes for the xfer_write operation). This is sufficient to infer the other +/// vector sizes required here. +/// +/// If the vector sizes are not provided: +/// * the vector sizes are determined from the destination tensor static shape. +/// * the inBounds attribute is used instead of masking. +/// +/// EXAMPLE (no vector sizes): +/// ``` +/// %pack = tensor.pack %src +/// inner_dims_pos = [2, 1] +/// inner_tiles = [16, 2] +/// into %dst : tensor<32x8x16xf32> -> tensor<32x4x1x16x2xf32> +/// `` +/// is vectorizes as: +/// ``` +/// %read = vector.transfer_read %src +/// : tensor<32x7x16xf32>, vector<32x8x16xf32> +/// %sc = vector.shape_cast %read +/// : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> +/// %tr = vector.transpose %sc, [0, 1, 3, 4, 2] +/// : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32> +/// %write = vector.transfer_write %tr into %dest +/// : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> +/// ``` +static LogicalResult +vectorizeAsTensorPackOp(RewriterBase &rewriter, linalg::PackOp packOp, + ArrayRef<int64_t> inputVectorSizes, + SmallVectorImpl<Value> &newResults) { + if (!inputVectorSizes.empty()) { + assert(inputVectorSizes.size() == packOp.getDestRank() && + "Invalid number of input vector sizes!"); + } + + // TODO: Introduce a parent class that will handle the insertion point update. + OpBuilder::InsertionGuard g(rewriter); + rewriter.setInsertionPoint(packOp); + + Location loc = packOp.getLoc(); + std::optional<Value> padValue = packOp.getPaddingValue() + ? std::optional(packOp.getPaddingValue()) + : std::nullopt; + + SmallVector<int64_t> destShape = + SmallVector<int64_t>(packOp.getDestType().getShape()); + + // This is just a convenience alias to clearly communicate that the input + // vector sizes determine the _write_ sizes. + ArrayRef<int64_t> &writeVectorSizes = inputVectorSizes; + + // In the absence of input-vector-sizes, use the _static_ input tensor shape. + // In addition, use the inBounds attribute instead of masking. + bool useInBoundsInsteadOfMasking = false; + if (writeVectorSizes.empty()) { + if (ShapedType::isDynamicShape(destShape)) + return rewriter.notifyMatchFailure(packOp, + "unable to infer vector sizes"); + + writeVectorSizes = destShape; + useInBoundsInsteadOfMasking = true; + } + + // Compute pre-transpose-write-vector-type, i.e. the write vector type + // _before_ the transposition (i.e. before dimension permutation). This is + // done by inverting the permutation/transposition that's part of the Pack + // operation. This type is required to: + // 1) compute the read vector type for masked-read below, and + // 2) generate shape-cast Op below that expands the read vector type. + PackingMetadata packMetadata; + SmallVector<int64_t> preTransposeWriteVecSizses(writeVectorSizes); + auto destInvPermutation = getPackInverseDestPerm(packOp, packMetadata); + applyPermutationToVector(preTransposeWriteVecSizses, destInvPermutation); + auto preTransposeWriteVecType = VectorType::get( + preTransposeWriteVecSizses, packOp.getType().getElementType()); + + // Compute vector type for the _read_ opeartion. This is simply + // pre-transpose-write-vector-type with the dimensions collapsed + // as per the Pack operation. + VectorType readVecType = getCollapsedVecType( + preTransposeWriteVecType, + getSymbolLessAffineMaps(convertReassociationIndicesToExprs( + rewriter.getContext(), packMetadata.reassociations))); + + // Create masked TransferReadOp. + auto maskedRead = vector::createReadOrMaskedRead( + rewriter, loc, packOp.getSource(), readVecType.getShape(), padValue, + useInBoundsInsteadOfMasking, + /*inputScalableVecSizes=*/{}); + + // Create ShapeCastOp. + auto shapeCastOp = vector::ShapeCastOp::create( + rewriter, loc, preTransposeWriteVecType, maskedRead); + + // Create TransposeOp. + auto destPermutation = invertPermutationVector(destInvPermutation); + auto transposeOp = vector::TransposeOp::create( + rewriter, loc, shapeCastOp.getResult(), destPermutation); + + // Create TransferWriteOp. + Operation *write = createWriteOrMaskedWrite( + rewriter, loc, transposeOp.getResult(), packOp.getDest()); + newResults.push_back(write->getResult(0)); + return success(); +} + /// Vectorize `linalg.unpack` as: /// * xfer_read -> vector.transpose -> vector.shape_cast -> xfer_write /// -/// The input-vector-sizes specify the read vector sizes (i.e. the vector sizes -/// for the xfer_read operation). This is sufficient to infer the other vector -/// sizes required here. +/// The input-vector-sizes specify the _read_ vector sizes (i.e. the vector +/// sizes for the xfer_read operation). This is sufficient to infer the other +/// vector sizes required here. /// /// If the vector sizes are not provided: /// * the vector sizes are determined from the input tensor static shape. @@ -1960,7 +1969,8 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp, // In the absence of input-vector-sizes, use the _static_ input tensor shape. if (inputVectorSizes.empty()) { if (ShapedType::isDynamicShape(sourceShape)) - return failure(); + return rewriter.notifyMatchFailure(unpackOp, + "Unable to infer vector sizes!"); readVectorSizes.assign(sourceShape.begin(), sourceShape.end()); useInBoundsInsteadOfMasking = true; @@ -2443,6 +2453,7 @@ vectorizePackOpPrecondition(linalg::PackOp packOp, ArrayRef<int64_t> inputVectorSizes) { auto padValue = packOp.getPaddingValue(); Attribute cstAttr; + // TODO: Relax this condiiton if (padValue && !matchPattern(padValue, m_Constant(&cstAttr))) { LDBG() << "pad value is not constant: " << packOp; return failure(); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 24d3722..6eeb206 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -171,29 +171,24 @@ computePackUnPackPerm(int64_t rank, ArrayRef<int64_t> &innerDimsPos, namespace mlir { namespace linalg { -SmallVector<int64_t> getPackInverseDestPerm(PackOp packOp) { +SmallVector<int64_t> getPackInverseDestPerm(PackOp packOp, + PackingMetadata &metadata) { - PackingMetadata pMetadata; int64_t packedRank = packOp.getDestType().getRank(); ArrayRef<int64_t> innerDimPos = packOp.getInnerDimsPos(); ArrayRef<int64_t> outerPerm = packOp.getOuterDimsPerm(); SmallVector<int64_t> packInvDestPerm = - computePackUnPackPerm(packedRank, innerDimPos, outerPerm, pMetadata); + computePackUnPackPerm(packedRank, innerDimPos, outerPerm, metadata); return packInvDestPerm; } -SmallVector<int64_t> getUnPackInverseSrcPerm(UnPackOp unpackOp) { - PackingMetadata metadata; - return getUnPackInverseSrcPerm(unpackOp, metadata); -} - SmallVector<int64_t> getUnPackInverseSrcPerm(UnPackOp unpackOp, PackingMetadata &metadata) { - int64_t unpackRank = unpackOp.getSourceType().getRank(); + int64_t packedRank = unpackOp.getSourceType().getRank(); ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos(); ArrayRef<int64_t> outerPerm = unpackOp.getOuterDimsPerm(); SmallVector<int64_t> unpackInvSrcPerm = - computePackUnPackPerm(unpackRank, innerDimPos, outerPerm, metadata); + computePackUnPackPerm(packedRank, innerDimPos, outerPerm, metadata); return unpackInvSrcPerm; } diff --git a/mlir/lib/Dialect/XeGPU/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/CMakeLists.txt index 31167e6..46b8251 100644 --- a/mlir/lib/Dialect/XeGPU/CMakeLists.txt +++ b/mlir/lib/Dialect/XeGPU/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(IR) add_subdirectory(Transforms) add_subdirectory(Utils) +add_subdirectory(TransformOps) diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/TransformOps/CMakeLists.txt new file mode 100644 index 0000000..48fe841 --- /dev/null +++ b/mlir/lib/Dialect/XeGPU/TransformOps/CMakeLists.txt @@ -0,0 +1,17 @@ +add_mlir_dialect_library(MLIRXeGPUTransformOps + XeGPUTransformOps.cpp + + ADDITIONAL_HEADER_DIRS + ${PROJECT_SOURCE_DIR}/mlir/Dialect/XeGPU/TransformOps/ + + DEPENDS + MLIRXeGPUTransformOpsIncGen + + LINK_LIBS PUBLIC + MLIRXeGPUDialect + MLIRXeGPUTransforms + MLIRIR + MLIRTransformDialect + MLIRFuncDialect + MLIRSCFDialect +) diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp new file mode 100644 index 0000000..8943ba0 --- /dev/null +++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp @@ -0,0 +1,225 @@ +//===- XeGPUTransformOps.cpp - Implementation of XeGPU transformation ops -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/XeGPU/IR/XeGPU.h" +#include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h" + +#include <optional> + +using namespace mlir; +using namespace mlir::transform; + +/// Assuming that `ofr` is an index attr or a param of index type +/// or a transform dialect handle mapped to exactly one op +/// with one index result, get that value and cast it to int type. +static DiagnosedSilenceableFailure convertMixedValuesToInt( + transform::TransformState &state, TransformOpInterface transformOp, + SmallVectorImpl<int32_t> &result, ArrayRef<OpFoldResult> ofrs) { + for (OpFoldResult ofr : ofrs) { + // Attribute case. + if (auto attr = dyn_cast<Attribute>(ofr)) { + if (auto intAttr = dyn_cast<IntegerAttr>(attr)) { + result.push_back(intAttr.getInt()); + continue; + } + return transformOp.emitDefiniteFailure() << "expected IntegerAttr"; + } + + // Transform param case. + Value transformValue = cast<Value>(ofr); + if (isa<TransformParamTypeInterface>(transformValue.getType())) { + ArrayRef<Attribute> params = state.getParams(transformValue); + if (params.size() != 1) + return transformOp.emitDefiniteFailure() + << "requires exactly one parameter associated"; + result.push_back( + cast<IntegerAttr>(params.front()).getValue().getSExtValue()); + continue; + } + + // Payload value case. + auto payloadOps = state.getPayloadOps(transformValue); + if (!llvm::hasSingleElement(payloadOps)) { + DiagnosedSilenceableFailure diag = + transformOp.emitSilenceableError() + << "handle must be mapped to exactly one payload op"; + diag.attachNote(transformValue.getLoc()) + << "mapped to " << llvm::range_size(payloadOps) << " payload ops"; + return diag; + } + + Operation *op = *payloadOps.begin(); + if (op->getNumResults() != 1 || !op->getResult(0).getType().isIndex()) { + DiagnosedSilenceableFailure diag = + transformOp.emitSilenceableError() + << "payload op must have exactly 1 index result"; + diag.attachNote(op->getLoc()) + << "has " << op->getNumResults() << " results"; + return diag; + } + + IntegerAttr intAttr; + if (!matchPattern(op->getResult(0), m_Constant(&intAttr))) + return transformOp.emitSilenceableError() + << "requires param or handle to be the result of a constant like " + "op"; + + result.push_back(intAttr.getInt()); + } + return DiagnosedSilenceableFailure::success(); +} + +/// Create a layout attribute from the given parameters. +static xegpu::LayoutAttr +createLayoutAttr(MLIRContext *ctx, ArrayRef<int32_t> sgLayout, + ArrayRef<int32_t> sgData, + std::optional<ArrayRef<int32_t>> instData) { + return xegpu::LayoutAttr::get( + ctx, DenseI32ArrayAttr::get(ctx, sgLayout), + DenseI32ArrayAttr::get(ctx, sgData), + instData ? DenseI32ArrayAttr::get(ctx, instData.value()) : nullptr, + /*lane_layout=*/nullptr, + /*lane_data=*/nullptr, + /*order=*/nullptr); +} + +/// Replace xegpu.create_nd_desc op with a new one with the given layout. +static xegpu::CreateNdDescOp +setDescLayout(transform::TransformRewriter &rewriter, + xegpu::CreateNdDescOp descOp, xegpu::LayoutAttr layout) { + assert(descOp.getMixedOffsets().size() == 0 && + "create desc op with offsets is not supported"); + auto oldTensorDesc = descOp.getType(); + auto descType = xegpu::TensorDescType::get( + oldTensorDesc.getShape(), oldTensorDesc.getElementType(), + /*array_length=*/oldTensorDesc.getArrayLength(), + /*boundary_check=*/oldTensorDesc.getBoundaryCheck(), + /*memory_space=*/oldTensorDesc.getMemorySpace(), + /*layout=*/layout); + + rewriter.setInsertionPointAfter(descOp); + auto newDescOp = rewriter.replaceOpWithNewOp<xegpu::CreateNdDescOp>( + descOp, descType, descOp.getSource(), descOp.getMixedSizes(), + descOp.getMixedStrides()); + return newDescOp; +} + +void transform::SetDescLayoutOp::build(OpBuilder &builder, + OperationState &result, Value target, + ArrayRef<OpFoldResult> mixedSgLayout, + ArrayRef<OpFoldResult> mixedSgData, + ArrayRef<OpFoldResult> mixedInstData) { + SmallVector<int64_t> staticSgLayout, staticSgData, staticInstData; + SmallVector<Value> dynamicSgLayout, dynamicSgData, dynamicInstData; + dispatchIndexOpFoldResults(mixedSgLayout, dynamicSgLayout, staticSgLayout); + dispatchIndexOpFoldResults(mixedSgData, dynamicSgData, staticSgData); + dispatchIndexOpFoldResults(mixedInstData, dynamicInstData, staticInstData); + build(builder, result, target.getType(), + /*target=*/target, + /*sg_layout=*/dynamicSgLayout, + /*sg_data=*/dynamicSgData, + /*inst_data=*/dynamicInstData, + /*static_sg_layout=*/staticSgLayout, + /*static_sg_data=*/staticSgData, + /*static_inst_data=*/staticInstData); +} + +DiagnosedSilenceableFailure +transform::SetDescLayoutOp::apply(transform::TransformRewriter &rewriter, + transform::TransformResults &results, + transform::TransformState &state) { + auto targetOps = state.getPayloadOps(getTarget()); + if (!llvm::hasSingleElement(targetOps)) { + return emitDefiniteFailure() << "requires exactly one targetOp handle (got " + << llvm::range_size(targetOps) << ")"; + } + Operation *target = *targetOps.begin(); + + SmallVector<int32_t> sgLayout; + DiagnosedSilenceableFailure status = + convertMixedValuesToInt(state, (*this), sgLayout, getMixedSgLayout()); + if (!status.succeeded()) + return status; + + SmallVector<int32_t> sgData; + status = convertMixedValuesToInt(state, (*this), sgData, getMixedSgData()); + if (!status.succeeded()) + return status; + + SmallVector<int32_t> instData; + status = + convertMixedValuesToInt(state, (*this), instData, getMixedInstData()); + if (!status.succeeded()) + return status; + auto maybeInstData = instData.empty() + ? std::nullopt + : std::optional<ArrayRef<int32_t>>(instData); + + // For now only create_nd_desc op is supported. + auto descOp = dyn_cast<xegpu::CreateNdDescOp>(target); + if (!descOp) { + auto diag = emitSilenceableFailure(getLoc()) + << "Expected a xegpu.create_nd_desc op, but got: " + << target->getName(); + diag.attachNote(target->getLoc()) << "target op"; + return diag; + } + + // Set layout attr in desc op's return type. Replaces old desc op. + auto layoutAttr = + createLayoutAttr(rewriter.getContext(), sgLayout, sgData, maybeInstData); + auto newdescOp = setDescLayout(rewriter, descOp, layoutAttr); + + // Map result handles. + results.set(cast<OpResult>(getTransformed()), {newdescOp.getOperation()}); + + return DiagnosedSilenceableFailure::success(); +} + +void transform::SetDescLayoutOp::getEffects( + ::llvm::SmallVectorImpl<MemoryEffects::EffectInstance> &effects) { + consumesHandle(getTargetMutable(), effects); + onlyReadsHandle(getSgLayoutMutable(), effects); + onlyReadsHandle(getSgDataMutable(), effects); + onlyReadsHandle(getInstDataMutable(), effects); + producesHandle(getOperation()->getOpResults(), effects); + modifiesPayload(effects); +} + +namespace { +class XeGPUTransformDialectExtension + : public transform::TransformDialectExtension< + XeGPUTransformDialectExtension> { +public: + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(XeGPUTransformDialectExtension) + + using Base::Base; + + void init(); +}; + +void XeGPUTransformDialectExtension::init() { + declareGeneratedDialect<scf::SCFDialect>(); + declareGeneratedDialect<arith::ArithDialect>(); + declareGeneratedDialect<xegpu::XeGPUDialect>(); + + registerTransformOps< +#define GET_OP_LIST +#include "mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp.inc" + >(); +} +} // namespace + +#define GET_OP_CLASSES +#include "mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp.inc" + +void mlir::xegpu::registerTransformDialectExtension(DialectRegistry ®istry) { + registry.addExtensions<XeGPUTransformDialectExtension>(); +} diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp index d2bafb7..a5bfde1 100644 --- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp +++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp @@ -16,6 +16,7 @@ #include "mlir/Interfaces/ViewLikeInterface.h" #include "llvm/ADT/APSInt.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/DebugLog.h" #define DEBUG_TYPE "value-bounds-op-interface" @@ -195,7 +196,7 @@ void ValueBoundsConstraintSet::addBound(BoundType type, int64_t pos, // Even without this bound, there may be enough information in the // constraint system to compute the requested bound. In case this bound is // actually needed, `computeBound` will return `failure`. - LLVM_DEBUG(llvm::dbgs() << "Failed to add bound: " << expr << "\n"); + LDBG() << "Failed to add bound: " << expr << "\n"; } } @@ -271,11 +272,9 @@ int64_t ValueBoundsConstraintSet::insert(Value value, assert(!valueDimToPosition.contains(valueDim) && "already mapped"); int64_t pos = isSymbol ? cstr.appendVar(VarKind::Symbol) : cstr.appendVar(VarKind::SetDim); - LLVM_DEBUG(llvm::dbgs() << "Inserting constraint set column " << pos - << " for: " << value - << " (dim: " << dim.value_or(kIndexValue) - << ", owner: " << getOwnerOfValue(value)->getName() - << ")\n"); + LDBG() << "Inserting constraint set column " << pos << " for: " << value + << " (dim: " << dim.value_or(kIndexValue) + << ", owner: " << getOwnerOfValue(value)->getName() << ")"; positionToValueDim.insert(positionToValueDim.begin() + pos, valueDim); // Update reverse mapping. for (int64_t i = pos, e = positionToValueDim.size(); i < e; ++i) @@ -283,8 +282,8 @@ int64_t ValueBoundsConstraintSet::insert(Value value, valueDimToPosition[*positionToValueDim[i]] = i; if (addToWorklist) { - LLVM_DEBUG(llvm::dbgs() << "Push to worklist: " << value - << " (dim: " << dim.value_or(kIndexValue) << ")\n"); + LDBG() << "Push to worklist: " << value + << " (dim: " << dim.value_or(kIndexValue) << ")"; worklist.push(pos); } @@ -294,8 +293,7 @@ int64_t ValueBoundsConstraintSet::insert(Value value, int64_t ValueBoundsConstraintSet::insert(bool isSymbol) { int64_t pos = isSymbol ? cstr.appendVar(VarKind::Symbol) : cstr.appendVar(VarKind::SetDim); - LLVM_DEBUG(llvm::dbgs() << "Inserting anonymous constraint set column " << pos - << "\n"); + LDBG() << "Inserting anonymous constraint set column " << pos; positionToValueDim.insert(positionToValueDim.begin() + pos, std::nullopt); // Update reverse mapping. for (int64_t i = pos, e = positionToValueDim.size(); i < e; ++i) @@ -339,10 +337,9 @@ int64_t ValueBoundsConstraintSet::getPos(Value value, cast<BlockArgument>(value).getOwner()->isEntryBlock()) && "unstructured control flow is not supported"); #endif // NDEBUG - LLVM_DEBUG(llvm::dbgs() << "Getting pos for: " << value - << " (dim: " << dim.value_or(kIndexValue) - << ", owner: " << getOwnerOfValue(value)->getName() - << ")\n"); + LDBG() << "Getting pos for: " << value + << " (dim: " << dim.value_or(kIndexValue) + << ", owner: " << getOwnerOfValue(value)->getName() << ")"; auto it = valueDimToPosition.find(std::make_pair(value, dim.value_or(kIndexValue))); assert(it != valueDimToPosition.end() && "expected mapped entry"); @@ -364,7 +361,7 @@ bool ValueBoundsConstraintSet::isMapped(Value value, } void ValueBoundsConstraintSet::processWorklist() { - LLVM_DEBUG(llvm::dbgs() << "Processing value bounds worklist...\n"); + LDBG() << "Processing value bounds worklist..."; while (!worklist.empty()) { int64_t pos = worklist.front(); worklist.pop(); @@ -386,8 +383,8 @@ void ValueBoundsConstraintSet::processWorklist() { // Do not process any further if the stop condition is met. auto maybeDim = dim == kIndexValue ? std::nullopt : std::make_optional(dim); if (stopCondition(value, maybeDim, *this)) { - LLVM_DEBUG(llvm::dbgs() << "Stop condition met for: " << value - << " (dim: " << maybeDim << ")\n"); + LDBG() << "Stop condition met for: " << value << " (dim: " << maybeDim + << ")"; continue; } @@ -395,9 +392,8 @@ void ValueBoundsConstraintSet::processWorklist() { // the worklist. auto valueBoundsOp = dyn_cast<ValueBoundsOpInterface>(getOwnerOfValue(value)); - LLVM_DEBUG(llvm::dbgs() - << "Query value bounds for: " << value - << " (owner: " << getOwnerOfValue(value)->getName() << ")\n"); + LDBG() << "Query value bounds for: " << value + << " (owner: " << getOwnerOfValue(value)->getName() << ")"; if (valueBoundsOp) { if (dim == kIndexValue) { valueBoundsOp.populateBoundsForIndexValue(value, *this); @@ -406,7 +402,7 @@ void ValueBoundsConstraintSet::processWorklist() { } continue; } - LLVM_DEBUG(llvm::dbgs() << "--> ValueBoundsOpInterface not implemented\n"); + LDBG() << "--> ValueBoundsOpInterface not implemented"; // If the op does not implement `ValueBoundsOpInterface`, check if it // implements the `DestinationStyleOpInterface`. OpResults of such ops are @@ -705,9 +701,7 @@ bool ValueBoundsConstraintSet::comparePos(int64_t lhsPos, // We cannot prove anything if the constraint set is already empty. if (cstr.isEmpty()) { - LLVM_DEBUG( - llvm::dbgs() - << "cannot compare value/dims: constraint system is already empty"); + LDBG() << "cannot compare value/dims: constraint system is already empty"; return false; } diff --git a/mlir/lib/RegisterAllExtensions.cpp b/mlir/lib/RegisterAllExtensions.cpp index 3839172..c857c38 100644 --- a/mlir/lib/RegisterAllExtensions.cpp +++ b/mlir/lib/RegisterAllExtensions.cpp @@ -56,6 +56,7 @@ #include "mlir/Dialect/Transform/SMTExtension/SMTExtension.h" #include "mlir/Dialect/Transform/TuneExtension/TuneExtension.h" #include "mlir/Dialect/Vector/TransformOps/VectorTransformOps.h" +#include "mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.h" #include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" @@ -112,6 +113,7 @@ void mlir::registerAllExtensions(DialectRegistry ®istry) { transform::registerSMTExtension(registry); transform::registerTuneExtension(registry); vector::registerTransformDialectExtension(registry); + xegpu::registerTransformDialectExtension(registry); arm_neon::registerTransformDialectExtension(registry); arm_sve::registerTransformDialectExtension(registry); diff --git a/mlir/lib/Support/Timing.cpp b/mlir/lib/Support/Timing.cpp index 2e92d9c..b0ac379 100644 --- a/mlir/lib/Support/Timing.cpp +++ b/mlir/lib/Support/Timing.cpp @@ -619,11 +619,17 @@ void mlir::applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm) { return; tm.setEnabled(options->timing); tm.setDisplayMode(options->displayMode); + tm.setOutput(createOutputStrategy(options->outputFormat, llvm::errs())); +} - std::unique_ptr<OutputStrategy> printer; - if (options->outputFormat == OutputFormat::Text) - printer = std::make_unique<OutputTextStrategy>(llvm::errs()); - else if (options->outputFormat == OutputFormat::Json) - printer = std::make_unique<OutputJsonStrategy>(llvm::errs()); - tm.setOutput(std::move(printer)); +std::unique_ptr<OutputStrategy> +mlir::createOutputStrategy(DefaultTimingManager::OutputFormat fmt, + raw_ostream &os) { + switch (fmt) { + case OutputFormat::Text: + return std::make_unique<OutputTextStrategy>(os); + case OutputFormat::Json: + return std::make_unique<OutputJsonStrategy>(os); + } + llvm_unreachable("Invalid output format"); } diff --git a/mlir/lib/Transforms/RemoveDeadValues.cpp b/mlir/lib/Transforms/RemoveDeadValues.cpp index 979b396..41f3f9d 100644 --- a/mlir/lib/Transforms/RemoveDeadValues.cpp +++ b/mlir/lib/Transforms/RemoveDeadValues.cpp @@ -742,25 +742,7 @@ static void processBranchOp(BranchOpInterface branchOp, RunLivenessAnalysis &la, static void cleanUpDeadVals(RDVFinalCleanupList &list) { LDBG() << "Starting cleanup of dead values..."; - // 1. Blocks - LDBG() << "Cleaning up " << list.blocks.size() << " block argument lists"; - for (auto &b : list.blocks) { - // blocks that are accessed via multiple codepaths processed once - if (b.b->getNumArguments() != b.nonLiveArgs.size()) - continue; - LDBG() << "Erasing " << b.nonLiveArgs.count() - << " non-live arguments from block: " << b.b; - // it iterates backwards because erase invalidates all successor indexes - for (int i = b.nonLiveArgs.size() - 1; i >= 0; --i) { - if (!b.nonLiveArgs[i]) - continue; - LDBG() << " Erasing block argument " << i << ": " << b.b->getArgument(i); - b.b->getArgument(i).dropAllUses(); - b.b->eraseArgument(i); - } - } - - // 2. Operations + // 1. Operations LDBG() << "Cleaning up " << list.operations.size() << " operations"; for (auto &op : list.operations) { LDBG() << "Erasing operation: " @@ -769,14 +751,14 @@ static void cleanUpDeadVals(RDVFinalCleanupList &list) { op->erase(); } - // 3. Values + // 2. Values LDBG() << "Cleaning up " << list.values.size() << " values"; for (auto &v : list.values) { LDBG() << "Dropping all uses of value: " << v; v.dropAllUses(); } - // 4. Functions + // 3. Functions LDBG() << "Cleaning up " << list.functions.size() << " functions"; // Record which function arguments were erased so we can shrink call-site // argument segments for CallOpInterface operations (e.g. ops using @@ -798,7 +780,7 @@ static void cleanUpDeadVals(RDVFinalCleanupList &list) { (void)f.funcOp.eraseResults(f.nonLiveRets); } - // 5. Operands + // 4. Operands LDBG() << "Cleaning up " << list.operands.size() << " operand lists"; for (OperationToCleanup &o : list.operands) { // Handle call-specific cleanup only when we have a cached callee reference. @@ -840,7 +822,7 @@ static void cleanUpDeadVals(RDVFinalCleanupList &list) { } } - // 6. Results + // 5. Results LDBG() << "Cleaning up " << list.results.size() << " result lists"; for (auto &r : list.results) { LDBG() << "Erasing " << r.nonLive.count() @@ -849,6 +831,24 @@ static void cleanUpDeadVals(RDVFinalCleanupList &list) { dropUsesAndEraseResults(r.op, r.nonLive); } + // 6. Blocks + LDBG() << "Cleaning up " << list.blocks.size() << " block argument lists"; + for (auto &b : list.blocks) { + // blocks that are accessed via multiple codepaths processed once + if (b.b->getNumArguments() != b.nonLiveArgs.size()) + continue; + LDBG() << "Erasing " << b.nonLiveArgs.count() + << " non-live arguments from block: " << b.b; + // it iterates backwards because erase invalidates all successor indexes + for (int i = b.nonLiveArgs.size() - 1; i >= 0; --i) { + if (!b.nonLiveArgs[i]) + continue; + LDBG() << " Erasing block argument " << i << ": " << b.b->getArgument(i); + b.b->getArgument(i).dropAllUses(); + b.b->eraseArgument(i); + } + } + // 7. Successor Operands LDBG() << "Cleaning up " << list.successorOperands.size() << " successor operand lists"; diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index 20ed3ab..51c7576 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -322,6 +322,15 @@ declare_mlir_dialect_extension_python_bindings( "../../include/mlir/Dialect/Vector/Transforms/VectorTransformsBase.td" ) +declare_mlir_dialect_extension_python_bindings( + ADD_TO_PARENT MLIRPythonSources.Dialects + ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" + TD_FILE dialects/XeGPUTransformOps.td + SOURCES + dialects/transform/xegpu.py + DIALECT_NAME transform + EXTENSION_NAME xegpu_transform) + declare_mlir_dialect_python_bindings( ADD_TO_PARENT MLIRPythonSources.Dialects ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" diff --git a/mlir/python/mlir/dialects/XeGPUTransformOps.td b/mlir/python/mlir/dialects/XeGPUTransformOps.td new file mode 100644 index 0000000..5a5e7b9 --- /dev/null +++ b/mlir/python/mlir/dialects/XeGPUTransformOps.td @@ -0,0 +1,19 @@ +//===---- XeGPUTransformOps.td -----------------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Entry point of the Python bindings generator for the XeGPU transform ops. +// +//===----------------------------------------------------------------------===// + + +#ifndef PYTHON_BINDINGS_XEGPU_TRANSFORM_OPS +#define PYTHON_BINDINGS_XEGPU_TRANSFORM_OPS + +include "mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td" + +#endif // PYTHON_BINDINGS_XEGPU_TRANSFORM_OPS diff --git a/mlir/python/mlir/dialects/transform/xegpu.py b/mlir/python/mlir/dialects/transform/xegpu.py new file mode 100644 index 0000000..2918bf5 --- /dev/null +++ b/mlir/python/mlir/dialects/transform/xegpu.py @@ -0,0 +1,66 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from .._xegpu_transform_ops_gen import * +from .._xegpu_transform_ops_gen import _Dialect + +try: + from ...ir import * + from .._ods_common import _cext as _ods_cext + from .._ods_common import ( + MixedValues, + get_op_result_or_value as _get_op_result_or_value, + _dispatch_dynamic_index_list, + ) + +except ImportError as e: + raise RuntimeError("Error loading imports from extension module") from e + +from typing import Union, Optional + + +@_ods_cext.register_operation(_Dialect, replace=True) +class SetDescLayoutOp(SetDescLayoutOp): + """Specialization for SetDescLayoutOp class.""" + + def __init__( + self, + target: Union[Operation, Value], + sg_layout: MixedValues, + sg_data: MixedValues, + *, + inst_data: Optional[MixedValues] = None, + loc=None, + ip=None, + ): + target_handle = _get_op_result_or_value(target) + inst_data = [] if inst_data is None else inst_data + ( + dynamic_sg_layout, + static_sg_layout, + _, + ) = _dispatch_dynamic_index_list(sg_layout) + ( + dynamic_sg_data, + static_sg_data, + _, + ) = _dispatch_dynamic_index_list(sg_data) + ( + dynamic_inst_data, + static_inst_data, + _, + ) = _dispatch_dynamic_index_list(inst_data) + + super().__init__( + target_handle.type, + target_handle, + dynamic_sg_layout, + dynamic_sg_data, + dynamic_inst_data, + static_sg_layout=static_sg_layout, + static_sg_data=static_sg_data, + static_inst_data=static_inst_data, + loc=loc, + ip=ip, + ) diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir index aa2c1da..9a14ab7 100644 --- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir @@ -285,6 +285,8 @@ module attributes {transform.with_named_sequence} { ///---------------------------------------------------------------------------------------- /// Tests for linalg.pack +/// +/// TODO: Add similar tests for linalg.unpack ///---------------------------------------------------------------------------------------- // Note, see a similar test in: diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir index 1304a90..170bae6 100644 --- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir @@ -1335,7 +1335,7 @@ func.func @pack_no_padding(%src: tensor<32x8x16xf32>, %dest: tensor<4x1x32x16x2x module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%src: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.pack"]} in %src : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [4, 1, 32] : !transform.any_op + transform.structured.vectorize %0 vector_sizes [4, 1, 32, 16, 2] : !transform.any_op transform.yield } } @@ -1378,7 +1378,7 @@ func.func @pack_with_padding(%src: tensor<32x7x15xf32>, %dest: tensor<32x4x1x16x module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [32, 4, 1] : !transform.any_op + transform.structured.vectorize %0 vector_sizes [32, 4, 1, 16, 2] : !transform.any_op transform.yield } } @@ -1424,8 +1424,13 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @pack_with_dynamic_dims // CHECK-SAME: %[[SRC:.*]]: tensor<?x?xf32>, // CHECK-SAME: %[[DEST:.*]]: tensor<?x?x16x2xf32> -func.func @pack_with_dynamic_dims(%src: tensor<?x?xf32>, %dest: tensor<?x?x16x2xf32>) -> tensor<?x?x16x2xf32> { - %pack = linalg.pack %src inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %dest : tensor<?x?xf32> -> tensor<?x?x16x2xf32> +func.func @pack_with_dynamic_dims( + %src: tensor<?x?xf32>, + %dest: tensor<?x?x16x2xf32>) -> tensor<?x?x16x2xf32> { + %pack = linalg.pack %src + inner_dims_pos = [1, 0] + inner_tiles = [16, 2] + into %dest : tensor<?x?xf32> -> tensor<?x?x16x2xf32> return %pack : tensor<?x?x16x2xf32> } @@ -1433,30 +1438,108 @@ func.func @pack_with_dynamic_dims(%src: tensor<?x?xf32>, %dest: tensor<?x?x16x2x // CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C0_0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1_0:.*]] = arith.constant 1 : index + +/// Compute mask for xfer_read // CHECK-DAG: %[[D0_0:.*]] = tensor.dim {{.*}} %[[C0_0]] : tensor<?x?xf32> // CHECK-DAG: %[[D1_0:.*]] = tensor.dim {{.*}} %[[C1_0]] : tensor<?x?xf32> // CHECK: %[[MASK:.*]] = vector.create_mask %[[D0_0]], %[[D1_0]] : vector<8x16xi1> + +/// --= read =--- // CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { // CHECK-SAME: vector.transfer_read %{{.*}}[%[[C0_1]], %[[C0_1]]], %[[CST]] // CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<8x16xf32> // CHECK-SAME: } : vector<8x16xi1> -> vector<8x16xf32> + +/// --= shape_cast =--- // CHECK: %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<8x16xf32> to vector<4x2x1x16xf32> + +/// --= transpose =--- // CHECK: %[[TR:.*]] = vector.transpose %[[SC]], [0, 2, 3, 1] : vector<4x2x1x16xf32> to vector<4x1x16x2xf32> + +/// Compute mask for xfer_write // CHECK-DAG: %[[C0_2:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index // CHECK-DAG: %[[D2:.*]] = tensor.dim %[[DEST]], {{.*}} : tensor<?x?x16x2xf32> // CHECK-DAG: %[[D3:.*]] = tensor.dim %[[DEST]], {{.*}} : tensor<?x?x16x2xf32> // CHECK: %[[MASK_0:.*]] = vector.create_mask %[[D2]], %[[D3]], %[[C16]], %[[C2]] : vector<4x1x16x2xi1> + +/// --= write =--- // CHECK: %[[WRITE:.*]] = vector.mask %[[MASK_0]] { // CHECK-SAME: vector.transfer_write %[[TR]], %[[DEST]][%[[C0_2]], %[[C0_2]], %[[C0_2]], %[[C0_2]]] // CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<4x1x16x2xf32>, tensor<?x?x16x2xf32> + // CHECK: return %[[WRITE]] : tensor<?x?x16x2xf32> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [4, 1] : !transform.any_op + transform.structured.vectorize %0 vector_sizes [4, 1, 16, 2] : !transform.any_op + transform.yield + } +} + +// ----- + +/// Similar to the test above, but one of the inner tile sizes is dynamic. As a +/// result, more output dims are dynamic (and, e.g., output mask calcuation is a bit different). + +// CHECK-LABEL: func @pack_with_dynamic_dims_and_dynamic_inner_tile +// CHECK-SAME: %[[SRC:.*]]: tensor<?x?xf32>, +// CHECK-SAME: %[[DEST:.*]]: tensor<?x?x?x2xf32> +func.func @pack_with_dynamic_dims_and_dynamic_inner_tile( + %src: tensor<?x?xf32>, + %dest: tensor<?x?x?x2xf32>) -> tensor<?x?x?x2xf32> { + %c16 = arith.constant 16 : index + %pack = linalg.pack %src + inner_dims_pos = [1, 0] + inner_tiles = [%c16, 2] + into %dest : tensor<?x?xf32> -> tensor<?x?x?x2xf32> + return %pack : tensor<?x?x?x2xf32> +} + +// CHECK-DAG: %[[CST:.*]] = ub.poison : f32 +// CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C0_0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1_0:.*]] = arith.constant 1 : index + +/// Compute mask for xfer_read +// CHECK-DAG: %[[D0_0:.*]] = tensor.dim {{.*}} %[[C0_0]] : tensor<?x?xf32> +// CHECK-DAG: %[[D1_0:.*]] = tensor.dim {{.*}} %[[C1_0]] : tensor<?x?xf32> +// CHECK: %[[MASK:.*]] = vector.create_mask %[[D0_0]], %[[D1_0]] : vector<8x16xi1> + +/// --= read =--- +// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { +// CHECK-SAME: vector.transfer_read %{{.*}}[%[[C0_1]], %[[C0_1]]], %[[CST]] +// CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<8x16xf32> +// CHECK-SAME: } : vector<8x16xi1> -> vector<8x16xf32> + +/// --= shape_cast =--- +// CHECK: %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<8x16xf32> to vector<4x2x1x16xf32> + +/// --= transpose =--- +// CHECK: %[[TR:.*]] = vector.transpose %[[SC]], [0, 2, 3, 1] : vector<4x2x1x16xf32> to vector<4x1x16x2xf32> + +/// Compute mask for xfer_write +// CHECK-DAG: %[[C0_2:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[C2_2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[D2:.*]] = tensor.dim %[[DEST]], {{.*}} : tensor<?x?x?x2xf32> +// CHECK-DAG: %[[D3:.*]] = tensor.dim %[[DEST]], {{.*}} : tensor<?x?x?x2xf32> +// CHECK-DAG: %[[D4:.*]] = tensor.dim %[[DEST]], {{.*}} : tensor<?x?x?x2xf32> +// CHECK: %[[MASK_0:.*]] = vector.create_mask %[[D2]], %[[D3]], %[[D4]], %[[C2_2]] : vector<4x1x16x2xi1> + +/// --= write =--- +// CHECK: %[[WRITE:.*]] = vector.mask %[[MASK_0]] { +// CHECK-SAME: vector.transfer_write %[[TR]], %[[DEST]][%[[C0_2]], %[[C0_2]], %[[C0_2]], %[[C0_2]]] +// CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<4x1x16x2xf32>, tensor<?x?x?x2xf32> + +// CHECK: return %[[WRITE]] : tensor<?x?x?x2xf32> + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [4, 1, 16, 2] : !transform.any_op transform.yield } } diff --git a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir new file mode 100644 index 0000000..3035845 --- /dev/null +++ b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir @@ -0,0 +1,15 @@ +// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics + +func.func @set_desc_layout(%arg0: memref<4096x4096xf16>) { + %c32 = arith.constant 32 : index // expected-note {{target op}} + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op + // expected-error@below {{Expected a xegpu.create_nd_desc op, but got: arith.constant}} + %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] : (!transform.any_op) -> !transform.any_op + transform.yield + } +} diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir new file mode 100644 index 0000000..23e1cd9 --- /dev/null +++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir @@ -0,0 +1,58 @@ +// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics | FileCheck %s + +// CHECK-LABEL: @set_desc_layout +func.func @set_desc_layout(%arg0: memref<4096x4096xf16>) { + // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0 + // CHECK-SAME: #xegpu.block_tdesc_attr<boundary_check = false> + // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [8, 16]>> + %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16, #xegpu.block_tdesc_attr<boundary_check = false>> + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op + // CHECK: transform.xegpu.set_desc_layout %{{.*}} + %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] inst_data = [8, 16] : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: @set_desc_layout_minimal +func.func @set_desc_layout_minimal(%arg0: memref<4096x4096xf16>) { + // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0 + // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>> + %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16> + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op + // CHECK: transform.xegpu.set_desc_layout %{{.*}} + %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: @set_desc_layout_param +func.func @set_desc_layout_param(%arg0: memref<4096x4096xf16>) { + // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0 + // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [8, 16]>> + %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16> + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op + // CHECK: transform.xegpu.set_desc_layout %{{.*}} + %layout0 = transform.param.constant 8 : i64 -> !transform.param<i64> + %1 = transform.xegpu.set_desc_layout %0 sg_layout = [%layout0, 4] sg_data = [32, 32] inst_data = [8, 16] : (!transform.any_op, !transform.param<i64>) -> !transform.any_op + transform.yield + } +} diff --git a/mlir/test/Transforms/remove-dead-values.mlir b/mlir/test/Transforms/remove-dead-values.mlir index 8b5ccdc..e730450 100644 --- a/mlir/test/Transforms/remove-dead-values.mlir +++ b/mlir/test/Transforms/remove-dead-values.mlir @@ -674,18 +674,3 @@ func.func @dead_value_loop_ivs_no_result(%lb: index, %ub: index, %step: index, % } return } - -// ----- - -// CHECK-LABEL: func @op_block_have_dead_arg -func.func @op_block_have_dead_arg(%arg0: index, %arg1: index, %arg2: index, %arg3: i1) { - scf.for %iv = %arg0 to %arg1 step %arg2 { - scf.execute_region { - cf.cond_br %arg3, ^bb1(%arg0 : index), ^bb1(%arg1 : index) - ^bb1(%0: index): - scf.yield - } - } -// CHECK-NEXT: return - return -} diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py new file mode 100644 index 0000000..1c8a2bc --- /dev/null +++ b/mlir/test/python/dialects/transform_xegpu_ext.py @@ -0,0 +1,51 @@ +# RUN: %PYTHON %s | FileCheck %s + +from mlir.ir import * +from mlir.dialects import transform +from mlir.dialects.transform import xegpu +from mlir.dialects.transform import structured + + +def run(f): + with Context(), Location.unknown(): + module = Module.create() + with InsertionPoint(module.body): + print("\nTEST:", f.__name__) + f() + print(module) + return f + + +@run +def setDescLayoutMinimal(): + sequence = transform.SequenceOp( + transform.FailurePropagationMode.Propagate, + [], + transform.OperationType.get("xegpu.create_nd_tdesc"), + ) + with InsertionPoint(sequence.body): + xegpu.SetDescLayoutOp(sequence.bodyTarget, sg_layout=[6, 4], sg_data=[32, 16]) + transform.YieldOp() + # CHECK-LABEL: TEST: setDescLayoutMinimal + # CHECK: %0 = transform.xegpu.set_desc_layout % + # CHECK: sg_layout = [6, 4] + # CHECK: sg_data = [32, 16] + + +@run +def setDescLayoutInstData(): + sequence = transform.SequenceOp( + transform.FailurePropagationMode.Propagate, + [], + transform.OperationType.get("xegpu.create_nd_tdesc"), + ) + with InsertionPoint(sequence.body): + xegpu.SetDescLayoutOp( + sequence.bodyTarget, sg_layout=[6, 4], sg_data=[32, 16], inst_data=[8, 16] + ) + transform.YieldOp() + # CHECK-LABEL: TEST: setDescLayoutInstData + # CHECK: %0 = transform.xegpu.set_desc_layout % + # CHECK: sg_layout = [6, 4] + # CHECK: sg_data = [32, 16] + # CHECK: inst_data = [8, 16] diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index b65fe64..ecd11b9 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -5281,6 +5281,7 @@ libc_function( hdrs = ["src/stdlib/strfromf.h"], deps = [ ":__support_common", + ":printf_error_mapper", ":str_from_util", ], ) @@ -5291,6 +5292,7 @@ libc_function( hdrs = ["src/stdlib/strfromd.h"], deps = [ ":__support_common", + ":printf_error_mapper", ":str_from_util", ], ) @@ -5301,6 +5303,7 @@ libc_function( hdrs = ["src/stdlib/strfroml.h"], deps = [ ":__support_common", + ":printf_error_mapper", ":str_from_util", ], ) @@ -6514,12 +6517,34 @@ libc_support_library( ) libc_support_library( + name = "printf_error_mapper", + hdrs = [ + "src/stdio/printf_core/error_mapper.h", + ] + select({ + "@platforms//os:linux": [ + "src/stdio/printf_core/linux/error_mapper.h", + ], + "//conditions:default": [ + "src/stdio/printf_core/generic/error_mapper.h", + ], + }), + deps = [ + ":__support_cpp_type_traits", + ":__support_error_or", + ":__support_macros_properties_architectures", + ":hdr_errno_macros", + ":printf_core_structs", + ], +) + +libc_support_library( name = "printf_main", hdrs = ["src/stdio/printf_core/printf_main.h"], deps = [ ":__support_arg_list", ":printf_converter", ":printf_core_structs", + ":printf_error_mapper", ":printf_parser", ":printf_writer", ], diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/stdio/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/stdio/BUILD.bazel index cbc6d13..e33199c 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/stdio/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/stdio/BUILD.bazel @@ -87,6 +87,8 @@ libc_test( name = "fprintf_test", srcs = ["fprintf_test.cpp"], deps = [ + "//libc:__support_cpp_limits", + "//libc:__support_macros_properties_architectures", "//libc:fprintf", ], ) |
