diff options
245 files changed, 7957 insertions, 3244 deletions
diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml index 1c07a0a..77f79a8 100644 --- a/.github/workflows/libcxx-build-and-test.yaml +++ b/.github/workflows/libcxx-build-and-test.yaml @@ -281,6 +281,10 @@ jobs: - name: Set up the MSVC dev environment if: ${{ matrix.mingw != true }} uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0 + - name: Add the installed Clang at the start of the path + if: ${{ matrix.mingw != true }} + run: | + echo "c:\Program Files\LLVM\bin" | Out-File -FilePath $Env:GITHUB_PATH -Encoding utf8 -Append - name: Build and test run: | bash libcxx/utils/ci/run-buildbot ${{ matrix.config }} diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 0a78492..7f2e55d 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -2341,6 +2341,12 @@ def CIR_FuncOp : CIR_Op<"func", [ The function linkage information is specified by `linkage`, as defined by `GlobalLinkageKind` attribute. + A compiler builtin function must be marked as `builtin` for further + processing when lowering from CIR. + + The `coroutine` keyword is used to mark a coroutine function, which requires + at least one `cir.await` instruction to be used in its body. + The `lambda` translates to a C++ `operator()` that implements a lambda, this allow callsites to make certain assumptions about the real function nature when writing analysis. @@ -2362,11 +2368,22 @@ def CIR_FuncOp : CIR_Op<"func", [ // Linkage information cir.func linkonce_odr @some_method(...) ``` + // Builtin function + cir.func builtin @__builtin_coro_end(!cir.ptr<i8>, !cir.bool) -> !cir.bool + // Coroutine + cir.func coroutine @_Z10silly_taskv() -> !CoroTask { + ... + cir.await(...) + ... + } + ``` }]; let arguments = (ins SymbolNameAttr:$sym_name, CIR_VisibilityAttr:$global_visibility, TypeAttrOf<CIR_FuncType>:$function_type, + UnitAttr:$builtin, + UnitAttr:$coroutine, UnitAttr:$lambda, UnitAttr:$no_proto, UnitAttr:$dso_local, diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 3dfcafc..0e7cec4 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -136,6 +136,13 @@ struct MissingFeatures { static bool recordZeroInitPadding() { return false; } static bool zeroSizeRecordMembers() { return false; } + // Coroutines + static bool coroAllocBuiltinCall() { return false; } + static bool coroBeginBuiltinCall() { return false; } + static bool coroEndBuiltinCall() { return false; } + static bool coroSizeBuiltinCall() { return false; } + static bool coroutineFrame() { return false; } + // Various handling of deferred processing in CIRGenModule. static bool cgmRelease() { return false; } static bool deferredVtables() { return false; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 2ef6098..5a48f0b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1258,8 +1258,9 @@ def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">, HelpText<"Compression level for offload device binaries (HIP only)">; def offload_jobs_EQ : Joined<["--"], "offload-jobs=">, - HelpText<"Specify the number of threads to use for device offloading tasks" - " during compilation.">; + HelpText<"Specify the number of threads to use for device offloading tasks " + "during compilation. Can be a positive integer or the string " + "'jobserver' to use the make-style jobserver from the environment.">; defm offload_via_llvm : BoolFOption<"offload-via-llvm", LangOpts<"OffloadViaLLVM">, DefaultFalse, diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index cf17de1..4cfa91e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -428,6 +428,32 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, return emitUnaryFPBuiltin<cir::ATanOp>(*this, *e); case Builtin::BI__builtin_elementwise_cos: return emitUnaryFPBuiltin<cir::CosOp>(*this, *e); + case Builtin::BI__builtin_coro_id: + case Builtin::BI__builtin_coro_promise: + case Builtin::BI__builtin_coro_resume: + case Builtin::BI__builtin_coro_noop: + case Builtin::BI__builtin_coro_destroy: + case Builtin::BI__builtin_coro_done: + case Builtin::BI__builtin_coro_alloc: + case Builtin::BI__builtin_coro_begin: + case Builtin::BI__builtin_coro_end: + case Builtin::BI__builtin_coro_suspend: + case Builtin::BI__builtin_coro_align: + cgm.errorNYI(e->getSourceRange(), "BI__builtin_coro_id like NYI"); + return getUndefRValue(e->getType()); + + case Builtin::BI__builtin_coro_frame: { + cgm.errorNYI(e->getSourceRange(), "BI__builtin_coro_frame NYI"); + assert(!cir::MissingFeatures::coroutineFrame()); + return getUndefRValue(e->getType()); + } + case Builtin::BI__builtin_coro_free: + case Builtin::BI__builtin_coro_size: { + cgm.errorNYI(e->getSourceRange(), + "BI__builtin_coro_free, BI__builtin_coro_size NYI"); + assert(!cir::MissingFeatures::coroSizeBuiltinCall()); + return getUndefRValue(e->getType()); + } } // If this is an alias for a lib function (e.g. __builtin_sin), emit diff --git a/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp new file mode 100644 index 0000000..c25cce4 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp @@ -0,0 +1,82 @@ +//===----- CGCoroutine.cpp - Emit CIR Code for C++ coroutines -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code dealing with C++ code generation of coroutines. +// +//===----------------------------------------------------------------------===// + +#include "CIRGenFunction.h" +#include "mlir/Support/LLVM.h" +#include "clang/AST/StmtCXX.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" + +using namespace clang; +using namespace clang::CIRGen; + +struct clang::CIRGen::CGCoroData { + // Stores the __builtin_coro_id emitted in the function so that we can supply + // it as the first argument to other builtins. + cir::CallOp coroId = nullptr; +}; + +// Defining these here allows to keep CGCoroData private to this file. +CIRGenFunction::CGCoroInfo::CGCoroInfo() {} +CIRGenFunction::CGCoroInfo::~CGCoroInfo() {} + +static void createCoroData(CIRGenFunction &cgf, + CIRGenFunction::CGCoroInfo &curCoro, + cir::CallOp coroId) { + assert(!curCoro.data && "EmitCoroutineBodyStatement called twice?"); + + curCoro.data = std::make_unique<CGCoroData>(); + curCoro.data->coroId = coroId; +} + +cir::CallOp CIRGenFunction::emitCoroIDBuiltinCall(mlir::Location loc, + mlir::Value nullPtr) { + cir::IntType int32Ty = builder.getUInt32Ty(); + + const TargetInfo &ti = cgm.getASTContext().getTargetInfo(); + unsigned newAlign = ti.getNewAlign() / ti.getCharWidth(); + + mlir::Operation *builtin = cgm.getGlobalValue(cgm.builtinCoroId); + + cir::FuncOp fnOp; + if (!builtin) { + fnOp = cgm.createCIRBuiltinFunction( + loc, cgm.builtinCoroId, + cir::FuncType::get({int32Ty, VoidPtrTy, VoidPtrTy, VoidPtrTy}, int32Ty), + /*FD=*/nullptr); + assert(fnOp && "should always succeed"); + } else { + fnOp = cast<cir::FuncOp>(builtin); + } + + return builder.createCallOp(loc, fnOp, + mlir::ValueRange{builder.getUInt32(newAlign, loc), + nullPtr, nullPtr, nullPtr}); +} + +mlir::LogicalResult +CIRGenFunction::emitCoroutineBody(const CoroutineBodyStmt &s) { + mlir::Location openCurlyLoc = getLoc(s.getBeginLoc()); + cir::ConstantOp nullPtrCst = builder.getNullPtr(VoidPtrTy, openCurlyLoc); + + auto fn = mlir::cast<cir::FuncOp>(curFn); + fn.setCoroutine(true); + cir::CallOp coroId = emitCoroIDBuiltinCall(openCurlyLoc, nullPtrCst); + createCoroData(*this, curCoro, coroId); + + assert(!cir::MissingFeatures::coroAllocBuiltinCall()); + + assert(!cir::MissingFeatures::coroBeginBuiltinCall()); + + assert(!cir::MissingFeatures::generateDebugInfo()); + return mlir::success(); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index fa68ad9..b4c8924 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -1108,8 +1108,9 @@ CIRGenFunction::emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e) { return lv; } -LValue CIRGenFunction::emitStringLiteralLValue(const StringLiteral *e) { - cir::GlobalOp globalOp = cgm.getGlobalForStringLiteral(e); +LValue CIRGenFunction::emitStringLiteralLValue(const StringLiteral *e, + llvm::StringRef name) { + cir::GlobalOp globalOp = cgm.getGlobalForStringLiteral(e, name); assert(globalOp.getAlignment() && "expected alignment for string literal"); unsigned align = *(globalOp.getAlignment()); mlir::Value addr = @@ -2372,6 +2373,21 @@ mlir::Value CIRGenFunction::emitScalarConstant( return builder.getConstant(getLoc(e->getSourceRange()), constant.getValue()); } +LValue CIRGenFunction::emitPredefinedLValue(const PredefinedExpr *e) { + const StringLiteral *sl = e->getFunctionName(); + assert(sl != nullptr && "No StringLiteral name in PredefinedExpr"); + auto fn = cast<cir::FuncOp>(curFn); + StringRef fnName = fn.getName(); + fnName.consume_front("\01"); + std::array<StringRef, 2> nameItems = { + PredefinedExpr::getIdentKindName(e->getIdentKind()), fnName}; + std::string gvName = llvm::join(nameItems, "."); + if (isa_and_nonnull<BlockDecl>(curCodeDecl)) + cgm.errorNYI(e->getSourceRange(), "predefined lvalue in block"); + + return emitStringLiteralLValue(sl, gvName); +} + /// An LValue is a candidate for having its loads and stores be made atomic if /// we are operating under /volatile:ms *and* the LValue itself is volatile and /// performing such an operation can be performed without a libcall. diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index b26b4f2..52fb0d7 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -342,6 +342,9 @@ void CIRGenFunction::LexicalScope::cleanup() { cir::ReturnOp CIRGenFunction::LexicalScope::emitReturn(mlir::Location loc) { CIRGenBuilderTy &builder = cgf.getBuilder(); + // If we are on a coroutine, add the coro_end builtin call. + assert(!cir::MissingFeatures::coroEndBuiltinCall()); + auto fn = dyn_cast<cir::FuncOp>(cgf.curFn); assert(fn && "emitReturn from non-function"); if (!fn.getFunctionType().hasVoidReturn()) { @@ -815,6 +818,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) { return emitMemberExpr(cast<MemberExpr>(e)); case Expr::CompoundLiteralExprClass: return emitCompoundLiteralLValue(cast<CompoundLiteralExpr>(e)); + case Expr::PredefinedExprClass: + return emitPredefinedLValue(cast<PredefinedExpr>(e)); case Expr::BinaryOperatorClass: return emitBinaryOperatorLValue(cast<BinaryOperator>(e)); case Expr::CompoundAssignOperatorClass: { diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index cb7cf98..dfd9d2c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -47,6 +47,8 @@ class LoopOp; namespace clang::CIRGen { +struct CGCoroData; + class CIRGenFunction : public CIRGenTypeCache { public: CIRGenModule &cgm; @@ -66,6 +68,18 @@ public: /// The compiler-generated variable that holds the return value. std::optional<mlir::Value> fnRetAlloca; + // Holds coroutine data if the current function is a coroutine. We use a + // wrapper to manage its lifetime, so that we don't have to define CGCoroData + // in this header. + struct CGCoroInfo { + std::unique_ptr<CGCoroData> data; + CGCoroInfo(); + ~CGCoroInfo(); + }; + CGCoroInfo curCoro; + + bool isCoroutine() const { return curCoro.data != nullptr; } + /// The temporary alloca to hold the return value. This is /// invalid iff the function has no return value. Address returnValue = Address::invalid(); @@ -1174,6 +1188,10 @@ public: void emitConstructorBody(FunctionArgList &args); + mlir::LogicalResult emitCoroutineBody(const CoroutineBodyStmt &s); + cir::CallOp emitCoroEndBuiltinCall(mlir::Location loc, mlir::Value nullPtr); + cir::CallOp emitCoroIDBuiltinCall(mlir::Location loc, mlir::Value nullPtr); + void emitDestroy(Address addr, QualType type, Destroyer *destroyer); void emitDestructorBody(FunctionArgList &args); @@ -1279,6 +1297,8 @@ public: void emitInitializerForField(clang::FieldDecl *field, LValue lhs, clang::Expr *init); + LValue emitPredefinedLValue(const PredefinedExpr *e); + mlir::Value emitPromotedComplexExpr(const Expr *e, QualType promotionType); mlir::Value emitPromotedScalarExpr(const Expr *e, QualType promotionType); @@ -1473,7 +1493,8 @@ public: mlir::Value emitStoreThroughBitfieldLValue(RValue src, LValue dstresult); - LValue emitStringLiteralLValue(const StringLiteral *e); + LValue emitStringLiteralLValue(const StringLiteral *e, + llvm::StringRef name = ".str"); mlir::LogicalResult emitSwitchBody(const clang::Stmt *s); mlir::LogicalResult emitSwitchCase(const clang::SwitchCase &s, diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 2bd2729..8485564 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -1343,32 +1343,36 @@ cir::GlobalOp CIRGenModule::getGlobalForStringLiteral(const StringLiteral *s, mlir::Attribute c = getConstantArrayFromStringLiteral(s); - if (getLangOpts().WritableStrings) { - errorNYI(s->getSourceRange(), - "getGlobalForStringLiteral: Writable strings"); - } - - // Mangle the string literal if that's how the ABI merges duplicate strings. - // Don't do it if they are writable, since we don't want writes in one TU to - // affect strings in another. - if (getCXXABI().getMangleContext().shouldMangleStringLiteral(s) && - !getLangOpts().WritableStrings) { - errorNYI(s->getSourceRange(), - "getGlobalForStringLiteral: mangle string literals"); - } - - // Unlike LLVM IR, CIR doesn't automatically unique names for globals, so - // we need to do that explicitly. - std::string uniqueName = getUniqueGlobalName(name.str()); - mlir::Location loc = getLoc(s->getSourceRange()); - auto typedC = llvm::cast<mlir::TypedAttr>(c); - cir::GlobalOp gv = - generateStringLiteral(loc, typedC, cir::GlobalLinkageKind::PrivateLinkage, - *this, uniqueName, alignment); - setDSOLocal(static_cast<mlir::Operation *>(gv)); + cir::GlobalOp gv; + if (!getLangOpts().WritableStrings && constantStringMap.count(c)) { + gv = constantStringMap[c]; + // The bigger alignment always wins. + if (!gv.getAlignment() || + uint64_t(alignment.getQuantity()) > *gv.getAlignment()) + gv.setAlignmentAttr(getSize(alignment)); + } else { + // Mangle the string literal if that's how the ABI merges duplicate strings. + // Don't do it if they are writable, since we don't want writes in one TU to + // affect strings in another. + if (getCXXABI().getMangleContext().shouldMangleStringLiteral(s) && + !getLangOpts().WritableStrings) { + errorNYI(s->getSourceRange(), + "getGlobalForStringLiteral: mangle string literals"); + } - assert(!cir::MissingFeatures::sanitizers()); + // Unlike LLVM IR, CIR doesn't automatically unique names for globals, so + // we need to do that explicitly. + std::string uniqueName = getUniqueGlobalName(name.str()); + mlir::Location loc = getLoc(s->getSourceRange()); + auto typedC = llvm::cast<mlir::TypedAttr>(c); + gv = generateStringLiteral(loc, typedC, + cir::GlobalLinkageKind::PrivateLinkage, *this, + uniqueName, alignment); + setDSOLocal(static_cast<mlir::Operation *>(gv)); + constantStringMap[c] = gv; + assert(!cir::MissingFeatures::sanitizers()); + } return gv; } @@ -2065,6 +2069,15 @@ CIRGenModule::createCIRFunction(mlir::Location loc, StringRef name, return func; } +cir::FuncOp +CIRGenModule::createCIRBuiltinFunction(mlir::Location loc, StringRef name, + cir::FuncType ty, + const clang::FunctionDecl *fd) { + cir::FuncOp fnOp = createCIRFunction(loc, name, ty, fd); + fnOp.setBuiltin(true); + return fnOp; +} + mlir::SymbolTable::Visibility CIRGenModule::getMLIRVisibility(cir::GlobalOp op) { // MLIR doesn't accept public symbols declarations (only diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 2c4c6dd..c6a6681 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -274,6 +274,8 @@ public: llvm_unreachable("unknown visibility!"); } + llvm::DenseMap<mlir::Attribute, cir::GlobalOp> constantStringMap; + /// Return a constant array for the given string. mlir::Attribute getConstantArrayFromStringLiteral(const StringLiteral *e); @@ -473,6 +475,13 @@ public: cir::FuncType funcType, const clang::FunctionDecl *funcDecl); + /// Create a CIR function with builtin attribute set. + cir::FuncOp createCIRBuiltinFunction(mlir::Location loc, llvm::StringRef name, + cir::FuncType ty, + const clang::FunctionDecl *fd); + + static constexpr const char *builtinCoroId = "__builtin_coro_id"; + /// Given a builtin id for a function like "__builtin_fabsf", return a /// Function* for "fabsf". cir::FuncOp getBuiltinLibFunction(const FunctionDecl *fd, unsigned builtinID); diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index 644c383..0b8f8bf 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -197,6 +197,7 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s, case Stmt::SEHLeaveStmtClass: case Stmt::SYCLKernelCallStmtClass: case Stmt::CoroutineBodyStmtClass: + return emitCoroutineBody(cast<CoroutineBodyStmt>(*s)); case Stmt::CoreturnStmtClass: case Stmt::CXXTryStmtClass: case Stmt::IndirectGotoStmtClass: diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt index 3ebf460..36db4bd 100644 --- a/clang/lib/CIR/CodeGen/CMakeLists.txt +++ b/clang/lib/CIR/CodeGen/CMakeLists.txt @@ -14,6 +14,7 @@ add_clang_library(clangCIR CIRGenCall.cpp CIRGenClass.cpp CIRGenCleanup.cpp + CIRGenCoroutine.cpp CIRGenCXX.cpp CIRGenCXXABI.cpp CIRGenBuiltin.cpp diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 6b5cc80..fba094f 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1632,12 +1632,19 @@ ParseResult cir::FuncOp::parse(OpAsmParser &parser, OperationState &state) { llvm::SMLoc loc = parser.getCurrentLocation(); mlir::Builder &builder = parser.getBuilder(); + mlir::StringAttr builtinNameAttr = getBuiltinAttrName(state.name); + mlir::StringAttr coroutineNameAttr = getCoroutineAttrName(state.name); mlir::StringAttr lambdaNameAttr = getLambdaAttrName(state.name); mlir::StringAttr noProtoNameAttr = getNoProtoAttrName(state.name); mlir::StringAttr visNameAttr = getSymVisibilityAttrName(state.name); mlir::StringAttr visibilityNameAttr = getGlobalVisibilityAttrName(state.name); mlir::StringAttr dsoLocalNameAttr = getDsoLocalAttrName(state.name); + if (::mlir::succeeded(parser.parseOptionalKeyword(builtinNameAttr.strref()))) + state.addAttribute(builtinNameAttr, parser.getBuilder().getUnitAttr()); + if (::mlir::succeeded( + parser.parseOptionalKeyword(coroutineNameAttr.strref()))) + state.addAttribute(coroutineNameAttr, parser.getBuilder().getUnitAttr()); if (::mlir::succeeded(parser.parseOptionalKeyword(lambdaNameAttr.strref()))) state.addAttribute(lambdaNameAttr, parser.getBuilder().getUnitAttr()); if (parser.parseOptionalKeyword(noProtoNameAttr).succeeded()) @@ -1747,6 +1754,12 @@ mlir::Region *cir::FuncOp::getCallableRegion() { } void cir::FuncOp::print(OpAsmPrinter &p) { + if (getBuiltin()) + p << " builtin"; + + if (getCoroutine()) + p << " coroutine"; + if (getLambda()) p << " lambda"; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 412a176..684cc09 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -9224,14 +9224,20 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, addOffloadCompressArgs(Args, CmdArgs); if (Arg *A = Args.getLastArg(options::OPT_offload_jobs_EQ)) { - int NumThreads; - if (StringRef(A->getValue()).getAsInteger(10, NumThreads) || - NumThreads <= 0) - C.getDriver().Diag(diag::err_drv_invalid_int_value) - << A->getAsString(Args) << A->getValue(); - else - CmdArgs.push_back( - Args.MakeArgString("--wrapper-jobs=" + Twine(NumThreads))); + StringRef Val = A->getValue(); + + if (Val.equals_insensitive("jobserver")) + CmdArgs.push_back(Args.MakeArgString("--wrapper-jobs=jobserver")); + else { + int NumThreads; + if (Val.getAsInteger(10, NumThreads) || NumThreads <= 0) { + C.getDriver().Diag(diag::err_drv_invalid_int_value) + << A->getAsString(Args) << Val; + } else { + CmdArgs.push_back( + Args.MakeArgString("--wrapper-jobs=" + Twine(NumThreads))); + } + } } const char *Exec = diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 2bf6244..686e541 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -3199,7 +3199,7 @@ private: Keywords.kw_NS_OPTIONS, TT_ObjCBlockLBrace, TT_ObjCBlockLParen, TT_ObjCDecl, TT_ObjCForIn, TT_ObjCMethodExpr, TT_ObjCMethodSpecifier, - TT_ObjCProperty)) { + TT_ObjCProperty, TT_ObjCSelector)) { LLVM_DEBUG(llvm::dbgs() << "Detected ObjC at location " << FormatTok->Tok.getLocation().printToString( diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index e4ddd61..f015d27 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -127,9 +127,17 @@ namespace format { TYPE(ObjCBlockLParen) \ TYPE(ObjCDecl) \ TYPE(ObjCForIn) \ + /* The square brackets surrounding a method call, the colon separating the \ + * method or parameter name and the argument inside the square brackets, and \ + * the colon separating the method or parameter name and the type inside the \ + * method declaration. */ \ TYPE(ObjCMethodExpr) \ + /* The '+' or '-' at the start of the line. */ \ TYPE(ObjCMethodSpecifier) \ TYPE(ObjCProperty) \ + /* The parentheses following '@selector' and the colon following the method \ + * or parameter name inside the parentheses. */ \ + TYPE(ObjCSelector) \ TYPE(ObjCStringLiteral) \ TYPE(OverloadedOperator) \ TYPE(OverloadedOperatorLParen) \ @@ -146,6 +154,9 @@ namespace format { TYPE(RequiresExpression) \ TYPE(RequiresExpressionLBrace) \ TYPE(RequiresExpressionLParen) \ + /* The hash key in languages that have hash literals, not including the \ + * field name in the C++ struct literal. Also the method or parameter name \ + * in the Objective-C method declaration or call. */ \ TYPE(SelectorName) \ TYPE(StartOfName) \ TYPE(StatementAttributeLikeMacro) \ diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 59f81b3..5b784ed 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -321,13 +321,13 @@ private: return parseUntouchableParens(); } - bool StartsObjCMethodExpr = false; + bool StartsObjCSelector = false; if (!Style.isVerilog()) { if (FormatToken *MaybeSel = OpeningParen.Previous) { // @selector( starts a selector. if (MaybeSel->is(tok::objc_selector) && MaybeSel->Previous && MaybeSel->Previous->is(tok::at)) { - StartsObjCMethodExpr = true; + StartsObjCSelector = true; } } } @@ -451,10 +451,8 @@ private: } } - if (StartsObjCMethodExpr) { - Contexts.back().ColonIsObjCMethodExpr = true; - OpeningParen.setType(TT_ObjCMethodExpr); - } + if (StartsObjCSelector) + OpeningParen.setType(TT_ObjCSelector); // MightBeFunctionType and ProbablyFunctionType are used for // function pointer and reference types as well as Objective-C @@ -513,8 +511,8 @@ private: } } - if (StartsObjCMethodExpr) { - CurrentToken->setType(TT_ObjCMethodExpr); + if (StartsObjCSelector) { + CurrentToken->setType(TT_ObjCSelector); if (Contexts.back().FirstObjCSelectorName) { Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = Contexts.back().LongestObjCSelectorName; @@ -1449,7 +1447,7 @@ private: Next->Next->is(tok::colon)))) { // This handles a special macro in ObjC code where selectors including // the colon are passed as macro arguments. - Tok->setType(TT_ObjCMethodExpr); + Tok->setType(TT_ObjCSelector); } break; case tok::pipe: @@ -4608,7 +4606,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return false; } if (Left.is(tok::colon)) - return Left.isNot(TT_ObjCMethodExpr); + return Left.isNoneOf(TT_ObjCSelector, TT_ObjCMethodExpr); if (Left.is(tok::coloncolon)) return false; if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) { @@ -5464,7 +5462,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, // `private:` and `public:`. if (!Right.getNextNonComment()) return false; - if (Right.is(TT_ObjCMethodExpr)) + if (Right.isOneOf(TT_ObjCSelector, TT_ObjCMethodExpr)) return false; if (Left.is(tok::question)) return false; @@ -6288,6 +6286,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon; if (Right.is(TT_InheritanceColon)) return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon; + // When the method parameter has no name, allow breaking before the colon. if (Right.is(TT_ObjCMethodExpr) && Right.isNot(tok::r_square) && Left.isNot(TT_SelectorName)) { return true; diff --git a/clang/lib/StaticAnalyzer/Core/CMakeLists.txt b/clang/lib/StaticAnalyzer/Core/CMakeLists.txt index d0a9b20..b8095a5 100644 --- a/clang/lib/StaticAnalyzer/Core/CMakeLists.txt +++ b/clang/lib/StaticAnalyzer/Core/CMakeLists.txt @@ -61,6 +61,7 @@ add_clang_library(clangStaticAnalyzerCore clangBasic clangCrossTU clangFrontend + clangIndex clangLex clangRewrite clangToolingCore diff --git a/clang/lib/Tooling/DependencyScanning/CMakeLists.txt b/clang/lib/Tooling/DependencyScanning/CMakeLists.txt index 53a2728..76bdc50 100644 --- a/clang/lib/Tooling/DependencyScanning/CMakeLists.txt +++ b/clang/lib/Tooling/DependencyScanning/CMakeLists.txt @@ -24,6 +24,5 @@ add_clang_library(clangDependencyScanning clangFrontend clangLex clangSerialization - clangTooling ${LLVM_PTHREAD_LIB} ) diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp index 010380d..e1f4d0d 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp @@ -9,8 +9,10 @@ #include "DependencyScannerImpl.h" #include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/DiagnosticSerialization.h" +#include "clang/Driver/Driver.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" +#include "llvm/TargetParser/Host.h" using namespace clang; using namespace tooling; @@ -332,11 +334,9 @@ public: return DepFS->getDirectiveTokens(File.getName()); } }; -} // namespace /// Sanitize diagnostic options for dependency scan. -void clang::tooling::dependencies::sanitizeDiagOpts( - DiagnosticOptions &DiagOpts) { +void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) { // Don't print 'X warnings and Y errors generated'. DiagOpts.ShowCarets = false; // Don't write out diagnostic file. @@ -355,44 +355,146 @@ void clang::tooling::dependencies::sanitizeDiagOpts( .Default(true); }); } +} // namespace -bool DependencyScanningAction::runInvocation( - std::shared_ptr<CompilerInvocation> Invocation, - IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, - std::shared_ptr<PCHContainerOperations> PCHContainerOps, - DiagnosticConsumer *DiagConsumer) { - // Making sure that we canonicalize the defines before we create the deep - // copy to avoid unnecessary variants in the scanner and in the resulting - // explicit command lines. - if (any(Service.getOptimizeArgs() & ScanningOptimizations::Macros)) - canonicalizeDefines(Invocation->getPreprocessorOpts()); +namespace clang::tooling::dependencies { +std::unique_ptr<DiagnosticOptions> +createDiagOptions(ArrayRef<std::string> CommandLine) { + std::vector<const char *> CLI; + for (const std::string &Arg : CommandLine) + CLI.push_back(Arg.c_str()); + auto DiagOpts = CreateAndPopulateDiagOpts(CLI); + sanitizeDiagOpts(*DiagOpts); + return DiagOpts; +} - // Make a deep copy of the original Clang invocation. - CompilerInvocation OriginalInvocation(*Invocation); +DignosticsEngineWithDiagOpts::DignosticsEngineWithDiagOpts( + ArrayRef<std::string> CommandLine, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, DiagnosticConsumer &DC) { + std::vector<const char *> CCommandLine(CommandLine.size(), nullptr); + llvm::transform(CommandLine, CCommandLine.begin(), + [](const std::string &Str) { return Str.c_str(); }); + DiagOpts = CreateAndPopulateDiagOpts(CCommandLine); + sanitizeDiagOpts(*DiagOpts); + DiagEngine = CompilerInstance::createDiagnostics(*FS, *DiagOpts, &DC, + /*ShouldOwnClient=*/false); +} - if (Scanned) { - // Scanning runs once for the first -cc1 invocation in a chain of driver - // jobs. For any dependent jobs, reuse the scanning result and just - // update the LastCC1Arguments to correspond to the new invocation. - // FIXME: to support multi-arch builds, each arch requires a separate scan - setLastCC1Arguments(std::move(OriginalInvocation)); - return true; +std::pair<std::unique_ptr<driver::Driver>, std::unique_ptr<driver::Compilation>> +buildCompilation(ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) { + SmallVector<const char *, 256> Argv; + Argv.reserve(ArgStrs.size()); + for (const std::string &Arg : ArgStrs) + Argv.push_back(Arg.c_str()); + + std::unique_ptr<driver::Driver> Driver = std::make_unique<driver::Driver>( + Argv[0], llvm::sys::getDefaultTargetTriple(), Diags, + "clang LLVM compiler", FS); + Driver->setTitle("clang_based_tool"); + + llvm::BumpPtrAllocator Alloc; + bool CLMode = driver::IsClangCL( + driver::getDriverMode(Argv[0], ArrayRef(Argv).slice(1))); + + if (llvm::Error E = + driver::expandResponseFiles(Argv, CLMode, Alloc, FS.get())) { + Diags.Report(diag::err_drv_expand_response_file) + << llvm::toString(std::move(E)); + return std::make_pair(nullptr, nullptr); } - Scanned = true; + std::unique_ptr<driver::Compilation> Compilation( + Driver->BuildCompilation(Argv)); + if (!Compilation) + return std::make_pair(nullptr, nullptr); - // Create a compiler instance to handle the actual work. - auto ModCache = makeInProcessModuleCache(Service.getModuleCacheEntries()); - ScanInstanceStorage.emplace(std::move(Invocation), std::move(PCHContainerOps), - ModCache.get()); - CompilerInstance &ScanInstance = *ScanInstanceStorage; + if (Compilation->containsError()) + return std::make_pair(nullptr, nullptr); + + return std::make_pair(std::move(Driver), std::move(Compilation)); +} + +std::unique_ptr<CompilerInvocation> +createCompilerInvocation(ArrayRef<std::string> CommandLine, + DiagnosticsEngine &Diags) { + llvm::opt::ArgStringList Argv; + for (const std::string &Str : ArrayRef(CommandLine).drop_front()) + Argv.push_back(Str.c_str()); + + auto Invocation = std::make_unique<CompilerInvocation>(); + if (!CompilerInvocation::CreateFromArgs(*Invocation, Argv, Diags)) { + // FIXME: Should we just go on like cc1_main does? + return nullptr; + } + return Invocation; +} + +std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>> +initVFSForTUBuferScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS, + ArrayRef<std::string> CommandLine, + StringRef WorkingDirectory, + llvm::MemoryBufferRef TUBuffer) { + // Reset what might have been modified in the previous worker invocation. + BaseFS->setCurrentWorkingDirectory(WorkingDirectory); + + IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS; + auto OverlayFS = + llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS); + auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>(); + InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory); + auto InputPath = TUBuffer.getBufferIdentifier(); + InMemoryFS->addFile( + InputPath, 0, llvm::MemoryBuffer::getMemBufferCopy(TUBuffer.getBuffer())); + IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS; + + OverlayFS->pushOverlay(InMemoryOverlay); + ModifiedFS = OverlayFS; + std::vector<std::string> ModifiedCommandLine(CommandLine); + ModifiedCommandLine.emplace_back(InputPath); + + return std::make_pair(ModifiedFS, ModifiedCommandLine); +} + +std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>> +initVFSForByNameScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS, + ArrayRef<std::string> CommandLine, + StringRef WorkingDirectory, StringRef ModuleName) { + // Reset what might have been modified in the previous worker invocation. + BaseFS->setCurrentWorkingDirectory(WorkingDirectory); + + // If we're scanning based on a module name alone, we don't expect the client + // to provide us with an input file. However, the driver really wants to have + // one. Let's just make it up to make the driver happy. + auto OverlayFS = + llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS); + auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>(); + InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory); + SmallString<128> FakeInputPath; + // TODO: We should retry the creation if the path already exists. + llvm::sys::fs::createUniquePath(ModuleName + "-%%%%%%%%.input", FakeInputPath, + /*MakeAbsolute=*/false); + InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer("")); + IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS; + OverlayFS->pushOverlay(InMemoryOverlay); + + std::vector<std::string> ModifiedCommandLine(CommandLine); + ModifiedCommandLine.emplace_back(FakeInputPath); + + return std::make_pair(OverlayFS, ModifiedCommandLine); +} + +bool initializeScanCompilerInstance( + CompilerInstance &ScanInstance, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, + DiagnosticConsumer *DiagConsumer, DependencyScanningService &Service, + IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS) { ScanInstance.setBuildingModule(false); ScanInstance.createVirtualFileSystem(FS, DiagConsumer); // Create the compiler's actual diagnostics engine. sanitizeDiagOpts(ScanInstance.getDiagnosticOpts()); - assert(!DiagConsumerFinished && "attempt to reuse finished consumer"); ScanInstance.createDiagnostics(DiagConsumer, /*ShouldOwnClient=*/false); if (!ScanInstance.hasDiagnostics()) return false; @@ -435,6 +537,26 @@ bool DependencyScanningAction::runInvocation( ScanInstance.createSourceManager(); + // Consider different header search and diagnostic options to create + // different modules. This avoids the unsound aliasing of module PCMs. + // + // TODO: Implement diagnostic bucketing to reduce the impact of strict + // context hashing. + ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true; + ScanInstance.getHeaderSearchOpts().ModulesSerializeOnlyPreprocessor = true; + ScanInstance.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true; + ScanInstance.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true; + ScanInstance.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true; + ScanInstance.getHeaderSearchOpts().ModulesForceValidateUserHeaders = false; + + // Avoid some checks and module map parsing when loading PCM files. + ScanInstance.getPreprocessorOpts().ModulesCheckRelocated = false; + + return true; +} + +llvm::SmallVector<StringRef> +getInitialStableDirs(const CompilerInstance &ScanInstance) { // Create a collection of stable directories derived from the ScanInstance // for determining whether module dependencies would fully resolve from // those directories. @@ -442,7 +564,12 @@ bool DependencyScanningAction::runInvocation( const StringRef Sysroot = ScanInstance.getHeaderSearchOpts().Sysroot; if (!Sysroot.empty() && (llvm::sys::path::root_directory(Sysroot) != Sysroot)) StableDirs = {Sysroot, ScanInstance.getHeaderSearchOpts().ResourceDir}; + return StableDirs; +} +std::optional<PrebuiltModulesAttrsMap> +computePrebuiltModulesASTMap(CompilerInstance &ScanInstance, + llvm::SmallVector<StringRef> &StableDirs) { // Store a mapping of prebuilt module files and their properties like header // search options. This will prevent the implicit build to create duplicate // modules and will force reuse of the existing prebuilt module files @@ -454,12 +581,14 @@ bool DependencyScanningAction::runInvocation( ScanInstance.getPreprocessorOpts().ImplicitPCHInclude, ScanInstance, ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles, PrebuiltModulesASTMap, ScanInstance.getDiagnostics(), StableDirs)) - return false; + return {}; - // Create the dependency collector that will collect the produced - // dependencies. - // - // This also moves the existing dependency output options from the + return PrebuiltModulesASTMap; +} + +std::unique_ptr<DependencyOutputOptions> +takeDependencyOutputOptionsFrom(CompilerInstance &ScanInstance) { + // This function moves the existing dependency output options from the // invocation to the collector. The options in the invocation are reset, // which ensures that the compiler won't create new dependency collectors, // and thus won't write out the extra '.d' files to disk. @@ -472,35 +601,85 @@ bool DependencyScanningAction::runInvocation( ScanInstance.getFrontendOpts().Inputs)}; Opts->IncludeSystemHeaders = true; + return Opts; +} + +std::shared_ptr<ModuleDepCollector> initializeScanInstanceDependencyCollector( + CompilerInstance &ScanInstance, + std::unique_ptr<DependencyOutputOptions> DepOutputOpts, + StringRef WorkingDirectory, DependencyConsumer &Consumer, + DependencyScanningService &Service, CompilerInvocation &Inv, + DependencyActionController &Controller, + PrebuiltModulesAttrsMap PrebuiltModulesASTMap, + llvm::SmallVector<StringRef> &StableDirs) { + std::shared_ptr<ModuleDepCollector> MDC; switch (Service.getFormat()) { case ScanningOutputFormat::Make: ScanInstance.addDependencyCollector( std::make_shared<DependencyConsumerForwarder>( - std::move(Opts), WorkingDirectory, Consumer)); + std::move(DepOutputOpts), WorkingDirectory, Consumer)); break; case ScanningOutputFormat::P1689: case ScanningOutputFormat::Full: MDC = std::make_shared<ModuleDepCollector>( - Service, std::move(Opts), ScanInstance, Consumer, Controller, - OriginalInvocation, std::move(PrebuiltModulesASTMap), StableDirs); + Service, std::move(DepOutputOpts), ScanInstance, Consumer, Controller, + Inv, std::move(PrebuiltModulesASTMap), StableDirs); ScanInstance.addDependencyCollector(MDC); break; } - // Consider different header search and diagnostic options to create - // different modules. This avoids the unsound aliasing of module PCMs. - // - // TODO: Implement diagnostic bucketing to reduce the impact of strict - // context hashing. - ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true; - ScanInstance.getHeaderSearchOpts().ModulesSerializeOnlyPreprocessor = true; - ScanInstance.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true; - ScanInstance.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true; - ScanInstance.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true; - ScanInstance.getHeaderSearchOpts().ModulesForceValidateUserHeaders = false; + return MDC; +} +} // namespace clang::tooling::dependencies - // Avoid some checks and module map parsing when loading PCM files. - ScanInstance.getPreprocessorOpts().ModulesCheckRelocated = false; +bool DependencyScanningAction::runInvocation( + std::unique_ptr<CompilerInvocation> Invocation, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, + std::shared_ptr<PCHContainerOperations> PCHContainerOps, + DiagnosticConsumer *DiagConsumer) { + // Making sure that we canonicalize the defines before we create the deep + // copy to avoid unnecessary variants in the scanner and in the resulting + // explicit command lines. + if (any(Service.getOptimizeArgs() & ScanningOptimizations::Macros)) + canonicalizeDefines(Invocation->getPreprocessorOpts()); + + // Make a deep copy of the original Clang invocation. + CompilerInvocation OriginalInvocation(*Invocation); + + if (Scanned) { + // Scanning runs once for the first -cc1 invocation in a chain of driver + // jobs. For any dependent jobs, reuse the scanning result and just + // update the LastCC1Arguments to correspond to the new invocation. + // FIXME: to support multi-arch builds, each arch requires a separate scan + setLastCC1Arguments(std::move(OriginalInvocation)); + return true; + } + + Scanned = true; + + // Create a compiler instance to handle the actual work. + auto ModCache = makeInProcessModuleCache(Service.getModuleCacheEntries()); + ScanInstanceStorage.emplace(std::move(Invocation), std::move(PCHContainerOps), + ModCache.get()); + CompilerInstance &ScanInstance = *ScanInstanceStorage; + + assert(!DiagConsumerFinished && "attempt to reuse finished consumer"); + if (!initializeScanCompilerInstance(ScanInstance, FS, DiagConsumer, Service, + DepFS)) + return false; + + llvm::SmallVector<StringRef> StableDirs = getInitialStableDirs(ScanInstance); + auto MaybePrebuiltModulesASTMap = + computePrebuiltModulesASTMap(ScanInstance, StableDirs); + if (!MaybePrebuiltModulesASTMap) + return false; + + auto DepOutputOpts = takeDependencyOutputOptionsFrom(ScanInstance); + + MDC = initializeScanInstanceDependencyCollector( + ScanInstance, std::move(DepOutputOpts), WorkingDirectory, Consumer, + Service, OriginalInvocation, Controller, *MaybePrebuiltModulesASTMap, + StableDirs); std::unique_ptr<FrontendAction> Action; diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h index 32fbcff..71c6731 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h +++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h @@ -9,8 +9,10 @@ #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNER_H #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNER_H +#include "clang/Driver/Compilation.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Serialization/ObjectFilePCHContainerReader.h" #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" @@ -30,12 +32,12 @@ public: DependencyScanningAction( DependencyScanningService &Service, StringRef WorkingDirectory, DependencyConsumer &Consumer, DependencyActionController &Controller, - llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS, + IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS, std::optional<StringRef> ModuleName = std::nullopt) : Service(Service), WorkingDirectory(WorkingDirectory), Consumer(Consumer), Controller(Controller), DepFS(std::move(DepFS)), ModuleName(ModuleName) {} - bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation, + bool runInvocation(std::unique_ptr<CompilerInvocation> Invocation, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, std::shared_ptr<PCHContainerOperations> PCHContainerOps, DiagnosticConsumer *DiagConsumer); @@ -63,7 +65,7 @@ private: StringRef WorkingDirectory; DependencyConsumer &Consumer; DependencyActionController &Controller; - llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS; + IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS; std::optional<StringRef> ModuleName; std::optional<CompilerInstance> ScanInstanceStorage; std::shared_ptr<ModuleDepCollector> MDC; @@ -72,9 +74,81 @@ private: bool DiagConsumerFinished = false; }; -// Helper functions -void sanitizeDiagOpts(DiagnosticOptions &DiagOpts); +// Helper functions and data types. +std::unique_ptr<DiagnosticOptions> +createDiagOptions(ArrayRef<std::string> CommandLine); +struct DignosticsEngineWithDiagOpts { + // We need to bound the lifetime of the DiagOpts used to create the + // DiganosticsEngine with the DiagnosticsEngine itself. + std::unique_ptr<DiagnosticOptions> DiagOpts; + IntrusiveRefCntPtr<DiagnosticsEngine> DiagEngine; + + DignosticsEngineWithDiagOpts(ArrayRef<std::string> CommandLine, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, + DiagnosticConsumer &DC); +}; + +struct TextDiagnosticsPrinterWithOutput { + // We need to bound the lifetime of the data that supports the DiagPrinter + // with it together so they have the same lifetime. + std::string DiagnosticOutput; + llvm::raw_string_ostream DiagnosticsOS; + std::unique_ptr<DiagnosticOptions> DiagOpts; + TextDiagnosticPrinter DiagPrinter; + + TextDiagnosticsPrinterWithOutput(ArrayRef<std::string> CommandLine) + : DiagnosticsOS(DiagnosticOutput), + DiagOpts(createDiagOptions(CommandLine)), + DiagPrinter(DiagnosticsOS, *DiagOpts) {} +}; + +std::pair<std::unique_ptr<driver::Driver>, std::unique_ptr<driver::Compilation>> +buildCompilation(ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS); + +std::unique_ptr<CompilerInvocation> +createCompilerInvocation(ArrayRef<std::string> CommandLine, + DiagnosticsEngine &Diags); + +std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>> +initVFSForTUBuferScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS, + ArrayRef<std::string> CommandLine, + StringRef WorkingDirectory, + llvm::MemoryBufferRef TUBuffer); + +std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>> +initVFSForByNameScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS, + ArrayRef<std::string> CommandLine, + StringRef WorkingDirectory, StringRef ModuleName); + +bool initializeScanCompilerInstance( + CompilerInstance &ScanInstance, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, + DiagnosticConsumer *DiagConsumer, DependencyScanningService &Service, + IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS); + +SmallVector<StringRef> +getInitialStableDirs(const CompilerInstance &ScanInstance); + +std::optional<PrebuiltModulesAttrsMap> +computePrebuiltModulesASTMap(CompilerInstance &ScanInstance, + SmallVector<StringRef> &StableDirs); + +std::unique_ptr<DependencyOutputOptions> +takeDependencyOutputOptionsFrom(CompilerInstance &ScanInstance); + +/// Create the dependency collector that will collect the produced +/// dependencies. May return the created ModuleDepCollector depending +/// on the scanning format. +std::shared_ptr<ModuleDepCollector> initializeScanInstanceDependencyCollector( + CompilerInstance &ScanInstance, + std::unique_ptr<DependencyOutputOptions> DepOutputOpts, + StringRef WorkingDirectory, DependencyConsumer &Consumer, + DependencyScanningService &Service, CompilerInvocation &Inv, + DependencyActionController &Controller, + PrebuiltModulesAttrsMap PrebuiltModulesASTMap, + llvm::SmallVector<StringRef> &StableDirs); } // namespace dependencies } // namespace tooling } // namespace clang diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp index 796e587..9515421 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -8,29 +8,9 @@ #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" #include "DependencyScannerImpl.h" -#include "clang/Basic/DiagnosticDriver.h" #include "clang/Basic/DiagnosticFrontend.h" -#include "clang/Basic/DiagnosticSerialization.h" -#include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Job.h" #include "clang/Driver/Tool.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/FrontendActions.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Frontend/Utils.h" -#include "clang/Lex/PreprocessorOptions.h" -#include "clang/Serialization/ObjectFilePCHContainerReader.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningService.h" -#include "clang/Tooling/DependencyScanning/InProcessModuleCache.h" -#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" -#include "llvm/ADT/IntrusiveRefCntPtr.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/TargetParser/Host.h" -#include <optional> using namespace clang; using namespace tooling; @@ -63,32 +43,19 @@ DependencyScanningWorker::DependencyScanningWorker( } } -static std::unique_ptr<DiagnosticOptions> -createDiagOptions(const std::vector<std::string> &CommandLine) { - std::vector<const char *> CLI; - for (const std::string &Arg : CommandLine) - CLI.push_back(Arg.c_str()); - auto DiagOpts = CreateAndPopulateDiagOpts(CLI); - sanitizeDiagOpts(*DiagOpts); - return DiagOpts; -} - llvm::Error DependencyScanningWorker::computeDependencies( StringRef WorkingDirectory, const std::vector<std::string> &CommandLine, DependencyConsumer &Consumer, DependencyActionController &Controller, std::optional<llvm::MemoryBufferRef> TUBuffer) { // Capture the emitted diagnostics and report them to the client // in the case of a failure. - std::string DiagnosticOutput; - llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput); - auto DiagOpts = createDiagOptions(CommandLine); - TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, *DiagOpts); + TextDiagnosticsPrinterWithOutput DiagPrinterWithOS(CommandLine); if (computeDependencies(WorkingDirectory, CommandLine, Consumer, Controller, - DiagPrinter, TUBuffer)) + DiagPrinterWithOS.DiagPrinter, TUBuffer)) return llvm::Error::success(); - return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(), - llvm::inconvertibleErrorCode()); + return llvm::make_error<llvm::StringError>( + DiagPrinterWithOS.DiagnosticsOS.str(), llvm::inconvertibleErrorCode()); } llvm::Error DependencyScanningWorker::computeDependencies( @@ -97,51 +64,24 @@ llvm::Error DependencyScanningWorker::computeDependencies( StringRef ModuleName) { // Capture the emitted diagnostics and report them to the client // in the case of a failure. - std::string DiagnosticOutput; - llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput); - auto DiagOpts = createDiagOptions(CommandLine); - TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, *DiagOpts); + TextDiagnosticsPrinterWithOutput DiagPrinterWithOS(CommandLine); if (computeDependencies(WorkingDirectory, CommandLine, Consumer, Controller, - DiagPrinter, ModuleName)) + DiagPrinterWithOS.DiagPrinter, ModuleName)) return llvm::Error::success(); - return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(), - llvm::inconvertibleErrorCode()); + return llvm::make_error<llvm::StringError>( + DiagPrinterWithOS.DiagnosticsOS.str(), llvm::inconvertibleErrorCode()); } static bool forEachDriverJob( ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, llvm::function_ref<bool(const driver::Command &Cmd)> Callback) { - SmallVector<const char *, 256> Argv; - Argv.reserve(ArgStrs.size()); - for (const std::string &Arg : ArgStrs) - Argv.push_back(Arg.c_str()); - - std::unique_ptr<driver::Driver> Driver = std::make_unique<driver::Driver>( - Argv[0], llvm::sys::getDefaultTargetTriple(), Diags, - "clang LLVM compiler", FS); - Driver->setTitle("clang_based_tool"); - - llvm::BumpPtrAllocator Alloc; - bool CLMode = driver::IsClangCL( - driver::getDriverMode(Argv[0], ArrayRef(Argv).slice(1))); - - if (llvm::Error E = - driver::expandResponseFiles(Argv, CLMode, Alloc, FS.get())) { - Diags.Report(diag::err_drv_expand_response_file) - << llvm::toString(std::move(E)); - return false; - } - - const std::unique_ptr<driver::Compilation> Compilation( - Driver->BuildCompilation(llvm::ArrayRef(Argv))); + // Compilation holds a non-owning a reference to the Driver, hence we need to + // keep the Driver alive when we use Compilation. + auto [Driver, Compilation] = buildCompilation(ArgStrs, Diags, FS); if (!Compilation) return false; - - if (Compilation->containsError()) - return false; - for (const driver::Command &Job : Compilation->getJobs()) { if (!Callback(Job)) return false; @@ -150,30 +90,21 @@ static bool forEachDriverJob( } static bool createAndRunToolInvocation( - std::vector<std::string> CommandLine, DependencyScanningAction &Action, + const std::vector<std::string> &CommandLine, + DependencyScanningAction &Action, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, std::shared_ptr<clang::PCHContainerOperations> &PCHContainerOps, DiagnosticsEngine &Diags, DependencyConsumer &Consumer) { - - // Save executable path before providing CommandLine to ToolInvocation - std::string Executable = CommandLine[0]; - - llvm::opt::ArgStringList Argv; - for (const std::string &Str : ArrayRef(CommandLine).drop_front()) - Argv.push_back(Str.c_str()); - - auto Invocation = std::make_shared<CompilerInvocation>(); - if (!CompilerInvocation::CreateFromArgs(*Invocation, Argv, Diags)) { - // FIXME: Should we just go on like cc1_main does? + auto Invocation = createCompilerInvocation(CommandLine, Diags); + if (!Invocation) return false; - } if (!Action.runInvocation(std::move(Invocation), std::move(FS), PCHContainerOps, Diags.getClient())) return false; std::vector<std::string> Args = Action.takeLastCC1Arguments(); - Consumer.handleBuildCommand({std::move(Executable), std::move(Args)}); + Consumer.handleBuildCommand({CommandLine[0], std::move(Args)}); return true; } @@ -182,24 +113,19 @@ bool DependencyScanningWorker::scanDependencies( DependencyConsumer &Consumer, DependencyActionController &Controller, DiagnosticConsumer &DC, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, std::optional<StringRef> ModuleName) { - std::vector<const char *> CCommandLine(CommandLine.size(), nullptr); - llvm::transform(CommandLine, CCommandLine.begin(), - [](const std::string &Str) { return Str.c_str(); }); - auto DiagOpts = CreateAndPopulateDiagOpts(CCommandLine); - sanitizeDiagOpts(*DiagOpts); - auto Diags = CompilerInstance::createDiagnostics(*FS, *DiagOpts, &DC, - /*ShouldOwnClient=*/false); - + DignosticsEngineWithDiagOpts DiagEngineWithCmdAndOpts(CommandLine, FS, DC); DependencyScanningAction Action(Service, WorkingDirectory, Consumer, Controller, DepFS, ModuleName); bool Success = false; if (CommandLine[1] == "-cc1") { - Success = createAndRunToolInvocation(CommandLine, Action, FS, - PCHContainerOps, *Diags, Consumer); + Success = createAndRunToolInvocation( + CommandLine, Action, FS, PCHContainerOps, + *DiagEngineWithCmdAndOpts.DiagEngine, Consumer); } else { Success = forEachDriverJob( - CommandLine, *Diags, FS, [&](const driver::Command &Cmd) { + CommandLine, *DiagEngineWithCmdAndOpts.DiagEngine, FS, + [&](const driver::Command &Cmd) { if (StringRef(Cmd.getCreator().getName()) != "clang") { // Non-clang command. Just pass through to the dependency // consumer. @@ -218,13 +144,15 @@ bool DependencyScanningWorker::scanDependencies( // system to ensure that any file system requests that // are made by the driver do not go through the // dependency scanning filesystem. - return createAndRunToolInvocation(std::move(Argv), Action, FS, - PCHContainerOps, *Diags, Consumer); + return createAndRunToolInvocation( + std::move(Argv), Action, FS, PCHContainerOps, + *DiagEngineWithCmdAndOpts.DiagEngine, Consumer); }); } if (Success && !Action.hasScanned()) - Diags->Report(diag::err_fe_expected_compiler_job) + DiagEngineWithCmdAndOpts.DiagEngine->Report( + diag::err_fe_expected_compiler_job) << llvm::join(CommandLine, " "); // Ensure finish() is called even if we never reached ExecuteAction(). @@ -238,66 +166,25 @@ bool DependencyScanningWorker::computeDependencies( StringRef WorkingDirectory, const std::vector<std::string> &CommandLine, DependencyConsumer &Consumer, DependencyActionController &Controller, DiagnosticConsumer &DC, std::optional<llvm::MemoryBufferRef> TUBuffer) { - // Reset what might have been modified in the previous worker invocation. - BaseFS->setCurrentWorkingDirectory(WorkingDirectory); - - std::optional<std::vector<std::string>> ModifiedCommandLine; - llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS; - - // If we're scanning based on a module name alone, we don't expect the client - // to provide us with an input file. However, the driver really wants to have - // one. Let's just make it up to make the driver happy. if (TUBuffer) { - auto OverlayFS = - llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS); - auto InMemoryFS = - llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>(); - InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory); - auto InputPath = TUBuffer->getBufferIdentifier(); - InMemoryFS->addFile( - InputPath, 0, - llvm::MemoryBuffer::getMemBufferCopy(TUBuffer->getBuffer())); - llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = - InMemoryFS; - - OverlayFS->pushOverlay(InMemoryOverlay); - ModifiedFS = OverlayFS; - ModifiedCommandLine = CommandLine; - ModifiedCommandLine->emplace_back(InputPath); + auto [FinalFS, FinalCommandLine] = initVFSForTUBuferScanning( + BaseFS, CommandLine, WorkingDirectory, *TUBuffer); + return scanDependencies(WorkingDirectory, FinalCommandLine, Consumer, + Controller, DC, FinalFS, + /*ModuleName=*/std::nullopt); + } else { + BaseFS->setCurrentWorkingDirectory(WorkingDirectory); + return scanDependencies(WorkingDirectory, CommandLine, Consumer, Controller, + DC, BaseFS, /*ModuleName=*/std::nullopt); } - - const std::vector<std::string> &FinalCommandLine = - ModifiedCommandLine ? *ModifiedCommandLine : CommandLine; - auto &FinalFS = ModifiedFS ? ModifiedFS : BaseFS; - - return scanDependencies(WorkingDirectory, FinalCommandLine, Consumer, - Controller, DC, FinalFS, /*ModuleName=*/std::nullopt); } bool DependencyScanningWorker::computeDependencies( StringRef WorkingDirectory, const std::vector<std::string> &CommandLine, DependencyConsumer &Consumer, DependencyActionController &Controller, DiagnosticConsumer &DC, StringRef ModuleName) { - // Reset what might have been modified in the previous worker invocation. - BaseFS->setCurrentWorkingDirectory(WorkingDirectory); - - // If we're scanning based on a module name alone, we don't expect the client - // to provide us with an input file. However, the driver really wants to have - // one. Let's just make it up to make the driver happy. - auto OverlayFS = - llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS); - auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>(); - InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory); - SmallString<128> FakeInputPath; - // TODO: We should retry the creation if the path already exists. - llvm::sys::fs::createUniquePath(ModuleName + "-%%%%%%%%.input", FakeInputPath, - /*MakeAbsolute=*/false); - InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer("")); - llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS; - - OverlayFS->pushOverlay(InMemoryOverlay); - auto ModifiedCommandLine = CommandLine; - ModifiedCommandLine.emplace_back(FakeInputPath); + auto [OverlayFS, ModifiedCommandLine] = initVFSForByNameScanning( + BaseFS, CommandLine, WorkingDirectory, ModuleName); return scanDependencies(WorkingDirectory, ModifiedCommandLine, Consumer, Controller, DC, OverlayFS, ModuleName); diff --git a/clang/test/CIR/CodeGen/coro-task.cpp b/clang/test/CIR/CodeGen/coro-task.cpp new file mode 100644 index 0000000..1fc7d77 --- /dev/null +++ b/clang/test/CIR/CodeGen/coro-task.cpp @@ -0,0 +1,123 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR + +namespace std { + +template<typename T> struct remove_reference { typedef T type; }; +template<typename T> struct remove_reference<T &> { typedef T type; }; +template<typename T> struct remove_reference<T &&> { typedef T type; }; + +template<typename T> +typename remove_reference<T>::type &&move(T &&t) noexcept; + +template <class Ret, typename... T> +struct coroutine_traits { using promise_type = typename Ret::promise_type; }; + +template <class Promise = void> +struct coroutine_handle { + static coroutine_handle from_address(void *) noexcept; +}; +template <> +struct coroutine_handle<void> { + template <class PromiseType> + coroutine_handle(coroutine_handle<PromiseType>) noexcept; + static coroutine_handle from_address(void *); +}; + +struct suspend_always { + bool await_ready() noexcept { return false; } + void await_suspend(coroutine_handle<>) noexcept {} + void await_resume() noexcept {} +}; + +struct suspend_never { + bool await_ready() noexcept { return true; } + void await_suspend(coroutine_handle<>) noexcept {} + void await_resume() noexcept {} +}; + +} // namespace std + +namespace folly { +namespace coro { + +using std::suspend_always; +using std::suspend_never; +using std::coroutine_handle; + +using SemiFuture = int; + +template<class T> +struct Task { + struct promise_type { + Task<T> get_return_object() noexcept; + suspend_always initial_suspend() noexcept; + suspend_always final_suspend() noexcept; + void return_value(T); + void unhandled_exception(); + auto yield_value(Task<T>) noexcept { return final_suspend(); } + }; + bool await_ready() noexcept { return false; } + void await_suspend(coroutine_handle<>) noexcept {} + T await_resume(); +}; + +template<> +struct Task<void> { + struct promise_type { + Task<void> get_return_object() noexcept; + suspend_always initial_suspend() noexcept; + suspend_always final_suspend() noexcept; + void return_void() noexcept; + void unhandled_exception() noexcept; + auto yield_value(Task<void>) noexcept { return final_suspend(); } + }; + bool await_ready() noexcept { return false; } + void await_suspend(coroutine_handle<>) noexcept {} + void await_resume() noexcept {} + SemiFuture semi(); +}; + +// FIXME: add CIRGen support here. +// struct blocking_wait_fn { +// template <typename T> +// T operator()(Task<T>&& awaitable) const { +// return T(); +// } +// }; + +// inline constexpr blocking_wait_fn blocking_wait{}; +// static constexpr blocking_wait_fn const& blockingWait = blocking_wait; + +struct co_invoke_fn { + template <typename F, typename... A> + Task<void> operator()(F&& f, A&&... a) const { + return Task<void>(); + } +}; + +co_invoke_fn co_invoke; + +}} // namespace folly::coro + +// CIR-DAG: ![[VoidTask:.*]] = !cir.record<struct "folly::coro::Task<void>" padded {!u8i}> + +// CIR: module {{.*}} { +// CIR-NEXT: cir.global external @_ZN5folly4coro9co_invokeE = #cir.zero : !rec_folly3A3Acoro3A3Aco_invoke_fn + +// CIR: cir.func builtin private @__builtin_coro_id(!u32i, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>) -> !u32i + +using VoidTask = folly::coro::Task<void>; + +VoidTask silly_task() { + co_await std::suspend_always(); +} + +// CIR: cir.func coroutine dso_local @_Z10silly_taskv() -> ![[VoidTask]] +// CHECK: %[[#VoidTaskAddr:]] = cir.alloca ![[VoidTask]], {{.*}}, ["__retval"] + +// Get coroutine id with __builtin_coro_id. + +// CIR: %[[NullPtr:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!void> +// CIR: %[[Align:.*]] = cir.const #cir.int<16> : !u32i +// CIR: %[[CoroId:.*]] = cir.call @__builtin_coro_id(%[[Align]], %[[NullPtr]], %[[NullPtr]], %[[NullPtr]]) diff --git a/clang/test/CIR/CodeGen/predefined-expr.c b/clang/test/CIR/CodeGen/predefined-expr.c new file mode 100644 index 0000000..674c9bd0 --- /dev/null +++ b/clang/test/CIR/CodeGen/predefined-expr.c @@ -0,0 +1,71 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -o %t.cir +// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t-cir.ll +// RUN: FileCheck %s --input-file=%t-cir.ll --check-prefix=LLVM +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -o %t.ll +// RUN: FileCheck %s --input-file=%t.ll --check-prefix=OGCG + +// CIR: cir.global "private" constant cir_private dso_local @__func__.plainFunction = #cir.const_array<"plainFunction\00" : !cir.array<!s8i x 14>> +// CIR: cir.global "private" constant cir_private dso_local @__PRETTY_FUNCTION__.plainFunction = #cir.const_array<"void plainFunction(void)\00" : !cir.array<!s8i x 25>> +// CIR: cir.global "private" constant cir_private dso_local @__func__.externFunction = #cir.const_array<"externFunction\00" : !cir.array<!s8i x 15>> +// CIR: cir.global "private" constant cir_private dso_local @__PRETTY_FUNCTION__.externFunction = #cir.const_array<"void externFunction(void)\00" : !cir.array<!s8i x 26>> +// CIR: cir.global "private" constant cir_private dso_local @__func__.privateExternFunction = #cir.const_array<"privateExternFunction\00" : !cir.array<!s8i x 22>> +// CIR: cir.global "private" constant cir_private dso_local @__PRETTY_FUNCTION__.privateExternFunction = #cir.const_array<"void privateExternFunction(void)\00" : !cir.array<!s8i x 33>> +// CIR: cir.global "private" constant cir_private dso_local @__func__.staticFunction = #cir.const_array<"staticFunction\00" : !cir.array<!s8i x 15>> +// CIR: cir.global "private" constant cir_private dso_local @__PRETTY_FUNCTION__.staticFunction = #cir.const_array<"void staticFunction(void)\00" : !cir.array<!s8i x 26>> + +// TODO(cir): These should be unnamed_addr +// LLVM: @__func__.plainFunction = private constant [14 x i8] c"plainFunction\00" +// LLVM: @__PRETTY_FUNCTION__.plainFunction = private constant [25 x i8] c"void plainFunction(void)\00" +// LLVM: @__func__.externFunction = private constant [15 x i8] c"externFunction\00" +// LLVM: @__PRETTY_FUNCTION__.externFunction = private constant [26 x i8] c"void externFunction(void)\00" +// LLVM: @__func__.privateExternFunction = private constant [22 x i8] c"privateExternFunction\00" +// LLVM: @__PRETTY_FUNCTION__.privateExternFunction = private constant [33 x i8] c"void privateExternFunction(void)\00" +// LLVM: @__func__.staticFunction = private constant [15 x i8] c"staticFunction\00" +// LLVM: @__PRETTY_FUNCTION__.staticFunction = private constant [26 x i8] c"void staticFunction(void)\00" + +// OGCG: @__func__.plainFunction = private unnamed_addr constant [14 x i8] c"plainFunction\00" +// OGCG: @__PRETTY_FUNCTION__.plainFunction = private unnamed_addr constant [25 x i8] c"void plainFunction(void)\00" +// OGCG: @__func__.externFunction = private unnamed_addr constant [15 x i8] c"externFunction\00" +// OGCG: @__PRETTY_FUNCTION__.externFunction = private unnamed_addr constant [26 x i8] c"void externFunction(void)\00" +// OGCG: @__func__.privateExternFunction = private unnamed_addr constant [22 x i8] c"privateExternFunction\00" +// OGCG: @__PRETTY_FUNCTION__.privateExternFunction = private unnamed_addr constant [33 x i8] c"void privateExternFunction(void)\00" +// OGCG: @__func__.staticFunction = private unnamed_addr constant [15 x i8] c"staticFunction\00" +// OGCG: @__PRETTY_FUNCTION__.staticFunction = private unnamed_addr constant [26 x i8] c"void staticFunction(void)\00" + +int printf(const char *, ...); + +void plainFunction(void) { + printf("__func__ %s\n", __func__); + printf("__FUNCTION__ %s\n", __FUNCTION__); + printf("__PRETTY_FUNCTION__ %s\n\n", __PRETTY_FUNCTION__); +} + +extern void externFunction(void) { + printf("__func__ %s\n", __func__); + printf("__FUNCTION__ %s\n", __FUNCTION__); + printf("__PRETTY_FUNCTION__ %s\n\n", __PRETTY_FUNCTION__); +} + +__private_extern__ void privateExternFunction(void) { + printf("__func__ %s\n", __func__); + printf("__FUNCTION__ %s\n", __FUNCTION__); + printf("__PRETTY_FUNCTION__ %s\n\n", __PRETTY_FUNCTION__); +} + +// TODO(cir): Add support for __captured_stmt + +static void staticFunction(void) { + printf("__func__ %s\n", __func__); + printf("__FUNCTION__ %s\n", __FUNCTION__); + printf("__PRETTY_FUNCTION__ %s\n\n", __PRETTY_FUNCTION__); +} + +int main(void) { + plainFunction(); + externFunction(); + privateExternFunction(); + staticFunction(); + + return 0; +} diff --git a/clang/test/CIR/IR/func.cir b/clang/test/CIR/IR/func.cir index 9532859..d7e8184 100644 --- a/clang/test/CIR/IR/func.cir +++ b/clang/test/CIR/IR/func.cir @@ -99,4 +99,15 @@ cir.func @ullfunc() -> !u64i { // CHECK: %[[VAL:.*]] = cir.const #cir.int<42> : !u64i // CHECK: cir.return %[[VAL:.*]] : !u64i // CHECK: } + +cir.func coroutine @coro() { + cir.return +} +// CHECK: cir.func{{.*}} coroutine @coro() + +cir.func builtin @builtin() { + cir.return +} +// CHECK: cir.func{{.*}} builtin @builtin() + } diff --git a/clang/test/CodeGenHLSL/resources/AppendStructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/AppendStructuredBuffer-elementtype.hlsl deleted file mode 100644 index 094006f..0000000 --- a/clang/test/CodeGenHLSL/resources/AppendStructuredBuffer-elementtype.hlsl +++ /dev/null @@ -1,54 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=DXIL -// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPV - -struct MyStruct { - float4 a; - int2 b; -}; - -// DXIL: %"class.hlsl::AppendStructuredBuffer" = type { target("dx.RawBuffer", i16, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.0" = type { target("dx.RawBuffer", i16, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.1" = type { target("dx.RawBuffer", i32, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.3" = type { target("dx.RawBuffer", i64, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.5" = type { target("dx.RawBuffer", half, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.6" = type { target("dx.RawBuffer", float, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.7" = type { target("dx.RawBuffer", double, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 1, 0) -// DXIL: %"class.hlsl::AppendStructuredBuffer.12" = type { target("dx.RawBuffer", %struct.MyStruct, 1, 0) -// DXIL: %struct.MyStruct = type <{ <4 x float>, <2 x i32> }> -// DXIL: %"class.hlsl::AppendStructuredBuffer.13" = type { target("dx.RawBuffer", i32, 1, 0) -// SPV: %"class.hlsl::AppendStructuredBuffer.13" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1) -// DXIL: %"class.hlsl::AppendStructuredBuffer.14" = type { target("dx.RawBuffer", <4 x i32>, 1, 0) -// SPV: %"class.hlsl::AppendStructuredBuffer.14" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1) - -AppendStructuredBuffer<int16_t> BufI16; -AppendStructuredBuffer<uint16_t> BufU16; -AppendStructuredBuffer<int> BufI32; -AppendStructuredBuffer<uint> BufU32; -AppendStructuredBuffer<int64_t> BufI64; -AppendStructuredBuffer<uint64_t> BufU64; -AppendStructuredBuffer<half> BufF16; -AppendStructuredBuffer<float> BufF32; -AppendStructuredBuffer<double> BufF64; -AppendStructuredBuffer< vector<int16_t, 4> > BufI16x4; -AppendStructuredBuffer< vector<uint, 3> > BufU32x3; -AppendStructuredBuffer<half2> BufF16x2; -AppendStructuredBuffer<float3> BufF32x3; -// TODO: AppendStructuredBuffer<snorm half> BufSNormF16; -// TODO: AppendStructuredBuffer<unorm half> BufUNormF16; -// TODO: AppendStructuredBuffer<snorm float> BufSNormF32; -// TODO: AppendStructuredBuffer<unorm float> BufUNormF32; -// TODO: AppendStructuredBuffer<snorm double> BufSNormF64; -// TODO: AppendStructuredBuffer<unorm double> BufUNormF64; -AppendStructuredBuffer<MyStruct> BufMyStruct; -AppendStructuredBuffer<bool> BufBool; -AppendStructuredBuffer<bool4> BufBoolVec; - -[numthreads(1,1,1)] -void main(int GI : SV_GroupIndex) { -} diff --git a/clang/test/CodeGenHLSL/resources/ConsumeStructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/ConsumeStructuredBuffer-elementtype.hlsl deleted file mode 100644 index 632fd91..0000000 --- a/clang/test/CodeGenHLSL/resources/ConsumeStructuredBuffer-elementtype.hlsl +++ /dev/null @@ -1,54 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=DXIL -// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPV - -struct MyStruct { - float4 a; - int2 b; -}; - -// DXIL: %"class.hlsl::ConsumeStructuredBuffer" = type { target("dx.RawBuffer", i16, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.0" = type { target("dx.RawBuffer", i16, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.1" = type { target("dx.RawBuffer", i32, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.3" = type { target("dx.RawBuffer", i64, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.5" = type { target("dx.RawBuffer", half, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.6" = type { target("dx.RawBuffer", float, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.7" = type { target("dx.RawBuffer", double, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 1, 0) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.12" = type { target("dx.RawBuffer", %struct.MyStruct, 1, 0) -// DXIL: %struct.MyStruct = type <{ <4 x float>, <2 x i32> }> -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.13" = type { target("dx.RawBuffer", i32, 1, 0) -// SPV: %"class.hlsl::ConsumeStructuredBuffer.13" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1) -// DXIL: %"class.hlsl::ConsumeStructuredBuffer.14" = type { target("dx.RawBuffer", <4 x i32>, 1, 0) -// SPV: %"class.hlsl::ConsumeStructuredBuffer.14" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1) - -ConsumeStructuredBuffer<int16_t> BufI16; -ConsumeStructuredBuffer<uint16_t> BufU16; -ConsumeStructuredBuffer<int> BufI32; -ConsumeStructuredBuffer<uint> BufU32; -ConsumeStructuredBuffer<int64_t> BufI64; -ConsumeStructuredBuffer<uint64_t> BufU64; -ConsumeStructuredBuffer<half> BufF16; -ConsumeStructuredBuffer<float> BufF32; -ConsumeStructuredBuffer<double> BufF64; -ConsumeStructuredBuffer< vector<int16_t, 4> > BufI16x4; -ConsumeStructuredBuffer< vector<uint, 3> > BufU32x3; -ConsumeStructuredBuffer<half2> BufF16x2; -ConsumeStructuredBuffer<float3> BufF32x3; -// TODO: ConsumeStructuredBuffer<snorm half> BufSNormF16; -// TODO: ConsumeStructuredBuffer<unorm half> BufUNormF16; -// TODO: ConsumeStructuredBuffer<snorm float> BufSNormF32; -// TODO: ConsumeStructuredBuffer<unorm float> BufUNormF32; -// TODO: ConsumeStructuredBuffer<snorm double> BufSNormF64; -// TODO: ConsumeStructuredBuffer<unorm double> BufUNormF64; -ConsumeStructuredBuffer<MyStruct> BufMyStruct; -ConsumeStructuredBuffer<bool> BufBool; -ConsumeStructuredBuffer<bool4> BufBoolVec; - -[numthreads(1,1,1)] -void main(int GI : SV_GroupIndex) { -} diff --git a/clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl deleted file mode 100644 index 9f0a5b7..0000000 --- a/clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl +++ /dev/null @@ -1,74 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK -// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPV - -// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i16, 1, 0), target("dx.RawBuffer", i16, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.0" = type { target("dx.RawBuffer", i16, 1, 0), target("dx.RawBuffer", i16, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.0" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.1" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.1" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.2" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.3" = type { target("dx.RawBuffer", i64, 1, 0), target("dx.RawBuffer", i64, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.3" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 0), target("dx.RawBuffer", i64, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.4" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.5" = type { target("dx.RawBuffer", half, 1, 0), target("dx.RawBuffer", half, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.5" = type { target("spirv.VulkanBuffer", [0 x half], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.6" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.6" = type { target("spirv.VulkanBuffer", [0 x float], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.7" = type { target("dx.RawBuffer", double, 1, 0), target("dx.RawBuffer", double, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.7" = type { target("spirv.VulkanBuffer", [0 x double], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 1, 0), target("dx.RawBuffer", <4 x i16>, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.8" = type { target("spirv.VulkanBuffer", [0 x <4 x i16>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 1, 0), target("dx.RawBuffer", <3 x i32>, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.9" = type { target("spirv.VulkanBuffer", [0 x <3 x i32>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 1, 0), target("dx.RawBuffer", <2 x half>, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.10" = type { target("spirv.VulkanBuffer", [0 x <2 x half>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 1, 0), target("dx.RawBuffer", <3 x float>, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.11" = type { target("spirv.VulkanBuffer", [0 x <3 x float>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.12" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.12" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } -// CHECK: %"class.hlsl::RWStructuredBuffer.13" = type { target("dx.RawBuffer", <4 x i32>, 1, 0), target("dx.RawBuffer", <4 x i32>, 1, 0) } -// SPV: %"class.hlsl::RWStructuredBuffer.13" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } - -RWStructuredBuffer<int16_t> BufI16; -RWStructuredBuffer<uint16_t> BufU16; -RWStructuredBuffer<int> BufI32; -RWStructuredBuffer<uint> BufU32; -RWStructuredBuffer<int64_t> BufI64; -RWStructuredBuffer<uint64_t> BufU64; -RWStructuredBuffer<half> BufF16; -RWStructuredBuffer<float> BufF32; -RWStructuredBuffer<double> BufF64; -RWStructuredBuffer< vector<int16_t, 4> > BufI16x4; -RWStructuredBuffer< vector<uint, 3> > BufU32x3; -RWStructuredBuffer<half2> BufF16x2; -RWStructuredBuffer<float3> BufF32x3; -RWStructuredBuffer<bool> BufBool; -RWStructuredBuffer<bool4> BufBoolVec; -// TODO: RWStructuredBuffer<snorm half> BufSNormF16; -// TODO: RWStructuredBuffer<unorm half> BufUNormF16; -// TODO: RWStructuredBuffer<snorm float> BufSNormF32; -// TODO: RWStructuredBuffer<unorm float> BufUNormF32; -// TODO: RWStructuredBuffer<snorm double> BufSNormF64; -// TODO: RWStructuredBuffer<unorm double> BufUNormF64; - -[numthreads(1,1,1)] -void main(int GI : SV_GroupIndex) { - BufI16[GI] = 0; - BufU16[GI] = 0; - BufI32[GI] = 0; - BufU32[GI] = 0; - BufI64[GI] = 0; - BufU64[GI] = 0; - BufF16[GI] = 0; - BufF32[GI] = 0; - BufF64[GI] = 0; - BufI16x4[GI] = 0; - BufU32x3[GI] = 0; - BufF16x2[GI] = 0; - BufF32x3[GI] = 0; - BufBool[GI] = false; - BufBool[GI] = false; -} diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffer-elementtype.hlsl deleted file mode 100644 index 00216df..0000000 --- a/clang/test/CodeGenHLSL/resources/StructuredBuffer-elementtype.hlsl +++ /dev/null @@ -1,61 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK -// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPV - -// CHECK: %"class.hlsl::StructuredBuffer" = type { target("dx.RawBuffer", i16, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.0" = type { target("dx.RawBuffer", i16, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.1" = type { target("dx.RawBuffer", i32, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.2" = type { target("dx.RawBuffer", i32, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.3" = type { target("dx.RawBuffer", i64, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.4" = type { target("dx.RawBuffer", i64, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.5" = type { target("dx.RawBuffer", half, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.6" = type { target("dx.RawBuffer", float, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.7" = type { target("dx.RawBuffer", double, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 0, 0) } -// CHECK: %"class.hlsl::StructuredBuffer.12" = type { target("dx.RawBuffer", i32, 0, 0) } -// SPV: %"class.hlsl::StructuredBuffer.12" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 0) -// CHECK: %"class.hlsl::StructuredBuffer.13" = type { target("dx.RawBuffer", <4 x i32>, 0, 0) } -// SPV: %"class.hlsl::StructuredBuffer.13" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 0) - -StructuredBuffer<int16_t> BufI16; -StructuredBuffer<uint16_t> BufU16; -StructuredBuffer<int> BufI32; -StructuredBuffer<uint> BufU32; -StructuredBuffer<int64_t> BufI64; -StructuredBuffer<uint64_t> BufU64; -StructuredBuffer<half> BufF16; -StructuredBuffer<float> BufF32; -StructuredBuffer<double> BufF64; -StructuredBuffer< vector<int16_t, 4> > BufI16x4; -StructuredBuffer< vector<uint, 3> > BufU32x3; -StructuredBuffer<half2> BufF16x2; -StructuredBuffer<float3> BufF32x3; -StructuredBuffer<bool> BufBool; -StructuredBuffer<bool4> BufBoolVec; -// TODO: StructuredBuffer<snorm half> BufSNormF16; -// TODO: StructuredBuffer<unorm half> BufUNormF16; -// TODO: StructuredBuffer<snorm float> BufSNormF32; -// TODO: StructuredBuffer<unorm float> BufUNormF32; -// TODO: StructuredBuffer<snorm double> BufSNormF64; -// TODO: StructuredBuffer<unorm double> BufUNormF64; - -[numthreads(1,1,1)] -void main(int GI : SV_GroupIndex) { - int16_t v1 = BufI16[GI]; - uint16_t v2 = BufU16[GI]; - int v3 = BufI32[GI]; - uint v4 = BufU32[GI]; - int64_t v5 = BufI64[GI]; - uint64_t v6 = BufU64[GI]; - half v7 = BufF16[GI]; - float v8 = BufF32[GI]; - double v9 = BufF64[GI]; - vector<int16_t,4> v10 = BufI16x4[GI]; - vector<int, 3> v11 = BufU32x3[GI]; - half2 v12 = BufF16x2[GI]; - float3 v13 = BufF32x3[GI]; - bool v14 = BufBool[GI]; - bool4 v15 = BufBoolVec[GI]; -} diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-elementtype.hlsl new file mode 100644 index 0000000..2b286bd --- /dev/null +++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-elementtype.hlsl @@ -0,0 +1,113 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=StructuredBuffer %s | FileCheck %s -DRESOURCE=StructuredBuffer -check-prefixes=DXIL-RO + +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=StructuredBuffer %s | FileCheck %s -DRESOURCE=StructuredBuffer -check-prefixes=SPV-RO + +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=RWStructuredBuffer %s | FileCheck %s -DRESOURCE=RWStructuredBuffer -check-prefixes=DXIL-RW + +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=RWStructuredBuffer %s | FileCheck %s -DRESOURCE=RWStructuredBuffer -check-prefixes=SPV-RW + +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=AppendStructuredBuffer %s | FileCheck %s -DRESOURCE=AppendStructuredBuffer -check-prefixes=DXIL-RW + +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=AppendStructuredBuffer %s | FileCheck %s -DRESOURCE=AppendStructuredBuffer -check-prefixes=SPV-RW + +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=ConsumeStructuredBuffer %s | FileCheck %s -DRESOURCE=ConsumeStructuredBuffer -check-prefixes=DXIL-RW + +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type \ +// RUN: -emit-llvm -o - -DRESOURCE=ConsumeStructuredBuffer %s | FileCheck %s -DRESOURCE=ConsumeStructuredBuffer -check-prefixes=SPV-RW + +// DXIL-RO: %"class.hlsl::[[RESOURCE]]" = type { target("dx.RawBuffer", i16, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].0" = type { target("dx.RawBuffer", i16, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].1" = type { target("dx.RawBuffer", i32, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].2" = type { target("dx.RawBuffer", i32, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].3" = type { target("dx.RawBuffer", i64, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].4" = type { target("dx.RawBuffer", i64, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].5" = type { target("dx.RawBuffer", half, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].6" = type { target("dx.RawBuffer", float, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].7" = type { target("dx.RawBuffer", double, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].8" = type { target("dx.RawBuffer", <4 x i16>, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].9" = type { target("dx.RawBuffer", <3 x i32>, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].10" = type { target("dx.RawBuffer", <2 x half>, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].11" = type { target("dx.RawBuffer", <3 x float>, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].12" = type { target("dx.RawBuffer", i32, 0, 0) } +// DXIL-RO: %"class.hlsl::[[RESOURCE]].13" = type { target("dx.RawBuffer", <4 x i32>, 0, 0) } + +// DXIL-RW: %"class.hlsl::[[RESOURCE]]" = type { target("dx.RawBuffer", i16, 1, 0), target("dx.RawBuffer", i16, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].0" = type { target("dx.RawBuffer", i16, 1, 0), target("dx.RawBuffer", i16, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].1" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].2" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].3" = type { target("dx.RawBuffer", i64, 1, 0), target("dx.RawBuffer", i64, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].4" = type { target("dx.RawBuffer", i64, 1, 0), target("dx.RawBuffer", i64, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].5" = type { target("dx.RawBuffer", half, 1, 0), target("dx.RawBuffer", half, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].6" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].7" = type { target("dx.RawBuffer", double, 1, 0), target("dx.RawBuffer", double, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].8" = type { target("dx.RawBuffer", <4 x i16>, 1, 0), target("dx.RawBuffer", <4 x i16>, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].9" = type { target("dx.RawBuffer", <3 x i32>, 1, 0), target("dx.RawBuffer", <3 x i32>, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].10" = type { target("dx.RawBuffer", <2 x half>, 1, 0), target("dx.RawBuffer", <2 x half>, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].11" = type { target("dx.RawBuffer", <3 x float>, 1, 0), target("dx.RawBuffer", <3 x float>, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].12" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) } +// DXIL-RW: %"class.hlsl::[[RESOURCE]].13" = type { target("dx.RawBuffer", <4 x i32>, 1, 0), target("dx.RawBuffer", <4 x i32>, 1, 0) } + +// SPV-RO: %"class.hlsl::[[RESOURCE]]" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].0" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].1" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].2" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].3" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].4" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].5" = type { target("spirv.VulkanBuffer", [0 x half], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].6" = type { target("spirv.VulkanBuffer", [0 x float], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].7" = type { target("spirv.VulkanBuffer", [0 x double], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].8" = type { target("spirv.VulkanBuffer", [0 x <4 x i16>], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].9" = type { target("spirv.VulkanBuffer", [0 x <3 x i32>], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].10" = type { target("spirv.VulkanBuffer", [0 x <2 x half>], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].11" = type { target("spirv.VulkanBuffer", [0 x <3 x float>], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].12" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 0) } +// SPV-RO: %"class.hlsl::[[RESOURCE]].13" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 0) } + +// SPV-RW: %"class.hlsl::[[RESOURCE]]" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].0" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].1" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].2" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].3" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].4" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].5" = type { target("spirv.VulkanBuffer", [0 x half], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].6" = type { target("spirv.VulkanBuffer", [0 x float], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].7" = type { target("spirv.VulkanBuffer", [0 x double], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].8" = type { target("spirv.VulkanBuffer", [0 x <4 x i16>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].9" = type { target("spirv.VulkanBuffer", [0 x <3 x i32>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].10" = type { target("spirv.VulkanBuffer", [0 x <2 x half>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].11" = type { target("spirv.VulkanBuffer", [0 x <3 x float>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].12" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } +// SPV-RW: %"class.hlsl::[[RESOURCE]].13" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) } + +RESOURCE<int16_t> BufI16; +RESOURCE<uint16_t> BufU16; +RESOURCE<int> BufI32; +RESOURCE<uint> BufU32; +RESOURCE<int64_t> BufI64; +RESOURCE<uint64_t> BufU64; +RESOURCE<half> BufF16; +RESOURCE<float> BufF32; +RESOURCE<double> BufF64; +RESOURCE< vector<int16_t, 4> > BufI16x4; +RESOURCE< vector<uint, 3> > BufU32x3; +RESOURCE<half2> BufF16x2; +RESOURCE<float3> BufF32x3; +RESOURCE<bool> BufBool; +RESOURCE<bool4> BufBoolVec; +// TODO: RESOURCE<snorm half> BufSNormF16; +// TODO: RESOURCE<unorm half> BufUNormF16; +// TODO: RESOURCE<snorm float> BufSNormF32; +// TODO: RESOURCE<unorm float> BufUNormF32; +// TODO: RESOURCE<snorm double> BufSNormF64; +// TODO: RESOURCE<unorm double> BufUNormF64; + +[numthreads(1,1,1)] +void main() { +} diff --git a/clang/test/Driver/dxc_frs.hlsl b/clang/test/Driver/dxc_frs.hlsl index 767cab6..ffc3886 100644 --- a/clang/test/Driver/dxc_frs.hlsl +++ b/clang/test/Driver/dxc_frs.hlsl @@ -1,10 +1,9 @@ -// RUN: %clang_dxc -T cs_6_0 /Fo %t.dxo /Frs %t.rs.dxo -### %s 2>&1 | FileCheck %s +// RUN: %clang_dxc -Vd -T cs_6_0 /Fo %t.dxo /Frs %t.rs.dxo -### %s 2>&1 | FileCheck %s // Test to demonstrate extracting the root signature to the specified // output file with /Frs. // CHECK: "{{.*}}llvm-objcopy{{(.exe)?}}" "{{.*}}.obj" "{{.*}}.dxo" "--extract-section=RTS0={{.*}}.rs.dxo" - [shader("compute"), RootSignature("")] [numthreads(1,1,1)] void EmptyEntry() {} diff --git a/clang/test/Driver/dxc_rootsignature_target.hlsl b/clang/test/Driver/dxc_rootsignature_target.hlsl index 08cd1ab..bb48063 100644 --- a/clang/test/Driver/dxc_rootsignature_target.hlsl +++ b/clang/test/Driver/dxc_rootsignature_target.hlsl @@ -1,4 +1,4 @@ -// RUN: %clang_dxc -E EntryRS -T rootsig_1_1 /Fo %t.dxo -### %s 2>&1 | FileCheck %s --check-prefix=CMDS +// RUN: %clang_dxc -Vd -E EntryRS -T rootsig_1_1 /Fo %t.dxo -### %s 2>&1 | FileCheck %s --check-prefix=CMDS // CMDS: "{{.*}}clang{{.*}}" "-cc1" // CMDS-SAME: "-triple" "dxilv1.1-unknown-shadermodel1.1-rootsignature" diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip index 6206020..09f1ffa 100644 --- a/clang/test/Driver/hip-options.hip +++ b/clang/test/Driver/hip-options.hip @@ -254,3 +254,9 @@ // RUN: --offload-arch=gfx1100 --offload-new-driver --offload-jobs=0x4 %s 2>&1 | \ // RUN: FileCheck -check-prefix=INVJOBS %s // INVJOBS: clang: error: invalid integral value '0x4' in '--offload-jobs=0x4' + +// RUN: %clang -### -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ +// RUN: --offload-arch=gfx1100 --offload-new-driver --offload-jobs=jobserver %s 2>&1 | \ +// RUN: FileCheck -check-prefix=JOBSV %s +// JOBSV: clang-linker-wrapper{{.*}} "--wrapper-jobs=jobserver" + diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index c060dae..1c0fb96 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -114,6 +114,8 @@ __attribute__((visibility("protected"), used)) int x; // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --wrapper-jobs=4 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-PAR +// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --wrapper-jobs=jobserver \ +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-PAR // CUDA-PAR: fatbinary{{.*}}-64 --create {{.*}}.fatbin diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 1419b8c..4d5b956 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1295,12 +1295,18 @@ int main(int Argc, char **Argv) { parallel::strategy = hardware_concurrency(1); if (auto *Arg = Args.getLastArg(OPT_wrapper_jobs)) { - unsigned Threads = 0; - if (!llvm::to_integer(Arg->getValue(), Threads) || Threads == 0) - reportError(createStringError("%s: expected a positive integer, got '%s'", - Arg->getSpelling().data(), - Arg->getValue())); - parallel::strategy = hardware_concurrency(Threads); + StringRef Val = Arg->getValue(); + if (Val.equals_insensitive("jobserver")) + parallel::strategy = jobserver_concurrency(); + else { + unsigned Threads = 0; + if (!llvm::to_integer(Val, Threads) || Threads == 0) + reportError(createStringError( + "%s: expected a positive integer or 'jobserver', got '%s'", + Arg->getSpelling().data(), Val.data())); + else + parallel::strategy = hardware_concurrency(Threads); + } } if (Args.hasArg(OPT_wrapper_time_trace_eq)) { diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td index fa73e02..87f911c 100644 --- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td +++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td @@ -53,7 +53,8 @@ def wrapper_time_trace_granularity : Joined<["--"], "wrapper-time-trace-granular def wrapper_jobs : Joined<["--"], "wrapper-jobs=">, Flags<[WrapperOnlyOption]>, MetaVarName<"<number>">, - HelpText<"Sets the number of parallel jobs to use for device linking">; + HelpText<"Sets the number of parallel jobs for device linking. Can be a " + "positive integer or 'jobserver'.">; def override_image : Joined<["--"], "override-image=">, Flags<[WrapperOnlyOption]>, MetaVarName<"<kind=file>">, diff --git a/clang/unittests/Format/FormatTestObjC.cpp b/clang/unittests/Format/FormatTestObjC.cpp index f7f73db..700d7cf8 100644 --- a/clang/unittests/Format/FormatTestObjC.cpp +++ b/clang/unittests/Format/FormatTestObjC.cpp @@ -763,6 +763,15 @@ TEST_F(FormatTestObjC, FormatObjCMethodExpr) { " backing:NSBackingStoreBuffered\n" " defer:NO]);\n" "}"); + Style.ColumnLimit = 63; + verifyFormat( + "- (void)test {\n" + " if ([object\n" + " respondsToSelector:@selector(\n" + " selectorName:param1:param2:)])\n" + " return;\n" + "}"); + Style.ColumnLimit = PreviousColumnLimit; verifyFormat("[contentsContainer replaceSubview:[subviews objectAtIndex:0]\n" " with:contentsNativeView];"); diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 4a8f27f..c21b118 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -1929,6 +1929,37 @@ TEST_F(TokenAnnotatorTest, UnderstandsObjCMethodExpr) { ASSERT_EQ(Tokens.size(), 20u) << Tokens; EXPECT_TOKEN(Tokens[9], tok::l_square, TT_ObjCMethodExpr); EXPECT_TOKEN(Tokens[15], tok::greater, TT_BinaryOperator); + + Tokens = annotate("a = @selector(name:);"); + ASSERT_EQ(Tokens.size(), 10u) << Tokens; + EXPECT_TOKEN(Tokens[4], tok::l_paren, TT_ObjCSelector); + EXPECT_TOKEN(Tokens[6], tok::colon, TT_ObjCSelector); + EXPECT_TOKEN(Tokens[7], tok::r_paren, TT_ObjCSelector); + + Tokens = + annotate("[object respondsToSelector:@selector(name:param1:param2:)\n" + " respondsToSelector:@selector(name:param1:param2:)];"); + ASSERT_EQ(Tokens.size(), 29u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::l_square, TT_ObjCMethodExpr); + EXPECT_TOKEN(Tokens[3], tok::colon, TT_ObjCMethodExpr); + EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_ObjCSelector); + EXPECT_TOKEN(Tokens[8], tok::colon, TT_ObjCSelector); + EXPECT_TOKEN(Tokens[10], tok::colon, TT_ObjCSelector); + EXPECT_TOKEN(Tokens[12], tok::colon, TT_ObjCSelector); + EXPECT_TOKEN(Tokens[13], tok::r_paren, TT_ObjCSelector); + EXPECT_TOKEN(Tokens[15], tok::colon, TT_ObjCMethodExpr); + EXPECT_TOKEN(Tokens[18], tok::l_paren, TT_ObjCSelector); + EXPECT_TOKEN(Tokens[20], tok::colon, TT_ObjCSelector); + EXPECT_TOKEN(Tokens[22], tok::colon, TT_ObjCSelector); + EXPECT_TOKEN(Tokens[24], tok::colon, TT_ObjCSelector); + EXPECT_TOKEN(Tokens[25], tok::r_paren, TT_ObjCSelector); + EXPECT_TOKEN(Tokens[26], tok::r_square, TT_ObjCMethodExpr); + + Tokens = annotate("[a b:c];"); + ASSERT_EQ(Tokens.size(), 8u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::l_square, TT_ObjCMethodExpr); + EXPECT_TOKEN(Tokens[3], tok::colon, TT_ObjCMethodExpr); + EXPECT_TOKEN(Tokens[5], tok::r_square, TT_ObjCMethodExpr); } TEST_F(TokenAnnotatorTest, UnderstandsObjCMethodDecl) { diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 9095b05..6c226aa 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -819,7 +819,7 @@ set(s390x_SOURCES set(wasm_SOURCES wasm/__c_longjmp.S - wasm/__cpp_exceptions.S + wasm/__cpp_exception.S ${GENERIC_TF_SOURCES} ${GENERIC_SOURCES} ) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 6548ec9..e8f70bd 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -178,9 +178,6 @@ endif () if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn|^nvptx") set(sources ${gpu_sources}) elseif(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA") - # findloc.cpp has some issues with higher compute capability. Remove it - # from CUDA build until we can lower its memory footprint. - list(REMOVE_ITEM supported_sources findloc.cpp) set(sources ${supported_sources}) else () set(sources ${supported_sources} ${host_sources} ${f128_sources}) diff --git a/flang-rt/lib/runtime/extrema.cpp b/flang-rt/lib/runtime/extrema.cpp index 9846529..c4575cc 100644 --- a/flang-rt/lib/runtime/extrema.cpp +++ b/flang-rt/lib/runtime/extrema.cpp @@ -397,9 +397,12 @@ template <TypeCategory CAT, bool IS_MAX, template <typename, bool, bool> class COMPARE> struct DoPartialMaxOrMinLocHelper { template <int KIND> struct Functor { - RT_API_ATTRS void operator()(const char *intrinsic, Descriptor &result, - const Descriptor &x, int kind, int dim, const Descriptor *mask, - bool back, Terminator &terminator) const { + // NVCC inlines more aggressively which causes too many specializations of + // this function to be inlined causing compiler timeouts. Set as + // noinline to allow compilation to complete. + RT_API_ATTRS RT_DEVICE_NOINLINE void operator()(const char *intrinsic, + Descriptor &result, const Descriptor &x, int kind, int dim, + const Descriptor *mask, bool back, Terminator &terminator) const { DoPartialMaxOrMinLoc<CAT, KIND, IS_MAX, COMPARE>( intrinsic, result, x, kind, dim, mask, back, terminator); } diff --git a/flang-rt/lib/runtime/findloc.cpp b/flang-rt/lib/runtime/findloc.cpp index 5485f4b..b5031ec 100644 --- a/flang-rt/lib/runtime/findloc.cpp +++ b/flang-rt/lib/runtime/findloc.cpp @@ -153,10 +153,13 @@ template <TypeCategory CAT, class HELPER> struct NumericFindlocHelper { template <int KIND> struct Functor { - RT_API_ATTRS void operator()(TypeCategory targetCat, int targetKind, - Descriptor &result, const Descriptor &x, const Descriptor &target, - int kind, int dim, const Descriptor *mask, bool back, - Terminator &terminator) const { + // NVCC inlines more aggressively which causes too many specializations of + // this function to be inlined causing compiler timeouts. Set as + // noinline to allow compilation to complete. + RT_API_ATTRS RT_DEVICE_NOINLINE void operator()(TypeCategory targetCat, + int targetKind, Descriptor &result, const Descriptor &x, + const Descriptor &target, int kind, int dim, const Descriptor *mask, + bool back, Terminator &terminator) const { switch (targetCat) { case TypeCategory::Integer: case TypeCategory::Unsigned: diff --git a/flang/include/flang/Evaluate/characteristics.h b/flang/include/flang/Evaluate/characteristics.h index b6a9ebe..4cf82e7 100644 --- a/flang/include/flang/Evaluate/characteristics.h +++ b/flang/include/flang/Evaluate/characteristics.h @@ -251,7 +251,8 @@ struct DummyDataObject { std::optional<std::string> *warning = nullptr) const; static std::optional<DummyDataObject> Characterize( const semantics::Symbol &, FoldingContext &); - bool CanBePassedViaImplicitInterface(std::string *whyNot = nullptr) const; + bool CanBePassedViaImplicitInterface( + std::string *whyNot = nullptr, bool checkCUDA = true) const; bool IsPassedByDescriptor(bool isBindC) const; llvm::raw_ostream &Dump(llvm::raw_ostream &) const; @@ -307,7 +308,8 @@ struct DummyArgument { void SetOptional(bool = true); common::Intent GetIntent() const; void SetIntent(common::Intent); - bool CanBePassedViaImplicitInterface(std::string *whyNot = nullptr) const; + bool CanBePassedViaImplicitInterface( + std::string *whyNot = nullptr, bool checkCUDA = true) const; bool IsTypelessIntrinsicDummy() const; bool IsCompatibleWith(const DummyArgument &, std::string *whyNot = nullptr, std::optional<std::string> *warning = nullptr) const; @@ -402,7 +404,8 @@ struct Procedure { return !attrs.test(Attr::ImplicitInterface); } std::optional<int> FindPassIndex(std::optional<parser::CharBlock>) const; - bool CanBeCalledViaImplicitInterface(std::string *whyNot = nullptr) const; + bool CanBeCalledViaImplicitInterface( + std::string *whyNot = nullptr, bool checkCUDA = true) const; bool CanOverride(const Procedure &, std::optional<int> passIndex) const; bool IsCompatibleWith(const Procedure &, bool ignoreImplicitVsExplicit, std::string *whyNot = nullptr, const SpecificIntrinsic * = nullptr, diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 325ca9b..1443e93 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -1639,6 +1639,7 @@ struct CommonStmt { BOILERPLATE(CommonStmt); CommonStmt(std::optional<Name> &&, std::list<CommonBlockObject> &&, std::list<Block> &&); + CharBlock source; std::list<Block> blocks; }; diff --git a/flang/include/flang/Semantics/scope.h b/flang/include/flang/Semantics/scope.h index b404683..3195892 100644 --- a/flang/include/flang/Semantics/scope.h +++ b/flang/include/flang/Semantics/scope.h @@ -188,7 +188,7 @@ public: void add_crayPointer(const SourceName &, Symbol &); mapType &commonBlocks() { return commonBlocks_; } const mapType &commonBlocks() const { return commonBlocks_; } - Symbol &MakeCommonBlock(const SourceName &); + Symbol &MakeCommonBlock(SourceName, SourceName location); Symbol *FindCommonBlock(const SourceName &) const; /// Make a Symbol but don't add it to the scope. diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index a0d5ae7..975423b 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -570,17 +570,21 @@ private: class CommonBlockDetails : public WithBindName { public: + explicit CommonBlockDetails(SourceName location) + : sourceLocation_{location} {} + SourceName sourceLocation() const { return sourceLocation_; } MutableSymbolVector &objects() { return objects_; } const MutableSymbolVector &objects() const { return objects_; } void add_object(Symbol &object) { objects_.emplace_back(object); } void replace_object(Symbol &object, unsigned index) { - CHECK(index < (unsigned)objects_.size()); + CHECK(index < objects_.size()); objects_[index] = object; } std::size_t alignment() const { return alignment_; } void set_alignment(std::size_t alignment) { alignment_ = alignment; } private: + SourceName sourceLocation_; MutableSymbolVector objects_; std::size_t alignment_{0}; // required alignment in bytes }; diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index db73a85..b977fb8 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -770,5 +770,7 @@ std::string GetCommonBlockObjectName(const Symbol &, bool underscoring); // Check for ambiguous USE associations bool HadUseError(SemanticsContext &, SourceName at, const Symbol *); +bool AreSameModuleSymbol(const Symbol &, const Symbol &); + } // namespace Fortran::semantics #endif // FORTRAN_SEMANTICS_TOOLS_H_ diff --git a/flang/include/flang/Semantics/type.h b/flang/include/flang/Semantics/type.h index 5d96f1e..3bd638b 100644 --- a/flang/include/flang/Semantics/type.h +++ b/flang/include/flang/Semantics/type.h @@ -285,6 +285,9 @@ public: bool IsForwardReferenced() const; bool HasDefaultInitialization( bool ignoreAllocatable = false, bool ignorePointer = true) const; + std::optional<std::string> // component path suitable for error messages + ComponentWithDefaultInitialization( + bool ignoreAllocatable = false, bool ignorePointer = true) const; bool HasDestruction() const; // The "raw" type parameter list is a simple transcription from the diff --git a/flang/lib/Evaluate/characteristics.cpp b/flang/lib/Evaluate/characteristics.cpp index 37c62c9..542f122 100644 --- a/flang/lib/Evaluate/characteristics.cpp +++ b/flang/lib/Evaluate/characteristics.cpp @@ -458,7 +458,7 @@ std::optional<DummyDataObject> DummyDataObject::Characterize( } bool DummyDataObject::CanBePassedViaImplicitInterface( - std::string *whyNot) const { + std::string *whyNot, bool checkCUDA) const { if ((attrs & Attrs{Attr::Allocatable, Attr::Asynchronous, Attr::Optional, Attr::Pointer, Attr::Target, Attr::Value, Attr::Volatile}) @@ -482,7 +482,7 @@ bool DummyDataObject::CanBePassedViaImplicitInterface( *whyNot = "a dummy argument is polymorphic"; } return false; // 15.4.2.2(3)(f) - } else if (cudaDataAttr) { + } else if (checkCUDA && cudaDataAttr) { if (whyNot) { *whyNot = "a dummy argument has a CUDA data attribute"; } @@ -1012,9 +1012,10 @@ common::Intent DummyArgument::GetIntent() const { u); } -bool DummyArgument::CanBePassedViaImplicitInterface(std::string *whyNot) const { +bool DummyArgument::CanBePassedViaImplicitInterface( + std::string *whyNot, bool checkCUDA) const { if (const auto *object{std::get_if<DummyDataObject>(&u)}) { - return object->CanBePassedViaImplicitInterface(whyNot); + return object->CanBePassedViaImplicitInterface(whyNot, checkCUDA); } else if (const auto *proc{std::get_if<DummyProcedure>(&u)}) { return proc->CanBePassedViaImplicitInterface(whyNot); } else { @@ -1501,7 +1502,8 @@ std::optional<Procedure> Procedure::FromActuals(const ProcedureDesignator &proc, return callee; } -bool Procedure::CanBeCalledViaImplicitInterface(std::string *whyNot) const { +bool Procedure::CanBeCalledViaImplicitInterface( + std::string *whyNot, bool checkCUDA) const { if (attrs.test(Attr::Elemental)) { if (whyNot) { *whyNot = "the procedure is elemental"; @@ -1524,7 +1526,7 @@ bool Procedure::CanBeCalledViaImplicitInterface(std::string *whyNot) const { return false; } else { for (const DummyArgument &arg : dummyArguments) { - if (!arg.CanBePassedViaImplicitInterface(whyNot)) { + if (!arg.CanBePassedViaImplicitInterface(whyNot, checkCUDA)) { return false; } } diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index 3cfad03..b927fa3 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -1209,6 +1209,15 @@ parser::Message *AttachDeclaration( message.Attach(use->location(), "'%s' is USE-associated with '%s' in module '%s'"_en_US, symbol.name(), unhosted->name(), GetUsedModule(*use).name()); + } else if (const auto *common{ + unhosted->detailsIf<semantics::CommonBlockDetails>()}) { + parser::CharBlock at{unhosted->name()}; + if (at.empty()) { // blank COMMON, with or without // + at = common->sourceLocation(); + } + if (!at.empty()) { + message.Attach(at, "Declaration of /%s/"_en_US, unhosted->name()); + } } else { message.Attach( unhosted->name(), "Declaration of '%s'"_en_US, unhosted->name()); diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index f9b9b850..4a9e494 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -2222,6 +2222,9 @@ buildACCLoopOp(Fortran::lower::AbstractConverter &converter, addOperands(operands, operandSegments, tileOperands); addOperands(operands, operandSegments, cacheOperands); addOperands(operands, operandSegments, privateOperands); + // fill empty firstprivate operands since they are not permitted + // from OpenACC language perspective. + addOperands(operands, operandSegments, {}); addOperands(operands, operandSegments, reductionOperands); auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>( diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp index fbe629a..d33a18f 100644 --- a/flang/lib/Parser/Fortran-parsers.cpp +++ b/flang/lib/Parser/Fortran-parsers.cpp @@ -1100,14 +1100,14 @@ TYPE_PARSER(construct<EquivalenceObject>(indirect(designator))) // R873 common-stmt -> // COMMON [/ [common-block-name] /] common-block-object-list // [[,] / [common-block-name] / common-block-object-list]... -TYPE_PARSER( +TYPE_PARSER(sourced( construct<CommonStmt>("COMMON" >> defaulted("/" >> maybe(name) / "/"), nonemptyList("expected COMMON block objects"_err_en_US, Parser<CommonBlockObject>{}), many(maybe(","_tok) >> construct<CommonStmt::Block>("/" >> maybe(name) / "/", nonemptyList("expected COMMON block objects"_err_en_US, - Parser<CommonBlockObject>{}))))) + Parser<CommonBlockObject>{})))))) // R874 common-block-object -> variable-name [( array-spec )] TYPE_PARSER(construct<CommonBlockObject>(name, maybe(arraySpec))) diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 4939d8d..81c53aa 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -56,28 +56,44 @@ static void CheckImplicitInterfaceArg(evaluate::ActualArgument &arg, "%VAL argument must be a scalar numeric or logical expression"_err_en_US); } if (const auto *expr{arg.UnwrapExpr()}) { - if (const Symbol * base{GetFirstSymbol(*expr)}; - base && IsFunctionResult(*base)) { - context.NoteDefinedSymbol(*base); + if (const Symbol *base{GetFirstSymbol(*expr)}) { + const Symbol &symbol{GetAssociationRoot(*base)}; + if (IsFunctionResult(symbol)) { + context.NoteDefinedSymbol(symbol); + } } if (IsBOZLiteral(*expr)) { - messages.Say("BOZ argument requires an explicit interface"_err_en_US); + messages.Say("BOZ argument %s requires an explicit interface"_err_en_US, + expr->AsFortran()); } else if (evaluate::IsNullPointerOrAllocatable(expr)) { messages.Say( - "Null pointer argument requires an explicit interface"_err_en_US); + "Null pointer argument '%s' requires an explicit interface"_err_en_US, + expr->AsFortran()); } else if (auto named{evaluate::ExtractNamedEntity(*expr)}) { - const Symbol &symbol{named->GetLastSymbol()}; - if (IsAssumedRank(symbol)) { + const Symbol &resolved{ResolveAssociations(named->GetLastSymbol())}; + if (IsAssumedRank(resolved)) { messages.Say( - "Assumed rank argument requires an explicit interface"_err_en_US); + "Assumed rank argument '%s' requires an explicit interface"_err_en_US, + expr->AsFortran()); } + const Symbol &symbol{GetAssociationRoot(resolved)}; if (symbol.attrs().test(Attr::ASYNCHRONOUS)) { messages.Say( - "ASYNCHRONOUS argument requires an explicit interface"_err_en_US); + "ASYNCHRONOUS argument '%s' requires an explicit interface"_err_en_US, + expr->AsFortran()); } if (symbol.attrs().test(Attr::VOLATILE)) { messages.Say( - "VOLATILE argument requires an explicit interface"_err_en_US); + "VOLATILE argument '%s' requires an explicit interface"_err_en_US, + expr->AsFortran()); + } + if (const auto *object{symbol.detailsIf<ObjectEntityDetails>()}) { + if (object->cudaDataAttr()) { + messages.Warn(/*inModuleFile=*/false, context.languageFeatures(), + common::UsageWarning::CUDAUsage, + "Actual argument '%s' with CUDA data attributes should be passed via an explicit interface"_warn_en_US, + expr->AsFortran()); + } } } else if (auto argChars{characteristics::DummyArgument::FromActual( "actual argument", *expr, context.foldingContext(), @@ -2387,44 +2403,51 @@ bool CheckArguments(const characteristics::Procedure &proc, evaluate::FoldingContext foldingContext{context.foldingContext()}; parser::ContextualMessages &messages{foldingContext.messages()}; bool allowArgumentConversions{true}; + parser::Messages implicitBuffer; if (!explicitInterface || treatingExternalAsImplicit) { - parser::Messages buffer; { - auto restorer{messages.SetMessages(buffer)}; + auto restorer{messages.SetMessages(implicitBuffer)}; for (auto &actual : actuals) { if (actual) { CheckImplicitInterfaceArg(*actual, messages, context); } } } - if (!buffer.empty()) { + if (implicitBuffer.AnyFatalError()) { if (auto *msgs{messages.messages()}) { - msgs->Annex(std::move(buffer)); + msgs->Annex(std::move(implicitBuffer)); } return false; // don't pile on } allowArgumentConversions = false; } if (explicitInterface) { - auto buffer{CheckExplicitInterface(proc, actuals, context, &scope, + auto explicitBuffer{CheckExplicitInterface(proc, actuals, context, &scope, intrinsic, allowArgumentConversions, /*extentErrors=*/true, ignoreImplicitVsExplicit)}; - if (!buffer.empty()) { + if (!explicitBuffer.empty()) { if (treatingExternalAsImplicit) { - if (auto *msg{foldingContext.Warn( + // Combine all messages into one warning + if (auto *warning{messages.Warn(/*inModuleFile=*/false, + context.languageFeatures(), common::UsageWarning::KnownBadImplicitInterface, "If the procedure's interface were explicit, this reference would be in error"_warn_en_US)}) { - buffer.AttachTo(*msg, parser::Severity::Because); - } else { - buffer.clear(); + explicitBuffer.AttachTo(*warning, parser::Severity::Because); } + } else if (auto *msgs{messages.messages()}) { + msgs->Annex(std::move(explicitBuffer)); } - if (auto *msgs{messages.messages()}) { - msgs->Annex(std::move(buffer)); - } + // These messages override any in implicitBuffer. return false; } } - return true; + if (!implicitBuffer.empty()) { + if (auto *msgs{messages.messages()}) { + msgs->Annex(std::move(implicitBuffer)); + } + return false; + } else { + return true; // no messages + } } } // namespace Fortran::semantics diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 7b88100..7593424 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -512,39 +512,111 @@ void CheckHelper::Check(const Symbol &symbol) { } void CheckHelper::CheckCommonBlock(const Symbol &symbol) { - auto restorer{messages_.SetLocation(symbol.name())}; CheckGlobalName(symbol); - if (symbol.attrs().test(Attr::BIND_C)) { + const auto &common{symbol.get<CommonBlockDetails>()}; + SourceName location{symbol.name()}; + if (location.empty()) { + location = common.sourceLocation(); + } + bool isBindCCommon{symbol.attrs().test(Attr::BIND_C)}; + if (isBindCCommon) { CheckBindC(symbol); - for (auto ref : symbol.get<CommonBlockDetails>().objects()) { - if (ref->has<ObjectEntityDetails>()) { - if (auto msgs{WhyNotInteroperableObject(*ref, - /*allowInteroperableType=*/false, /*forCommonBlock=*/true)}; - !msgs.empty()) { - parser::Message &reason{msgs.messages().front()}; - parser::Message *msg{nullptr}; - if (reason.IsFatal()) { - msg = messages_.Say(symbol.name(), - "'%s' may not be a member of BIND(C) COMMON block /%s/"_err_en_US, - ref->name(), symbol.name()); - } else { - msg = messages_.Say(symbol.name(), - "'%s' should not be a member of BIND(C) COMMON block /%s/"_warn_en_US, - ref->name(), symbol.name()); - } - if (msg) { - msg->Attach( - std::move(reason.set_severity(parser::Severity::Because))); - } + } + for (auto ref : symbol.get<CommonBlockDetails>().objects()) { + auto restorer{ + messages_.SetLocation(location.empty() ? ref->name() : location)}; + if (isBindCCommon && ref->has<ObjectEntityDetails>()) { + if (auto msgs{WhyNotInteroperableObject(*ref, + /*allowInteroperableType=*/false, /*forCommonBlock=*/true)}; + !msgs.empty()) { + parser::Message &reason{msgs.messages().front()}; + parser::Message *msg{nullptr}; + if (reason.IsFatal()) { + msg = messages_.Say( + "'%s' may not be a member of BIND(C) COMMON block /%s/"_err_en_US, + ref->name(), symbol.name()); + } else { + msg = messages_.Say( + "'%s' should not be a member of BIND(C) COMMON block /%s/"_warn_en_US, + ref->name(), symbol.name()); } + if (msg) { + msg = &msg->Attach( + std::move(reason.set_severity(parser::Severity::Because))); + } + evaluate::AttachDeclaration(msg, *ref); } } - } - for (auto ref : symbol.get<CommonBlockDetails>().objects()) { if (ref->test(Symbol::Flag::CrayPointee)) { - messages_.Say(ref->name(), - "Cray pointee '%s' may not be a member of a COMMON block"_err_en_US, - ref->name()); + evaluate::AttachDeclaration( + messages_.Say( + "Cray pointee '%s' may not be a member of COMMON block /%s/"_err_en_US, + ref->name(), symbol.name()), + *ref); + } + if (IsAllocatable(*ref)) { + evaluate::AttachDeclaration( + messages_.Say( + "ALLOCATABLE object '%s' may not appear in COMMON block /%s/"_err_en_US, + ref->name(), symbol.name()), + *ref); + } + if (ref->attrs().test(Attr::BIND_C)) { + evaluate::AttachDeclaration( + messages_.Say( + "BIND(C) object '%s' may not appear in COMMON block /%s/"_err_en_US, + ref->name(), symbol.name()), + *ref); + } + if (IsNamedConstant(*ref)) { + evaluate::AttachDeclaration( + messages_.Say( + "Named constant '%s' may not appear in COMMON block /%s/"_err_en_US, + ref->name(), symbol.name()), + *ref); + } + if (IsDummy(*ref)) { + evaluate::AttachDeclaration( + messages_.Say( + "Dummy argument '%s' may not appear in COMMON block /%s/"_err_en_US, + ref->name(), symbol.name()), + *ref); + } + if (ref->IsFuncResult()) { + evaluate::AttachDeclaration( + messages_.Say( + "Function result '%s' may not appear in COMMON block /%s/"_err_en_US, + ref->name(), symbol.name()), + *ref); + } + if (const auto *type{ref->GetType()}) { + if (type->category() == DeclTypeSpec::ClassStar) { + evaluate::AttachDeclaration( + messages_.Say( + "Unlimited polymorphic pointer '%s' may not appear in COMMON block /%s/"_err_en_US, + ref->name(), symbol.name()), + *ref); + } else if (const auto *derived{type->AsDerived()}) { + if (!IsSequenceOrBindCType(derived)) { + evaluate::AttachDeclaration( + evaluate::AttachDeclaration( + messages_.Say( + "Object '%s' whose derived type '%s' is neither SEQUENCE nor BIND(C) may not appear in COMMON block /%s/"_err_en_US, + ref->name(), derived->name(), symbol.name()), + derived->typeSymbol()), + *ref); + } else if (auto componentPath{ + derived->ComponentWithDefaultInitialization()}) { + evaluate::AttachDeclaration( + evaluate::AttachDeclaration( + messages_.Say( + "COMMON block /%s/ may not have the member '%s' whose derived type '%s' has a component '%s' that is ALLOCATABLE or has default initialization"_err_en_US, + symbol.name(), ref->name(), derived->name(), + *componentPath), + derived->typeSymbol()), + *ref); + } + } } } } @@ -2976,14 +3048,6 @@ static std::optional<std::string> DefinesGlobalName(const Symbol &symbol) { return std::nullopt; } -static bool IsSameSymbolFromHermeticModule( - const Symbol &symbol, const Symbol &other) { - return symbol.name() == other.name() && symbol.owner().IsModule() && - other.owner().IsModule() && symbol.owner() != other.owner() && - symbol.owner().GetName() && - symbol.owner().GetName() == other.owner().GetName(); -} - // 19.2 p2 void CheckHelper::CheckGlobalName(const Symbol &symbol) { if (auto global{DefinesGlobalName(symbol)}) { @@ -3001,7 +3065,7 @@ void CheckHelper::CheckGlobalName(const Symbol &symbol) { (!IsExternalProcedureDefinition(symbol) || !IsExternalProcedureDefinition(other))) { // both are procedures/BLOCK DATA, not both definitions - } else if (IsSameSymbolFromHermeticModule(symbol, other)) { + } else if (AreSameModuleSymbol(symbol, other)) { // Both symbols are the same thing. } else if (symbol.has<ModuleDetails>()) { Warn(common::LanguageFeature::BenignNameClash, symbol.name(), diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 8365001..fc26888 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -3628,7 +3628,7 @@ std::optional<characteristics::Procedure> ExpressionAnalyzer::CheckCall( if (chars) { std::string whyNot; if (treatExternalAsImplicit && - !chars->CanBeCalledViaImplicitInterface(&whyNot)) { + !chars->CanBeCalledViaImplicitInterface(&whyNot, /*checkCUDA=*/false)) { if (auto *msg{Say(callSite, "References to the procedure '%s' require an explicit interface"_err_en_US, DEREF(procSymbol).name())}; diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 02fcf02..18fc638 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -625,7 +625,7 @@ public: for (const parser::OmpObject &obj : x.v) { auto *name{std::get_if<parser::Name>(&obj.u)}; if (name && !name->symbol) { - Resolve(*name, currScope().MakeCommonBlock(name->source)); + Resolve(*name, currScope().MakeCommonBlock(name->source, name->source)); } } } diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 5041a6a..b7c7603d 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1106,8 +1106,9 @@ protected: // or nullptr on error. Symbol *DeclareStatementEntity(const parser::DoVariable &, const std::optional<parser::IntegerTypeSpec> &); - Symbol &MakeCommonBlockSymbol(const parser::Name &); - Symbol &MakeCommonBlockSymbol(const std::optional<parser::Name> &); + Symbol &MakeCommonBlockSymbol(const parser::Name &, SourceName); + Symbol &MakeCommonBlockSymbol( + const std::optional<parser::Name> &, SourceName); bool CheckUseError(const parser::Name &); void CheckAccessibility(const SourceName &, bool, Symbol &); void CheckCommonBlocks(); @@ -1244,8 +1245,6 @@ private: bool OkToAddComponent(const parser::Name &, const Symbol *extends = nullptr); ParamValue GetParamValue( const parser::TypeParamValue &, common::TypeParamAttr attr); - void CheckCommonBlockDerivedType( - const SourceName &, const Symbol &, UnorderedSymbolSet &); Attrs HandleSaveName(const SourceName &, Attrs); void AddSaveName(std::set<SourceName> &, const SourceName &); bool HandleUnrestrictedSpecificIntrinsicFunction(const parser::Name &); @@ -3963,8 +3962,26 @@ void ModuleVisitor::DoAddUse(SourceName location, SourceName localName, } } + auto AreSameModuleProcOrBothInterfaces{[](const Symbol &p1, + const Symbol &p2) { + if (IsProcedure(p1) && !IsPointer(p1) && IsProcedure(p2) && + !IsPointer(p2)) { + auto classification{ClassifyProcedure(p1)}; + if (classification == ClassifyProcedure(p2)) { + if (classification == ProcedureDefinitionClass::External) { + const auto *subp1{p1.detailsIf<SubprogramDetails>()}; + const auto *subp2{p2.detailsIf<SubprogramDetails>()}; + return subp1 && subp1->isInterface() && subp2 && subp2->isInterface(); + } else if (classification == ProcedureDefinitionClass::Module) { + return AreSameModuleSymbol(p1, p2); + } + } + } + return false; + }}; + auto AreSameProcedure{[&](const Symbol &p1, const Symbol &p2) { - if (&p1 == &p2) { + if (&p1.GetUltimate() == &p2.GetUltimate()) { return true; } else if (p1.name() != p2.name()) { return false; @@ -3972,31 +3989,16 @@ void ModuleVisitor::DoAddUse(SourceName location, SourceName localName, p2.attrs().test(Attr::INTRINSIC)) { return p1.attrs().test(Attr::INTRINSIC) && p2.attrs().test(Attr::INTRINSIC); - } else if (!IsProcedure(p1) || !IsProcedure(p2)) { - return false; - } else if (IsPointer(p1) || IsPointer(p2)) { - return false; - } else if (const auto *subp{p1.detailsIf<SubprogramDetails>()}; - subp && !subp->isInterface()) { - return false; // defined in module, not an external - } else if (const auto *subp{p2.detailsIf<SubprogramDetails>()}; - subp && !subp->isInterface()) { - return false; // defined in module, not an external + } else if (AreSameModuleProcOrBothInterfaces(p1, p2)) { + // Both are external interfaces, perhaps to the same procedure, + // or both are module procedures from modules with the same name. + auto p1Chars{evaluate::characteristics::Procedure::Characterize( + p1, GetFoldingContext())}; + auto p2Chars{evaluate::characteristics::Procedure::Characterize( + p2, GetFoldingContext())}; + return p1Chars && p2Chars && *p1Chars == *p2Chars; } else { - // Both are external interfaces, perhaps to the same procedure - auto class1{ClassifyProcedure(p1)}; - auto class2{ClassifyProcedure(p2)}; - if (class1 == ProcedureDefinitionClass::External && - class2 == ProcedureDefinitionClass::External) { - auto chars1{evaluate::characteristics::Procedure::Characterize( - p1, GetFoldingContext())}; - auto chars2{evaluate::characteristics::Procedure::Characterize( - p2, GetFoldingContext())}; - // same procedure interface defined identically in two modules? - return chars1 && chars2 && *chars1 == *chars2; - } else { - return false; - } + return false; } }}; @@ -4097,13 +4099,32 @@ void ModuleVisitor::DoAddUse(SourceName location, SourceName localName, localSymbol = &newSymbol; } if (useGeneric) { - // Combine two use-associated generics + // Combine two use-associated generics. localSymbol->attrs() = useSymbol.attrs() & ~Attrs{Attr::PUBLIC, Attr::PRIVATE}; localSymbol->flags() = useSymbol.flags(); AddGenericUse(*localGeneric, localName, useUltimate); - localGeneric->clear_derivedType(); - localGeneric->CopyFrom(*useGeneric); + // Don't duplicate specific procedures. + std::size_t originalLocalSpecifics{localGeneric->specificProcs().size()}; + std::size_t useSpecifics{useGeneric->specificProcs().size()}; + CHECK(originalLocalSpecifics == localGeneric->bindingNames().size()); + CHECK(useSpecifics == useGeneric->bindingNames().size()); + std::size_t j{0}; + for (const Symbol &useSpecific : useGeneric->specificProcs()) { + SourceName useBindingName{useGeneric->bindingNames()[j++]}; + bool isDuplicate{false}; + std::size_t k{0}; + for (const Symbol &localSpecific : localGeneric->specificProcs()) { + if (localGeneric->bindingNames()[k++] == useBindingName && + AreSameProcedure(localSpecific, useSpecific)) { + isDuplicate = true; + break; + } + } + if (!isDuplicate) { + localGeneric->AddSpecificProc(useSpecific, useBindingName); + } + } } localGeneric->clear_derivedType(); if (combinedDerivedType) { @@ -5564,7 +5585,7 @@ bool DeclarationVisitor::Pre(const parser::BindEntity &x) { if (kind == parser::BindEntity::Kind::Object) { symbol = &HandleAttributeStmt(Attr::BIND_C, name); } else { - symbol = &MakeCommonBlockSymbol(name); + symbol = &MakeCommonBlockSymbol(name, name.source); SetExplicitAttr(*symbol, Attr::BIND_C); } // 8.6.4(1) @@ -7147,7 +7168,7 @@ bool DeclarationVisitor::Pre(const parser::SaveStmt &x) { auto kind{std::get<parser::SavedEntity::Kind>(y.t)}; const auto &name{std::get<parser::Name>(y.t)}; if (kind == parser::SavedEntity::Kind::Common) { - MakeCommonBlockSymbol(name); + MakeCommonBlockSymbol(name, name.source); AddSaveName(specPartState_.saveInfo.commons, name.source); } else { HandleAttributeStmt(Attr::SAVE, name); @@ -7227,59 +7248,22 @@ void DeclarationVisitor::CheckCommonBlocks() { if (symbol.get<CommonBlockDetails>().objects().empty() && symbol.attrs().test(Attr::BIND_C)) { Say(symbol.name(), - "'%s' appears as a COMMON block in a BIND statement but not in" - " a COMMON statement"_err_en_US); - } - } - // check objects in common blocks - for (const auto &name : specPartState_.commonBlockObjects) { - const auto *symbol{currScope().FindSymbol(name)}; - if (!symbol) { - continue; - } - const auto &attrs{symbol->attrs()}; - if (attrs.test(Attr::ALLOCATABLE)) { - Say(name, - "ALLOCATABLE object '%s' may not appear in a COMMON block"_err_en_US); - } else if (attrs.test(Attr::BIND_C)) { - Say(name, - "Variable '%s' with BIND attribute may not appear in a COMMON block"_err_en_US); - } else if (IsNamedConstant(*symbol)) { - Say(name, - "A named constant '%s' may not appear in a COMMON block"_err_en_US); - } else if (IsDummy(*symbol)) { - Say(name, - "Dummy argument '%s' may not appear in a COMMON block"_err_en_US); - } else if (symbol->IsFuncResult()) { - Say(name, - "Function result '%s' may not appear in a COMMON block"_err_en_US); - } else if (const DeclTypeSpec * type{symbol->GetType()}) { - if (type->category() == DeclTypeSpec::ClassStar) { - Say(name, - "Unlimited polymorphic pointer '%s' may not appear in a COMMON block"_err_en_US); - } else if (const auto *derived{type->AsDerived()}) { - if (!IsSequenceOrBindCType(derived)) { - Say(name, - "Derived type '%s' in COMMON block must have the BIND or" - " SEQUENCE attribute"_err_en_US); - } - UnorderedSymbolSet typeSet; - CheckCommonBlockDerivedType(name, derived->typeSymbol(), typeSet); - } + "'%s' appears as a COMMON block in a BIND statement but not in a COMMON statement"_err_en_US); } } specPartState_.commonBlockObjects = {}; } -Symbol &DeclarationVisitor::MakeCommonBlockSymbol(const parser::Name &name) { - return Resolve(name, currScope().MakeCommonBlock(name.source)); +Symbol &DeclarationVisitor::MakeCommonBlockSymbol( + const parser::Name &name, SourceName location) { + return Resolve(name, currScope().MakeCommonBlock(name.source, location)); } Symbol &DeclarationVisitor::MakeCommonBlockSymbol( - const std::optional<parser::Name> &name) { + const std::optional<parser::Name> &name, SourceName location) { if (name) { - return MakeCommonBlockSymbol(*name); + return MakeCommonBlockSymbol(*name, location); } else { - return MakeCommonBlockSymbol(parser::Name{}); + return MakeCommonBlockSymbol(parser::Name{}, location); } } @@ -7287,43 +7271,6 @@ bool DeclarationVisitor::NameIsKnownOrIntrinsic(const parser::Name &name) { return FindSymbol(name) || HandleUnrestrictedSpecificIntrinsicFunction(name); } -// Check if this derived type can be in a COMMON block. -void DeclarationVisitor::CheckCommonBlockDerivedType(const SourceName &name, - const Symbol &typeSymbol, UnorderedSymbolSet &typeSet) { - if (auto iter{typeSet.find(SymbolRef{typeSymbol})}; iter != typeSet.end()) { - return; - } - typeSet.emplace(typeSymbol); - if (const auto *scope{typeSymbol.scope()}) { - for (const auto &pair : *scope) { - const Symbol &component{*pair.second}; - if (component.attrs().test(Attr::ALLOCATABLE)) { - Say2(name, - "Derived type variable '%s' may not appear in a COMMON block" - " due to ALLOCATABLE component"_err_en_US, - component.name(), "Component with ALLOCATABLE attribute"_en_US); - return; - } - const auto *details{component.detailsIf<ObjectEntityDetails>()}; - if (component.test(Symbol::Flag::InDataStmt) || - (details && details->init())) { - Say2(name, - "Derived type variable '%s' may not appear in a COMMON block due to component with default initialization"_err_en_US, - component.name(), "Component with default initialization"_en_US); - return; - } - if (details) { - if (const auto *type{details->type()}) { - if (const auto *derived{type->AsDerived()}) { - const Symbol &derivedTypeSymbol{derived->typeSymbol()}; - CheckCommonBlockDerivedType(name, derivedTypeSymbol, typeSet); - } - } - } - } - } -} - bool DeclarationVisitor::HandleUnrestrictedSpecificIntrinsicFunction( const parser::Name &name) { if (auto interface{context().intrinsics().IsSpecificIntrinsicFunction( @@ -9655,7 +9602,7 @@ void ResolveNamesVisitor::CreateCommonBlockSymbols( const parser::CommonStmt &commonStmt) { for (const parser::CommonStmt::Block &block : commonStmt.blocks) { const auto &[name, objects] = block.t; - Symbol &commonBlock{MakeCommonBlockSymbol(name)}; + Symbol &commonBlock{MakeCommonBlockSymbol(name, commonStmt.source)}; for (const auto &object : objects) { Symbol &obj{DeclareObjectEntity(std::get<parser::Name>(object.t))}; if (auto *details{obj.detailsIf<ObjectEntityDetails>()}) { diff --git a/flang/lib/Semantics/scope.cpp b/flang/lib/Semantics/scope.cpp index 9c5682b..4af371f 100644 --- a/flang/lib/Semantics/scope.cpp +++ b/flang/lib/Semantics/scope.cpp @@ -143,12 +143,13 @@ void Scope::add_crayPointer(const SourceName &name, Symbol &pointer) { crayPointers_.emplace(name, pointer); } -Symbol &Scope::MakeCommonBlock(const SourceName &name) { +Symbol &Scope::MakeCommonBlock(SourceName name, SourceName location) { const auto it{commonBlocks_.find(name)}; if (it != commonBlocks_.end()) { return *it->second; } else { - Symbol &symbol{MakeSymbol(name, Attrs{}, CommonBlockDetails{})}; + Symbol &symbol{MakeSymbol( + name, Attrs{}, CommonBlockDetails{name.empty() ? location : name})}; commonBlocks_.emplace(name, symbol); return symbol; } diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index 6db11aa..bdb5377 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -313,15 +313,13 @@ private: /// Return the symbol of an initialized member if a COMMON block /// is initalized. Otherwise, return nullptr. static Symbol *CommonBlockIsInitialized(const Symbol &common) { - const auto &commonDetails = - common.get<Fortran::semantics::CommonBlockDetails>(); - + const auto &commonDetails{ + common.get<Fortran::semantics::CommonBlockDetails>()}; for (const auto &member : commonDetails.objects()) { if (IsInitialized(*member)) { return &*member; } } - // Common block may be initialized via initialized variables that are in an // equivalence with the common block members. for (const Fortran::semantics::EquivalenceSet &set : diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 28829d3..8eddd03 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -1870,4 +1870,9 @@ bool HadUseError( } } +bool AreSameModuleSymbol(const Symbol &symbol, const Symbol &other) { + return symbol.name() == other.name() && symbol.owner().IsModule() && + other.owner().IsModule() && symbol.owner().GetName() && + symbol.owner().GetName() == other.owner().GetName(); +} } // namespace Fortran::semantics diff --git a/flang/lib/Semantics/type.cpp b/flang/lib/Semantics/type.cpp index 964a37e..69e6ffa 100644 --- a/flang/lib/Semantics/type.cpp +++ b/flang/lib/Semantics/type.cpp @@ -206,14 +206,25 @@ bool DerivedTypeSpec::IsForwardReferenced() const { return typeSymbol_.get<DerivedTypeDetails>().isForwardReferenced(); } -bool DerivedTypeSpec::HasDefaultInitialization( +std::optional<std::string> DerivedTypeSpec::ComponentWithDefaultInitialization( bool ignoreAllocatable, bool ignorePointer) const { DirectComponentIterator components{*this}; - return bool{std::find_if( - components.begin(), components.end(), [&](const Symbol &component) { - return IsInitialized(component, /*ignoreDataStatements=*/true, - ignoreAllocatable, ignorePointer); - })}; + if (auto it{std::find_if(components.begin(), components.end(), + [ignoreAllocatable, ignorePointer](const Symbol &component) { + return (!ignoreAllocatable && IsAllocatable(component)) || + (!ignorePointer && IsPointer(component)) || + HasDeclarationInitializer(component); + })}) { + return it.BuildResultDesignatorName(); + } else { + return std::nullopt; + } +} + +bool DerivedTypeSpec::HasDefaultInitialization( + bool ignoreAllocatable, bool ignorePointer) const { + return ComponentWithDefaultInitialization(ignoreAllocatable, ignorePointer) + .has_value(); } bool DerivedTypeSpec::HasDestruction() const { diff --git a/flang/test/Semantics/boz-literal-constants.f90 b/flang/test/Semantics/boz-literal-constants.f90 index 4d957d1..67e9ce7 100644 --- a/flang/test/Semantics/boz-literal-constants.f90 +++ b/flang/test/Semantics/boz-literal-constants.f90 @@ -120,7 +120,7 @@ subroutine bozchecks !ERROR: Actual argument 'z'55'' associated with dummy argument 'c=' is not a variable or typed expression call explicit(z'deadbeef', o'666', b'01010101') - !ERROR: BOZ argument requires an explicit interface + !ERROR: BOZ argument z'12345' requires an explicit interface call implictSub(Z'12345') !ERROR: Output item must not be a BOZ literal constant diff --git a/flang/test/Semantics/call13.f90 b/flang/test/Semantics/call13.f90 index 3f7fb2e..90e1918 100644 --- a/flang/test/Semantics/call13.f90 +++ b/flang/test/Semantics/call13.f90 @@ -20,7 +20,7 @@ subroutine s(assumedRank, coarray, class, classStar, typeStar) real :: array(implicit01()) ! 15.4.2.2(2) !ERROR: Keyword 'keyword=' may not appear in a reference to a procedure with an implicit interface call implicit10(1, 2, keyword=3) ! 15.4.2.2(1) - !ERROR: Assumed rank argument requires an explicit interface + !ERROR: Assumed rank argument 'assumedrank' requires an explicit interface call implicit11(assumedRank) ! 15.4.2.2(3)(c) call implicit12(coarray) ! ok call implicit12a(coarray[1]) ! ok diff --git a/flang/test/Semantics/cuf24.cuf b/flang/test/Semantics/cuf24.cuf new file mode 100644 index 0000000..67c9d5d --- /dev/null +++ b/flang/test/Semantics/cuf24.cuf @@ -0,0 +1,40 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenacc + +subroutine implicitDeviceInSameFile(v) + real, device :: v(10) +end + +subroutine implicitNonDeviceInSameFile(v) + real :: v(10) +end + +program p + real, device :: dev(10) + real :: host(10) + interface + subroutine explicitDevice(v) + real, device :: v(10) + end + subroutine explicitNonDevice(v) + real :: v(10) + end + end interface + !WARNING: Actual argument 'dev' with CUDA data attributes should be passed via an explicit interface [-Wcuda-usage] + call implicit1(dev) + call implicit2(host) + !WARNING: Actual argument 'dev' with CUDA data attributes should be passed via an explicit interface [-Wcuda-usage] + call implicitDeviceInSameFile(dev) + !WARNING: If the procedure's interface were explicit, this reference would be in error [-Wknown-bad-implicit-interface] + !BECAUSE: dummy argument 'v=' has ATTRIBUTES(DEVICE) but its associated actual argument has no CUDA data attribute + call implicitDeviceInSameFile(host) + !WARNING: If the procedure's interface were explicit, this reference would be in error [-Wknown-bad-implicit-interface] + !BECAUSE: dummy argument 'v=' has no CUDA data attribute but its associated actual argument has ATTRIBUTES(DEVICE) + call implicitNonDeviceInSameFile(dev) + call implicitNonDeviceInSameFile(host) + call explicitDevice(dev) + !ERROR: dummy argument 'v=' has ATTRIBUTES(DEVICE) but its associated actual argument has no CUDA data attribute + call explicitDevice(host) + !ERROR: dummy argument 'v=' has no CUDA data attribute but its associated actual argument has ATTRIBUTES(DEVICE) + call explicitNonDevice(dev) + call explicitNonDevice(host) +end diff --git a/flang/test/Semantics/declarations01.f90 b/flang/test/Semantics/declarations01.f90 index 77cb6b4..3d8754e 100644 --- a/flang/test/Semantics/declarations01.f90 +++ b/flang/test/Semantics/declarations01.f90 @@ -7,7 +7,7 @@ function f1() result(x) integer, parameter :: x2 = 1 integer :: x3 - !ERROR: A named constant 'x2' may not appear in a COMMON block + !ERROR: Named constant 'x2' may not appear in COMMON block /blk/ common /blk/ x2, x3 end diff --git a/flang/test/Semantics/declarations08.f90 b/flang/test/Semantics/declarations08.f90 index 2c4027d..de7d5d7 100644 --- a/flang/test/Semantics/declarations08.f90 +++ b/flang/test/Semantics/declarations08.f90 @@ -2,7 +2,7 @@ pointer(p,x) !ERROR: Cray pointee 'y' may not be a member of an EQUIVALENCE group pointer(p,y) -!ERROR: Cray pointee 'x' may not be a member of a COMMON block +!ERROR: Cray pointee 'x' may not be a member of COMMON block // common x equivalence(y,z) !ERROR: Cray pointee 'v' may not be initialized diff --git a/flang/test/Semantics/modfile80.F90 b/flang/test/Semantics/modfile80.F90 new file mode 100644 index 0000000..425847e --- /dev/null +++ b/flang/test/Semantics/modfile80.F90 @@ -0,0 +1,25 @@ +!RUN: %flang_fc1 -DPART1 %s +!RUN: %flang_fc1 -DPART2 -fhermetic-module-files %s +!RUN: %flang_fc1 -DPART3 | FileCheck --allow-empty %s +!CHECK-NOT: error: + +#if defined PART1 +module modfile80a + interface generic + module procedure specific + end interface + contains + subroutine specific + end +end +#elif defined PART2 +module modfile80b + use modfile80a +end +#else +program test + use modfile80a + use modfile80b + call generic +end +#endif diff --git a/flang/test/Semantics/null01.f90 b/flang/test/Semantics/null01.f90 index 64c9881..ccf6179 100644 --- a/flang/test/Semantics/null01.f90 +++ b/flang/test/Semantics/null01.f90 @@ -116,9 +116,9 @@ subroutine test call optionalAllocatable(null(mold=ip0)) call optionalAllocatable(null(mold=ia0)) ! fine call optionalAllocatable(null()) ! fine - !ERROR: Null pointer argument requires an explicit interface + !ERROR: Null pointer argument 'NULL()' requires an explicit interface call implicit(null()) - !ERROR: Null pointer argument requires an explicit interface + !ERROR: Null pointer argument 'null(mold=ip0)' requires an explicit interface call implicit(null(mold=ip0)) !ERROR: A NULL() pointer is not allowed for 'x=' intrinsic argument print *, sin(null(rp0)) diff --git a/flang/test/Semantics/resolve42.f90 b/flang/test/Semantics/resolve42.f90 index 5a433d0..13caff0 100644 --- a/flang/test/Semantics/resolve42.f90 +++ b/flang/test/Semantics/resolve42.f90 @@ -28,17 +28,17 @@ subroutine s5 end function f6(x) result(r) - !ERROR: ALLOCATABLE object 'y' may not appear in a COMMON block - !ERROR: Dummy argument 'x' may not appear in a COMMON block + !ERROR: ALLOCATABLE object 'y' may not appear in COMMON block // + !ERROR: Dummy argument 'x' may not appear in COMMON block // + !ERROR: Function result 'r' may not appear in COMMON block // common y,x,z allocatable y - !ERROR: Function result 'r' may not appear in a COMMON block common r end module m7 - !ERROR: Variable 'w' with BIND attribute may not appear in a COMMON block - !ERROR: Variable 'z' with BIND attribute may not appear in a COMMON block + !ERROR: BIND(C) object 'w' may not appear in COMMON block // + !ERROR: BIND(C) object 'z' may not appear in COMMON block // common w,z integer, bind(c) :: z integer, bind(c,name="w") :: w @@ -48,8 +48,8 @@ module m8 type t end type class(*), pointer :: x - !ERROR: Unlimited polymorphic pointer 'x' may not appear in a COMMON block - !ERROR: Unlimited polymorphic pointer 'y' may not appear in a COMMON block + !ERROR: Unlimited polymorphic pointer 'x' may not appear in COMMON block // + !ERROR: Unlimited polymorphic pointer 'y' may not appear in COMMON block // common x, y class(*), pointer :: y end @@ -67,7 +67,7 @@ module m10 type t end type type(t) :: x - !ERROR: Derived type 'x' in COMMON block must have the BIND or SEQUENCE attribute + !ERROR: Object 'x' whose derived type 't' is neither SEQUENCE nor BIND(C) may not appear in COMMON block // common x end @@ -82,7 +82,7 @@ module m11 integer:: c end type type(t2) :: x2 - !ERROR: Derived type variable 'x2' may not appear in a COMMON block due to ALLOCATABLE component + !ERROR: COMMON block /c2/ may not have the member 'x2' whose derived type 't2' has a component '%b%a' that is ALLOCATABLE or has default initialization common /c2/ x2 end @@ -97,7 +97,7 @@ module m12 integer:: c end type type(t2) :: x2 - !ERROR: Derived type variable 'x2' may not appear in a COMMON block due to component with default initialization + !ERROR: COMMON block /c3/ may not have the member 'x2' whose derived type 't2' has a component '%b%a' that is ALLOCATABLE or has default initialization common /c3/ x2 end @@ -112,3 +112,21 @@ subroutine s14 !ERROR: 'c' appears as a COMMON block in a BIND statement but not in a COMMON statement bind(c) :: /c/ end + +module m15 + interface + subroutine sub + end subroutine + end interface + type t1 + sequence + procedure(sub), pointer, nopass :: pp => sub + end type + type t2 + sequence + type(t1) :: a + end type + type(t2) :: x2 + !ERROR: COMMON block /c4/ may not have the member 'x2' whose derived type 't2' has a component '%a%pp' that is ALLOCATABLE or has default initialization + common /c4/ x2 +end diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index 51b1363..a2ca577 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -223,6 +223,7 @@ struct Configuration { bool warnThinArchiveMissingMembers; bool disableVerify; bool separateCstringLiteralSections; + bool tailMergeStrings; bool callGraphProfileSort = false; llvm::StringRef printSymbolOrder; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 7ce987e..94f441b 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1986,6 +1986,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, config->separateCstringLiteralSections = args.hasFlag(OPT_separate_cstring_literal_sections, OPT_no_separate_cstring_literal_sections, false); + config->tailMergeStrings = + args.hasFlag(OPT_tail_merge_strings, OPT_no_tail_merge_strings, false); auto IncompatWithCGSort = [&](StringRef firstArgStr) { // Throw an error only if --call-graph-profile-sort is explicitly specified diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 4eeb8fb..be1a1cc 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -1091,6 +1091,10 @@ defm separate_cstring_literal_sections "Emit all cstring literals into the __cstring section. As a special " "case, the __objc_methname section will still be emitted. (default)">, Group<grp_rare>; +defm tail_merge_strings + : BB<"tail-merge-strings", "Enable string tail merging", + "Disable string tail merging to improve link-time performance">, + Group<grp_rare>; def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 903ba78..187cccb 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -1746,6 +1746,8 @@ void CStringSection::finalizeContents() { void DeduplicatedCStringSection::finalizeContents() { // Find the largest alignment required for each string. DenseMap<CachedHashStringRef, Align> strToAlignment; + // Used for tail merging only + std::vector<CachedHashStringRef> deduplicatedStrs; for (const CStringInputSection *isec : inputs) { for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { if (!piece.live) @@ -1754,17 +1756,66 @@ void DeduplicatedCStringSection::finalizeContents() { assert(isec->align != 0); auto align = getStringPieceAlignment(isec, piece); auto [it, wasInserted] = strToAlignment.try_emplace(s, align); + if (config->tailMergeStrings && wasInserted) + deduplicatedStrs.push_back(s); if (!wasInserted && it->second < align) it->second = align; } } + // Like lexigraphical sort, except we read strings in reverse and take the + // longest string first + // TODO: We could improve performance by implementing our own sort that avoids + // comparing characters we know to be the same. See + // StringTableBuilder::multikeySort() for details + llvm::sort(deduplicatedStrs, [](const auto &left, const auto &right) { + for (const auto &[leftChar, rightChar] : + llvm::zip(llvm::reverse(left.val()), llvm::reverse(right.val()))) { + if (leftChar == rightChar) + continue; + return leftChar < rightChar; + } + return left.size() > right.size(); + }); + std::optional<CachedHashStringRef> mergeCandidate; + DenseMap<CachedHashStringRef, std::pair<CachedHashStringRef, uint64_t>> + tailMergeMap; + for (auto &s : deduplicatedStrs) { + if (!mergeCandidate || !mergeCandidate->val().ends_with(s.val())) { + mergeCandidate = s; + continue; + } + uint64_t tailMergeOffset = mergeCandidate->size() - s.size(); + // TODO: If the tail offset is incompatible with this string's alignment, we + // might be able to find another superstring with a compatible tail offset. + // The difficulty is how to do this efficiently + const auto &align = strToAlignment.at(s); + if (!isAligned(align, tailMergeOffset)) + continue; + auto &mergeCandidateAlign = strToAlignment[*mergeCandidate]; + if (align > mergeCandidateAlign) + mergeCandidateAlign = align; + tailMergeMap.try_emplace(s, *mergeCandidate, tailMergeOffset); + } + // Sort the strings for performance and compression size win, and then // assign an offset for each string and save it to the corresponding // StringPieces for easy access. for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) { auto &piece = isec->pieces[i]; auto s = isec->getCachedHashStringRef(i); + // Any string can be tail merged with itself with an offset of zero + uint64_t tailMergeOffset = 0; + auto mergeIt = + config->tailMergeStrings ? tailMergeMap.find(s) : tailMergeMap.end(); + if (mergeIt != tailMergeMap.end()) { + auto &[superString, offset] = mergeIt->second; + // s can be tail merged with superString. Do not layout s. Instead layout + // superString if we haven't already + assert(superString.val().ends_with(s.val())); + s = superString; + tailMergeOffset = offset; + } auto [it, wasInserted] = stringOffsetMap.try_emplace(s, /*placeholder*/ 0); if (wasInserted) { // Avoid computing the offset until we are sure we will need to @@ -1772,9 +1823,12 @@ void DeduplicatedCStringSection::finalizeContents() { it->second = offset; size = offset + s.size() + 1; // account for null terminator } - // If the string was already in stringOffsetMap, it is a duplicate and we - // only need to assign the offset. - piece.outSecOff = it->second; + piece.outSecOff = it->second + tailMergeOffset; + if (mergeIt != tailMergeMap.end()) { + auto &tailMergedString = mergeIt->first; + stringOffsetMap[tailMergedString] = piece.outSecOff; + assert(isAligned(strToAlignment.at(tailMergedString), piece.outSecOff)); + } } for (CStringInputSection *isec : inputs) isec->isFinal = true; diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 566dde6..29db1cd 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -46,6 +46,8 @@ MachO Improvements * ``--separate-cstring-literal-sections`` emits cstring literal sections into sections defined by their section name. (`#158720 <https://github.com/llvm/llvm-project/pull/158720>`_) +* ``--tail-merge-strings`` enables tail merging of cstring literals. + (`#161262 <https://github.com/llvm/llvm-project/pull/161262>`_) WebAssembly Improvements ------------------------ diff --git a/lld/test/MachO/cstring-tailmerge-objc.s b/lld/test/MachO/cstring-tailmerge-objc.s new file mode 100644 index 0000000..46b2bbf --- /dev/null +++ b/lld/test/MachO/cstring-tailmerge-objc.s @@ -0,0 +1,144 @@ +; REQUIRES: aarch64 +; RUN: rm -rf %t && split-file %s %t + +; Test that ObjC method names are tail merged and +; ObjCSelRefsHelper::makeSelRef() still works correctly + +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o +; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/a.o -o %t/a +; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/a | FileCheck %s --implicit-check-not=error + +; RUN: %lld -dylib -arch arm64 --no-tail-merge-strings %t/a.o -o %t/nomerge +; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/nomerge | FileCheck %s --check-prefixes=CHECK,NOMERGE --implicit-check-not=error + +; CHECK: withBar:error: +; NOMERGE: error: + +;--- a.mm +__attribute__((objc_root_class)) +@interface Foo +- (void)withBar:(int)bar error:(int)error; +- (void)error:(int)error; +@end + +@implementation Foo +- (void)withBar:(int)bar error:(int)error {} +- (void)error:(int)error {} +@end + +void *_objc_empty_cache; +void *_objc_empty_vtable; +;--- gen +clang -Oz -target arm64-apple-darwin a.mm -S -o - +;--- a.s + .build_version macos, 11, 0 + .section __TEXT,__text,regular,pure_instructions + .p2align 2 ; -- Begin function -[Foo withBar:error:] +"-[Foo withBar:error:]": ; @"\01-[Foo withBar:error:]" + .cfi_startproc +; %bb.0: + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function -[Foo error:] +"-[Foo error:]": ; @"\01-[Foo error:]" + .cfi_startproc +; %bb.0: + ret + .cfi_endproc + ; -- End function + .globl __objc_empty_vtable ; @_objc_empty_vtable +.zerofill __DATA,__common,__objc_empty_vtable,8,3 + .section __DATA,__objc_data + .globl _OBJC_CLASS_$_Foo ; @"OBJC_CLASS_$_Foo" + .p2align 3, 0x0 +_OBJC_CLASS_$_Foo: + .quad _OBJC_METACLASS_$_Foo + .quad 0 + .quad __objc_empty_cache + .quad __objc_empty_vtable + .quad __OBJC_CLASS_RO_$_Foo + + .globl _OBJC_METACLASS_$_Foo ; @"OBJC_METACLASS_$_Foo" + .p2align 3, 0x0 +_OBJC_METACLASS_$_Foo: + .quad _OBJC_METACLASS_$_Foo + .quad _OBJC_CLASS_$_Foo + .quad __objc_empty_cache + .quad __objc_empty_vtable + .quad __OBJC_METACLASS_RO_$_Foo + + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_: ; @OBJC_CLASS_NAME_ + .asciz "Foo" + + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_METACLASS_RO_$_Foo" +__OBJC_METACLASS_RO_$_Foo: + .long 3 ; 0x3 + .long 40 ; 0x28 + .long 40 ; 0x28 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_: ; @OBJC_METH_VAR_NAME_ + .asciz "withBar:error:" + + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_: ; @OBJC_METH_VAR_TYPE_ + .asciz "v24@0:8i16i20" + + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.1: ; @OBJC_METH_VAR_NAME_.1 + .asciz "error:" + + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_.2: ; @OBJC_METH_VAR_TYPE_.2 + .asciz "v20@0:8i16" + + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_INSTANCE_METHODS_Foo" +__OBJC_$_INSTANCE_METHODS_Foo: + .long 24 ; 0x18 + .long 2 ; 0x2 + .quad l_OBJC_METH_VAR_NAME_ + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[Foo withBar:error:]" + .quad l_OBJC_METH_VAR_NAME_.1 + .quad l_OBJC_METH_VAR_TYPE_.2 + .quad "-[Foo error:]" + + .p2align 3, 0x0 ; @"_OBJC_CLASS_RO_$_Foo" +__OBJC_CLASS_RO_$_Foo: + .long 2 ; 0x2 + .long 0 ; 0x0 + .long 0 ; 0x0 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_INSTANCE_METHODS_Foo + .quad 0 + .quad 0 + .quad 0 + .quad 0 + + .globl __objc_empty_cache ; @_objc_empty_cache +.zerofill __DATA,__common,__objc_empty_cache,8,3 + .section __DATA,__objc_classlist,regular,no_dead_strip + .p2align 3, 0x0 ; @"OBJC_LABEL_CLASS_$" +l_OBJC_LABEL_CLASS_$: + .quad _OBJC_CLASS_$_Foo + + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 64 + +.subsections_via_symbols diff --git a/lld/test/MachO/cstring-tailmerge.s b/lld/test/MachO/cstring-tailmerge.s new file mode 100644 index 0000000..740f971 --- /dev/null +++ b/lld/test/MachO/cstring-tailmerge.s @@ -0,0 +1,85 @@ +; REQUIRES: aarch64 +; RUN: rm -rf %t && split-file %s %t + +; RUN: sed "s/<ALIGN>/0/g" %t/align.s.template > %t/align-1.s +; RUN: sed "s/<ALIGN>/1/g" %t/align.s.template > %t/align-2.s +; RUN: sed "s/<ALIGN>/2/g" %t/align.s.template > %t/align-4.s + +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/first.s -o %t/first.o +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-1.s -o %t/align-1.o +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-2.s -o %t/align-2.o +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-4.s -o %t/align-4.o + +; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-1.o -o %t/align-1 +; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1 + +; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-2.o -o %t/align-2 +; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2 + +; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-4.o -o %t/align-4 +; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4 + +; CHECK: Contents of (__TEXT,__cstring) section +; CHECK: [[#%.16x,START:]] get awkward offset{{$}} + +; ALIGN1: [[#%.16x,START+19]] myotherlongstr{{$}} +; ALIGN1: [[#%.16x,START+19+15]] otherstr{{$}} + +; ALIGN2: [[#%.16x,START+20]] myotherlongstr{{$}} +; ALIGN2: [[#%.16x,START+20+16]] longstr{{$}} +; ALIGN2: [[#%.16x,START+20+16+8]] otherstr{{$}} +; ALIGN2: [[#%.16x,START+20+16+8+10]] str{{$}} + +; ALIGN4: [[#%.16x,START+20]] myotherlongstr{{$}} +; ALIGN4: [[#%.16x,START+20+16]] otherlongstr{{$}} +; ALIGN4: [[#%.16x,START+20+16+16]] longstr{{$}} +; ALIGN4: [[#%.16x,START+20+16+16+8]] otherstr{{$}} +; ALIGN4: [[#%.16x,START+20+16+16+8+12]] str{{$}} + +; CHECK: SYMBOL TABLE: + +; ALIGN1: [[#%.16x,START+19]] l O __TEXT,__cstring _myotherlongstr +; ALIGN1: [[#%.16x,START+21]] l O __TEXT,__cstring _otherlongstr +; ALIGN1: [[#%.16x,START+26]] l O __TEXT,__cstring _longstr +; ALIGN1: [[#%.16x,START+34]] l O __TEXT,__cstring _otherstr +; ALIGN1: [[#%.16x,START+39]] l O __TEXT,__cstring _str + +; ALIGN2: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr +; ALIGN2: [[#%.16x,START+20+2]] l O __TEXT,__cstring _otherlongstr +; ALIGN2: [[#%.16x,START+20+16]] l O __TEXT,__cstring _longstr +; ALIGN2: [[#%.16x,START+20+16+8]] l O __TEXT,__cstring _otherstr +; ALIGN2: [[#%.16x,START+20+16+8+10]] l O __TEXT,__cstring _str + +; ALIGN4: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr +; ALIGN4: [[#%.16x,START+20+16]] l O __TEXT,__cstring _otherlongstr +; ALIGN4: [[#%.16x,START+20+16+16]] l O __TEXT,__cstring _longstr +; ALIGN4: [[#%.16x,START+20+16+16+8]] l O __TEXT,__cstring _otherstr +; ALIGN4: [[#%.16x,START+20+16+16+8+12]] l O __TEXT,__cstring _str + +;--- first.s +.cstring +.p2align 2 +.asciz "get awkward offset" ; length = 19 + +;--- align.s.template +.cstring + +.p2align <ALIGN> + _myotherlongstr: +.asciz "myotherlongstr" ; length = 15 + +.p2align <ALIGN> + _otherlongstr: +.asciz "otherlongstr" ; length = 13, tail offset = 2 + +.p2align <ALIGN> + _longstr: +.asciz "longstr" ; length = 8, tail offset = 7 + +.p2align <ALIGN> + _otherstr: +.asciz "otherstr" ; length = 9 + +.p2align <ALIGN> + _str: +.asciz "str" ; length = 4, tail offset = 5 diff --git a/lld/test/MachO/order-file-cstring-tailmerge.s b/lld/test/MachO/order-file-cstring-tailmerge.s new file mode 100644 index 0000000..20a4d16 --- /dev/null +++ b/lld/test/MachO/order-file-cstring-tailmerge.s @@ -0,0 +1,56 @@ +; REQUIRES: aarch64 +; RUN: rm -rf %t && split-file %s %t + +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o +; RUN: %lld -dylib -arch arm64 --no-tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s +; RUN: %lld -dylib -arch arm64 --tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=MERGED + +; CHECK: _str2 +; CHECK: _str1 +; CHECK: _superstr2 +; CHECK: _superstr3 +; CHECK: _superstr1 +; CHECK: _str3 + +; str1 has a higher priority than superstr1, so str1 must be ordered before +; str3, even though superstr1 is before superstr3 in the orderfile. + +; MERGED: _superstr2 +; MERGED: _str2 +; MERGED: _superstr1 +; MERGED: _str1 +; MERGED: _superstr3 +; MERGED: _str3 + +;--- a.s +.cstring + _superstr1: +.asciz "superstr1" + _str1: +.asciz "str1" + _superstr2: +.asciz "superstr2" + _str2: +.asciz "str2" + _superstr3: +.asciz "superstr3" + _str3: +.asciz "str3" + +; TODO: We could use update_test_body.py to generate the hashes for the +; orderfile. Unfortunately, it seems that LLVM has a different hash +; implementation than the xxh64sum tool. See +; DeduplicatedCStringSection::getStringOffset() for hash details. +; +; while IFS="" read -r line; do +; echo -n $line | xxh64sum | awk '{printf "CSTR;%010d", and(strtonum("0x"$1), 0x7FFFFFFF)}' +; echo " # $line" +; done < orderfile.txt.template + +;--- orderfile.txt +CSTR;1236462241 # str2 +CSTR;1526669509 # str1 +CSTR;1563550684 # superstr2 +CSTR;1044337806 # superstr3 +CSTR;262417687 # superstr1 +CSTR;717161398 # str3 diff --git a/lldb/include/lldb/Host/JSONTransport.h b/lldb/include/lldb/Host/JSONTransport.h index c73021d..1453316 100644 --- a/lldb/include/lldb/Host/JSONTransport.h +++ b/lldb/include/lldb/Host/JSONTransport.h @@ -18,6 +18,7 @@ #include "lldb/Utility/IOObject.h" #include "lldb/Utility/Status.h" #include "lldb/lldb-forward.h" +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" @@ -25,13 +26,23 @@ #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" +#include <functional> +#include <mutex> +#include <optional> #include <string> #include <system_error> +#include <type_traits> +#include <utility> #include <variant> #include <vector> +#if __cplusplus >= 202002L +#include <concepts> +#endif -namespace lldb_private { +namespace lldb_private::transport { +/// An error to indicate that the transport reached EOF but there were still +/// unhandled contents in the read buffer. class TransportUnhandledContentsError : public llvm::ErrorInfo<TransportUnhandledContentsError> { public: @@ -50,17 +61,75 @@ private: std::string m_unhandled_contents; }; +/// An error to indicate that the parameters of a Req, Resp or Evt could not be +/// deserialized. +class InvalidParams : public llvm::ErrorInfo<InvalidParams> { +public: + static char ID; + + explicit InvalidParams(std::string method, std::string context) + : m_method(std::move(method)), m_context(std::move(context)) {} + + void log(llvm::raw_ostream &OS) const override; + std::error_code convertToErrorCode() const override; + +private: + /// The JSONRPC remote method call. + std::string m_method; + + /// Additional context from the parsing failure, e.g. "missing value at + /// (root)[1].str". + std::string m_context; +}; + +/// An error to indicate that no handler was registered for a given method. +class MethodNotFound : public llvm::ErrorInfo<MethodNotFound> { +public: + static char ID; + + static constexpr int kErrorCode = -32601; + + explicit MethodNotFound(std::string method) : m_method(std::move(method)) {} + + void log(llvm::raw_ostream &OS) const override; + std::error_code convertToErrorCode() const override; + +private: + std::string m_method; +}; + +#if __cplusplus >= 202002L +/// A ProtocolDescriptor details the types used in a JSONTransport for handling +/// transport communication. +template <typename T> +concept ProtocolDescriptor = requires { + typename T::Id; + typename T::Req; + typename T::Resp; + typename T::Evt; +}; +#endif + /// A transport is responsible for maintaining the connection to a client /// application, and reading/writing structured messages to it. /// -/// Transports have limited thread safety requirements: +/// JSONTransport have limited thread safety requirements: /// - Messages will not be sent concurrently. /// - Messages MAY be sent while Run() is reading, or its callback is active. -template <typename Req, typename Resp, typename Evt> class Transport { +/// +#if __cplusplus >= 202002L +template <ProtocolDescriptor Proto> +#else +template <typename Proto> +#endif +class JSONTransport { public: + using Req = typename Proto::Req; + using Resp = typename Proto::Resp; + using Evt = typename Proto::Evt; using Message = std::variant<Req, Resp, Evt>; - virtual ~Transport() = default; + virtual ~JSONTransport() = default; /// Sends an event, a message that does not require a response. virtual llvm::Error Send(const Evt &) = 0; @@ -69,7 +138,8 @@ public: /// Sends a response to a specific request. virtual llvm::Error Send(const Resp &) = 0; - /// Implemented to handle incoming messages. (See Run() below). + /// Implemented to handle incoming messages. (See `RegisterMessageHandler()` + /// below). class MessageHandler { public: virtual ~MessageHandler() = default; @@ -90,8 +160,6 @@ public: virtual void OnClosed() = 0; }; - using MessageHandlerSP = std::shared_ptr<MessageHandler>; - /// RegisterMessageHandler registers the Transport with the given MainLoop and /// handles any incoming messages using the given MessageHandler. /// @@ -108,18 +176,23 @@ protected: }; /// An IOTransport sends and receives messages using an IOObject. -template <typename Req, typename Resp, typename Evt> -class IOTransport : public Transport<Req, Resp, Evt> { +template <typename Proto> class IOTransport : public JSONTransport<Proto> { public: - using Transport<Req, Resp, Evt>::Transport; - using MessageHandler = typename Transport<Req, Resp, Evt>::MessageHandler; + using Message = typename JSONTransport<Proto>::Message; + using MessageHandler = typename JSONTransport<Proto>::MessageHandler; IOTransport(lldb::IOObjectSP in, lldb::IOObjectSP out) : m_in(in), m_out(out) {} - llvm::Error Send(const Evt &evt) override { return Write(evt); } - llvm::Error Send(const Req &req) override { return Write(req); } - llvm::Error Send(const Resp &resp) override { return Write(resp); } + llvm::Error Send(const typename Proto::Evt &evt) override { + return Write(evt); + } + llvm::Error Send(const typename Proto::Req &req) override { + return Write(req); + } + llvm::Error Send(const typename Proto::Resp &resp) override { + return Write(resp); + } llvm::Expected<MainLoop::ReadHandleUP> RegisterMessageHandler(MainLoop &loop, MessageHandler &handler) override { @@ -139,7 +212,7 @@ public: /// detail. static constexpr size_t kReadBufferSize = 1024; - // FIXME: Write should be protected. +protected: llvm::Error Write(const llvm::json::Value &message) { this->Logv("<-- {0}", message); std::string output = Encode(message); @@ -147,7 +220,6 @@ public: return m_out->Write(output.data(), bytes_written).takeError(); } -protected: virtual llvm::Expected<std::vector<std::string>> Parse() = 0; virtual std::string Encode(const llvm::json::Value &message) = 0; @@ -174,9 +246,8 @@ private: } for (const std::string &raw_message : *raw_messages) { - llvm::Expected<typename Transport<Req, Resp, Evt>::Message> message = - llvm::json::parse<typename Transport<Req, Resp, Evt>::Message>( - raw_message); + llvm::Expected<Message> message = + llvm::json::parse<Message>(raw_message); if (!message) { handler.OnError(message.takeError()); return; @@ -201,10 +272,14 @@ private: }; /// A transport class for JSON with a HTTP header. -template <typename Req, typename Resp, typename Evt> -class HTTPDelimitedJSONTransport : public IOTransport<Req, Resp, Evt> { +#if __cplusplus >= 202002L +template <ProtocolDescriptor Proto> +#else +template <typename Proto> +#endif +class HTTPDelimitedJSONTransport : public IOTransport<Proto> { public: - using IOTransport<Req, Resp, Evt>::IOTransport; + using IOTransport<Proto>::IOTransport; protected: /// Encodes messages based on @@ -230,8 +305,8 @@ protected: for (const llvm::StringRef &header : llvm::split(headers, kHeaderSeparator)) { auto [key, value] = header.split(kHeaderFieldSeparator); - // 'Content-Length' is the only meaningful key at the moment. Others are - // ignored. + // 'Content-Length' is the only meaningful key at the moment. Others + // are ignored. if (!key.equals_insensitive(kHeaderContentLength)) continue; @@ -268,10 +343,14 @@ protected: }; /// A transport class for JSON RPC. -template <typename Req, typename Resp, typename Evt> -class JSONRPCTransport : public IOTransport<Req, Resp, Evt> { +#if __cplusplus >= 202002L +template <ProtocolDescriptor Proto> +#else +template <typename Proto> +#endif +class JSONRPCTransport : public IOTransport<Proto> { public: - using IOTransport<Req, Resp, Evt>::IOTransport; + using IOTransport<Proto>::IOTransport; protected: std::string Encode(const llvm::json::Value &message) override { @@ -297,6 +376,497 @@ protected: static constexpr llvm::StringLiteral kMessageSeparator = "\n"; }; -} // namespace lldb_private +/// A handler for the response to an outgoing request. +template <typename T> +using Reply = + std::conditional_t<std::is_void_v<T>, + llvm::unique_function<void(llvm::Error)>, + llvm::unique_function<void(llvm::Expected<T>)>>; + +namespace detail { +template <typename R, typename P> struct request_t final { + using type = llvm::unique_function<void(const P &, Reply<R>)>; +}; +template <typename R> struct request_t<R, void> final { + using type = llvm::unique_function<void(Reply<R>)>; +}; +template <typename P> struct event_t final { + using type = llvm::unique_function<void(const P &)>; +}; +template <> struct event_t<void> final { + using type = llvm::unique_function<void()>; +}; +} // namespace detail + +template <typename R, typename P> +using OutgoingRequest = typename detail::request_t<R, P>::type; + +/// A function to send an outgoing event. +template <typename P> using OutgoingEvent = typename detail::event_t<P>::type; + +#if __cplusplus >= 202002L +/// This represents a protocol description that includes additional helpers +/// for constructing requests, responses and events to work with `Binder`. +template <typename T> +concept BindingBuilder = + ProtocolDescriptor<T> && + requires(T::Id id, T::Req req, T::Resp resp, T::Evt evt, + llvm::StringRef method, std::optional<llvm::json::Value> params, + std::optional<llvm::json::Value> result, llvm::Error err) { + /// For initializing the unique sequence identifier; + { T::InitialId() } -> std::same_as<typename T::Id>; + /// Incrementing the sequence identifier. + { id++ } -> std::same_as<typename T::Id>; + + /// Constructing protocol types + /// @{ + /// Construct a new request. + { T::Make(id, method, params) } -> std::same_as<typename T::Req>; + /// Construct a new error response. + { T::Make(req, std::move(err)) } -> std::same_as<typename T::Resp>; + /// Construct a new success response. + { T::Make(req, result) } -> std::same_as<typename T::Resp>; + /// Construct a new event. + { T::Make(method, params) } -> std::same_as<typename T::Evt>; + /// @} + + /// Keys for associated types. + /// @{ + /// Looking up in flight responses. + { T::KeyFor(resp) } -> std::same_as<typename T::Id>; + /// Extract method from request. + { T::KeyFor(req) } -> std::same_as<std::string>; + /// Extract method from event. + { T::KeyFor(evt) } -> std::same_as<std::string>; + /// @} + + /// Extracting information from associated types. + /// @{ + /// Extract parameters from a request. + { T::Extract(req) } -> std::same_as<std::optional<llvm::json::Value>>; + /// Extract result from a response. + { T::Extract(resp) } -> std::same_as<llvm::Expected<llvm::json::Value>>; + /// Extract parameters from an event. + { T::Extract(evt) } -> std::same_as<std::optional<llvm::json::Value>>; + /// @} + }; +#endif + +/// Binder collects a table of functions that handle calls. +/// +/// The wrapper takes care of parsing/serializing responses. +/// +/// This allows a JSONTransport to handle incoming and outgoing requests and +/// events. +/// +/// A bind of an incoming request to a lambda. +/// \code{cpp} +/// Binder binder{transport}; +/// binder.bind<int, vector<int>>("adder", [](const vector<int> ¶ms) { +/// int sum = 0; +/// for (int v : params) +/// sum += v; +/// return sum; +/// }); +/// \endcode +/// +/// A bind of an outgoing request. +/// \code{cpp} +/// OutgoingRequest<int, vector<int>> call_add = +/// binder.bind<int, vector<int>>("add"); +/// call_add({1,2,3}, [](Expected<int> result) { +/// cout << *result << "\n"; +/// }); +/// \endcode +#if __cplusplus >= 202002L +template <BindingBuilder Proto> +#else +template <typename Proto> +#endif +class Binder : public JSONTransport<Proto>::MessageHandler { + using Req = typename Proto::Req; + using Resp = typename Proto::Resp; + using Evt = typename Proto::Evt; + using Id = typename Proto::Id; + using Transport = JSONTransport<Proto>; + using MessageHandler = typename Transport::MessageHandler; + +public: + explicit Binder(Transport &transport) : m_transport(transport), m_seq(0) {} + + Binder(const Binder &) = delete; + Binder &operator=(const Binder &) = delete; + + /// Bind a handler on transport disconnect. + template <typename Fn, typename... Args> + void OnDisconnect(Fn &&fn, Args &&...args); + + /// Bind a handler on error when communicating with the transport. + template <typename Fn, typename... Args> + void OnError(Fn &&fn, Args &&...args); + + /// Bind a handler for an incoming request. + /// e.g. `bind("peek", &ThisModule::peek, this);`. + /// Handler should be e.g. `Expected<PeekResult> peek(const PeekParams&);` + /// PeekParams must be JSON parsable and PeekResult must be serializable. + template <typename Result, typename Params, typename Fn, typename... Args> + void Bind(llvm::StringLiteral method, Fn &&fn, Args &&...args); + + /// Bind a handler for an incoming event. + /// e.g. `bind("peek", &ThisModule::peek, this);` + /// Handler should be e.g. `void peek(const PeekParams&);` + /// PeekParams must be JSON parsable. + template <typename Params, typename Fn, typename... Args> + void Bind(llvm::StringLiteral method, Fn &&fn, Args &&...args); + + /// Bind a function object to be used for outgoing requests. + /// e.g. `OutgoingRequest<Params, Result> Edit = bind("edit");` + /// Params must be JSON-serializable, Result must be parsable. + template <typename Result, typename Params> + OutgoingRequest<Result, Params> Bind(llvm::StringLiteral method); + + /// Bind a function object to be used for outgoing events. + /// e.g. `OutgoingEvent<LogParams> Log = bind("log");` + /// LogParams must be JSON-serializable. + template <typename Params> + OutgoingEvent<Params> Bind(llvm::StringLiteral method); + + void Received(const Evt &evt) override { + std::scoped_lock<std::recursive_mutex> guard(m_mutex); + auto it = m_event_handlers.find(Proto::KeyFor(evt)); + if (it == m_event_handlers.end()) { + OnError(llvm::createStringError( + llvm::formatv("no handled for event {0}", toJSON(evt)))); + return; + } + it->second(evt); + } + + void Received(const Req &req) override { + ReplyOnce reply(req, &m_transport, this); + + std::scoped_lock<std::recursive_mutex> guard(m_mutex); + auto it = m_request_handlers.find(Proto::KeyFor(req)); + if (it == m_request_handlers.end()) { + reply(Proto::Make(req, llvm::createStringError("method not found"))); + return; + } + + it->second(req, std::move(reply)); + } + + void Received(const Resp &resp) override { + std::scoped_lock<std::recursive_mutex> guard(m_mutex); + + Id id = Proto::KeyFor(resp); + auto it = m_pending_responses.find(id); + if (it == m_pending_responses.end()) { + OnError(llvm::createStringError( + llvm::formatv("no pending request for {0}", toJSON(resp)))); + return; + } + + it->second(resp); + m_pending_responses.erase(it); + } + + void OnError(llvm::Error err) override { + std::scoped_lock<std::recursive_mutex> guard(m_mutex); + if (m_error_handler) + m_error_handler(std::move(err)); + } + + void OnClosed() override { + std::scoped_lock<std::recursive_mutex> guard(m_mutex); + if (m_disconnect_handler) + m_disconnect_handler(); + } + +private: + template <typename T> + llvm::Expected<T> static Parse(const llvm::json::Value &raw, + llvm::StringRef method); + + template <typename T> using Callback = llvm::unique_function<T>; + + std::recursive_mutex m_mutex; + Transport &m_transport; + Id m_seq; + std::map<Id, Callback<void(const Resp &)>> m_pending_responses; + llvm::StringMap<Callback<void(const Req &, Callback<void(const Resp &)>)>> + m_request_handlers; + llvm::StringMap<Callback<void(const Evt &)>> m_event_handlers; + Callback<void()> m_disconnect_handler; + Callback<void(llvm::Error)> m_error_handler; + + /// Function object to reply to a call. + /// Each instance must be called exactly once, otherwise: + /// - the bug is logged, and (in debug mode) an assert will fire + /// - if there was no reply, an error reply is sent + /// - if there were multiple replies, only the first is sent + class ReplyOnce { + std::atomic<bool> replied = {false}; + const Req req; + Transport *transport; // Null when moved-from. + MessageHandler *handler; // Null when moved-from. + + public: + ReplyOnce(const Req req, Transport *transport, MessageHandler *handler) + : req(req), transport(transport), handler(handler) { + assert(handler); + } + ReplyOnce(ReplyOnce &&other) + : replied(other.replied.load()), req(other.req), + transport(other.transport), handler(other.handler) { + other.transport = nullptr; + other.handler = nullptr; + } + ReplyOnce &operator=(ReplyOnce &&) = delete; + ReplyOnce(const ReplyOnce &) = delete; + ReplyOnce &operator=(const ReplyOnce &) = delete; + + ~ReplyOnce() { + if (transport && handler && !replied) { + assert(false && "must reply to all calls!"); + (*this)(Proto::Make(req, llvm::createStringError("failed to reply"))); + } + } + + void operator()(const Resp &resp) { + assert(transport && handler && "moved-from!"); + if (replied.exchange(true)) { + assert(false && "must reply to each call only once!"); + return; + } + + if (llvm::Error error = transport->Send(resp)) + handler->OnError(std::move(error)); + } + }; +}; + +#if __cplusplus >= 202002L +template <BindingBuilder Proto> +#else +template <typename Proto> +#endif +template <typename Fn, typename... Args> +void Binder<Proto>::OnDisconnect(Fn &&fn, Args &&...args) { + m_disconnect_handler = [fn, args...]() mutable { + std::invoke(std::forward<Fn>(fn), std::forward<Args>(args)...); + }; +} + +#if __cplusplus >= 202002L +template <BindingBuilder Proto> +#else +template <typename Proto> +#endif +template <typename Fn, typename... Args> +void Binder<Proto>::OnError(Fn &&fn, Args &&...args) { + m_error_handler = [fn, args...](llvm::Error error) mutable { + std::invoke(std::forward<Fn>(fn), std::forward<Args>(args)..., + std::move(error)); + }; +} + +#if __cplusplus >= 202002L +template <BindingBuilder Proto> +#else +template <typename Proto> +#endif +template <typename Result, typename Params, typename Fn, typename... Args> +void Binder<Proto>::Bind(llvm::StringLiteral method, Fn &&fn, Args &&...args) { + assert(m_request_handlers.find(method) == m_request_handlers.end() && + "request already bound"); + if constexpr (std::is_void_v<Result> && std::is_void_v<Params>) { + m_request_handlers[method] = + [fn, args...](const Req &req, + llvm::unique_function<void(const Resp &)> reply) mutable { + llvm::Error result = + std::invoke(std::forward<Fn>(fn), std::forward<Args>(args)...); + reply(Proto::Make(req, std::move(result))); + }; + } else if constexpr (std::is_void_v<Params>) { + m_request_handlers[method] = + [fn, args...](const Req &req, + llvm::unique_function<void(const Resp &)> reply) mutable { + llvm::Expected<Result> result = + std::invoke(std::forward<Fn>(fn), std::forward<Args>(args)...); + if (!result) + return reply(Proto::Make(req, result.takeError())); + reply(Proto::Make(req, toJSON(*result))); + }; + } else if constexpr (std::is_void_v<Result>) { + m_request_handlers[method] = + [method, fn, + args...](const Req &req, + llvm::unique_function<void(const Resp &)> reply) mutable { + llvm::Expected<Params> params = + Parse<Params>(Proto::Extract(req), method); + if (!params) + return reply(Proto::Make(req, params.takeError())); + + llvm::Error result = std::invoke( + std::forward<Fn>(fn), std::forward<Args>(args)..., *params); + reply(Proto::Make(req, std::move(result))); + }; + } else { + m_request_handlers[method] = + [method, fn, + args...](const Req &req, + llvm::unique_function<void(const Resp &)> reply) mutable { + llvm::Expected<Params> params = + Parse<Params>(Proto::Extract(req), method); + if (!params) + return reply(Proto::Make(req, params.takeError())); + + llvm::Expected<Result> result = std::invoke( + std::forward<Fn>(fn), std::forward<Args>(args)..., *params); + if (!result) + return reply(Proto::Make(req, result.takeError())); + + reply(Proto::Make(req, toJSON(*result))); + }; + } +} + +#if __cplusplus >= 202002L +template <BindingBuilder Proto> +#else +template <typename Proto> +#endif +template <typename Params, typename Fn, typename... Args> +void Binder<Proto>::Bind(llvm::StringLiteral method, Fn &&fn, Args &&...args) { + assert(m_event_handlers.find(method) == m_event_handlers.end() && + "event already bound"); + if constexpr (std::is_void_v<Params>) { + m_event_handlers[method] = [fn, args...](const Evt &) mutable { + std::invoke(std::forward<Fn>(fn), std::forward<Args>(args)...); + }; + } else { + m_event_handlers[method] = [this, method, fn, + args...](const Evt &evt) mutable { + llvm::Expected<Params> params = + Parse<Params>(Proto::Extract(evt), method); + if (!params) + return OnError(params.takeError()); + std::invoke(std::forward<Fn>(fn), std::forward<Args>(args)..., *params); + }; + } +} + +#if __cplusplus >= 202002L +template <BindingBuilder Proto> +#else +template <typename Proto> +#endif +template <typename Result, typename Params> +OutgoingRequest<Result, Params> +Binder<Proto>::Bind(llvm::StringLiteral method) { + if constexpr (std::is_void_v<Result> && std::is_void_v<Params>) { + return [this, method](Reply<Result> fn) { + std::scoped_lock<std::recursive_mutex> guard(m_mutex); + Id id = ++m_seq; + Req req = Proto::Make(id, method, std::nullopt); + m_pending_responses[id] = [fn = std::move(fn)](const Resp &resp) mutable { + llvm::Expected<llvm::json::Value> result = Proto::Extract(resp); + if (!result) + return fn(result.takeError()); + fn(llvm::Error::success()); + }; + if (llvm::Error error = m_transport.Send(req)) + OnError(std::move(error)); + }; + } else if constexpr (std::is_void_v<Params>) { + return [this, method](Reply<Result> fn) { + std::scoped_lock<std::recursive_mutex> guard(m_mutex); + Id id = ++m_seq; + Req req = Proto::Make(id, method, std::nullopt); + m_pending_responses[id] = [fn = std::move(fn), + method](const Resp &resp) mutable { + llvm::Expected<llvm::json::Value> result = Proto::Extract(resp); + if (!result) + return fn(result.takeError()); + fn(Parse<Result>(*result, method)); + }; + if (llvm::Error error = m_transport.Send(req)) + OnError(std::move(error)); + }; + } else if constexpr (std::is_void_v<Result>) { + return [this, method](const Params ¶ms, Reply<Result> fn) { + std::scoped_lock<std::recursive_mutex> guard(m_mutex); + Id id = ++m_seq; + Req req = Proto::Make(id, method, llvm::json::Value(params)); + m_pending_responses[id] = [fn = std::move(fn)](const Resp &resp) mutable { + llvm::Expected<llvm::json::Value> result = Proto::Extract(resp); + if (!result) + return fn(result.takeError()); + fn(llvm::Error::success()); + }; + if (llvm::Error error = m_transport.Send(req)) + OnError(std::move(error)); + }; + } else { + return [this, method](const Params ¶ms, Reply<Result> fn) { + std::scoped_lock<std::recursive_mutex> guard(m_mutex); + Id id = ++m_seq; + Req req = Proto::Make(id, method, llvm::json::Value(params)); + m_pending_responses[id] = [fn = std::move(fn), + method](const Resp &resp) mutable { + llvm::Expected<llvm::json::Value> result = Proto::Extract(resp); + if (llvm::Error err = result.takeError()) + return fn(std::move(err)); + fn(Parse<Result>(*result, method)); + }; + if (llvm::Error error = m_transport.Send(req)) + OnError(std::move(error)); + }; + } +} + +#if __cplusplus >= 202002L +template <BindingBuilder Proto> +#else +template <typename Proto> +#endif +template <typename Params> +OutgoingEvent<Params> Binder<Proto>::Bind(llvm::StringLiteral method) { + if constexpr (std::is_void_v<Params>) { + return [this, method]() { + if (llvm::Error error = + m_transport.Send(Proto::Make(method, std::nullopt))) + OnError(std::move(error)); + }; + } else { + return [this, method](const Params ¶ms) { + if (llvm::Error error = + m_transport.Send(Proto::Make(method, toJSON(params)))) + OnError(std::move(error)); + }; + } +} + +#if __cplusplus >= 202002L +template <BindingBuilder Proto> +#else +template <typename Proto> +#endif +template <typename T> +llvm::Expected<T> Binder<Proto>::Parse(const llvm::json::Value &raw, + llvm::StringRef method) { + T result; + llvm::json::Path::Root root; + if (!fromJSON(raw, result, root)) { + // Dump the relevant parts of the broken message. + std::string context; + llvm::raw_string_ostream OS(context); + root.printErrorContext(raw, OS); + return llvm::make_error<InvalidParams>(method.str(), context); + } + return std::move(result); +} + +} // namespace lldb_private::transport #endif diff --git a/lldb/include/lldb/Protocol/MCP/MCPError.h b/lldb/include/lldb/Protocol/MCP/MCPError.h index 55dd40f..609a173 100644 --- a/lldb/include/lldb/Protocol/MCP/MCPError.h +++ b/lldb/include/lldb/Protocol/MCP/MCPError.h @@ -9,7 +9,6 @@ #ifndef LLDB_PROTOCOL_MCP_MCPERROR_H #define LLDB_PROTOCOL_MCP_MCPERROR_H -#include "lldb/Protocol/MCP/Protocol.h" #include "llvm/Support/Error.h" #include <string> @@ -26,14 +25,12 @@ public: const std::string &getMessage() const { return m_message; } - lldb_protocol::mcp::Error toProtocolError() const; - static constexpr int64_t kResourceNotFound = -32002; static constexpr int64_t kInternalError = -32603; private: std::string m_message; - int64_t m_error_code; + int m_error_code; }; class UnsupportedURI : public llvm::ErrorInfo<UnsupportedURI> { diff --git a/lldb/include/lldb/Protocol/MCP/Protocol.h b/lldb/include/lldb/Protocol/MCP/Protocol.h index 6e1ffcb..a0ba865 100644 --- a/lldb/include/lldb/Protocol/MCP/Protocol.h +++ b/lldb/include/lldb/Protocol/MCP/Protocol.h @@ -14,6 +14,7 @@ #ifndef LLDB_PROTOCOL_MCP_PROTOCOL_H #define LLDB_PROTOCOL_MCP_PROTOCOL_H +#include "llvm/ADT/StringRef.h" #include "llvm/Support/JSON.h" #include <optional> #include <string> @@ -322,6 +323,10 @@ struct CallToolResult { llvm::json::Value toJSON(const CallToolResult &); bool fromJSON(const llvm::json::Value &, CallToolResult &, llvm::json::Path); +lldb_protocol::mcp::Request +MakeRequest(int64_t id, llvm::StringRef method, + std::optional<llvm::json::Value> params); + } // namespace lldb_protocol::mcp #endif diff --git a/lldb/include/lldb/Protocol/MCP/Server.h b/lldb/include/lldb/Protocol/MCP/Server.h index 970980d..f185d51 100644 --- a/lldb/include/lldb/Protocol/MCP/Server.h +++ b/lldb/include/lldb/Protocol/MCP/Server.h @@ -9,7 +9,6 @@ #ifndef LLDB_PROTOCOL_MCP_SERVER_H #define LLDB_PROTOCOL_MCP_SERVER_H -#include "lldb/Host/JSONTransport.h" #include "lldb/Host/MainLoop.h" #include "lldb/Protocol/MCP/Protocol.h" #include "lldb/Protocol/MCP/Resource.h" @@ -19,75 +18,66 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/JSON.h" #include "llvm/Support/Signals.h" -#include <functional> #include <memory> #include <string> #include <vector> namespace lldb_protocol::mcp { -class Server : public MCPTransport::MessageHandler { - using ClosedCallback = llvm::unique_function<void()>; +class Server { + + using MCPTransportUP = std::unique_ptr<lldb_protocol::mcp::MCPTransport>; + + using ReadHandleUP = lldb_private::MainLoop::ReadHandleUP; public: - Server(std::string name, std::string version, MCPTransport &client, - LogCallback log_callback = {}, ClosedCallback closed_callback = {}); + Server(std::string name, std::string version, LogCallback log_callback = {}); ~Server() = default; - using NotificationHandler = std::function<void(const Notification &)>; - void AddTool(std::unique_ptr<Tool> tool); void AddResourceProvider(std::unique_ptr<ResourceProvider> resource_provider); - void AddNotificationHandler(llvm::StringRef method, - NotificationHandler handler); - -protected: - ServerCapabilities GetCapabilities(); - - using RequestHandler = - std::function<llvm::Expected<Response>(const Request &)>; - void AddRequestHandlers(); + llvm::Error Accept(lldb_private::MainLoop &, MCPTransportUP); - void AddRequestHandler(llvm::StringRef method, RequestHandler handler); - - llvm::Expected<std::optional<Message>> HandleData(llvm::StringRef data); - - llvm::Expected<Response> Handle(const Request &request); - void Handle(const Notification ¬ification); +protected: + MCPBinderUP Bind(MCPTransport &); - llvm::Expected<Response> InitializeHandler(const Request &); + ServerCapabilities GetCapabilities(); - llvm::Expected<Response> ToolsListHandler(const Request &); - llvm::Expected<Response> ToolsCallHandler(const Request &); + llvm::Expected<InitializeResult> InitializeHandler(const InitializeParams &); - llvm::Expected<Response> ResourcesListHandler(const Request &); - llvm::Expected<Response> ResourcesReadHandler(const Request &); + llvm::Expected<ListToolsResult> ToolsListHandler(); + llvm::Expected<CallToolResult> ToolsCallHandler(const CallToolParams &); - void Received(const Request &) override; - void Received(const Response &) override; - void Received(const Notification &) override; - void OnError(llvm::Error) override; - void OnClosed() override; + llvm::Expected<ListResourcesResult> ResourcesListHandler(); + llvm::Expected<ReadResourceResult> + ResourcesReadHandler(const ReadResourceParams &); -protected: - void Log(llvm::StringRef); + template <typename... Ts> inline auto Logv(const char *Fmt, Ts &&...Vals) { + Log(llvm::formatv(Fmt, std::forward<Ts>(Vals)...).str()); + } + void Log(llvm::StringRef message) { + if (m_log_callback) + m_log_callback(message); + } private: const std::string m_name; const std::string m_version; - MCPTransport &m_client; LogCallback m_log_callback; - ClosedCallback m_closed_callback; + struct Client { + ReadHandleUP handle; + MCPTransportUP transport; + MCPBinderUP binder; + }; + std::map<MCPTransport *, Client> m_instances; llvm::StringMap<std::unique_ptr<Tool>> m_tools; std::vector<std::unique_ptr<ResourceProvider>> m_resource_providers; - - llvm::StringMap<RequestHandler> m_request_handlers; - llvm::StringMap<NotificationHandler> m_notification_handlers; }; class ServerInfoHandle; @@ -121,7 +111,7 @@ public: ServerInfoHandle &operator=(const ServerInfoHandle &) = delete; /// @} - /// Remove the file. + /// Remove the file on disk, if one is tracked. void Remove(); private: diff --git a/lldb/include/lldb/Protocol/MCP/Transport.h b/lldb/include/lldb/Protocol/MCP/Transport.h index 47c2ccf..b7a1eb7 100644 --- a/lldb/include/lldb/Protocol/MCP/Transport.h +++ b/lldb/include/lldb/Protocol/MCP/Transport.h @@ -10,22 +10,78 @@ #define LLDB_PROTOCOL_MCP_TRANSPORT_H #include "lldb/Host/JSONTransport.h" +#include "lldb/Protocol/MCP/MCPError.h" #include "lldb/Protocol/MCP/Protocol.h" #include "lldb/lldb-forward.h" #include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include <sys/types.h> namespace lldb_protocol::mcp { +struct ProtocolDescriptor { + using Id = int64_t; + using Req = Request; + using Resp = Response; + using Evt = Notification; + + static inline Id InitialId() { return 0; } + static inline Request Make(Id id, llvm::StringRef method, + std::optional<llvm::json::Value> params) { + return Request{id, method.str(), params}; + } + static inline Notification Make(llvm::StringRef method, + std::optional<llvm::json::Value> params) { + return Notification{method.str(), params}; + } + static inline Response Make(Req req, llvm::Error error) { + lldb_protocol::mcp::Error protocol_error; + llvm::handleAllErrors( + std::move(error), [&](const llvm::ErrorInfoBase &err) { + std::error_code cerr = err.convertToErrorCode(); + protocol_error.code = + cerr == llvm::inconvertibleErrorCode() + ? lldb_protocol::mcp::eErrorCodeInternalError + : cerr.value(); + protocol_error.message = err.message(); + }); + + return Response{req.id, std::move(protocol_error)}; + } + static inline Response Make(Req req, + std::optional<llvm::json::Value> result) { + return Response{req.id, std::move(result)}; + } + static inline Id KeyFor(Response r) { return std::get<Id>(r.id); } + static inline std::string KeyFor(Request r) { return r.method; } + static inline std::string KeyFor(Notification n) { return n.method; } + static inline std::optional<llvm::json::Value> Extract(Request r) { + return r.params; + } + static inline llvm::Expected<llvm::json::Value> Extract(Response r) { + if (const lldb_protocol::mcp::Error *error = + std::get_if<lldb_protocol::mcp::Error>(&r.result)) + return llvm::make_error<lldb_protocol::mcp::MCPError>(error->message, + error->code); + return std::get<llvm::json::Value>(r.result); + } + static inline std::optional<llvm::json::Value> Extract(Notification n) { + return n.params; + } +}; + /// Generic transport that uses the MCP protocol. -using MCPTransport = lldb_private::Transport<Request, Response, Notification>; +using MCPTransport = lldb_private::transport::JSONTransport<ProtocolDescriptor>; +using MCPBinder = lldb_private::transport::Binder<ProtocolDescriptor>; +using MCPBinderUP = std::unique_ptr<MCPBinder>; /// Generic logging callback, to allow the MCP server / client / transport layer /// to be independent of the lldb log implementation. using LogCallback = llvm::unique_function<void(llvm::StringRef message)>; class Transport final - : public lldb_private::JSONRPCTransport<Request, Response, Notification> { + : public lldb_private::transport::JSONRPCTransport<ProtocolDescriptor> { public: Transport(lldb::IOObjectSP in, lldb::IOObjectSP out, LogCallback log_callback = {}); diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h index 6f20a02..9958b6e 100644 --- a/lldb/include/lldb/Target/Language.h +++ b/lldb/include/lldb/Target/Language.h @@ -404,8 +404,15 @@ public: GetLanguageTypeFromString(const char *string) = delete; static lldb::LanguageType GetLanguageTypeFromString(llvm::StringRef string); + /// Returns the internal LLDB name for the specified language. When presenting + /// the language name to users, use \ref GetDisplayNameForLanguageType + /// instead. static const char *GetNameForLanguageType(lldb::LanguageType language); + /// Returns a user-friendly name for the specified language. + static llvm::StringRef + GetDisplayNameForLanguageType(lldb::LanguageType language); + static void PrintAllLanguages(Stream &s, const char *prefix, const char *suffix); diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index 2966ac0..e30d549 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -280,9 +280,6 @@ def parseOptionsAndInitTestdirs(): configuration.llvm_tools_dir = args.llvm_tools_dir configuration.filecheck = shutil.which("FileCheck", path=args.llvm_tools_dir) configuration.yaml2obj = shutil.which("yaml2obj", path=args.llvm_tools_dir) - configuration.yaml2macho_core = shutil.which( - "yaml2macho-core", path=args.llvm_tools_dir - ) if not configuration.get_filecheck_path(): logging.warning("No valid FileCheck executable; some tests may fail...") @@ -563,6 +560,8 @@ def setupSysPath(): if is_exe(lldbDAPExec): os.environ["LLDBDAP_EXEC"] = lldbDAPExec + configuration.yaml2macho_core = shutil.which("yaml2macho-core", path=lldbDir) + lldbPythonDir = None # The directory that contains 'lldb/__init__.py' # If our lldb supports the -P option, use it to find the python path: diff --git a/lldb/source/Host/common/JSONTransport.cpp b/lldb/source/Host/common/JSONTransport.cpp index c4b42ea..22de7fa 100644 --- a/lldb/source/Host/common/JSONTransport.cpp +++ b/lldb/source/Host/common/JSONTransport.cpp @@ -14,8 +14,7 @@ #include <string> using namespace llvm; -using namespace lldb; -using namespace lldb_private; +using namespace lldb_private::transport; char TransportUnhandledContentsError::ID; @@ -23,10 +22,31 @@ TransportUnhandledContentsError::TransportUnhandledContentsError( std::string unhandled_contents) : m_unhandled_contents(unhandled_contents) {} -void TransportUnhandledContentsError::log(llvm::raw_ostream &OS) const { +void TransportUnhandledContentsError::log(raw_ostream &OS) const { OS << "transport EOF with unhandled contents: '" << m_unhandled_contents << "'"; } std::error_code TransportUnhandledContentsError::convertToErrorCode() const { return std::make_error_code(std::errc::bad_message); } + +char InvalidParams::ID; + +void InvalidParams::log(raw_ostream &OS) const { + OS << "invalid parameters for method '" << m_method << "': '" << m_context + << "'"; +} +std::error_code InvalidParams::convertToErrorCode() const { + return std::make_error_code(std::errc::invalid_argument); +} + +char MethodNotFound::ID; + +void MethodNotFound::log(raw_ostream &OS) const { + OS << "method not found: '" << m_method << "'"; +} + +std::error_code MethodNotFound::convertToErrorCode() const { + // JSON-RPC Method not found + return std::error_code(MethodNotFound::kErrorCode, std::generic_category()); +} diff --git a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp index d7293fc..33bdd5e 100644 --- a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp +++ b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp @@ -52,11 +52,6 @@ llvm::StringRef ProtocolServerMCP::GetPluginDescriptionStatic() { } void ProtocolServerMCP::Extend(lldb_protocol::mcp::Server &server) const { - server.AddNotificationHandler("notifications/initialized", - [](const lldb_protocol::mcp::Notification &) { - LLDB_LOG(GetLog(LLDBLog::Host), - "MCP initialization complete"); - }); server.AddTool( std::make_unique<CommandTool>("command", "Run an lldb command.")); server.AddTool(std::make_unique<DebuggerListTool>( @@ -74,26 +69,9 @@ void ProtocolServerMCP::AcceptCallback(std::unique_ptr<Socket> socket) { io_sp, io_sp, [client_name](llvm::StringRef message) { LLDB_LOG(GetLog(LLDBLog::Host), "{0}: {1}", client_name, message); }); - MCPTransport *transport_ptr = transport_up.get(); - auto instance_up = std::make_unique<lldb_protocol::mcp::Server>( - std::string(kName), std::string(kVersion), *transport_up, - /*log_callback=*/ - [client_name](llvm::StringRef message) { - LLDB_LOG(GetLog(LLDBLog::Host), "{0} Server: {1}", client_name, - message); - }, - /*closed_callback=*/ - [this, transport_ptr]() { m_instances.erase(transport_ptr); }); - Extend(*instance_up); - llvm::Expected<MainLoop::ReadHandleUP> handle = - transport_up->RegisterMessageHandler(m_loop, *instance_up); - if (!handle) { - LLDB_LOG_ERROR(log, handle.takeError(), "Failed to run MCP server: {0}"); - return; - } - m_instances[transport_ptr] = - std::make_tuple<ServerUP, ReadHandleUP, TransportUP>( - std::move(instance_up), std::move(*handle), std::move(transport_up)); + + if (auto error = m_server->Accept(m_loop, std::move(transport_up))) + LLDB_LOG_ERROR(log, std::move(error), "{0}:"); } llvm::Error ProtocolServerMCP::Start(ProtocolServer::Connection connection) { @@ -124,14 +102,21 @@ llvm::Error ProtocolServerMCP::Start(ProtocolServer::Connection connection) { llvm::join(m_listener->GetListeningConnectionURI(), ", "); ServerInfo info{listening_uris[0]}; - llvm::Expected<ServerInfoHandle> handle = ServerInfo::Write(info); - if (!handle) - return handle.takeError(); + llvm::Expected<ServerInfoHandle> server_info_handle = ServerInfo::Write(info); + if (!server_info_handle) + return server_info_handle.takeError(); + + m_client_count = 0; + m_server = std::make_unique<lldb_protocol::mcp::Server>( + std::string(kName), std::string(kVersion), [](StringRef message) { + LLDB_LOG(GetLog(LLDBLog::Host), "MCP Server: {0}", message); + }); + Extend(*m_server); m_running = true; - m_server_info_handle = std::move(*handle); - m_listen_handlers = std::move(*handles); - m_loop_thread = std::thread([=] { + m_server_info_handle = std::move(*server_info_handle); + m_accept_handles = std::move(*handles); + m_loop_thread = std::thread([this] { llvm::set_thread_name("protocol-server.mcp"); m_loop.Run(); }); @@ -155,9 +140,10 @@ llvm::Error ProtocolServerMCP::Stop() { if (m_loop_thread.joinable()) m_loop_thread.join(); + m_accept_handles.clear(); + + m_server.reset(nullptr); m_server_info_handle.Remove(); - m_listen_handlers.clear(); - m_instances.clear(); return llvm::Error::success(); } diff --git a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.h b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.h index b325a36..e0f2a6c 100644 --- a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.h +++ b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.h @@ -23,16 +23,17 @@ namespace lldb_private::mcp { class ProtocolServerMCP : public ProtocolServer { - using ReadHandleUP = MainLoopBase::ReadHandleUP; - using TransportUP = std::unique_ptr<lldb_protocol::mcp::MCPTransport>; + using ServerUP = std::unique_ptr<lldb_protocol::mcp::Server>; + using ReadHandleUP = MainLoop::ReadHandleUP; + public: ProtocolServerMCP(); - virtual ~ProtocolServerMCP() override; + ~ProtocolServerMCP() override; - virtual llvm::Error Start(ProtocolServer::Connection connection) override; - virtual llvm::Error Stop() override; + llvm::Error Start(ProtocolServer::Connection connection) override; + llvm::Error Stop() override; static void Initialize(); static void Terminate(); @@ -56,19 +57,18 @@ private: bool m_running = false; - lldb_protocol::mcp::ServerInfoHandle m_server_info_handle; lldb_private::MainLoop m_loop; std::thread m_loop_thread; std::mutex m_mutex; size_t m_client_count = 0; std::unique_ptr<Socket> m_listener; + std::vector<ReadHandleUP> m_accept_handles; - std::vector<ReadHandleUP> m_listen_handlers; - std::map<lldb_protocol::mcp::MCPTransport *, - std::tuple<ServerUP, ReadHandleUP, TransportUP>> - m_instances; + ServerUP m_server; + lldb_protocol::mcp::ServerInfoHandle m_server_info_handle; }; + } // namespace lldb_private::mcp #endif diff --git a/lldb/source/Protocol/MCP/MCPError.cpp b/lldb/source/Protocol/MCP/MCPError.cpp index e140d11..cfac055 100644 --- a/lldb/source/Protocol/MCP/MCPError.cpp +++ b/lldb/source/Protocol/MCP/MCPError.cpp @@ -22,14 +22,7 @@ MCPError::MCPError(std::string message, int64_t error_code) void MCPError::log(llvm::raw_ostream &OS) const { OS << m_message; } std::error_code MCPError::convertToErrorCode() const { - return llvm::inconvertibleErrorCode(); -} - -lldb_protocol::mcp::Error MCPError::toProtocolError() const { - lldb_protocol::mcp::Error error; - error.code = m_error_code; - error.message = m_message; - return error; + return std::error_code(m_error_code, std::generic_category()); } UnsupportedURI::UnsupportedURI(std::string uri) : m_uri(uri) {} diff --git a/lldb/source/Protocol/MCP/Server.cpp b/lldb/source/Protocol/MCP/Server.cpp index 19030a3..71323ad 100644 --- a/lldb/source/Protocol/MCP/Server.cpp +++ b/lldb/source/Protocol/MCP/Server.cpp @@ -12,6 +12,7 @@ #include "lldb/Host/HostInfo.h" #include "lldb/Protocol/MCP/MCPError.h" #include "lldb/Protocol/MCP/Protocol.h" +#include "lldb/Protocol/MCP/Transport.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/JSON.h" @@ -108,48 +109,9 @@ Expected<std::vector<ServerInfo>> ServerInfo::Load() { return infos; } -Server::Server(std::string name, std::string version, MCPTransport &client, - LogCallback log_callback, ClosedCallback closed_callback) - : m_name(std::move(name)), m_version(std::move(version)), m_client(client), - m_log_callback(std::move(log_callback)), - m_closed_callback(std::move(closed_callback)) { - AddRequestHandlers(); -} - -void Server::AddRequestHandlers() { - AddRequestHandler("initialize", std::bind(&Server::InitializeHandler, this, - std::placeholders::_1)); - AddRequestHandler("tools/list", std::bind(&Server::ToolsListHandler, this, - std::placeholders::_1)); - AddRequestHandler("tools/call", std::bind(&Server::ToolsCallHandler, this, - std::placeholders::_1)); - AddRequestHandler("resources/list", std::bind(&Server::ResourcesListHandler, - this, std::placeholders::_1)); - AddRequestHandler("resources/read", std::bind(&Server::ResourcesReadHandler, - this, std::placeholders::_1)); -} - -llvm::Expected<Response> Server::Handle(const Request &request) { - auto it = m_request_handlers.find(request.method); - if (it != m_request_handlers.end()) { - llvm::Expected<Response> response = it->second(request); - if (!response) - return response; - response->id = request.id; - return *response; - } - - return llvm::make_error<MCPError>( - llvm::formatv("no handler for request: {0}", request.method).str()); -} - -void Server::Handle(const Notification ¬ification) { - auto it = m_notification_handlers.find(notification.method); - if (it != m_notification_handlers.end()) { - it->second(notification); - return; - } -} +Server::Server(std::string name, std::string version, LogCallback log_callback) + : m_name(std::move(name)), m_version(std::move(version)), + m_log_callback(std::move(log_callback)) {} void Server::AddTool(std::unique_ptr<Tool> tool) { if (!tool) @@ -164,48 +126,64 @@ void Server::AddResourceProvider( m_resource_providers.push_back(std::move(resource_provider)); } -void Server::AddRequestHandler(llvm::StringRef method, RequestHandler handler) { - m_request_handlers[method] = std::move(handler); -} - -void Server::AddNotificationHandler(llvm::StringRef method, - NotificationHandler handler) { - m_notification_handlers[method] = std::move(handler); -} - -llvm::Expected<Response> Server::InitializeHandler(const Request &request) { - Response response; +MCPBinderUP Server::Bind(MCPTransport &transport) { + MCPBinderUP binder_up = std::make_unique<MCPBinder>(transport); + binder_up->Bind<InitializeResult, InitializeParams>( + "initialize", &Server::InitializeHandler, this); + binder_up->Bind<ListToolsResult, void>("tools/list", + &Server::ToolsListHandler, this); + binder_up->Bind<CallToolResult, CallToolParams>( + "tools/call", &Server::ToolsCallHandler, this); + binder_up->Bind<ListResourcesResult, void>( + "resources/list", &Server::ResourcesListHandler, this); + binder_up->Bind<ReadResourceResult, ReadResourceParams>( + "resources/read", &Server::ResourcesReadHandler, this); + binder_up->Bind<void>("notifications/initialized", + [this]() { Log("MCP initialization complete"); }); + return binder_up; +} + +llvm::Error Server::Accept(MainLoop &loop, MCPTransportUP transport) { + MCPBinderUP binder = Bind(*transport); + MCPTransport *transport_ptr = transport.get(); + binder->OnDisconnect([this, transport_ptr]() { + assert(m_instances.find(transport_ptr) != m_instances.end() && + "Client not found in m_instances"); + m_instances.erase(transport_ptr); + }); + binder->OnError([this](llvm::Error err) { + Logv("Transport error: {0}", llvm::toString(std::move(err))); + }); + + auto handle = transport->RegisterMessageHandler(loop, *binder); + if (!handle) + return handle.takeError(); + + m_instances[transport_ptr] = + Client{std::move(*handle), std::move(transport), std::move(binder)}; + return llvm::Error::success(); +} + +Expected<InitializeResult> +Server::InitializeHandler(const InitializeParams &request) { InitializeResult result; result.protocolVersion = mcp::kProtocolVersion; result.capabilities = GetCapabilities(); result.serverInfo.name = m_name; result.serverInfo.version = m_version; - response.result = std::move(result); - return response; + return result; } -llvm::Expected<Response> Server::ToolsListHandler(const Request &request) { - Response response; - +llvm::Expected<ListToolsResult> Server::ToolsListHandler() { ListToolsResult result; for (const auto &tool : m_tools) result.tools.emplace_back(tool.second->GetDefinition()); - response.result = std::move(result); - - return response; + return result; } -llvm::Expected<Response> Server::ToolsCallHandler(const Request &request) { - Response response; - - if (!request.params) - return llvm::createStringError("no tool parameters"); - CallToolParams params; - json::Path::Root root("params"); - if (!fromJSON(request.params, params, root)) - return root.getError(); - +llvm::Expected<CallToolResult> +Server::ToolsCallHandler(const CallToolParams ¶ms) { llvm::StringRef tool_name = params.name; if (tool_name.empty()) return llvm::createStringError("no tool name"); @@ -222,113 +200,50 @@ llvm::Expected<Response> Server::ToolsCallHandler(const Request &request) { if (!text_result) return text_result.takeError(); - response.result = toJSON(*text_result); - - return response; + return text_result; } -llvm::Expected<Response> Server::ResourcesListHandler(const Request &request) { - Response response; - +llvm::Expected<ListResourcesResult> Server::ResourcesListHandler() { ListResourcesResult result; for (std::unique_ptr<ResourceProvider> &resource_provider_up : m_resource_providers) for (const Resource &resource : resource_provider_up->GetResources()) result.resources.push_back(resource); - response.result = std::move(result); - - return response; + return result; } -llvm::Expected<Response> Server::ResourcesReadHandler(const Request &request) { - Response response; - - if (!request.params) - return llvm::createStringError("no resource parameters"); - - ReadResourceParams params; - json::Path::Root root("params"); - if (!fromJSON(request.params, params, root)) - return root.getError(); - - llvm::StringRef uri_str = params.uri; +Expected<ReadResourceResult> +Server::ResourcesReadHandler(const ReadResourceParams ¶ms) { + StringRef uri_str = params.uri; if (uri_str.empty()) - return llvm::createStringError("no resource uri"); + return createStringError("no resource uri"); for (std::unique_ptr<ResourceProvider> &resource_provider_up : m_resource_providers) { - llvm::Expected<ReadResourceResult> result = + Expected<ReadResourceResult> result = resource_provider_up->ReadResource(uri_str); if (result.errorIsA<UnsupportedURI>()) { - llvm::consumeError(result.takeError()); + consumeError(result.takeError()); continue; } if (!result) return result.takeError(); - Response response; - response.result = std::move(*result); - return response; + return *result; } return make_error<MCPError>( - llvm::formatv("no resource handler for uri: {0}", uri_str).str(), + formatv("no resource handler for uri: {0}", uri_str).str(), MCPError::kResourceNotFound); } ServerCapabilities Server::GetCapabilities() { lldb_protocol::mcp::ServerCapabilities capabilities; capabilities.supportsToolsList = true; + capabilities.supportsResourcesList = true; // FIXME: Support sending notifications when a debugger/target are // added/removed. - capabilities.supportsResourcesList = false; + capabilities.supportsResourcesSubscribe = false; return capabilities; } - -void Server::Log(llvm::StringRef message) { - if (m_log_callback) - m_log_callback(message); -} - -void Server::Received(const Request &request) { - auto SendResponse = [this](const Response &response) { - if (llvm::Error error = m_client.Send(response)) - Log(llvm::toString(std::move(error))); - }; - - llvm::Expected<Response> response = Handle(request); - if (response) - return SendResponse(*response); - - lldb_protocol::mcp::Error protocol_error; - llvm::handleAllErrors( - response.takeError(), - [&](const MCPError &err) { protocol_error = err.toProtocolError(); }, - [&](const llvm::ErrorInfoBase &err) { - protocol_error.code = MCPError::kInternalError; - protocol_error.message = err.message(); - }); - Response error_response; - error_response.id = request.id; - error_response.result = std::move(protocol_error); - SendResponse(error_response); -} - -void Server::Received(const Response &response) { - Log("unexpected MCP message: response"); -} - -void Server::Received(const Notification ¬ification) { - Handle(notification); -} - -void Server::OnError(llvm::Error error) { - Log(llvm::toString(std::move(error))); -} - -void Server::OnClosed() { - Log("EOF"); - if (m_closed_callback) - m_closed_callback(); -} diff --git a/lldb/source/Target/Language.cpp b/lldb/source/Target/Language.cpp index d4a9268..395718e 100644 --- a/lldb/source/Target/Language.cpp +++ b/lldb/source/Target/Language.cpp @@ -271,6 +271,10 @@ const char *Language::GetNameForLanguageType(LanguageType language) { return language_names[eLanguageTypeUnknown].name; } +llvm::StringRef Language::GetDisplayNameForLanguageType(LanguageType language) { + return SourceLanguage(language).GetDescription(); +} + void Language::PrintSupportedLanguagesForExpressions(Stream &s, llvm::StringRef prefix, llvm::StringRef suffix) { @@ -543,9 +547,26 @@ Language::Language() = default; // Destructor Language::~Language() = default; +static std::optional<llvm::dwarf::SourceLanguage> +ToDwarfSourceLanguage(lldb::LanguageType language_type) { + if (language_type < lldb::eLanguageTypeLastStandardLanguage) + return static_cast<llvm::dwarf::SourceLanguage>(language_type); + + switch (language_type) { + case eLanguageTypeMipsAssembler: + return llvm::dwarf::DW_LANG_Mips_Assembler; + default: + return std::nullopt; + } +} + SourceLanguage::SourceLanguage(lldb::LanguageType language_type) { - auto lname = - llvm::dwarf::toDW_LNAME((llvm::dwarf::SourceLanguage)language_type); + std::optional<llvm::dwarf::SourceLanguage> dwarf_lang = + ToDwarfSourceLanguage(language_type); + if (!dwarf_lang) + return; + + auto lname = llvm::dwarf::toDW_LNAME(*dwarf_lang); if (!lname) return; name = lname->first; @@ -560,11 +581,8 @@ lldb::LanguageType SourceLanguage::AsLanguageType() const { } llvm::StringRef SourceLanguage::GetDescription() const { - LanguageType type = AsLanguageType(); - if (type) - return Language::GetNameForLanguageType(type); return llvm::dwarf::LanguageDescription( - (llvm::dwarf::SourceLanguageName)name); + static_cast<llvm::dwarf::SourceLanguageName>(name)); } bool SourceLanguage::IsC() const { return name == llvm::dwarf::DW_LNAME_C; } diff --git a/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py b/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py index c01f6d8..f1c0519 100644 --- a/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py +++ b/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py @@ -22,7 +22,9 @@ from lldbsuite.test.lldbtest import * from lldbsuite.test.lldbdwarf import * from lldbsuite.test import lldbutil, lldbplatformutil - +# On Linux systems with Yama ptrace_scope = 1 there is a race condition when the +# debugee enables tracing. See https://github.com/llvm/llvm-project/issues/161510. +@skipIfLinux class LldbGdbServerTestCase( gdbremote_testcase.GdbRemoteTestCaseBase, DwarfOpcodeParser ): diff --git a/lldb/tools/lldb-dap/DAP.h b/lldb/tools/lldb-dap/DAP.h index 71681fd..a90ddf5 100644 --- a/lldb/tools/lldb-dap/DAP.h +++ b/lldb/tools/lldb-dap/DAP.h @@ -78,11 +78,9 @@ enum DAPBroadcasterBits { enum class ReplMode { Variable = 0, Command, Auto }; -using DAPTransport = - lldb_private::Transport<protocol::Request, protocol::Response, - protocol::Event>; +using DAPTransport = lldb_private::transport::JSONTransport<ProtocolDescriptor>; -struct DAP final : private DAPTransport::MessageHandler { +struct DAP final : public DAPTransport::MessageHandler { /// Path to the lldb-dap binary itself. static llvm::StringRef debug_adapter_path; diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolBase.h b/lldb/tools/lldb-dap/Protocol/ProtocolBase.h index 0a9ef53..92e41b1 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolBase.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolBase.h @@ -30,6 +30,8 @@ namespace lldb_dap::protocol { // MARK: Base Protocol +using Id = int64_t; + /// A client or debug adapter initiated request. struct Request { /// Sequence number of the message (also known as message ID). The `seq` for @@ -39,7 +41,7 @@ struct Request { /// associate requests with their corresponding responses. For protocol /// messages of type `request` the sequence number can be used to cancel the /// request. - int64_t seq; + Id seq; /// The command to execute. std::string command; @@ -76,7 +78,7 @@ enum ResponseMessage : unsigned { /// Response for a request. struct Response { /// Sequence number of the corresponding request. - int64_t request_seq; + Id request_seq; /// The command requested. std::string command; diff --git a/lldb/tools/lldb-dap/Transport.h b/lldb/tools/lldb-dap/Transport.h index 4a9dd76..58c48c1 100644 --- a/lldb/tools/lldb-dap/Transport.h +++ b/lldb/tools/lldb-dap/Transport.h @@ -22,11 +22,18 @@ namespace lldb_dap { +struct ProtocolDescriptor { + using Id = protocol::Id; + using Req = protocol::Request; + using Resp = protocol::Response; + using Evt = protocol::Event; +}; + /// A transport class that performs the Debug Adapter Protocol communication /// with the client. class Transport final - : public lldb_private::HTTPDelimitedJSONTransport< - protocol::Request, protocol::Response, protocol::Event> { + : public lldb_private::transport::HTTPDelimitedJSONTransport< + ProtocolDescriptor> { public: Transport(llvm::StringRef client_name, lldb_dap::Log *log, lldb::IOObjectSP input, lldb::IOObjectSP output); diff --git a/lldb/unittests/DAP/DAPTest.cpp b/lldb/unittests/DAP/DAPTest.cpp index 2090fe6..4fd6cd5 100644 --- a/lldb/unittests/DAP/DAPTest.cpp +++ b/lldb/unittests/DAP/DAPTest.cpp @@ -9,13 +9,10 @@ #include "DAP.h" #include "Protocol/ProtocolBase.h" #include "TestBase.h" -#include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include <optional> -using namespace llvm; -using namespace lldb; using namespace lldb_dap; using namespace lldb_dap_tests; using namespace lldb_dap::protocol; @@ -24,18 +21,7 @@ using namespace testing; class DAPTest : public TransportBase {}; TEST_F(DAPTest, SendProtocolMessages) { - DAP dap{ - /*log=*/nullptr, - /*default_repl_mode=*/ReplMode::Auto, - /*pre_init_commands=*/{}, - /*no_lldbinit=*/false, - /*client_name=*/"test_client", - /*transport=*/*transport, - /*loop=*/loop, - }; - dap.Send(Event{/*event=*/"my-event", /*body=*/std::nullopt}); - loop.AddPendingCallback( - [](lldb_private::MainLoopBase &loop) { loop.RequestTermination(); }); - EXPECT_CALL(client, Received(IsEvent("my-event", std::nullopt))); - ASSERT_THAT_ERROR(dap.Loop(), llvm::Succeeded()); + dap->Send(Event{/*event=*/"my-event", /*body=*/std::nullopt}); + EXPECT_CALL(client, Received(IsEvent("my-event"))); + Run(); } diff --git a/lldb/unittests/DAP/Handler/DisconnectTest.cpp b/lldb/unittests/DAP/Handler/DisconnectTest.cpp index c6ff1f9..88d6e9a 100644 --- a/lldb/unittests/DAP/Handler/DisconnectTest.cpp +++ b/lldb/unittests/DAP/Handler/DisconnectTest.cpp @@ -31,7 +31,7 @@ TEST_F(DisconnectRequestHandlerTest, DisconnectTriggersTerminated) { DisconnectRequestHandler handler(*dap); ASSERT_THAT_ERROR(handler.Run(std::nullopt), Succeeded()); EXPECT_CALL(client, Received(IsEvent("terminated", _))); - RunOnce(); + Run(); } TEST_F(DisconnectRequestHandlerTest, DisconnectTriggersTerminateCommands) { @@ -53,5 +53,5 @@ TEST_F(DisconnectRequestHandlerTest, DisconnectTriggersTerminateCommands) { EXPECT_CALL(client, Received(Output("(lldb) script print(2)\n"))); EXPECT_CALL(client, Received(Output("Running terminateCommands:\n"))); EXPECT_CALL(client, Received(IsEvent("terminated", _))); - RunOnce(); + Run(); } diff --git a/lldb/unittests/DAP/TestBase.cpp b/lldb/unittests/DAP/TestBase.cpp index ba7baf2..3721e09 100644 --- a/lldb/unittests/DAP/TestBase.cpp +++ b/lldb/unittests/DAP/TestBase.cpp @@ -32,23 +32,9 @@ using lldb_private::FileSystem; using lldb_private::MainLoop; using lldb_private::Pipe; -Expected<MainLoop::ReadHandleUP> -TestTransport::RegisterMessageHandler(MainLoop &loop, MessageHandler &handler) { - Expected<lldb::FileUP> dummy_file = FileSystem::Instance().Open( - FileSpec(FileSystem::DEV_NULL), File::eOpenOptionReadWrite); - if (!dummy_file) - return dummy_file.takeError(); - m_dummy_file = std::move(*dummy_file); - lldb_private::Status status; - auto handle = loop.RegisterReadObject( - m_dummy_file, [](lldb_private::MainLoopBase &) {}, status); - if (status.Fail()) - return status.takeError(); - return handle; -} +void TransportBase::SetUp() { + std::tie(to_client, to_server) = TestDAPTransport::createPair(); -void DAPTestBase::SetUp() { - TransportBase::SetUp(); std::error_code EC; log = std::make_unique<Log>("-", EC); dap = std::make_unique<DAP>( @@ -57,16 +43,30 @@ void DAPTestBase::SetUp() { /*pre_init_commands=*/std::vector<std::string>(), /*no_lldbinit=*/false, /*client_name=*/"test_client", - /*transport=*/*transport, /*loop=*/loop); + /*transport=*/*to_client, /*loop=*/loop); + + auto server_handle = to_server->RegisterMessageHandler(loop, *dap.get()); + EXPECT_THAT_EXPECTED(server_handle, Succeeded()); + handles[0] = std::move(*server_handle); + + auto client_handle = to_client->RegisterMessageHandler(loop, client); + EXPECT_THAT_EXPECTED(client_handle, Succeeded()); + handles[1] = std::move(*client_handle); } +void TransportBase::Run() { + loop.AddPendingCallback( + [](lldb_private::MainLoopBase &loop) { loop.RequestTermination(); }); + EXPECT_THAT_ERROR(loop.Run().takeError(), llvm::Succeeded()); +} + +void DAPTestBase::SetUp() { TransportBase::SetUp(); } + void DAPTestBase::TearDown() { - if (core) { + if (core) ASSERT_THAT_ERROR(core->discard(), Succeeded()); - } - if (binary) { + if (binary) ASSERT_THAT_ERROR(binary->discard(), Succeeded()); - } } void DAPTestBase::SetUpTestSuite() { diff --git a/lldb/unittests/DAP/TestBase.h b/lldb/unittests/DAP/TestBase.h index c19eead..c32f3a7 100644 --- a/lldb/unittests/DAP/TestBase.h +++ b/lldb/unittests/DAP/TestBase.h @@ -7,73 +7,48 @@ //===----------------------------------------------------------------------===// #include "DAP.h" +#include "DAPLog.h" #include "Protocol/ProtocolBase.h" #include "TestingSupport/Host/JSONTransportTestUtilities.h" #include "TestingSupport/SubsystemRAII.h" +#include "Transport.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/HostInfo.h" #include "lldb/Host/MainLoop.h" #include "lldb/Host/MainLoopBase.h" -#include "lldb/lldb-forward.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/JSON.h" -#include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include <memory> +#include <optional> + +/// Helpers for gtest printing. +namespace lldb_dap::protocol { + +inline void PrintTo(const Request &req, std::ostream *os) { + *os << llvm::formatv("{0}", toJSON(req)).str(); +} + +inline void PrintTo(const Response &resp, std::ostream *os) { + *os << llvm::formatv("{0}", toJSON(resp)).str(); +} + +inline void PrintTo(const Event &evt, std::ostream *os) { + *os << llvm::formatv("{0}", toJSON(evt)).str(); +} + +inline void PrintTo(const Message &message, std::ostream *os) { + return std::visit([os](auto &&message) { return PrintTo(message, os); }, + message); +} + +} // namespace lldb_dap::protocol namespace lldb_dap_tests { -class TestTransport final - : public lldb_private::Transport<lldb_dap::protocol::Request, - lldb_dap::protocol::Response, - lldb_dap::protocol::Event> { -public: - using Message = lldb_private::Transport<lldb_dap::protocol::Request, - lldb_dap::protocol::Response, - lldb_dap::protocol::Event>::Message; - - TestTransport(lldb_private::MainLoop &loop, MessageHandler &handler) - : m_loop(loop), m_handler(handler) {} - - llvm::Error Send(const lldb_dap::protocol::Event &e) override { - m_loop.AddPendingCallback([this, e](lldb_private::MainLoopBase &) { - this->m_handler.Received(e); - }); - return llvm::Error::success(); - } - - llvm::Error Send(const lldb_dap::protocol::Request &r) override { - m_loop.AddPendingCallback([this, r](lldb_private::MainLoopBase &) { - this->m_handler.Received(r); - }); - return llvm::Error::success(); - } - - llvm::Error Send(const lldb_dap::protocol::Response &r) override { - m_loop.AddPendingCallback([this, r](lldb_private::MainLoopBase &) { - this->m_handler.Received(r); - }); - return llvm::Error::success(); - } - - llvm::Expected<lldb_private::MainLoop::ReadHandleUP> - RegisterMessageHandler(lldb_private::MainLoop &loop, - MessageHandler &handler) override; - - void Log(llvm::StringRef message) override { - log_messages.emplace_back(message); - } - - std::vector<std::string> log_messages; - -private: - lldb_private::MainLoop &m_loop; - MessageHandler &m_handler; - lldb::FileSP m_dummy_file; -}; +using TestDAPTransport = TestTransport<lldb_dap::ProtocolDescriptor>; /// A base class for tests that need transport configured for communicating DAP /// messages. @@ -82,22 +57,36 @@ protected: lldb_private::SubsystemRAII<lldb_private::FileSystem, lldb_private::HostInfo> subsystems; lldb_private::MainLoop loop; - std::unique_ptr<TestTransport> transport; - MockMessageHandler<lldb_dap::protocol::Request, lldb_dap::protocol::Response, - lldb_dap::protocol::Event> - client; - - void SetUp() override { - transport = std::make_unique<TestTransport>(loop, client); - } + lldb_private::MainLoop::ReadHandleUP handles[2]; + + std::unique_ptr<lldb_dap::Log> log; + + std::unique_ptr<TestDAPTransport> to_client; + MockMessageHandler<lldb_dap::ProtocolDescriptor> client; + + std::unique_ptr<TestDAPTransport> to_server; + std::unique_ptr<lldb_dap::DAP> dap; + + void SetUp() override; + + void Run(); }; /// A matcher for a DAP event. -template <typename M1, typename M2> +template <typename EventMatcher, typename BodyMatcher> inline testing::Matcher<const lldb_dap::protocol::Event &> -IsEvent(const M1 &m1, const M2 &m2) { - return testing::AllOf(testing::Field(&lldb_dap::protocol::Event::event, m1), - testing::Field(&lldb_dap::protocol::Event::body, m2)); +IsEvent(const EventMatcher &event_matcher, const BodyMatcher &body_matcher) { + return testing::AllOf( + testing::Field(&lldb_dap::protocol::Event::event, event_matcher), + testing::Field(&lldb_dap::protocol::Event::body, body_matcher)); +} + +template <typename EventMatcher> +inline testing::Matcher<const lldb_dap::protocol::Event &> +IsEvent(const EventMatcher &event_matcher) { + return testing::AllOf( + testing::Field(&lldb_dap::protocol::Event::event, event_matcher), + testing::Field(&lldb_dap::protocol::Event::body, std::nullopt)); } /// Matches an "output" event. @@ -110,8 +99,6 @@ inline auto Output(llvm::StringRef o, llvm::StringRef cat = "console") { /// A base class for tests that interact with a `lldb_dap::DAP` instance. class DAPTestBase : public TransportBase { protected: - std::unique_ptr<lldb_dap::Log> log; - std::unique_ptr<lldb_dap::DAP> dap; std::optional<llvm::sys::fs::TempFile> core; std::optional<llvm::sys::fs::TempFile> binary; @@ -126,12 +113,6 @@ protected: bool GetDebuggerSupportsTarget(llvm::StringRef platform); void CreateDebugger(); void LoadCore(); - - void RunOnce() { - loop.AddPendingCallback( - [](lldb_private::MainLoopBase &loop) { loop.RequestTermination(); }); - ASSERT_THAT_ERROR(dap->Loop(), llvm::Succeeded()); - } }; } // namespace lldb_dap_tests diff --git a/lldb/unittests/Host/JSONTransportTest.cpp b/lldb/unittests/Host/JSONTransportTest.cpp index 3a36bf2..7db6508 100644 --- a/lldb/unittests/Host/JSONTransportTest.cpp +++ b/lldb/unittests/Host/JSONTransportTest.cpp @@ -9,6 +9,7 @@ #include "lldb/Host/JSONTransport.h" #include "TestingSupport/Host/JSONTransportTestUtilities.h" #include "TestingSupport/Host/PipeTestUtilities.h" +#include "TestingSupport/SubsystemRAII.h" #include "lldb/Host/File.h" #include "lldb/Host/MainLoop.h" #include "lldb/Host/MainLoopBase.h" @@ -25,27 +26,45 @@ #include <chrono> #include <cstddef> #include <memory> +#include <optional> #include <string> +#include <system_error> using namespace llvm; using namespace lldb_private; +using namespace lldb_private::transport; using testing::_; using testing::HasSubstr; using testing::InSequence; +using testing::Ref; + +namespace llvm::json { +static bool fromJSON(const Value &V, Value &T, Path P) { + T = V; + return true; +} +} // namespace llvm::json namespace { namespace test_protocol { struct Req { + int id = 0; std::string name; + std::optional<json::Value> params; }; -json::Value toJSON(const Req &T) { return json::Object{{"req", T.name}}; } +json::Value toJSON(const Req &T) { + return json::Object{{"name", T.name}, {"id", T.id}, {"params", T.params}}; +} bool fromJSON(const json::Value &V, Req &T, json::Path P) { json::ObjectMapper O(V, P); - return O && O.map("req", T.name); + return O && O.map("name", T.name) && O.map("id", T.id) && + O.map("params", T.params); +} +bool operator==(const Req &a, const Req &b) { + return a.name == b.name && a.id == b.id && a.params == b.params; } -bool operator==(const Req &a, const Req &b) { return a.name == b.name; } inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Req &V) { OS << toJSON(V); return OS; @@ -58,14 +77,22 @@ void PrintTo(const Req &message, std::ostream *os) { } struct Resp { - std::string name; + int id = 0; + int errorCode = 0; + std::optional<json::Value> result; }; -json::Value toJSON(const Resp &T) { return json::Object{{"resp", T.name}}; } +json::Value toJSON(const Resp &T) { + return json::Object{ + {"id", T.id}, {"errorCode", T.errorCode}, {"result", T.result}}; +} bool fromJSON(const json::Value &V, Resp &T, json::Path P) { json::ObjectMapper O(V, P); - return O && O.map("resp", T.name); + return O && O.map("id", T.id) && O.mapOptional("errorCode", T.errorCode) && + O.map("result", T.result); +} +bool operator==(const Resp &a, const Resp &b) { + return a.id == b.id && a.errorCode == b.errorCode && a.result == b.result; } -bool operator==(const Resp &a, const Resp &b) { return a.name == b.name; } inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Resp &V) { OS << toJSON(V); return OS; @@ -79,11 +106,14 @@ void PrintTo(const Resp &message, std::ostream *os) { struct Evt { std::string name; + std::optional<json::Value> params; }; -json::Value toJSON(const Evt &T) { return json::Object{{"evt", T.name}}; } +json::Value toJSON(const Evt &T) { + return json::Object{{"name", T.name}, {"params", T.params}}; +} bool fromJSON(const json::Value &V, Evt &T, json::Path P) { json::ObjectMapper O(V, P); - return O && O.map("evt", T.name); + return O && O.map("name", T.name) && O.map("params", T.params); } bool operator==(const Evt &a, const Evt &b) { return a.name == b.name; } inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Evt &V) { @@ -107,41 +137,114 @@ bool fromJSON(const json::Value &V, Message &msg, json::Path P) { P.report("expected object"); return false; } - if (O->get("req")) { - Req R; - if (!fromJSON(V, R, P)) + + if (O->find("id") == O->end()) { + Evt E; + if (!fromJSON(V, E, P)) return false; - msg = std::move(R); + msg = std::move(E); return true; } - if (O->get("resp")) { - Resp R; + + if (O->get("name")) { + Req R; if (!fromJSON(V, R, P)) return false; msg = std::move(R); return true; } - if (O->get("evt")) { - Evt E; - if (!fromJSON(V, E, P)) - return false; - msg = std::move(E); - return true; - } - P.report("unknown message type"); - return false; + Resp R; + if (!fromJSON(V, R, P)) + return false; + + msg = std::move(R); + return true; } -} // namespace test_protocol +struct MyFnParams { + int a = 0; + int b = 0; +}; +json::Value toJSON(const MyFnParams &T) { + return json::Object{{"a", T.a}, {"b", T.b}}; +} +bool fromJSON(const json::Value &V, MyFnParams &T, json::Path P) { + json::ObjectMapper O(V, P); + return O && O.map("a", T.a) && O.map("b", T.b); +} + +struct MyFnResult { + int c = 0; +}; +json::Value toJSON(const MyFnResult &T) { return json::Object{{"c", T.c}}; } +bool fromJSON(const json::Value &V, MyFnResult &T, json::Path P) { + json::ObjectMapper O(V, P); + return O && O.map("c", T.c); +} -template <typename T, typename Req, typename Resp, typename Evt> -class JSONTransportTest : public PipePairTest { +struct ProtoDesc { + using Id = int; + using Req = Req; + using Resp = Resp; + using Evt = Evt; + static inline Id InitialId() { return 0; } + static inline Req Make(Id id, llvm::StringRef method, + std::optional<llvm::json::Value> params) { + return Req{id, method.str(), params}; + } + static inline Evt Make(llvm::StringRef method, + std::optional<llvm::json::Value> params) { + return Evt{method.str(), params}; + } + static inline Resp Make(Req req, llvm::Error error) { + Resp resp; + resp.id = req.id; + llvm::handleAllErrors( + std::move(error), [&](const llvm::ErrorInfoBase &err) { + std::error_code cerr = err.convertToErrorCode(); + resp.errorCode = + cerr == llvm::inconvertibleErrorCode() ? 1 : cerr.value(); + resp.result = err.message(); + }); + return resp; + } + static inline Resp Make(Req req, std::optional<llvm::json::Value> result) { + return Resp{req.id, 0, std::move(result)}; + } + static inline Id KeyFor(Resp r) { return r.id; } + static inline std::string KeyFor(Req r) { return r.name; } + static inline std::string KeyFor(Evt e) { return e.name; } + static inline std::optional<llvm::json::Value> Extract(Req r) { + return r.params; + } + static inline llvm::Expected<llvm::json::Value> Extract(Resp r) { + if (r.errorCode != 0) + return llvm::createStringError( + std::error_code(r.errorCode, std::generic_category()), + r.result && r.result->getAsString() ? *r.result->getAsString() + : "no-message"); + return r.result; + } + static inline std::optional<llvm::json::Value> Extract(Evt e) { + return e.params; + } +}; + +using Transport = TestTransport<ProtoDesc>; +using Binder = lldb_private::transport::Binder<ProtoDesc>; +using MessageHandler = MockMessageHandler<ProtoDesc>; + +} // namespace test_protocol + +template <typename T> class JSONTransportTest : public PipePairTest { protected: - MockMessageHandler<Req, Resp, Evt> message_handler; + SubsystemRAII<FileSystem> subsystems; + + test_protocol::MessageHandler message_handler; std::unique_ptr<T> transport; MainLoop loop; @@ -191,8 +294,7 @@ protected: }; class TestHTTPDelimitedJSONTransport final - : public HTTPDelimitedJSONTransport<test_protocol::Req, test_protocol::Resp, - test_protocol::Evt> { + : public HTTPDelimitedJSONTransport<test_protocol::ProtoDesc> { public: using HTTPDelimitedJSONTransport::HTTPDelimitedJSONTransport; @@ -204,9 +306,7 @@ public: }; class HTTPDelimitedJSONTransportTest - : public JSONTransportTest<TestHTTPDelimitedJSONTransport, - test_protocol::Req, test_protocol::Resp, - test_protocol::Evt> { + : public JSONTransportTest<TestHTTPDelimitedJSONTransport> { public: using JSONTransportTest::JSONTransportTest; @@ -222,8 +322,7 @@ public: }; class TestJSONRPCTransport final - : public JSONRPCTransport<test_protocol::Req, test_protocol::Resp, - test_protocol::Evt> { + : public JSONRPCTransport<test_protocol::ProtoDesc> { public: using JSONRPCTransport::JSONRPCTransport; @@ -234,9 +333,7 @@ public: std::vector<std::string> log_messages; }; -class JSONRPCTransportTest - : public JSONTransportTest<TestJSONRPCTransport, test_protocol::Req, - test_protocol::Resp, test_protocol::Evt> { +class JSONRPCTransportTest : public JSONTransportTest<TestJSONRPCTransport> { public: using JSONTransportTest::JSONTransportTest; @@ -248,6 +345,33 @@ public: } }; +class TransportBinderTest : public testing::Test { +protected: + SubsystemRAII<FileSystem> subsystems; + + std::unique_ptr<test_protocol::Transport> to_remote; + std::unique_ptr<test_protocol::Transport> from_remote; + std::unique_ptr<test_protocol::Binder> binder; + test_protocol::MessageHandler remote; + MainLoop loop; + + void SetUp() override { + std::tie(to_remote, from_remote) = test_protocol::Transport::createPair(); + binder = std::make_unique<test_protocol::Binder>(*to_remote); + + auto binder_handle = to_remote->RegisterMessageHandler(loop, remote); + EXPECT_THAT_EXPECTED(binder_handle, Succeeded()); + + auto remote_handle = from_remote->RegisterMessageHandler(loop, *binder); + EXPECT_THAT_EXPECTED(remote_handle, Succeeded()); + } + + void Run() { + loop.AddPendingCallback([](auto &loop) { loop.RequestTermination(); }); + EXPECT_THAT_ERROR(loop.Run().takeError(), Succeeded()); + } +}; + } // namespace // Failing on Windows, see https://github.com/llvm/llvm-project/issues/153446. @@ -269,35 +393,45 @@ TEST_F(HTTPDelimitedJSONTransportTest, MalformedRequests) { } TEST_F(HTTPDelimitedJSONTransportTest, Read) { - Write(Req{"foo"}); - EXPECT_CALL(message_handler, Received(Req{"foo"})); + Write(Req{6, "foo", std::nullopt}); + EXPECT_CALL(message_handler, Received(Req{6, "foo", std::nullopt})); ASSERT_THAT_ERROR(Run(), Succeeded()); } TEST_F(HTTPDelimitedJSONTransportTest, ReadMultipleMessagesInSingleWrite) { InSequence seq; - Write(Message{Req{"one"}}, Message{Evt{"two"}}, Message{Resp{"three"}}); - EXPECT_CALL(message_handler, Received(Req{"one"})); - EXPECT_CALL(message_handler, Received(Evt{"two"})); - EXPECT_CALL(message_handler, Received(Resp{"three"})); + Write( + Message{ + Req{6, "one", std::nullopt}, + }, + Message{ + Evt{"two", std::nullopt}, + }, + Message{ + Resp{2, 0, std::nullopt}, + }); + EXPECT_CALL(message_handler, Received(Req{6, "one", std::nullopt})); + EXPECT_CALL(message_handler, Received(Evt{"two", std::nullopt})); + EXPECT_CALL(message_handler, Received(Resp{2, 0, std::nullopt})); ASSERT_THAT_ERROR(Run(), Succeeded()); } TEST_F(HTTPDelimitedJSONTransportTest, ReadAcrossMultipleChunks) { std::string long_str = std::string( - HTTPDelimitedJSONTransport<Req, Resp, Evt>::kReadBufferSize * 2, 'x'); - Write(Req{long_str}); - EXPECT_CALL(message_handler, Received(Req{long_str})); + HTTPDelimitedJSONTransport<test_protocol::ProtoDesc>::kReadBufferSize * 2, + 'x'); + Write(Req{5, long_str, std::nullopt}); + EXPECT_CALL(message_handler, Received(Req{5, long_str, std::nullopt})); ASSERT_THAT_ERROR(Run(), Succeeded()); } TEST_F(HTTPDelimitedJSONTransportTest, ReadPartialMessage) { - std::string message = Encode(Req{"foo"}); + std::string message = Encode(Req{5, "foo", std::nullopt}); auto split_at = message.size() / 2; std::string part1 = message.substr(0, split_at); std::string part2 = message.substr(split_at); - EXPECT_CALL(message_handler, Received(Req{"foo"})); + EXPECT_CALL(message_handler, Received(Req{5, "foo", std::nullopt})); ASSERT_THAT_EXPECTED(input.Write(part1.data(), part1.size()), Succeeded()); loop.AddPendingCallback( @@ -309,12 +443,12 @@ TEST_F(HTTPDelimitedJSONTransportTest, ReadPartialMessage) { } TEST_F(HTTPDelimitedJSONTransportTest, ReadWithZeroByteWrites) { - std::string message = Encode(Req{"foo"}); + std::string message = Encode(Req{6, "foo", std::nullopt}); auto split_at = message.size() / 2; std::string part1 = message.substr(0, split_at); std::string part2 = message.substr(split_at); - EXPECT_CALL(message_handler, Received(Req{"foo"})); + EXPECT_CALL(message_handler, Received(Req{6, "foo", std::nullopt})); ASSERT_THAT_EXPECTED(input.Write(part1.data(), part1.size()), Succeeded()); @@ -366,20 +500,21 @@ TEST_F(HTTPDelimitedJSONTransportTest, InvalidTransport) { } TEST_F(HTTPDelimitedJSONTransportTest, Write) { - ASSERT_THAT_ERROR(transport->Send(Req{"foo"}), Succeeded()); - ASSERT_THAT_ERROR(transport->Send(Resp{"bar"}), Succeeded()); - ASSERT_THAT_ERROR(transport->Send(Evt{"baz"}), Succeeded()); + ASSERT_THAT_ERROR(transport->Send(Req{7, "foo", std::nullopt}), Succeeded()); + ASSERT_THAT_ERROR(transport->Send(Resp{5, 0, "bar"}), Succeeded()); + ASSERT_THAT_ERROR(transport->Send(Evt{"baz", std::nullopt}), Succeeded()); output.CloseWriteFileDescriptor(); char buf[1024]; Expected<size_t> bytes_read = output.Read(buf, sizeof(buf), std::chrono::milliseconds(1)); ASSERT_THAT_EXPECTED(bytes_read, Succeeded()); - ASSERT_EQ(StringRef(buf, *bytes_read), StringRef("Content-Length: 13\r\n\r\n" - R"({"req":"foo"})" - "Content-Length: 14\r\n\r\n" - R"({"resp":"bar"})" - "Content-Length: 13\r\n\r\n" - R"({"evt":"baz"})")); + ASSERT_EQ(StringRef(buf, *bytes_read), + StringRef("Content-Length: 35\r\n\r\n" + R"({"id":7,"name":"foo","params":null})" + "Content-Length: 37\r\n\r\n" + R"({"errorCode":0,"id":5,"result":"bar"})" + "Content-Length: 28\r\n\r\n" + R"({"name":"baz","params":null})")); } TEST_F(JSONRPCTransportTest, MalformedRequests) { @@ -395,37 +530,38 @@ TEST_F(JSONRPCTransportTest, MalformedRequests) { } TEST_F(JSONRPCTransportTest, Read) { - Write(Message{Req{"foo"}}); - EXPECT_CALL(message_handler, Received(Req{"foo"})); + Write(Message{Req{1, "foo", std::nullopt}}); + EXPECT_CALL(message_handler, Received(Req{1, "foo", std::nullopt})); ASSERT_THAT_ERROR(Run(), Succeeded()); } TEST_F(JSONRPCTransportTest, ReadMultipleMessagesInSingleWrite) { InSequence seq; - Write(Message{Req{"one"}}, Message{Evt{"two"}}, Message{Resp{"three"}}); - EXPECT_CALL(message_handler, Received(Req{"one"})); - EXPECT_CALL(message_handler, Received(Evt{"two"})); - EXPECT_CALL(message_handler, Received(Resp{"three"})); + Write(Message{Req{1, "one", std::nullopt}}, Message{Evt{"two", std::nullopt}}, + Message{Resp{3, 0, "three"}}); + EXPECT_CALL(message_handler, Received(Req{1, "one", std::nullopt})); + EXPECT_CALL(message_handler, Received(Evt{"two", std::nullopt})); + EXPECT_CALL(message_handler, Received(Resp{3, 0, "three"})); ASSERT_THAT_ERROR(Run(), Succeeded()); } TEST_F(JSONRPCTransportTest, ReadAcrossMultipleChunks) { // Use a string longer than the chunk size to ensure we split the message // across the chunk boundary. - std::string long_str = - std::string(IOTransport<Req, Resp, Evt>::kReadBufferSize * 2, 'x'); - Write(Req{long_str}); - EXPECT_CALL(message_handler, Received(Req{long_str})); + std::string long_str = std::string( + IOTransport<test_protocol::ProtoDesc>::kReadBufferSize * 2, 'x'); + Write(Req{42, long_str, std::nullopt}); + EXPECT_CALL(message_handler, Received(Req{42, long_str, std::nullopt})); ASSERT_THAT_ERROR(Run(), Succeeded()); } TEST_F(JSONRPCTransportTest, ReadPartialMessage) { - std::string message = R"({"req": "foo"})" + std::string message = R"({"id":42,"name":"foo","params":null})" "\n"; std::string part1 = message.substr(0, 7); std::string part2 = message.substr(7); - EXPECT_CALL(message_handler, Received(Req{"foo"})); + EXPECT_CALL(message_handler, Received(Req{42, "foo", std::nullopt})); ASSERT_THAT_EXPECTED(input.Write(part1.data(), part1.size()), Succeeded()); loop.AddPendingCallback( @@ -455,20 +591,21 @@ TEST_F(JSONRPCTransportTest, ReaderWithUnhandledData) { } TEST_F(JSONRPCTransportTest, Write) { - ASSERT_THAT_ERROR(transport->Send(Req{"foo"}), Succeeded()); - ASSERT_THAT_ERROR(transport->Send(Resp{"bar"}), Succeeded()); - ASSERT_THAT_ERROR(transport->Send(Evt{"baz"}), Succeeded()); + ASSERT_THAT_ERROR(transport->Send(Req{11, "foo", std::nullopt}), Succeeded()); + ASSERT_THAT_ERROR(transport->Send(Resp{14, 0, "bar"}), Succeeded()); + ASSERT_THAT_ERROR(transport->Send(Evt{"baz", std::nullopt}), Succeeded()); output.CloseWriteFileDescriptor(); char buf[1024]; Expected<size_t> bytes_read = output.Read(buf, sizeof(buf), std::chrono::milliseconds(1)); ASSERT_THAT_EXPECTED(bytes_read, Succeeded()); - ASSERT_EQ(StringRef(buf, *bytes_read), StringRef(R"({"req":"foo"})" - "\n" - R"({"resp":"bar"})" - "\n" - R"({"evt":"baz"})" - "\n")); + ASSERT_EQ(StringRef(buf, *bytes_read), + StringRef(R"({"id":11,"name":"foo","params":null})" + "\n" + R"({"errorCode":0,"id":14,"result":"bar"})" + "\n" + R"({"name":"baz","params":null})" + "\n")); } TEST_F(JSONRPCTransportTest, InvalidTransport) { @@ -477,4 +614,183 @@ TEST_F(JSONRPCTransportTest, InvalidTransport) { FailedWithMessage("IO object is not valid.")); } +// Out-bound binding request handler. +TEST_F(TransportBinderTest, OutBoundRequests) { + OutgoingRequest<MyFnResult, MyFnParams> addFn = + binder->Bind<MyFnResult, MyFnParams>("add"); + bool replied = false; + addFn(MyFnParams{1, 2}, [&](Expected<MyFnResult> result) { + EXPECT_THAT_EXPECTED(result, Succeeded()); + EXPECT_EQ(result->c, 3); + replied = true; + }); + EXPECT_CALL(remote, Received(Req{1, "add", MyFnParams{1, 2}})); + EXPECT_THAT_ERROR(from_remote->Send(Resp{1, 0, toJSON(MyFnResult{3})}), + Succeeded()); + Run(); + EXPECT_TRUE(replied); +} + +TEST_F(TransportBinderTest, OutBoundRequestsVoidParams) { + OutgoingRequest<MyFnResult, void> voidParamFn = + binder->Bind<MyFnResult, void>("voidParam"); + bool replied = false; + voidParamFn([&](Expected<MyFnResult> result) { + EXPECT_THAT_EXPECTED(result, Succeeded()); + EXPECT_EQ(result->c, 3); + replied = true; + }); + EXPECT_CALL(remote, Received(Req{1, "voidParam", std::nullopt})); + EXPECT_THAT_ERROR(from_remote->Send(Resp{1, 0, toJSON(MyFnResult{3})}), + Succeeded()); + Run(); + EXPECT_TRUE(replied); +} + +TEST_F(TransportBinderTest, OutBoundRequestsVoidResult) { + OutgoingRequest<void, MyFnParams> voidResultFn = + binder->Bind<void, MyFnParams>("voidResult"); + bool replied = false; + voidResultFn(MyFnParams{4, 5}, [&](llvm::Error error) { + EXPECT_THAT_ERROR(std::move(error), Succeeded()); + replied = true; + }); + EXPECT_CALL(remote, Received(Req{1, "voidResult", MyFnParams{4, 5}})); + EXPECT_THAT_ERROR(from_remote->Send(Resp{1, 0, std::nullopt}), Succeeded()); + Run(); + EXPECT_TRUE(replied); +} + +TEST_F(TransportBinderTest, OutBoundRequestsVoidParamsAndVoidResult) { + OutgoingRequest<void, void> voidParamAndResultFn = + binder->Bind<void, void>("voidParamAndResult"); + bool replied = false; + voidParamAndResultFn([&](llvm::Error error) { + EXPECT_THAT_ERROR(std::move(error), Succeeded()); + replied = true; + }); + EXPECT_CALL(remote, Received(Req{1, "voidParamAndResult", std::nullopt})); + EXPECT_THAT_ERROR(from_remote->Send(Resp{1, 0, std::nullopt}), Succeeded()); + Run(); + EXPECT_TRUE(replied); +} + +// In-bound binding request handler. +TEST_F(TransportBinderTest, InBoundRequests) { + bool called = false; + binder->Bind<MyFnResult, MyFnParams>( + "add", + [&](const int captured_param, + const MyFnParams ¶ms) -> Expected<MyFnResult> { + called = true; + return MyFnResult{params.a + params.b + captured_param}; + }, + 2); + EXPECT_THAT_ERROR(from_remote->Send(Req{1, "add", MyFnParams{3, 4}}), + Succeeded()); + + EXPECT_CALL(remote, Received(Resp{1, 0, MyFnResult{9}})); + Run(); + EXPECT_TRUE(called); +} + +TEST_F(TransportBinderTest, InBoundRequestsVoidParams) { + bool called = false; + binder->Bind<MyFnResult, void>( + "voidParam", + [&](const int captured_param) -> Expected<MyFnResult> { + called = true; + return MyFnResult{captured_param}; + }, + 2); + EXPECT_THAT_ERROR(from_remote->Send(Req{2, "voidParam", std::nullopt}), + Succeeded()); + EXPECT_CALL(remote, Received(Resp{2, 0, MyFnResult{2}})); + Run(); + EXPECT_TRUE(called); +} + +TEST_F(TransportBinderTest, InBoundRequestsVoidResult) { + bool called = false; + binder->Bind<void, MyFnParams>( + "voidResult", + [&](const int captured_param, const MyFnParams ¶ms) -> llvm::Error { + called = true; + EXPECT_EQ(captured_param, 2); + EXPECT_EQ(params.a, 3); + EXPECT_EQ(params.b, 4); + return llvm::Error::success(); + }, + 2); + EXPECT_THAT_ERROR(from_remote->Send(Req{3, "voidResult", MyFnParams{3, 4}}), + Succeeded()); + EXPECT_CALL(remote, Received(Resp{3, 0, std::nullopt})); + Run(); + EXPECT_TRUE(called); +} +TEST_F(TransportBinderTest, InBoundRequestsVoidParamsAndResult) { + bool called = false; + binder->Bind<void, void>( + "voidParamAndResult", + [&](const int captured_param) -> llvm::Error { + called = true; + EXPECT_EQ(captured_param, 2); + return llvm::Error::success(); + }, + 2); + EXPECT_THAT_ERROR( + from_remote->Send(Req{4, "voidParamAndResult", std::nullopt}), + Succeeded()); + EXPECT_CALL(remote, Received(Resp{4, 0, std::nullopt})); + Run(); + EXPECT_TRUE(called); +} + +// Out-bound binding event handler. +TEST_F(TransportBinderTest, OutBoundEvents) { + OutgoingEvent<MyFnParams> emitEvent = binder->Bind<MyFnParams>("evt"); + emitEvent(MyFnParams{1, 2}); + EXPECT_CALL(remote, Received(Evt{"evt", MyFnParams{1, 2}})); + Run(); +} + +TEST_F(TransportBinderTest, OutBoundEventsVoidParams) { + OutgoingEvent<void> emitEvent = binder->Bind<void>("evt"); + emitEvent(); + EXPECT_CALL(remote, Received(Evt{"evt", std::nullopt})); + Run(); +} + +// In-bound binding event handler. +TEST_F(TransportBinderTest, InBoundEvents) { + bool called = false; + binder->Bind<MyFnParams>( + "evt", + [&](const int captured_arg, const MyFnParams ¶ms) { + EXPECT_EQ(captured_arg, 42); + EXPECT_EQ(params.a, 3); + EXPECT_EQ(params.b, 4); + called = true; + }, + 42); + EXPECT_THAT_ERROR(from_remote->Send(Evt{"evt", MyFnParams{3, 4}}), + Succeeded()); + Run(); + EXPECT_TRUE(called); +} + +TEST_F(TransportBinderTest, InBoundEventsVoidParams) { + bool called = false; + binder->Bind<void>( + "evt", + [&](const int captured_arg) { + EXPECT_EQ(captured_arg, 42); + called = true; + }, + 42); + EXPECT_THAT_ERROR(from_remote->Send(Evt{"evt", std::nullopt}), Succeeded()); + Run(); + EXPECT_TRUE(called); +} + #endif diff --git a/lldb/unittests/Host/posix/HostTest.cpp b/lldb/unittests/Host/posix/HostTest.cpp index dc75b28..7135f26 100644 --- a/lldb/unittests/Host/posix/HostTest.cpp +++ b/lldb/unittests/Host/posix/HostTest.cpp @@ -15,10 +15,6 @@ #include <cerrno> #include <sys/resource.h> -#ifdef __linux__ -#include <linux/version.h> -#endif // __linux__ - using namespace lldb_private; namespace { @@ -120,12 +116,13 @@ TEST_F(HostTest, GetProcessInfoSetsPriority) { ASSERT_TRUE(Info.IsZombie().has_value()); ASSERT_FALSE(Info.IsZombie().value()); - // CoreDumping was added in kernel version 4.15. -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0) - ASSERT_TRUE(Info.IsCoreDumping().has_value()); - ASSERT_FALSE(Info.IsCoreDumping().value()); -#else - ASSERT_FALSE(Info.IsCoreDumping().has_value()); -#endif + const llvm::VersionTuple host_version = HostInfo::GetOSVersion(); + ASSERT_FALSE(host_version.empty()); + if (host_version >= llvm::VersionTuple(4, 15, 0)) { + ASSERT_TRUE(Info.IsCoreDumping().has_value()); + ASSERT_FALSE(Info.IsCoreDumping().value()); + } else { + ASSERT_FALSE(Info.IsCoreDumping().has_value()); + } } #endif diff --git a/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp b/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp index f3ca4cf..9628cbd 100644 --- a/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp +++ b/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp @@ -6,9 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "ProtocolMCPTestUtilities.h" +#include "ProtocolMCPTestUtilities.h" // IWYU pragma: keep #include "TestingSupport/Host/JSONTransportTestUtilities.h" -#include "TestingSupport/Host/PipeTestUtilities.h" #include "TestingSupport/SubsystemRAII.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/HostInfo.h" @@ -28,20 +27,22 @@ #include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include <chrono> -#include <condition_variable> +#include <future> +#include <memory> +#include <optional> +#include <system_error> using namespace llvm; using namespace lldb; using namespace lldb_private; +using namespace lldb_private::transport; using namespace lldb_protocol::mcp; namespace { -class TestServer : public Server { -public: - using Server::Server; -}; +template <typename T> Response make_response(T &&result, Id id = 1) { + return Response{id, std::forward<T>(result)}; +} /// Test tool that returns it argument as text. class TestTool : public Tool { @@ -101,7 +102,9 @@ public: using Tool::Tool; llvm::Expected<CallToolResult> Call(const ToolArguments &args) override { - return llvm::createStringError("error"); + return llvm::createStringError( + std::error_code(eErrorCodeInternalError, std::generic_category()), + "error"); } }; @@ -118,195 +121,207 @@ public: } }; -class ProtocolServerMCPTest : public PipePairTest { +class TestServer : public Server { +public: + using Server::Bind; + using Server::Server; +}; + +using Transport = TestTransport<lldb_protocol::mcp::ProtocolDescriptor>; + +class ProtocolServerMCPTest : public testing::Test { public: SubsystemRAII<FileSystem, HostInfo, Socket> subsystems; MainLoop loop; + lldb_private::MainLoop::ReadHandleUP handles[2]; - std::unique_ptr<lldb_protocol::mcp::Transport> from_client; - std::unique_ptr<lldb_protocol::mcp::Transport> to_client; - MainLoopBase::ReadHandleUP handles[2]; - + std::unique_ptr<Transport> to_server; + MCPBinderUP binder; std::unique_ptr<TestServer> server_up; - MockMessageHandler<Request, Response, Notification> message_handler; - llvm::Error Write(llvm::StringRef message) { - llvm::Expected<json::Value> value = json::parse(message); - if (!value) - return value.takeError(); - return from_client->Write(*value); - } + std::unique_ptr<Transport> to_client; + MockMessageHandler<lldb_protocol::mcp::ProtocolDescriptor> client; - llvm::Error Write(json::Value value) { return from_client->Write(value); } + std::vector<std::string> logged_messages; - /// Run the transport MainLoop and return any messages received. - llvm::Error Run() { - loop.AddCallback([](MainLoopBase &loop) { loop.RequestTermination(); }, - std::chrono::milliseconds(10)); - return loop.Run().takeError(); + /// Runs the MainLoop a single time, executing any pending callbacks. + void Run() { + loop.AddPendingCallback( + [](MainLoopBase &loop) { loop.RequestTermination(); }); + EXPECT_THAT_ERROR(loop.Run().takeError(), Succeeded()); } void SetUp() override { - PipePairTest::SetUp(); - - from_client = std::make_unique<lldb_protocol::mcp::Transport>( - std::make_shared<NativeFile>(input.GetReadFileDescriptor(), - File::eOpenOptionReadOnly, - NativeFile::Unowned), - std::make_shared<NativeFile>(output.GetWriteFileDescriptor(), - File::eOpenOptionWriteOnly, - NativeFile::Unowned), - [](StringRef message) { - // Uncomment for debugging - // llvm::errs() << "from_client: " << message << '\n'; - }); - to_client = std::make_unique<lldb_protocol::mcp::Transport>( - std::make_shared<NativeFile>(output.GetReadFileDescriptor(), - File::eOpenOptionReadOnly, - NativeFile::Unowned), - std::make_shared<NativeFile>(input.GetWriteFileDescriptor(), - File::eOpenOptionWriteOnly, - NativeFile::Unowned), - [](StringRef message) { - // Uncomment for debugging - // llvm::errs() << "to_client: " << message << '\n'; - }); - - server_up = std::make_unique<TestServer>("lldb-mcp", "0.1.0", *to_client, - [](StringRef message) { - // Uncomment for debugging - // llvm::errs() << "server: " << - // message << '\n'; - }); - - auto maybe_from_client_handle = - from_client->RegisterMessageHandler(loop, message_handler); - EXPECT_THAT_EXPECTED(maybe_from_client_handle, Succeeded()); - handles[0] = std::move(*maybe_from_client_handle); - - auto maybe_to_client_handle = - to_client->RegisterMessageHandler(loop, *server_up); - EXPECT_THAT_EXPECTED(maybe_to_client_handle, Succeeded()); - handles[1] = std::move(*maybe_to_client_handle); + std::tie(to_client, to_server) = Transport::createPair(); + + server_up = std::make_unique<TestServer>( + "lldb-mcp", "0.1.0", + [this](StringRef msg) { logged_messages.push_back(msg.str()); }); + binder = server_up->Bind(*to_client); + auto server_handle = to_server->RegisterMessageHandler(loop, *binder); + EXPECT_THAT_EXPECTED(server_handle, Succeeded()); + binder->OnError([](llvm::Error error) { + llvm::errs() << formatv("Server transport error: {0}", error); + }); + handles[0] = std::move(*server_handle); + + auto client_handle = to_client->RegisterMessageHandler(loop, client); + EXPECT_THAT_EXPECTED(client_handle, Succeeded()); + handles[1] = std::move(*client_handle); + } + + template <typename Result, typename Params> + Expected<json::Value> Call(StringRef method, const Params ¶ms) { + std::promise<Response> promised_result; + Request req = + lldb_protocol::mcp::Request{/*id=*/1, method.str(), toJSON(params)}; + EXPECT_THAT_ERROR(to_server->Send(req), Succeeded()); + EXPECT_CALL(client, Received(testing::An<const Response &>())) + .WillOnce( + [&](const Response &resp) { promised_result.set_value(resp); }); + Run(); + Response resp = promised_result.get_future().get(); + return toJSON(resp); + } + + template <typename Result> + Expected<json::Value> + Capture(llvm::unique_function<void(Reply<Result>)> &fn) { + std::promise<llvm::Expected<Result>> promised_result; + fn([&promised_result](llvm::Expected<Result> result) { + promised_result.set_value(std::move(result)); + }); + Run(); + llvm::Expected<Result> result = promised_result.get_future().get(); + if (!result) + return result.takeError(); + return toJSON(*result); + } + + template <typename Result, typename Params> + Expected<json::Value> + Capture(llvm::unique_function<void(const Params &, Reply<Result>)> &fn, + const Params ¶ms) { + std::promise<llvm::Expected<Result>> promised_result; + fn(params, [&promised_result](llvm::Expected<Result> result) { + promised_result.set_value(std::move(result)); + }); + Run(); + llvm::Expected<Result> result = promised_result.get_future().get(); + if (!result) + return result.takeError(); + return toJSON(*result); } }; template <typename T> -Request make_request(StringLiteral method, T &¶ms, Id id = 1) { - return Request{id, method.str(), toJSON(std::forward<T>(params))}; -} - -template <typename T> Response make_response(T &&result, Id id = 1) { - return Response{id, std::forward<T>(result)}; +inline testing::internal::EqMatcher<llvm::json::Value> HasJSON(T x) { + return testing::internal::EqMatcher<llvm::json::Value>(toJSON(x)); } } // namespace TEST_F(ProtocolServerMCPTest, Initialization) { - Request request = make_request( - "initialize", InitializeParams{/*protocolVersion=*/"2024-11-05", - /*capabilities=*/{}, - /*clientInfo=*/{"lldb-unit", "0.1.0"}}); - Response response = make_response( - InitializeResult{/*protocolVersion=*/"2024-11-05", - /*capabilities=*/{/*supportsToolsList=*/true}, - /*serverInfo=*/{"lldb-mcp", "0.1.0"}}); - - ASSERT_THAT_ERROR(Write(request), Succeeded()); - EXPECT_CALL(message_handler, Received(response)); - EXPECT_THAT_ERROR(Run(), Succeeded()); + EXPECT_THAT_EXPECTED( + (Call<InitializeResult, InitializeParams>( + "initialize", + InitializeParams{/*protocolVersion=*/"2024-11-05", + /*capabilities=*/{}, + /*clientInfo=*/{"lldb-unit", "0.1.0"}})), + HasValue(make_response( + InitializeResult{/*protocolVersion=*/"2024-11-05", + /*capabilities=*/ + { + /*supportsToolsList=*/true, + /*supportsResourcesList=*/true, + }, + /*serverInfo=*/{"lldb-mcp", "0.1.0"}}))); } TEST_F(ProtocolServerMCPTest, ToolsList) { server_up->AddTool(std::make_unique<TestTool>("test", "test tool")); - Request request = make_request("tools/list", Void{}, /*id=*/"one"); - ToolDefinition test_tool; test_tool.name = "test"; test_tool.description = "test tool"; test_tool.inputSchema = json::Object{{"type", "object"}}; - Response response = make_response(ListToolsResult{{test_tool}}, /*id=*/"one"); - - ASSERT_THAT_ERROR(Write(request), llvm::Succeeded()); - EXPECT_CALL(message_handler, Received(response)); - EXPECT_THAT_ERROR(Run(), Succeeded()); + EXPECT_THAT_EXPECTED(Call<ListToolsResult>("tools/list", Void{}), + HasValue(make_response(ListToolsResult{{test_tool}}))); } TEST_F(ProtocolServerMCPTest, ResourcesList) { server_up->AddResourceProvider(std::make_unique<TestResourceProvider>()); - Request request = make_request("resources/list", Void{}); - Response response = make_response(ListResourcesResult{ - {{/*uri=*/"lldb://foo/bar", /*name=*/"name", - /*description=*/"description", /*mimeType=*/"application/json"}}}); - - ASSERT_THAT_ERROR(Write(request), llvm::Succeeded()); - EXPECT_CALL(message_handler, Received(response)); - EXPECT_THAT_ERROR(Run(), Succeeded()); + EXPECT_THAT_EXPECTED(Call<ListResourcesResult>("resources/list", Void{}), + HasValue(make_response(ListResourcesResult{{ + { + /*uri=*/"lldb://foo/bar", + /*name=*/"name", + /*description=*/"description", + /*mimeType=*/"application/json", + }, + }}))); } TEST_F(ProtocolServerMCPTest, ToolsCall) { server_up->AddTool(std::make_unique<TestTool>("test", "test tool")); - Request request = make_request( - "tools/call", CallToolParams{/*name=*/"test", /*arguments=*/json::Object{ - {"arguments", "foo"}, - {"debugger_id", 0}, - }}); - Response response = make_response(CallToolResult{{{/*text=*/"foo"}}}); - - ASSERT_THAT_ERROR(Write(request), llvm::Succeeded()); - EXPECT_CALL(message_handler, Received(response)); - EXPECT_THAT_ERROR(Run(), Succeeded()); + EXPECT_THAT_EXPECTED( + (Call<CallToolResult, CallToolParams>("tools/call", + CallToolParams{ + /*name=*/"test", + /*arguments=*/ + json::Object{ + {"arguments", "foo"}, + {"debugger_id", 0}, + }, + })), + HasValue(make_response(CallToolResult{{{/*text=*/"foo"}}}))); } TEST_F(ProtocolServerMCPTest, ToolsCallError) { server_up->AddTool(std::make_unique<ErrorTool>("error", "error tool")); - Request request = make_request( - "tools/call", CallToolParams{/*name=*/"error", /*arguments=*/json::Object{ - {"arguments", "foo"}, - {"debugger_id", 0}, - }}); - Response response = - make_response(lldb_protocol::mcp::Error{eErrorCodeInternalError, - /*message=*/"error"}); - - ASSERT_THAT_ERROR(Write(request), llvm::Succeeded()); - EXPECT_CALL(message_handler, Received(response)); - EXPECT_THAT_ERROR(Run(), Succeeded()); + EXPECT_THAT_EXPECTED((Call<CallToolResult, CallToolParams>( + "tools/call", CallToolParams{ + /*name=*/"error", + /*arguments=*/ + json::Object{ + {"arguments", "foo"}, + {"debugger_id", 0}, + }, + })), + HasValue(make_response(lldb_protocol::mcp::Error{ + eErrorCodeInternalError, "error"}))); } TEST_F(ProtocolServerMCPTest, ToolsCallFail) { server_up->AddTool(std::make_unique<FailTool>("fail", "fail tool")); - Request request = make_request( - "tools/call", CallToolParams{/*name=*/"fail", /*arguments=*/json::Object{ - {"arguments", "foo"}, - {"debugger_id", 0}, - }}); - Response response = - make_response(CallToolResult{{{/*text=*/"failed"}}, /*isError=*/true}); - - ASSERT_THAT_ERROR(Write(request), llvm::Succeeded()); - EXPECT_CALL(message_handler, Received(response)); - EXPECT_THAT_ERROR(Run(), Succeeded()); + EXPECT_THAT_EXPECTED((Call<CallToolResult, CallToolParams>( + "tools/call", CallToolParams{ + /*name=*/"fail", + /*arguments=*/ + json::Object{ + {"arguments", "foo"}, + {"debugger_id", 0}, + }, + })), + HasValue(make_response(CallToolResult{ + {{/*text=*/"failed"}}, + /*isError=*/true, + }))); } TEST_F(ProtocolServerMCPTest, NotificationInitialized) { - bool handler_called = false; - std::condition_variable cv; - - server_up->AddNotificationHandler( - "notifications/initialized", - [&](const Notification ¬ification) { handler_called = true; }); - llvm::StringLiteral request = - R"json({"method":"notifications/initialized","jsonrpc":"2.0"})json"; - - ASSERT_THAT_ERROR(Write(request), llvm::Succeeded()); - EXPECT_THAT_ERROR(Run(), Succeeded()); - EXPECT_TRUE(handler_called); + EXPECT_THAT_ERROR(to_server->Send(lldb_protocol::mcp::Notification{ + "notifications/initialized", + std::nullopt, + }), + Succeeded()); + Run(); + EXPECT_THAT(logged_messages, + testing::Contains("MCP initialization complete")); } diff --git a/lldb/unittests/Target/CMakeLists.txt b/lldb/unittests/Target/CMakeLists.txt index 3169339..0c79675 100644 --- a/lldb/unittests/Target/CMakeLists.txt +++ b/lldb/unittests/Target/CMakeLists.txt @@ -2,6 +2,7 @@ add_lldb_unittest(TargetTests ABITest.cpp DynamicRegisterInfoTest.cpp ExecutionContextTest.cpp + Language.cpp LocateModuleCallbackTest.cpp MemoryRegionInfoTest.cpp MemoryTest.cpp diff --git a/lldb/unittests/Target/Language.cpp b/lldb/unittests/Target/Language.cpp new file mode 100644 index 0000000..a00fda78 --- /dev/null +++ b/lldb/unittests/Target/Language.cpp @@ -0,0 +1,69 @@ +//===-- LanguageTest.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Target/Language.h" +#include "lldb/lldb-enumerations.h" +#include "gtest/gtest.h" + +using namespace lldb_private; +using namespace lldb; + +namespace { +class LanguageTest : public ::testing::Test {}; +} // namespace + +TEST_F(LanguageTest, SourceLanguage_GetDescription) { + for (uint32_t i = 1; i < lldb::eNumLanguageTypes; ++i) { + // 0x29 is unassigned + if (i == 0x29) + continue; + + auto lang_type = static_cast<lldb::LanguageType>(i); + if (lang_type == lldb::eLanguageTypeLastStandardLanguage) + continue; + + SourceLanguage lang(lang_type); + + // eLanguageTypeHIP is not implemented as a DW_LNAME because of a conflict. + if (lang_type == lldb::eLanguageTypeHIP) + EXPECT_FALSE(lang); + else + EXPECT_TRUE(lang); + } + + EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus).GetDescription(), + "ISO C++"); + EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus_17).GetDescription(), + "ISO C++"); + EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus_20).GetDescription(), + "ISO C++"); + + EXPECT_EQ(SourceLanguage(eLanguageTypeObjC).GetDescription(), "Objective C"); + EXPECT_EQ(SourceLanguage(eLanguageTypeMipsAssembler).GetDescription(), + "Assembly"); + + auto next_vendor_language = + static_cast<lldb::LanguageType>(eLanguageTypeMipsAssembler + 1); + if (next_vendor_language < eNumLanguageTypes) + EXPECT_NE(SourceLanguage(next_vendor_language).GetDescription(), "Unknown"); + + EXPECT_EQ(SourceLanguage(eLanguageTypeUnknown).GetDescription(), "Unknown"); +} + +TEST_F(LanguageTest, SourceLanguage_AsLanguageType) { + EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus).AsLanguageType(), + eLanguageTypeC_plus_plus); + EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus_03).AsLanguageType(), + eLanguageTypeC_plus_plus_03); + + // Vendor-specific language code. + EXPECT_EQ(SourceLanguage(eLanguageTypeMipsAssembler).AsLanguageType(), + eLanguageTypeAssembly); + EXPECT_EQ(SourceLanguage(eLanguageTypeUnknown).AsLanguageType(), + eLanguageTypeUnknown); +} diff --git a/lldb/unittests/TestingSupport/Host/JSONTransportTestUtilities.h b/lldb/unittests/TestingSupport/Host/JSONTransportTestUtilities.h index 5a9eb8e..bacf8ca 100644 --- a/lldb/unittests/TestingSupport/Host/JSONTransportTestUtilities.h +++ b/lldb/unittests/TestingSupport/Host/JSONTransportTestUtilities.h @@ -6,19 +6,105 @@ // //===----------------------------------------------------------------------===// -#ifndef LLDB_UNITTESTS_TESTINGSUPPORT_HOST_NATIVEPROCESSTESTUTILS_H -#define LLDB_UNITTESTS_TESTINGSUPPORT_HOST_NATIVEPROCESSTESTUTILS_H +#ifndef LLDB_UNITTESTS_TESTINGSUPPORT_HOST_JSONTRANSPORTTESTUTILITIES_H +#define LLDB_UNITTESTS_TESTINGSUPPORT_HOST_JSONTRANSPORTTESTUTILITIES_H +#include "lldb/Host/FileSystem.h" #include "lldb/Host/JSONTransport.h" +#include "lldb/Host/MainLoop.h" +#include "lldb/Utility/FileSpec.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" +#include "gtest/gtest.h" +#include <cstddef> +#include <memory> +#include <utility> -template <typename Req, typename Resp, typename Evt> +template <typename Proto> +class TestTransport final + : public lldb_private::transport::JSONTransport<Proto> { +public: + using MessageHandler = + typename lldb_private::transport::JSONTransport<Proto>::MessageHandler; + + static std::pair<std::unique_ptr<TestTransport<Proto>>, + std::unique_ptr<TestTransport<Proto>>> + createPair() { + std::unique_ptr<TestTransport<Proto>> transports[2] = { + std::make_unique<TestTransport<Proto>>(), + std::make_unique<TestTransport<Proto>>()}; + return std::make_pair(std::move(transports[0]), std::move(transports[1])); + } + + explicit TestTransport() { + llvm::Expected<lldb::FileUP> dummy_file = + lldb_private::FileSystem::Instance().Open( + lldb_private::FileSpec(lldb_private::FileSystem::DEV_NULL), + lldb_private::File::eOpenOptionReadWrite); + EXPECT_THAT_EXPECTED(dummy_file, llvm::Succeeded()); + m_dummy_file = std::move(*dummy_file); + } + + llvm::Error Send(const typename Proto::Evt &evt) override { + EXPECT_TRUE(m_loop && m_handler) + << "Send called before RegisterMessageHandler"; + m_loop->AddPendingCallback([this, evt](lldb_private::MainLoopBase &) { + m_handler->Received(evt); + }); + return llvm::Error::success(); + } + + llvm::Error Send(const typename Proto::Req &req) override { + EXPECT_TRUE(m_loop && m_handler) + << "Send called before RegisterMessageHandler"; + m_loop->AddPendingCallback([this, req](lldb_private::MainLoopBase &) { + m_handler->Received(req); + }); + return llvm::Error::success(); + } + + llvm::Error Send(const typename Proto::Resp &resp) override { + EXPECT_TRUE(m_loop && m_handler) + << "Send called before RegisterMessageHandler"; + m_loop->AddPendingCallback([this, resp](lldb_private::MainLoopBase &) { + m_handler->Received(resp); + }); + return llvm::Error::success(); + } + + llvm::Expected<lldb_private::MainLoop::ReadHandleUP> + RegisterMessageHandler(lldb_private::MainLoop &loop, + MessageHandler &handler) override { + if (!m_loop) + m_loop = &loop; + if (!m_handler) + m_handler = &handler; + lldb_private::Status status; + auto handle = loop.RegisterReadObject( + m_dummy_file, [](lldb_private::MainLoopBase &) {}, status); + if (status.Fail()) + return status.takeError(); + return handle; + } + +protected: + void Log(llvm::StringRef message) override {}; + +private: + lldb_private::MainLoop *m_loop = nullptr; + MessageHandler *m_handler = nullptr; + // Dummy file for registering with the MainLoop. + lldb::FileSP m_dummy_file = nullptr; +}; + +template <typename Proto> class MockMessageHandler final - : public lldb_private::Transport<Req, Resp, Evt>::MessageHandler { + : public lldb_private::transport::JSONTransport<Proto>::MessageHandler { public: - MOCK_METHOD(void, Received, (const Evt &), (override)); - MOCK_METHOD(void, Received, (const Req &), (override)); - MOCK_METHOD(void, Received, (const Resp &), (override)); + MOCK_METHOD(void, Received, (const typename Proto::Req &), (override)); + MOCK_METHOD(void, Received, (const typename Proto::Resp &), (override)); + MOCK_METHOD(void, Received, (const typename Proto::Evt &), (override)); MOCK_METHOD(void, OnError, (llvm::Error), (override)); MOCK_METHOD(void, OnClosed, (), (override)); }; diff --git a/llvm/docs/DirectX/DXContainer.rst b/llvm/docs/DirectX/DXContainer.rst index 17452d9..4473f4e 100644 --- a/llvm/docs/DirectX/DXContainer.rst +++ b/llvm/docs/DirectX/DXContainer.rst @@ -530,7 +530,7 @@ but adds a 32-bit access flag. .. code-block:: c struct DescriptorRange_V1_0 { - uint32_t RangeType; + dxil::ResourceClass RangeType; uint32_t NumDescriptors; uint32_t BaseShaderRegister; uint32_t RegisterSpace; @@ -538,12 +538,12 @@ but adds a 32-bit access flag. }; struct DescriptorRange_V1_1 { - dxbc::DescriptorRangeType RangeType; + dxil::ResourceClass RangeType; uint32_t NumDescriptors; uint32_t BaseShaderRegister; uint32_t RegisterSpace; - uint32_t OffsetInDescriptorsFromTableStart; uint32_t Flags; + uint32_t OffsetInDescriptorsFromTableStart; }; Static Samplers @@ -556,22 +556,26 @@ This section also has a variable size, since it can contain multiple static samplers definitions. However, the definition is a fixed sized struct, containing 13 32-byte fields of various enum, float, and integer values. +In version 1.2, the static sampler is 17 bytes. It matches the 1.0 static sampler +but adds a 32-bit access flag. In Version 1.1, it matches static sampler +version 1.0. + .. code-block:: c struct StaticSamplerDesc { - FilterMode Filter; - TextureAddressMode AddressU; - TextureAddressMode AddressV; - TextureAddressMode AddressW; + dxbc::FilterMode Filter; + dxbc::TextureAddressMode AddressU; + dxbc::TextureAddressMode AddressV; + dxbc::TextureAddressMode AddressW; float MipLODBias; uint32_t MaxAnisotropy; - ComparisonFunc ComparisonFunc; - StaticBorderColor BorderColor; + dxbc::ComparisonFunc ComparisonFunc; + dxbc::StaticBorderColor BorderColor; float MinLOD; float MaxLOD; uint32_t ShaderRegister; uint32_t RegisterSpace; - ShaderVisibility ShaderVisibility; + dxbc::ShaderVisibility ShaderVisibility; }; SFI0 Part diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index d9d6f0b..62c0806 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1959,6 +1959,10 @@ public: LLVM_ABI SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); + /// Get all the nodes in their topological order without modifying any states. + LLVM_ABI void getTopologicallyOrderedNodes( + SmallVectorImpl<const SDNode *> &SortedNodes) const; + /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their /// topological order. Returns the number of nodes. @@ -2009,7 +2013,9 @@ public: /// function mirrors \c llvm::salvageDebugInfo. LLVM_ABI void salvageDebugInfo(SDNode &N); - LLVM_ABI void dump() const; + /// Dump the textual format of this DAG. Print nodes in sorted orders if \p + /// Sorted is true. + LLVM_ABI void dump(bool Sorted = false) const; /// In most cases this function returns the ABI alignment for a given type, /// except for illegal vector types where the alignment exceeds that of the diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 7bbad17..88691b9 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4654,23 +4654,6 @@ public: return false; } - /// Allows the target to handle physreg-carried dependency - /// in target-specific way. Used from the ScheduleDAGSDNodes to decide whether - /// to add the edge to the dependency graph. - /// Def - input: Selection DAG node defininfg physical register - /// User - input: Selection DAG node using physical register - /// Op - input: Number of User operand - /// PhysReg - inout: set to the physical register if the edge is - /// necessary, unchanged otherwise - /// Cost - inout: physical register copy cost. - /// Returns 'true' is the edge is necessary, 'false' otherwise - virtual bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, - const TargetRegisterInfo *TRI, - const TargetInstrInfo *TII, - MCRegister &PhysReg, int &Cost) const { - return false; - } - /// Target-specific combining of register parts into its original value virtual SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 0a11617..5331cb5 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -4001,15 +4001,17 @@ public: /// Keeps track of value of iteration variable for input/scan loop to be /// used for Scan directive lowering - llvm::Value *IV; + llvm::Value *IV = nullptr; /// Stores the span of canonical loop being lowered to be used for temporary /// buffer allocation or Finalization. - llvm::Value *Span; + llvm::Value *Span = nullptr; ScanInfo() { ScanBuffPtrs = new llvm::SmallDenseMap<llvm::Value *, llvm::Value *>(); } + ScanInfo(ScanInfo &) = delete; + ScanInfo &operator=(const ScanInfo &) = delete; ~ScanInfo() { delete (ScanBuffPtrs); } }; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index fbc92d7..b0269ee 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -162,7 +162,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_2Arg_Scalar_Narrow_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMExtendedType<0>, llvm_i32_ty], - [IntrNoMem]>; + [IntrNoMem, ImmArg<ArgIndex<1>>]>; class AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>], @@ -187,13 +187,13 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_3VectorArg_Scalar_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem]>; + [IntrNoMem, ImmArg<ArgIndex<2>>]>; class AdvSIMD_CvtFxToFP_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], - [IntrNoMem]>; + [IntrNoMem, ImmArg<ArgIndex<1>>]>; class AdvSIMD_CvtFPToFx_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], - [IntrNoMem]>; + [IntrNoMem, ImmArg<ArgIndex<1>>]>; class AdvSIMD_1Arg_Intrinsic : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem]>; @@ -221,7 +221,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". // Arithmetic ops -let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { +let TargetPrefix = "aarch64" in { // Vector Add Across Lanes def int_aarch64_neon_saddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; def int_aarch64_neon_uaddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic; diff --git a/llvm/include/llvm/Support/Jobserver.h b/llvm/include/llvm/Support/Jobserver.h new file mode 100644 index 0000000..6bee3b5 --- /dev/null +++ b/llvm/include/llvm/Support/Jobserver.h @@ -0,0 +1,162 @@ +//===- llvm/Support/Jobserver.h - Jobserver Client --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a client for the GNU Make jobserver protocol. This allows +// LLVM tools to coordinate parallel execution with a parent `make` process. +// +// The jobserver protocol is a mechanism for GNU Make to share its pool of +// available "job slots" with the subprocesses it invokes. This is particularly +// useful for tools that can perform parallel operations themselves (e.g., a +// multi-threaded linker or compiler). By participating in this protocol, a +// tool can ensure the total number of concurrent jobs does not exceed the +// limit specified by the user (e.g., `make -j8`). +// +// How it works: +// +// 1. Establishment: +// A child process discovers the jobserver by inspecting the `MAKEFLAGS` +// environment variable. If a jobserver is active, this variable will +// contain a `--jobserver-auth=<value>` argument. The format of `<value>` +// determines how to communicate with the server. +// +// 2. The Implicit Slot: +// Every command invoked by `make` is granted one "implicit" job slot. This +// means a tool can always perform at least one unit of work without needing +// to communicate with the jobserver. This implicit slot should NEVER be +// released back to the jobserver. +// +// 3. Acquiring and Releasing Slots: +// On POSIX systems, the jobserver is implemented as a pipe. The +// `--jobserver-auth` value specifies either a path to a named pipe +// (`fifo:PATH`) or a pair of file descriptors (`R,W`). The pipe is +// pre-loaded with single-character tokens, one for each available job slot. +// +// - To acquire an additional slot, a client reads a single-character token +// from the pipe. +// - To release a slot, the client must write the *exact same* character +// token back to the pipe. +// +// It is critical that a client releases all acquired slots before it exits, +// even in cases of error, to avoid deadlocking the build. +// +// Example: +// A multi-threaded linker invoked by `make -j8` wants to use multiple +// threads. It first checks for the jobserver. It knows it has one implicit +// slot, so it can use one thread. It then tries to acquire 7 more slots by +// reading 7 tokens from the jobserver pipe. If it only receives 3 tokens, +// it knows it can use a total of 1 (implicit) + 3 (acquired) = 4 threads. +// Before exiting, it must write the 3 tokens it read back to the pipe. +// +// For more context, see: +// - GNU Make manual on job slots: +// https://www.gnu.org/software/make/manual/html_node/Job-Slots.html +// - LLVM RFC discussion on jobserver support: +// https://discourse.llvm.org/t/rfc-adding-gnu-make-jobserver- +// support-to-llvm-for-coordinated-parallelism/87034 +// - Ninja’s jobserver support PR: +// https://github.com/ninja-build/ninja/pull/2506 +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_JOBSERVER_H +#define LLVM_SUPPORT_JOBSERVER_H + +#include "llvm/ADT/StringRef.h" +#include <memory> +#include <string> + +namespace llvm { + +/// A JobSlot represents a single job slot that can be acquired from or released +/// to a jobserver pool. This class is move-only. +class JobSlot { +public: + /// Default constructor creates an invalid instance. + JobSlot() = default; + + // Move operations are allowed. + JobSlot(JobSlot &&Other) noexcept : Value(Other.Value) { + Other.Value = kInvalidValue; + } + JobSlot &operator=(JobSlot &&Other) noexcept { + if (this != &Other) { + this->Value = Other.Value; + Other.Value = kInvalidValue; + } + return *this; + } + + // Copy operations are disallowed. + JobSlot(const JobSlot &) = delete; + JobSlot &operator=(const JobSlot &) = delete; + + /// Returns true if this instance is valid (either implicit or explicit). + bool isValid() const { return Value >= 0; } + + /// Returns true if this instance represents the implicit job slot. + bool isImplicit() const { return Value == kImplicitValue; } + + static JobSlot createExplicit(uint8_t V) { + return JobSlot(static_cast<int16_t>(V)); + } + + static JobSlot createImplicit() { return JobSlot(kImplicitValue); } + + uint8_t getExplicitValue() const; + bool isExplicit() const { return isValid() && !isImplicit(); } + +private: + friend class JobserverClient; + friend class JobserverClientImpl; + + JobSlot(int16_t V) : Value(V) {} + + /// The jobserver pipe carries explicit tokens (bytes 0–255). We reserve two + /// sentinels in Value for special cases: + /// kInvalidValue (-1): no slot held + /// kImplicitValue (INT16_MAX): implicit slot granted at startup (no pipe + /// I/O) + /// + /// We use int16_t so Value can store 0–255 explicit tokens and + /// sentinels without overflow, enforces fixed 16-bit width, and avoids + /// unsigned/signed mix-ups. + static constexpr int16_t kInvalidValue = -1; + static constexpr int16_t kImplicitValue = INT16_MAX; + int16_t Value = kInvalidValue; +}; + +/// The public interface for a jobserver client. +/// This client is a lazy-initialized singleton that is created on first use. +class JobserverClient { +public: + virtual ~JobserverClient(); + + /// Tries to acquire a job slot from the pool. On failure (e.g., if the pool + /// is empty), this returns an invalid JobSlot instance. The first successful + /// call will always return the implicit slot. + virtual JobSlot tryAcquire() = 0; + + /// Releases a job slot back to the pool. + virtual void release(JobSlot Slot) = 0; + + /// Returns the number of job slots available, as determined on first use. + /// This value is cached. Returns 0 if no jobserver is active. + virtual unsigned getNumJobs() const = 0; + + /// Returns the singleton instance of the JobserverClient. + /// The instance is created on the first call to this function. + /// Returns a nullptr if no jobserver is configured or an error occurs. + static JobserverClient *getInstance(); + + /// Resets the singleton instance. For testing purposes only. + static void resetForTesting(); +}; + +} // end namespace llvm + +#endif // LLVM_SUPPORT_JOBSERVER_H diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h index c26681c..c20efc7 100644 --- a/llvm/include/llvm/Support/ThreadPool.h +++ b/llvm/include/llvm/Support/ThreadPool.h @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Jobserver.h" #include "llvm/Support/RWMutex.h" #include "llvm/Support/Threading.h" #include "llvm/Support/thread.h" @@ -180,6 +181,7 @@ private: void grow(int requested); void processTasks(ThreadPoolTaskGroup *WaitingForGroup); + void processTasksWithJobserver(); /// Threads in flight std::vector<llvm::thread> Threads; @@ -208,6 +210,8 @@ private: /// Maximum number of threads to potentially grow this pool to. const unsigned MaxThreadCount; + + JobserverClient *TheJobserver = nullptr; }; #endif // LLVM_ENABLE_THREADS diff --git a/llvm/include/llvm/Support/Threading.h b/llvm/include/llvm/Support/Threading.h index d3fe0a5..8884680 100644 --- a/llvm/include/llvm/Support/Threading.h +++ b/llvm/include/llvm/Support/Threading.h @@ -142,6 +142,11 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; } /// the thread shall remain on the actual CPU socket. LLVM_ABI std::optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const; + + /// If true, the thread pool will attempt to coordinate with a GNU Make + /// jobserver, acquiring a job slot before processing a task. If no + /// jobserver is found in the environment, this is ignored. + bool UseJobserver = false; }; /// Build a strategy from a number of threads as a string provided in \p Num. @@ -210,6 +215,19 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; } return S; } + /// Returns a thread strategy that attempts to coordinate with a GNU Make + /// jobserver. The number of active threads will be limited by the number of + /// available job slots. If no jobserver is detected in the environment, this + /// strategy falls back to the default hardware_concurrency() behavior. + inline ThreadPoolStrategy jobserver_concurrency() { + ThreadPoolStrategy S; + S.UseJobserver = true; + // We can still request all threads be created, as they will simply + // block waiting for a job slot if the jobserver is the limiting factor. + S.ThreadsRequested = 0; // 0 means 'use all available' + return S; + } + /// Return the current thread id, as used in various OS system calls. /// Note that not all platforms guarantee that the value returned will be /// unique across the entire system, so portable code should not assume diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h index 1e07fbe..faaff4a 100644 --- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h +++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h @@ -18,8 +18,7 @@ #include "llvm/Support/DataTypes.h" -namespace llvm { -namespace X86Disassembler { +namespace llvm::X86Disassembler { #define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers #define CONTEXTS_SYM x86DisassemblerContexts @@ -541,7 +540,6 @@ static const unsigned X86_MAX_OPERANDS = 6; /// respectively. enum DisassemblerMode { MODE_16BIT, MODE_32BIT, MODE_64BIT }; -} // namespace X86Disassembler -} // namespace llvm +} // namespace llvm::X86Disassembler #endif diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index faf7788..e3f995d 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -126,7 +126,7 @@ def G_FRAME_INDEX : GenericInstruction { } def G_GLOBAL_VALUE : GenericInstruction { - let OutOperandList = (outs type0:$dst); + let OutOperandList = (outs ptype0:$dst); let InOperandList = (ins unknown:$src); let hasSideEffects = false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 558c5a0..309f1be 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6046,7 +6046,7 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, return N02; } - if (MaxC == 0 && MinCPlus1.isPowerOf2()) { + if (MaxC == 0 && MinC != 0 && MinCPlus1.isPowerOf2()) { BW = MinCPlus1.exactLogBase2(); Unsigned = true; return N02; diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 7902229..4f4fb9c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -111,15 +111,11 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, - const TargetLowering &TLI, MCRegister &PhysReg, int &Cost) { if (Op != 2 || User->getOpcode() != ISD::CopyToReg) return; Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost)) - return; - if (Reg.isVirtual()) return; @@ -490,8 +486,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { MCRegister PhysReg; int Cost = 1; // Determine if this is a physical register dependency. - const TargetLowering &TLI = DAG->getTargetLoweringInfo(); - CheckForPhysRegDependency(OpN, N, i, TRI, TII, TLI, PhysReg, Cost); + CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); assert((!PhysReg || !isChain) && "Chain dependence via physreg data?"); // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler // emits a copy from the physical register to a virtual register unless diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 95f53fe..6ea2e27 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12698,6 +12698,45 @@ unsigned SelectionDAG::AssignTopologicalOrder() { return DAGSize; } +void SelectionDAG::getTopologicallyOrderedNodes( + SmallVectorImpl<const SDNode *> &SortedNodes) const { + SortedNodes.clear(); + // Node -> remaining number of outstanding operands. + DenseMap<const SDNode *, unsigned> RemainingOperands; + + // Put nodes without any operands into SortedNodes first. + for (const SDNode &N : allnodes()) { + checkForCycles(&N, this); + unsigned NumOperands = N.getNumOperands(); + if (NumOperands == 0) + SortedNodes.push_back(&N); + else + // Record their total number of outstanding operands. + RemainingOperands[&N] = NumOperands; + } + + // A node is pushed into SortedNodes when all of its operands (predecessors in + // the graph) are also in SortedNodes. + for (unsigned i = 0U; i < SortedNodes.size(); ++i) { + const SDNode *N = SortedNodes[i]; + for (const SDNode *U : N->users()) { + unsigned &NumRemOperands = RemainingOperands[U]; + assert(NumRemOperands && "Invalid number of remaining operands"); + --NumRemOperands; + if (!NumRemOperands) + SortedNodes.push_back(U); + } + } + + assert(SortedNodes.size() == AllNodes.size() && "Node count mismatch"); + assert(SortedNodes.front()->getOpcode() == ISD::EntryToken && + "First node in topological sort is not the entry token"); + assert(SortedNodes.front()->getNumOperands() == 0 && + "First node in topological sort has operands"); + assert(SortedNodes.back()->use_empty() && + "Last node in topologic sort has users"); +} + /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the /// value is produced by SD. void SelectionDAG::AddDbgValue(SDDbgValue *DB, bool isParameter) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 4b2a00c..fcfbfe6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -1061,13 +1061,24 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { N->dump(G); } -LLVM_DUMP_METHOD void SelectionDAG::dump() const { +LLVM_DUMP_METHOD void SelectionDAG::dump(bool Sorted) const { dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n"; - for (const SDNode &N : allnodes()) { + auto dumpEachNode = [this](const SDNode &N) { if (!N.hasOneUse() && &N != getRoot().getNode() && (!shouldPrintInline(N, this) || N.use_empty())) DumpNodes(&N, 2, this); + }; + + if (Sorted) { + SmallVector<const SDNode *> SortedNodes; + SortedNodes.reserve(AllNodes.size()); + getTopologicallyOrderedNodes(SortedNodes); + for (const SDNode *N : SortedNodes) + dumpEachNode(*N); + } else { + for (const SDNode &N : allnodes()) + dumpEachNode(N); } if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index e61558c..c35f29d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -144,6 +144,11 @@ UseMBPI("use-mbpi", cl::init(true), cl::Hidden); #ifndef NDEBUG +static cl::opt<bool> + DumpSortedDAG("dump-sorted-dags", cl::Hidden, + cl::desc("Print DAGs with sorted nodes in debug dump"), + cl::init(false)); + static cl::opt<std::string> FilterDAGBasicBlockName("filter-view-dags", cl::Hidden, cl::desc("Only display the basic block whose name " @@ -932,7 +937,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nInitial selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -952,7 +957,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nOptimized lowered selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -974,7 +979,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nType-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -998,7 +1003,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nOptimized type-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1016,7 +1021,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nVector-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1032,7 +1037,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nVector/type-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1052,7 +1057,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nOptimized vector-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1072,7 +1077,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nLegalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1092,7 +1097,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nOptimized legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1116,7 +1121,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nSelected selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); if (ViewSchedDAGs && MatchFilterBB) CurDAG->viewGraph("scheduler input for " + BlockName); diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 7da972f..42b21b5 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -207,6 +207,7 @@ add_llvm_component_library(LLVMSupport InstructionCost.cpp IntEqClasses.cpp IntervalMap.cpp + Jobserver.cpp JSON.cpp KnownBits.cpp KnownFPClass.cpp diff --git a/llvm/lib/Support/Jobserver.cpp b/llvm/lib/Support/Jobserver.cpp new file mode 100644 index 0000000..9f726eb --- /dev/null +++ b/llvm/lib/Support/Jobserver.cpp @@ -0,0 +1,259 @@ +//===- llvm/Support/Jobserver.cpp - Jobserver Client Implementation -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Jobserver.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" + +#include <atomic> +#include <memory> +#include <mutex> +#include <new> + +#define DEBUG_TYPE "jobserver" + +using namespace llvm; + +namespace { +struct FdPair { + int Read = -1; + int Write = -1; + bool isValid() const { return Read >= 0 && Write >= 0; } +}; + +struct JobserverConfig { + enum Mode { + None, + PosixFifo, + PosixPipe, + Win32Semaphore, + }; + Mode TheMode = None; + std::string Path; + FdPair PipeFDs; +}; + +/// A helper function that checks if `Input` starts with `Prefix`. +/// If it does, it removes the prefix from `Input`, assigns the remainder to +/// `Value`, and returns true. Otherwise, it returns false. +bool getPrefixedValue(StringRef Input, StringRef Prefix, StringRef &Value) { + if (Input.consume_front(Prefix)) { + Value = Input; + return true; + } + return false; +} + +/// A helper function to parse a string in the format "R,W" where R and W are +/// non-negative integers representing file descriptors. It populates the +/// `ReadFD` and `WriteFD` output parameters. Returns true on success. +static std::optional<FdPair> getFileDescriptorPair(StringRef Input) { + FdPair FDs; + if (Input.consumeInteger(10, FDs.Read)) + return std::nullopt; + if (!Input.consume_front(",")) + return std::nullopt; + if (Input.consumeInteger(10, FDs.Write)) + return std::nullopt; + if (!Input.empty() || !FDs.isValid()) + return std::nullopt; + return FDs; +} + +/// Parses the `MAKEFLAGS` environment variable string to find jobserver +/// arguments. It splits the string into space-separated arguments and searches +/// for `--jobserver-auth` or `--jobserver-fds`. Based on the value of these +/// arguments, it determines the jobserver mode (Pipe, FIFO, or Semaphore) and +/// connection details (file descriptors or path). +Expected<JobserverConfig> parseNativeMakeFlags(StringRef MakeFlags) { + JobserverConfig Config; + if (MakeFlags.empty()) + return Config; + + // Split the MAKEFLAGS string into arguments. + SmallVector<StringRef, 8> Args; + SplitString(MakeFlags, Args); + + // If '-n' (dry-run) is present as a legacy flag (not starting with '-'), + // disable the jobserver. + if (!Args.empty() && !Args[0].starts_with("-") && Args[0].contains('n')) + return Config; + + // Iterate through arguments to find jobserver flags. + // Note that make may pass multiple --jobserver-auth flags; the last one wins. + for (StringRef Arg : Args) { + StringRef Value; + if (getPrefixedValue(Arg, "--jobserver-auth=", Value)) { + // Try to parse as a file descriptor pair first. + if (auto FDPair = getFileDescriptorPair(Value)) { + Config.TheMode = JobserverConfig::PosixPipe; + Config.PipeFDs = *FDPair; + } else { + StringRef FifoPath; + // If not FDs, try to parse as a named pipe (fifo). + if (getPrefixedValue(Value, "fifo:", FifoPath)) { + Config.TheMode = JobserverConfig::PosixFifo; + Config.Path = FifoPath.str(); + } else { + // Otherwise, assume it's a Windows semaphore. + Config.TheMode = JobserverConfig::Win32Semaphore; + Config.Path = Value.str(); + } + } + } else if (getPrefixedValue(Arg, "--jobserver-fds=", Value)) { + // This is an alternative, older syntax for the pipe-based server. + if (auto FDPair = getFileDescriptorPair(Value)) { + Config.TheMode = JobserverConfig::PosixPipe; + Config.PipeFDs = *FDPair; + } else { + return createStringError(inconvertibleErrorCode(), + "Invalid file descriptor pair in MAKEFLAGS"); + } + } + } + +// Perform platform-specific validation. +#ifdef _WIN32 + if (Config.TheMode == JobserverConfig::PosixFifo || + Config.TheMode == JobserverConfig::PosixPipe) + return createStringError( + inconvertibleErrorCode(), + "FIFO/Pipe-based jobserver is not supported on Windows"); +#else + if (Config.TheMode == JobserverConfig::Win32Semaphore) + return createStringError( + inconvertibleErrorCode(), + "Semaphore-based jobserver is not supported on this platform"); +#endif + return Config; +} + +std::once_flag GJobserverOnceFlag; +JobserverClient *GJobserver = nullptr; + +} // namespace + +namespace llvm { +class JobserverClientImpl : public JobserverClient { + bool IsInitialized = false; + std::atomic<bool> HasImplicitSlot{true}; + unsigned NumJobs = 0; + +public: + JobserverClientImpl(const JobserverConfig &Config); + ~JobserverClientImpl() override; + + JobSlot tryAcquire() override; + void release(JobSlot Slot) override; + unsigned getNumJobs() const override { return NumJobs; } + + bool isValid() const { return IsInitialized; } + +private: +#if defined(LLVM_ON_UNIX) + int ReadFD = -1; + int WriteFD = -1; + std::string FifoPath; +#elif defined(_WIN32) + void *Semaphore = nullptr; +#endif +}; +} // namespace llvm + +// Include the platform-specific parts of the class. +#if defined(LLVM_ON_UNIX) +#include "Unix/Jobserver.inc" +#elif defined(_WIN32) +#include "Windows/Jobserver.inc" +#else +// Dummy implementation for unsupported platforms. +JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) {} +JobserverClientImpl::~JobserverClientImpl() = default; +JobSlot JobserverClientImpl::tryAcquire() { return JobSlot(); } +void JobserverClientImpl::release(JobSlot Slot) {} +#endif + +namespace llvm { +JobserverClient::~JobserverClient() = default; + +uint8_t JobSlot::getExplicitValue() const { + assert(isExplicit() && "Cannot get value of implicit or invalid slot"); + return static_cast<uint8_t>(Value); +} + +/// This is the main entry point for acquiring a jobserver client. It uses a +/// std::call_once to ensure the singleton `GJobserver` instance is created +/// safely in a multi-threaded environment. On first call, it reads the +/// `MAKEFLAGS` environment variable, parses it, and attempts to construct and +/// initialize a `JobserverClientImpl`. If successful, the global instance is +/// stored in `GJobserver`. Subsequent calls will return the existing instance. +JobserverClient *JobserverClient::getInstance() { + std::call_once(GJobserverOnceFlag, []() { + LLVM_DEBUG( + dbgs() + << "JobserverClient::getInstance() called for the first time.\n"); + const char *MakeFlagsEnv = getenv("MAKEFLAGS"); + if (!MakeFlagsEnv) { + errs() << "Warning: failed to create jobserver client due to MAKEFLAGS " + "environment variable not found\n"; + return; + } + + LLVM_DEBUG(dbgs() << "Found MAKEFLAGS = \"" << MakeFlagsEnv << "\"\n"); + + auto ConfigOrErr = parseNativeMakeFlags(MakeFlagsEnv); + if (Error Err = ConfigOrErr.takeError()) { + errs() << "Warning: failed to create jobserver client due to invalid " + "MAKEFLAGS environment variable: " + << toString(std::move(Err)) << "\n"; + return; + } + + JobserverConfig Config = *ConfigOrErr; + if (Config.TheMode == JobserverConfig::None) { + errs() << "Warning: failed to create jobserver client due to jobserver " + "mode missing in MAKEFLAGS environment variable\n"; + return; + } + + if (Config.TheMode == JobserverConfig::PosixPipe) { +#if defined(LLVM_ON_UNIX) + if (!areFdsValid(Config.PipeFDs.Read, Config.PipeFDs.Write)) { + errs() << "Warning: failed to create jobserver client due to invalid " + "Pipe FDs in MAKEFLAGS environment variable\n"; + return; + } +#endif + } + + auto Client = std::make_unique<JobserverClientImpl>(Config); + if (Client->isValid()) { + LLVM_DEBUG(dbgs() << "Jobserver client created successfully!\n"); + GJobserver = Client.release(); + } else + errs() << "Warning: jobserver client initialization failed.\n"; + }); + return GJobserver; +} + +/// For testing purposes only. This function resets the singleton instance by +/// destroying the existing client and re-initializing the `std::once_flag`. +/// This allows tests to simulate the first-time initialization of the +/// jobserver client multiple times. +void JobserverClient::resetForTesting() { + delete GJobserver; + GJobserver = nullptr; + // Re-construct the std::once_flag in place to reset the singleton state. + new (&GJobserverOnceFlag) std::once_flag(); +} +} // namespace llvm diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp index 3ac6fc7..8e0c724 100644 --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -7,12 +7,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Parallel.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/ExponentialBackoff.h" +#include "llvm/Support/Jobserver.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Threading.h" #include <atomic> #include <future> +#include <memory> +#include <mutex> #include <thread> #include <vector> @@ -49,6 +54,9 @@ public: class ThreadPoolExecutor : public Executor { public: explicit ThreadPoolExecutor(ThreadPoolStrategy S) { + if (S.UseJobserver) + TheJobserver = JobserverClient::getInstance(); + ThreadCount = S.compute_thread_count(); // Spawn all but one of the threads in another thread as spawning threads // can take a while. @@ -69,6 +77,10 @@ public: }); } + // To make sure the thread pool executor can only be created with a parallel + // strategy. + ThreadPoolExecutor() = delete; + void stop() { { std::lock_guard<std::mutex> Lock(Mutex); @@ -111,15 +123,62 @@ private: void work(ThreadPoolStrategy S, unsigned ThreadID) { threadIndex = ThreadID; S.apply_thread_strategy(ThreadID); + // Note on jobserver deadlock avoidance: + // GNU Make grants each invoked process one implicit job slot. Our + // JobserverClient models this by returning an implicit JobSlot on the + // first successful tryAcquire() in a process. This guarantees forward + // progress without requiring a dedicated "always-on" thread here. + + static thread_local std::unique_ptr<ExponentialBackoff> Backoff; + while (true) { - std::unique_lock<std::mutex> Lock(Mutex); - Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); - if (Stop) - break; - auto Task = std::move(WorkStack.back()); - WorkStack.pop_back(); - Lock.unlock(); - Task(); + if (TheJobserver) { + // Jobserver-mode scheduling: + // - Acquire one job slot (with exponential backoff to avoid busy-wait). + // - While holding the slot, drain and run tasks from the local queue. + // - Release the slot when the queue is empty or when shutting down. + // Rationale: Holding a slot amortizes acquire/release overhead over + // multiple tasks and avoids requeue/yield churn, while still enforcing + // the jobserver’s global concurrency limit. With K available slots, + // up to K workers run tasks in parallel; within each worker tasks run + // sequentially until the local queue is empty. + ExponentialBackoff Backoff(std::chrono::hours(24)); + JobSlot Slot; + do { + if (Stop) + return; + Slot = TheJobserver->tryAcquire(); + if (Slot.isValid()) + break; + } while (Backoff.waitForNextAttempt()); + + auto SlotReleaser = llvm::make_scope_exit( + [&] { TheJobserver->release(std::move(Slot)); }); + + while (true) { + std::function<void()> Task; + { + std::unique_lock<std::mutex> Lock(Mutex); + Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); + if (Stop && WorkStack.empty()) + return; + if (WorkStack.empty()) + break; + Task = std::move(WorkStack.back()); + WorkStack.pop_back(); + } + Task(); + } + } else { + std::unique_lock<std::mutex> Lock(Mutex); + Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); + if (Stop) + break; + auto Task = std::move(WorkStack.back()); + WorkStack.pop_back(); + Lock.unlock(); + Task(); + } } } @@ -130,9 +189,20 @@ private: std::promise<void> ThreadsCreated; std::vector<std::thread> Threads; unsigned ThreadCount; + + JobserverClient *TheJobserver = nullptr; }; -Executor *Executor::getDefaultExecutor() { +// A global raw pointer to the executor. Lifetime is managed by the +// objects created within createExecutor(). +static Executor *TheExec = nullptr; +static std::once_flag Flag; + +// This function will be called exactly once to create the executor. +// It contains the necessary platform-specific logic. Since functions +// called by std::call_once cannot return value, we have to set the +// executor as a global variable. +void createExecutor() { #ifdef _WIN32 // The ManagedStatic enables the ThreadPoolExecutor to be stopped via // llvm_shutdown() which allows a "clean" fast exit, e.g. via _exit(). This @@ -156,16 +226,22 @@ Executor *Executor::getDefaultExecutor() { ThreadPoolExecutor::Deleter> ManagedExec; static std::unique_ptr<ThreadPoolExecutor> Exec(&(*ManagedExec)); - return Exec.get(); + TheExec = Exec.get(); #else // ManagedStatic is not desired on other platforms. When `Exec` is destroyed // by llvm_shutdown(), worker threads will clean up and invoke TLS // destructors. This can lead to race conditions if other threads attempt to // access TLS objects that have already been destroyed. static ThreadPoolExecutor Exec(strategy); - return &Exec; + TheExec = &Exec; #endif } + +Executor *Executor::getDefaultExecutor() { + // Use std::call_once to lazily and safely initialize the executor. + std::call_once(Flag, createExecutor); + return TheExec; +} } // namespace } // namespace detail diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp index c304f0f..6960268 100644 --- a/llvm/lib/Support/ThreadPool.cpp +++ b/llvm/lib/Support/ThreadPool.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// // +// // This file implements a crude C++11 based thread pool. // //===----------------------------------------------------------------------===// @@ -14,6 +15,8 @@ #include "llvm/Config/llvm-config.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/Support/ExponentialBackoff.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" @@ -33,7 +36,10 @@ ThreadPoolInterface::~ThreadPoolInterface() = default; #if LLVM_ENABLE_THREADS StdThreadPool::StdThreadPool(ThreadPoolStrategy S) - : Strategy(S), MaxThreadCount(S.compute_thread_count()) {} + : Strategy(S), MaxThreadCount(S.compute_thread_count()) { + if (Strategy.UseJobserver) + TheJobserver = JobserverClient::getInstance(); +} void StdThreadPool::grow(int requested) { llvm::sys::ScopedWriter LockGuard(ThreadsLock); @@ -45,7 +51,15 @@ void StdThreadPool::grow(int requested) { Threads.emplace_back([this, ThreadID] { set_thread_name(formatv("llvm-worker-{0}", ThreadID)); Strategy.apply_thread_strategy(ThreadID); - processTasks(nullptr); + // Note on jobserver deadlock avoidance: + // GNU Make grants each invoked process one implicit job slot. + // JobserverClient::tryAcquire() returns that implicit slot on the first + // successful call in a process, ensuring forward progress without a + // dedicated "always-on" thread. + if (TheJobserver) + processTasksWithJobserver(); + else + processTasks(nullptr); }); } } @@ -133,6 +147,96 @@ void StdThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) { } } +/// Main loop for worker threads when using a jobserver. +/// This function uses a two-level queue; it first acquires a job slot from the +/// external jobserver, then retrieves a task from the internal queue. +/// This allows the thread pool to cooperate with build systems like `make -j`. +void StdThreadPool::processTasksWithJobserver() { + while (true) { + // Acquire a job slot from the external jobserver. + // This polls for a slot and yields the thread to avoid a high-CPU wait. + JobSlot Slot; + // The timeout for the backoff can be very long, as the shutdown + // is checked on each iteration. The sleep duration is capped by MaxWait + // in ExponentialBackoff, so shutdown latency is not a problem. + ExponentialBackoff Backoff(std::chrono::hours(24)); + bool AcquiredToken = false; + do { + // Return if the thread pool is shutting down. + { + std::unique_lock<std::mutex> LockGuard(QueueLock); + if (!EnableFlag) + return; + } + + Slot = TheJobserver->tryAcquire(); + if (Slot.isValid()) { + AcquiredToken = true; + break; + } + } while (Backoff.waitForNextAttempt()); + + if (!AcquiredToken) { + // This is practically unreachable with a 24h timeout and indicates a + // deeper problem if hit. + report_fatal_error("Timed out waiting for jobserver token."); + } + + // `make_scope_exit` guarantees the job slot is released, even if the + // task throws or we exit early. This prevents deadlocking the build. + auto SlotReleaser = + make_scope_exit([&] { TheJobserver->release(std::move(Slot)); }); + + // While we hold a job slot, process tasks from the internal queue. + while (true) { + std::function<void()> Task; + ThreadPoolTaskGroup *GroupOfTask = nullptr; + + { + std::unique_lock<std::mutex> LockGuard(QueueLock); + + // Wait until a task is available or the pool is shutting down. + QueueCondition.wait(LockGuard, + [&] { return !EnableFlag || !Tasks.empty(); }); + + // If shutting down and the queue is empty, the thread can terminate. + if (!EnableFlag && Tasks.empty()) + return; + + // If the queue is empty, we're done processing tasks for now. + // Break the inner loop to release the job slot. + if (Tasks.empty()) + break; + + // A task is available. Mark it as active before releasing the lock + // to prevent race conditions with `wait()`. + ++ActiveThreads; + Task = std::move(Tasks.front().first); + GroupOfTask = Tasks.front().second; + if (GroupOfTask != nullptr) + ++ActiveGroups[GroupOfTask]; + Tasks.pop_front(); + } // The queue lock is released. + + // Run the task. The job slot remains acquired during execution. + Task(); + + // The task has finished. Update the active count and notify any waiters. + { + std::lock_guard<std::mutex> LockGuard(QueueLock); + --ActiveThreads; + if (GroupOfTask != nullptr) { + auto A = ActiveGroups.find(GroupOfTask); + if (--(A->second) == 0) + ActiveGroups.erase(A); + } + // If all tasks are complete, notify any waiting threads. + if (workCompletedUnlocked(nullptr)) + CompletionCondition.notify_all(); + } + } + } +} bool StdThreadPool::workCompletedUnlocked(ThreadPoolTaskGroup *Group) const { if (Group == nullptr) return !ActiveThreads && Tasks.empty(); diff --git a/llvm/lib/Support/Threading.cpp b/llvm/lib/Support/Threading.cpp index 693de0e..9da357a 100644 --- a/llvm/lib/Support/Threading.cpp +++ b/llvm/lib/Support/Threading.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/Threading.h" #include "llvm/Config/config.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Jobserver.h" #include <cassert> #include <optional> @@ -51,6 +52,10 @@ int llvm::get_physical_cores() { return -1; } static int computeHostNumHardwareThreads(); unsigned llvm::ThreadPoolStrategy::compute_thread_count() const { + if (UseJobserver) + if (auto JS = JobserverClient::getInstance()) + return JS->getNumJobs(); + int MaxThreadCount = UseHyperThreads ? computeHostNumHardwareThreads() : get_physical_cores(); if (MaxThreadCount <= 0) diff --git a/llvm/lib/Support/Unix/Jobserver.inc b/llvm/lib/Support/Unix/Jobserver.inc new file mode 100644 index 0000000..53bf7f2 --- /dev/null +++ b/llvm/lib/Support/Unix/Jobserver.inc @@ -0,0 +1,195 @@ +//===- llvm/Support/Unix/Jobserver.inc - Unix Jobserver Impl ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the UNIX-specific parts of the JobserverClient class. +// +//===----------------------------------------------------------------------===// + +#include <atomic> +#include <cassert> +#include <cerrno> +#include <fcntl.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +namespace { +/// Returns true if the given file descriptor is a FIFO (named pipe). +bool isFifo(int FD) { + struct stat StatBuf; + if (::fstat(FD, &StatBuf) != 0) + return false; + return S_ISFIFO(StatBuf.st_mode); +} + +/// Returns true if the given file descriptors are valid. +bool areFdsValid(int ReadFD, int WriteFD) { + if (ReadFD == -1 || WriteFD == -1) + return false; + // Check if the file descriptors are actually valid by checking their flags. + return ::fcntl(ReadFD, F_GETFD) != -1 && ::fcntl(WriteFD, F_GETFD) != -1; +} +} // namespace + +/// The constructor sets up the client based on the provided configuration. +/// For pipe-based jobservers, it duplicates the inherited file descriptors, +/// sets them to close-on-exec, and makes the read descriptor non-blocking. +/// For FIFO-based jobservers, it opens the named pipe. After setup, it drains +/// all available tokens from the jobserver to determine the total number of +/// available jobs (`NumJobs`), then immediately releases them back. +JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) { + switch (Config.TheMode) { + case JobserverConfig::PosixPipe: { + // Duplicate the read and write file descriptors. + int NewReadFD = ::dup(Config.PipeFDs.Read); + if (NewReadFD < 0) + return; + int NewWriteFD = ::dup(Config.PipeFDs.Write); + if (NewWriteFD < 0) { + ::close(NewReadFD); + return; + } + // Set the new descriptors to be closed automatically on exec(). + if (::fcntl(NewReadFD, F_SETFD, FD_CLOEXEC) == -1 || + ::fcntl(NewWriteFD, F_SETFD, FD_CLOEXEC) == -1) { + ::close(NewReadFD); + ::close(NewWriteFD); + return; + } + // Set the read descriptor to non-blocking. + int flags = ::fcntl(NewReadFD, F_GETFL, 0); + if (flags == -1 || ::fcntl(NewReadFD, F_SETFL, flags | O_NONBLOCK) == -1) { + ::close(NewReadFD); + ::close(NewWriteFD); + return; + } + ReadFD = NewReadFD; + WriteFD = NewWriteFD; + break; + } + case JobserverConfig::PosixFifo: + // Open the FIFO for reading. It must be non-blocking and close-on-exec. + ReadFD = ::open(Config.Path.c_str(), O_RDONLY | O_NONBLOCK | O_CLOEXEC); + if (ReadFD < 0 || !isFifo(ReadFD)) { + if (ReadFD >= 0) + ::close(ReadFD); + ReadFD = -1; + return; + } + FifoPath = Config.Path; + // The write FD is opened on-demand in release(). + WriteFD = -1; + break; + default: + return; + } + + IsInitialized = true; + // Determine the total number of jobs by acquiring all available slots and + // then immediately releasing them. + SmallVector<JobSlot, 8> Slots; + while (true) { + auto S = tryAcquire(); + if (!S.isValid()) + break; + Slots.push_back(std::move(S)); + } + NumJobs = Slots.size(); + assert(NumJobs >= 1 && "Invalid number of jobs"); + for (auto &S : Slots) + release(std::move(S)); +} + +/// The destructor closes any open file descriptors. +JobserverClientImpl::~JobserverClientImpl() { + if (ReadFD >= 0) + ::close(ReadFD); + if (WriteFD >= 0) + ::close(WriteFD); +} + +/// Tries to acquire a job slot. The first call to this function will always +/// successfully acquire the single "implicit" slot that is granted to every +/// process started by `make`. Subsequent calls attempt to read a one-byte +/// token from the jobserver's read pipe. A successful read grants one +/// explicit job slot. The read is non-blocking; if no token is available, +/// it fails and returns an invalid JobSlot. +JobSlot JobserverClientImpl::tryAcquire() { + if (!IsInitialized) + return JobSlot(); + + // The first acquisition is always for the implicit slot. + if (HasImplicitSlot.exchange(false, std::memory_order_acquire)) { + LLVM_DEBUG(dbgs() << "Acquired implicit job slot.\n"); + return JobSlot::createImplicit(); + } + + char Token; + ssize_t Ret; + LLVM_DEBUG(dbgs() << "Attempting to read token from FD " << ReadFD << ".\n"); + // Loop to retry on EINTR (interrupted system call). + do { + Ret = ::read(ReadFD, &Token, 1); + } while (Ret < 0 && errno == EINTR); + + if (Ret == 1) { + LLVM_DEBUG(dbgs() << "Acquired explicit token '" << Token << "'.\n"); + return JobSlot::createExplicit(static_cast<uint8_t>(Token)); + } + + LLVM_DEBUG(dbgs() << "Failed to acquire job slot, read returned " << Ret + << ".\n"); + return JobSlot(); +} + +/// Releases a job slot back to the pool. If the slot is implicit, it simply +/// resets a flag. If the slot is explicit, it writes the character token +/// associated with the slot back into the jobserver's write pipe. For FIFO +/// jobservers, this may require opening the FIFO for writing if it hasn't +/// been already. +void JobserverClientImpl::release(JobSlot Slot) { + if (!Slot.isValid()) + return; + + // Releasing the implicit slot just makes it available for the next acquire. + if (Slot.isImplicit()) { + LLVM_DEBUG(dbgs() << "Released implicit job slot.\n"); + [[maybe_unused]] bool was_already_released = + HasImplicitSlot.exchange(true, std::memory_order_release); + assert(!was_already_released && "Implicit slot released twice"); + return; + } + + uint8_t Token = Slot.getExplicitValue(); + LLVM_DEBUG(dbgs() << "Releasing explicit token '" << (char)Token << "' to FD " + << WriteFD << ".\n"); + + // For FIFO-based jobservers, the write FD might not be open yet. + // Open it on the first release. + if (WriteFD < 0) { + LLVM_DEBUG(dbgs() << "WriteFD is invalid, opening FIFO: " << FifoPath + << "\n"); + WriteFD = ::open(FifoPath.c_str(), O_WRONLY | O_CLOEXEC); + if (WriteFD < 0) { + LLVM_DEBUG(dbgs() << "Failed to open FIFO for writing.\n"); + return; + } + LLVM_DEBUG(dbgs() << "Opened FIFO as new WriteFD: " << WriteFD << "\n"); + } + + ssize_t Written; + // Loop to retry on EINTR (interrupted system call). + do { + Written = ::write(WriteFD, &Token, 1); + } while (Written < 0 && errno == EINTR); + + if (Written <= 0) { + LLVM_DEBUG(dbgs() << "Failed to write token to pipe, write returned " + << Written << "\n"); + } +} diff --git a/llvm/lib/Support/Windows/Jobserver.inc b/llvm/lib/Support/Windows/Jobserver.inc new file mode 100644 index 0000000..79028ee --- /dev/null +++ b/llvm/lib/Support/Windows/Jobserver.inc @@ -0,0 +1,79 @@ +//==- llvm/Support/Windows/Jobserver.inc - Windows Jobserver Impl -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Windows-specific parts of the JobserverClient class. +// On Windows, the jobserver is implemented using a named semaphore. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Windows/WindowsSupport.h" +#include <atomic> +#include <cassert> + +namespace llvm { +/// The constructor for the Windows jobserver client. It attempts to open a +/// handle to an existing named semaphore, the name of which is provided by +/// GNU make in the --jobserver-auth argument. If the semaphore is opened +/// successfully, the client is marked as initialized. +JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) { + Semaphore = (void *)::OpenSemaphoreA(SEMAPHORE_MODIFY_STATE | SYNCHRONIZE, + FALSE, Config.Path.c_str()); + if (Semaphore != nullptr) + IsInitialized = true; +} + +/// The destructor closes the handle to the semaphore, releasing the resource. +JobserverClientImpl::~JobserverClientImpl() { + if (Semaphore != nullptr) + ::CloseHandle((HANDLE)Semaphore); +} + +/// Tries to acquire a job slot. The first call always returns the implicit +/// slot. Subsequent calls use a non-blocking wait on the semaphore +/// (`WaitForSingleObject` with a timeout of 0). If the wait succeeds, the +/// semaphore's count is decremented, and an explicit job slot is acquired. +/// If the wait times out, it means no slots are available, and an invalid +/// slot is returned. +JobSlot JobserverClientImpl::tryAcquire() { + if (!IsInitialized) + return JobSlot(); + + // First, grant the implicit slot. + if (HasImplicitSlot.exchange(false, std::memory_order_acquire)) { + return JobSlot::createImplicit(); + } + + // Try to acquire a slot from the semaphore without blocking. + if (::WaitForSingleObject((HANDLE)Semaphore, 0) == WAIT_OBJECT_0) { + // The explicit token value is arbitrary on Windows, as the semaphore + // count is the real resource. + return JobSlot::createExplicit(1); + } + + return JobSlot(); // Invalid slot +} + +/// Releases a job slot back to the pool. If the slot is implicit, it simply +/// resets a flag. For an explicit slot, it increments the semaphore's count +/// by one using `ReleaseSemaphore`, making the slot available to other +/// processes. +void JobserverClientImpl::release(JobSlot Slot) { + if (!IsInitialized || !Slot.isValid()) + return; + + if (Slot.isImplicit()) { + [[maybe_unused]] bool was_already_released = + HasImplicitSlot.exchange(true, std::memory_order_release); + assert(!was_already_released && "Implicit slot released twice"); + return; + } + + // Release the slot by incrementing the semaphore count. + (void)::ReleaseSemaphore((HANDLE)Semaphore, 1, NULL); +} +} // namespace llvm diff --git a/llvm/lib/TableGen/Error.cpp b/llvm/lib/TableGen/Error.cpp index de0c4c9..3ba2c6c 100644 --- a/llvm/lib/TableGen/Error.cpp +++ b/llvm/lib/TableGen/Error.cpp @@ -19,10 +19,10 @@ #include "llvm/TableGen/Record.h" #include <cstdlib> -namespace llvm { +using namespace llvm; -SourceMgr SrcMgr; -unsigned ErrorsPrinted = 0; +SourceMgr llvm::SrcMgr; +unsigned llvm::ErrorsPrinted = 0; static void PrintMessage(ArrayRef<SMLoc> Locs, SourceMgr::DiagKind Kind, const Twine &Msg) { @@ -49,118 +49,118 @@ static void PrintMessage(ArrayRef<SMLoc> Locs, SourceMgr::DiagKind Kind, // Functions to print notes. -void PrintNote(const Twine &Msg) { - WithColor::note() << Msg << "\n"; -} +void llvm::PrintNote(const Twine &Msg) { WithColor::note() << Msg << "\n"; } -void PrintNote(function_ref<void(raw_ostream &OS)> PrintMsg) { +void llvm::PrintNote(function_ref<void(raw_ostream &OS)> PrintMsg) { PrintMsg(WithColor::note()); } -void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) { +void llvm::PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) { PrintMessage(NoteLoc, SourceMgr::DK_Note, Msg); } // Functions to print fatal notes. -void PrintFatalNote(const Twine &Msg) { +void llvm::PrintFatalNote(const Twine &Msg) { PrintNote(Msg); fatal_exit(); } -void PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) { +void llvm::PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) { PrintNote(NoteLoc, Msg); fatal_exit(); } // This method takes a Record and uses the source location // stored in it. -void PrintFatalNote(const Record *Rec, const Twine &Msg) { +void llvm::PrintFatalNote(const Record *Rec, const Twine &Msg) { PrintNote(Rec->getLoc(), Msg); fatal_exit(); } // This method takes a RecordVal and uses the source location // stored in it. -void PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) { +void llvm::PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) { PrintNote(RecVal->getLoc(), Msg); fatal_exit(); } // Functions to print warnings. -void PrintWarning(const Twine &Msg) { WithColor::warning() << Msg << "\n"; } +void llvm::PrintWarning(const Twine &Msg) { + WithColor::warning() << Msg << "\n"; +} -void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) { +void llvm::PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) { PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg); } -void PrintWarning(const char *Loc, const Twine &Msg) { +void llvm::PrintWarning(const char *Loc, const Twine &Msg) { SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Warning, Msg); } // Functions to print errors. -void PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; } +void llvm::PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; } -void PrintError(function_ref<void(raw_ostream &OS)> PrintMsg) { +void llvm::PrintError(function_ref<void(raw_ostream &OS)> PrintMsg) { PrintMsg(WithColor::error()); } -void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { +void llvm::PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg); } -void PrintError(const char *Loc, const Twine &Msg) { +void llvm::PrintError(const char *Loc, const Twine &Msg) { SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg); } // This method takes a Record and uses the source location // stored in it. -void PrintError(const Record *Rec, const Twine &Msg) { +void llvm::PrintError(const Record *Rec, const Twine &Msg) { PrintMessage(Rec->getLoc(), SourceMgr::DK_Error, Msg); } // This method takes a RecordVal and uses the source location // stored in it. -void PrintError(const RecordVal *RecVal, const Twine &Msg) { +void llvm::PrintError(const RecordVal *RecVal, const Twine &Msg) { PrintMessage(RecVal->getLoc(), SourceMgr::DK_Error, Msg); } // Functions to print fatal errors. -void PrintFatalError(const Twine &Msg) { +void llvm::PrintFatalError(const Twine &Msg) { PrintError(Msg); fatal_exit(); } -void PrintFatalError(function_ref<void(raw_ostream &OS)> PrintMsg) { +void llvm::PrintFatalError(function_ref<void(raw_ostream &OS)> PrintMsg) { PrintError(PrintMsg); fatal_exit(); } -void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { +void llvm::PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { PrintError(ErrorLoc, Msg); fatal_exit(); } // This method takes a Record and uses the source location // stored in it. -void PrintFatalError(const Record *Rec, const Twine &Msg) { +void llvm::PrintFatalError(const Record *Rec, const Twine &Msg) { PrintError(Rec->getLoc(), Msg); fatal_exit(); } // This method takes a RecordVal and uses the source location // stored in it. -void PrintFatalError(const RecordVal *RecVal, const Twine &Msg) { +void llvm::PrintFatalError(const RecordVal *RecVal, const Twine &Msg) { PrintError(RecVal->getLoc(), Msg); fatal_exit(); } // Check an assertion: Obtain the condition value and be sure it is true. // If not, print a nonfatal error along with the message. -bool CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) { +bool llvm::CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) { auto *CondValue = dyn_cast_or_null<IntInit>(Condition->convertInitializerTo( IntRecTy::get(Condition->getRecordKeeper()))); if (!CondValue) { @@ -178,11 +178,9 @@ bool CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) { } // Dump a message to stderr. -void dumpMessage(SMLoc Loc, const Init *Message) { +void llvm::dumpMessage(SMLoc Loc, const Init *Message) { if (auto *MessageInit = dyn_cast<StringInit>(Message)) PrintNote(Loc, MessageInit->getValue()); else PrintError(Loc, "dump value is not of type string"); } - -} // end namespace llvm diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index f545706..42043f7 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -64,14 +64,12 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed")); static cl::opt<bool> TimePhases("time-phases", cl::desc("Time phases of parser and backend")); -namespace llvm { -cl::opt<bool> EmitLongStrLiterals( +cl::opt<bool> llvm::EmitLongStrLiterals( "long-string-literals", cl::desc("when emitting large string tables, prefer string literals over " "comma-separated char literals. This can be a readability and " "compile-time performance win, but upsets some compilers"), cl::Hidden, cl::init(true)); -} // end namespace llvm static cl::opt<bool> NoWarnOnUnusedTemplateArgs( "no-warn-on-unused-template-args", diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 051a896..2ea3a24 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -46,8 +46,7 @@ using namespace llvm; // Context //===----------------------------------------------------------------------===// -namespace llvm { -namespace detail { +namespace llvm::detail { /// This class represents the internal implementation of the RecordKeeper. /// It contains all of the contextual static state of the Record classes. It is /// kept out-of-line to simplify dependencies, and also make it easier for @@ -100,8 +99,7 @@ struct RecordKeeperImpl { void dumpAllocationStats(raw_ostream &OS) const; }; -} // namespace detail -} // namespace llvm +} // namespace llvm::detail void detail::RecordKeeperImpl::dumpAllocationStats(raw_ostream &OS) const { // Dump memory allocation related stats. diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index f928ded..3d31d8e 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -31,8 +31,6 @@ using namespace llvm; // Support Code for the Semantic Actions. //===----------------------------------------------------------------------===// -namespace llvm { - RecordsEntry::RecordsEntry(std::unique_ptr<Record> Rec) : Rec(std::move(Rec)) {} RecordsEntry::RecordsEntry(std::unique_ptr<ForeachLoop> Loop) : Loop(std::move(Loop)) {} @@ -41,6 +39,7 @@ RecordsEntry::RecordsEntry(std::unique_ptr<Record::AssertionInfo> Assertion) RecordsEntry::RecordsEntry(std::unique_ptr<Record::DumpInfo> Dump) : Dump(std::move(Dump)) {} +namespace llvm { struct SubClassReference { SMRange RefRange; const Record *Rec = nullptr; @@ -61,6 +60,7 @@ struct SubMultiClassReference { bool isInvalid() const { return MC == nullptr; } void dump() const; }; +} // end namespace llvm #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void SubMultiClassReference::dump() const { @@ -74,8 +74,6 @@ LLVM_DUMP_METHOD void SubMultiClassReference::dump() const { } #endif -} // end namespace llvm - static bool checkBitsConcrete(Record &R, const RecordVal &RV) { const auto *BV = cast<BitsInit>(RV.getValue()); for (unsigned i = 0, e = BV->getNumBits(); i != e; ++i) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 70d5ad7d..dc8e7c8 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16461,7 +16461,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0), - DAG.getConstant(Cnt, DL, MVT::i32)); + DAG.getTargetConstant(Cnt, DL, MVT::i32)); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL, MVT::i32), @@ -16491,7 +16491,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, unsigned Opc = (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR; return DAG.getNode(Opc, DL, VT, Op.getOperand(0), - DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags()); + DAG.getTargetConstant(Cnt, DL, MVT::i32), + Op->getFlags()); } // Right shift register. Note, there is not a shift right register @@ -19973,7 +19974,7 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy, DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), - Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32)); + Op->getOperand(0), DAG.getTargetConstant(C, DL, MVT::i32)); // We can handle smaller integers by generating an extra trunc. if (IntBits < FloatBits) FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv); @@ -20696,7 +20697,7 @@ static SDValue performConcatVectorsCombine(SDNode *N, N100 = DAG.getNode(AArch64ISD::NVCAST, DL, VT, N100); SDValue Uzp = DAG.getNode(AArch64ISD::UZP2, DL, VT, N000, N100); SDValue NewShiftConstant = - DAG.getConstant(N001ConstVal - NScalarSize, DL, MVT::i32); + DAG.getTargetConstant(N001ConstVal - NScalarSize, DL, MVT::i32); return DAG.getNode(AArch64ISD::VLSHR, DL, VT, Uzp, NewShiftConstant); } @@ -22373,14 +22374,14 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) { Op = DAG.getNode(Opcode, DL, VT, Op, - DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32)); + DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32, true)); if (N->getValueType(0) == MVT::i64) Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op, DAG.getConstant(0, DL, MVT::i64)); return Op; } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) { Op = DAG.getNode(Opcode, DL, VT, Op, - DAG.getConstant(ShiftAmount, DL, MVT::i32)); + DAG.getTargetConstant(ShiftAmount, DL, MVT::i32)); if (N->getValueType(0) == MVT::i64) Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op, DAG.getConstant(0, DL, MVT::i64)); @@ -23198,7 +23199,7 @@ static SDValue performZExtUZPCombine(SDNode *N, SelectionDAG &DAG) { Op.getOperand(ExtOffset == 0 ? 0 : 1)); if (Shift != 0) BC = DAG.getNode(AArch64ISD::VLSHR, DL, VT, BC, - DAG.getConstant(Shift, DL, MVT::i32)); + DAG.getTargetConstant(Shift, DL, MVT::i32)); return DAG.getNode(ISD::AND, DL, VT, BC, DAG.getConstant(Mask, DL, VT)); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 6ef0a95..09ce713 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -812,49 +812,49 @@ def fixedpoint_recip_f16_i64 : fixedpoint_recip_i64<f16>; def fixedpoint_recip_f32_i64 : fixedpoint_recip_i64<f32>; def fixedpoint_recip_f64_i64 : fixedpoint_recip_i64<f64>; -def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR8 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9); }]> { let EncoderMethod = "getVecShiftR8OpValue"; let DecoderMethod = "DecodeVecShiftR8Imm"; let ParserMatchClass = Imm1_8Operand; } -def vecshiftR16 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR16 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); }]> { let EncoderMethod = "getVecShiftR16OpValue"; let DecoderMethod = "DecodeVecShiftR16Imm"; let ParserMatchClass = Imm1_16Operand; } -def vecshiftR16Narrow : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR16Narrow : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9); }]> { let EncoderMethod = "getVecShiftR16OpValue"; let DecoderMethod = "DecodeVecShiftR16ImmNarrow"; let ParserMatchClass = Imm1_8Operand; } -def vecshiftR32 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR32 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33); }]> { let EncoderMethod = "getVecShiftR32OpValue"; let DecoderMethod = "DecodeVecShiftR32Imm"; let ParserMatchClass = Imm1_32Operand; } -def vecshiftR32Narrow : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR32Narrow : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); }]> { let EncoderMethod = "getVecShiftR32OpValue"; let DecoderMethod = "DecodeVecShiftR32ImmNarrow"; let ParserMatchClass = Imm1_16Operand; } -def vecshiftR64 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR64 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65); }]> { let EncoderMethod = "getVecShiftR64OpValue"; let DecoderMethod = "DecodeVecShiftR64Imm"; let ParserMatchClass = Imm1_64Operand; } -def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR64Narrow : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33); }]> { let EncoderMethod = "getVecShiftR64OpValue"; @@ -862,37 +862,6 @@ def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm1_32Operand; } -// Same as vecshiftR#N, but use TargetConstant (TimmLeaf) instead of Constant -// (ImmLeaf) -def tvecshiftR8 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9); -}]> { - let EncoderMethod = "getVecShiftR8OpValue"; - let DecoderMethod = "DecodeVecShiftR8Imm"; - let ParserMatchClass = Imm1_8Operand; -} -def tvecshiftR16 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); -}]> { - let EncoderMethod = "getVecShiftR16OpValue"; - let DecoderMethod = "DecodeVecShiftR16Imm"; - let ParserMatchClass = Imm1_16Operand; -} -def tvecshiftR32 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33); -}]> { - let EncoderMethod = "getVecShiftR32OpValue"; - let DecoderMethod = "DecodeVecShiftR32Imm"; - let ParserMatchClass = Imm1_32Operand; -} -def tvecshiftR64 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65); -}]> { - let EncoderMethod = "getVecShiftR64OpValue"; - let DecoderMethod = "DecodeVecShiftR64Imm"; - let ParserMatchClass = Imm1_64Operand; -} - def Imm0_0Operand : AsmImmRange<0, 0>; def Imm0_1Operand : AsmImmRange<0, 1>; def Imm1_1Operand : AsmImmRange<1, 1>; @@ -904,28 +873,28 @@ def Imm0_15Operand : AsmImmRange<0, 15>; def Imm0_31Operand : AsmImmRange<0, 31>; def Imm0_63Operand : AsmImmRange<0, 63>; -def vecshiftL8 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftL8 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) < 8); }]> { let EncoderMethod = "getVecShiftL8OpValue"; let DecoderMethod = "DecodeVecShiftL8Imm"; let ParserMatchClass = Imm0_7Operand; } -def vecshiftL16 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftL16 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) < 16); }]> { let EncoderMethod = "getVecShiftL16OpValue"; let DecoderMethod = "DecodeVecShiftL16Imm"; let ParserMatchClass = Imm0_15Operand; } -def vecshiftL32 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftL32 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) < 32); }]> { let EncoderMethod = "getVecShiftL32OpValue"; let DecoderMethod = "DecodeVecShiftL32Imm"; let ParserMatchClass = Imm0_31Operand; } -def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftL64 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) < 64); }]> { let EncoderMethod = "getVecShiftL64OpValue"; @@ -933,36 +902,6 @@ def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_63Operand; } -// Same as vecshiftL#N, but use TargetConstant (TimmLeaf) instead of Constant -// (ImmLeaf) -def tvecshiftL8 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) < 8); -}]> { - let EncoderMethod = "getVecShiftL8OpValue"; - let DecoderMethod = "DecodeVecShiftL8Imm"; - let ParserMatchClass = Imm0_7Operand; -} -def tvecshiftL16 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) < 16); -}]> { - let EncoderMethod = "getVecShiftL16OpValue"; - let DecoderMethod = "DecodeVecShiftL16Imm"; - let ParserMatchClass = Imm0_15Operand; -} -def tvecshiftL32 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) < 32); -}]> { - let EncoderMethod = "getVecShiftL32OpValue"; - let DecoderMethod = "DecodeVecShiftL32Imm"; - let ParserMatchClass = Imm0_31Operand; -} -def tvecshiftL64 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) < 64); -}]> { - let EncoderMethod = "getVecShiftL64OpValue"; - let DecoderMethod = "DecodeVecShiftL64Imm"; - let ParserMatchClass = Imm0_63Operand; -} // Crazy immediate formats used by 32-bit and 64-bit logical immediate // instructions for splatting repeating bit patterns across the immediate. @@ -10232,7 +10171,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm, def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, V64, V64, vecshiftR16, asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> { + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 vecshiftR16:$imm)))]> { bits<4> imm; let Inst{19-16} = imm; } @@ -10240,15 +10179,16 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm, def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, V128, V128, vecshiftR16, asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 vecshiftR16:$imm)))]> { bits<4> imm; let Inst{19-16} = imm; } } // Predicates = [HasNEON, HasFullFP16] + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, V64, V64, vecshiftR32, asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> { + [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 vecshiftR32:$imm)))]> { bits<5> imm; let Inst{20-16} = imm; } @@ -10256,7 +10196,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm, def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, V128, V128, vecshiftR32, asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 vecshiftR32:$imm)))]> { bits<5> imm; let Inst{20-16} = imm; } @@ -10264,7 +10204,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm, def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, V128, V128, vecshiftR64, asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 vecshiftR64:$imm)))]> { bits<6> imm; let Inst{21-16} = imm; } @@ -10276,7 +10216,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, V64, V64, vecshiftR16, asm, ".4h", ".4h", - [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> { + [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 vecshiftR16:$imm)))]> { bits<4> imm; let Inst{19-16} = imm; } @@ -10284,7 +10224,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, V128, V128, vecshiftR16, asm, ".8h", ".8h", - [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 vecshiftR16:$imm)))]> { bits<4> imm; let Inst{19-16} = imm; } @@ -10293,7 +10233,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, V64, V64, vecshiftR32, asm, ".2s", ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> { + [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 vecshiftR32:$imm)))]> { bits<5> imm; let Inst{20-16} = imm; } @@ -10301,7 +10241,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, V128, V128, vecshiftR32, asm, ".4s", ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 vecshiftR32:$imm)))]> { bits<5> imm; let Inst{20-16} = imm; } @@ -10309,7 +10249,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, V128, V128, vecshiftR64, asm, ".2d", ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 vecshiftR64:$imm)))]> { bits<6> imm; let Inst{21-16} = imm; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 96cc3f3..3e55b76 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2957,9 +2957,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); // Need special instructions for atomics that affect ordering. - if (Order != AtomicOrdering::NotAtomic && - Order != AtomicOrdering::Unordered && - Order != AtomicOrdering::Monotonic) { + if (isStrongerThanMonotonic(Order)) { assert(!isa<GZExtLoad>(LdSt)); assert(MemSizeInBytes <= 8 && "128-bit atomics should already be custom-legalized"); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 6025f1c..63313da 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -556,8 +556,7 @@ void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned NewOpc = Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR; MachineIRBuilder MIB(MI); - auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm); - MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef}); + MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1)}).addImm(Imm); MI.eraseFromParent(); } diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 539470d..be44b8f 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -4967,7 +4967,7 @@ multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> { //===----------------------------------------------------------------------===// // SME2 multi-vec saturating shift right narrow class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u> - : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4), + : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4), mnemonic, "\t$Zd, $Zn, $imm4", "", []>, Sched<[]> { bits<4> imm4; @@ -4985,7 +4985,7 @@ class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u> multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u, SDPatternOperator intrinsic> { def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>; - def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>; + def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, vecshiftR16>; } class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty, @@ -5008,20 +5008,20 @@ class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty, } multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op, SDPatternOperator intrinsic> { - def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32, + def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, vecshiftR32, mnemonic>{ bits<5> imm; let Inst{20-16} = imm; } - def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64, + def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, vecshiftR64, mnemonic> { bits<6> imm; let Inst{22} = imm{5}; let Inst{20-16} = imm{4-0}; } - def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, tvecshiftR32>; - def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, tvecshiftR64>; + def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, vecshiftR32>; + def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, vecshiftR64>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 9a23c35..3cdd505 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -4436,9 +4436,9 @@ multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm, ZPR64, ZPR32, vecshiftL32> { let Inst{20-19} = imm{4-3}; } - def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _H)>; - def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _S)>; - def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _D)>; + def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -4481,10 +4481,10 @@ multiclass sve2_int_bin_shift_imm_left<bit opc, string asm, let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftL64, !cast<Instruction>(NAME # _D)>; } multiclass sve2_int_bin_shift_imm_right<bit opc, string asm, @@ -4501,10 +4501,10 @@ multiclass sve2_int_bin_shift_imm_right<bit opc, string asm, let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>; } class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm, @@ -4546,10 +4546,10 @@ multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm, let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>; def : SVE_Shift_Add_All_Active_Pat<nxv16i8, shift_op, nxv16i1, nxv16i8, nxv16i8, i32, !cast<Instruction>(NAME # _B)>; def : SVE_Shift_Add_All_Active_Pat<nxv8i16, shift_op, nxv8i1, nxv8i16, nxv8i16, i32, !cast<Instruction>(NAME # _H)>; @@ -4676,18 +4676,18 @@ class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc, multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm, SDPatternOperator op> { def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16, - tvecshiftR8>; + vecshiftR8>; def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32, - tvecshiftR16> { + vecshiftR16> { let Inst{19} = imm{3}; } def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64, - tvecshiftR32> { + vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; } class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc, @@ -4717,18 +4717,18 @@ class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc, multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm, SDPatternOperator op> { def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16, - tvecshiftR8>; + vecshiftR8>; def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32, - tvecshiftR16> { + vecshiftR16> { let Inst{19} = imm{3}; } def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64, - tvecshiftR32> { + vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; } class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm, @@ -5461,10 +5461,10 @@ multiclass sve2_int_rotate_right_imm<string asm, SDPatternOperator op> { let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -6443,10 +6443,10 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps, let Inst{9-8} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftL64, !cast<Instruction>(NAME # _D)>; } // As above but shift amount takes the form of a "vector immediate". @@ -6460,15 +6460,15 @@ multiclass sve_int_bin_pred_shift_imm_left_dup<bits<4> opc, string asm, } multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd<SDPatternOperator op> { - def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, tvecshiftL8, FalseLanesZero>; - def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>; - def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, tvecshiftL32, FalseLanesZero>; - def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, tvecshiftL64, FalseLanesZero>; + def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftL8, FalseLanesZero>; + def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftL16, FalseLanesZero>; + def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftL32, FalseLanesZero>; + def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftL64, FalseLanesZero>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, vecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, vecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, vecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, vecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>; } multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps, @@ -6489,10 +6489,10 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps, let Inst{9-8} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>; } // As above but shift amount takes the form of a "vector immediate". @@ -6511,10 +6511,10 @@ multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd<SDPatternOperator op = def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftR32, FalseLanesZero>; def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftR64, FalseLanesZero>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, vecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, vecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, vecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, vecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>; } class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc, @@ -10031,7 +10031,7 @@ multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatte // SVE2 multi-vec shift narrow class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz> - : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4), + : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4), mnemonic, "\t$Zd, $Zn, $imm4", "", []>, Sched<[]> { bits<5> Zd; @@ -10055,7 +10055,7 @@ class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz> multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> { def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, opc, 0b01>; - def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>; + def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, vecshiftR16>; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 2ba3156..9dd64e0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -131,10 +131,8 @@ static bool isDSAddress(const Constant *C) { return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; } -/// Returns true if the function requires the implicit argument be passed -/// regardless of the function contents. -static bool funcRequiresHostcallPtr(const Function &F) { - // Sanitizers require the hostcall buffer passed in the implicit arguments. +/// Returns true if sanitizer attributes are present on a function. +static bool hasSanitizerAttributes(const Function &F) { return F.hasFnAttribute(Attribute::SanitizeAddress) || F.hasFnAttribute(Attribute::SanitizeThread) || F.hasFnAttribute(Attribute::SanitizeMemory) || @@ -469,15 +467,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { // If the function requires the implicit arg pointer due to sanitizers, // assume it's needed even if explicitly marked as not requiring it. - const bool NeedsHostcall = funcRequiresHostcallPtr(*F); - if (NeedsHostcall) { + // Flat scratch initialization is needed because `asan_malloc_impl` + // calls introduced later in pipeline will have flat scratch accesses. + // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs + // implementation for `asan_malloc_impl` is updated. + const bool HasSanitizerAttrs = hasSanitizerAttributes(*F); + if (HasSanitizerAttrs) { removeAssumedBits(IMPLICIT_ARG_PTR); removeAssumedBits(HOSTCALL_PTR); + removeAssumedBits(FLAT_SCRATCH_INIT); } for (auto Attr : ImplicitAttrs) { - if (NeedsHostcall && - (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR)) + if (HasSanitizerAttrs && + (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR || + Attr.first == FLAT_SCRATCH_INIT)) continue; if (F->hasFnAttribute(Attr.second)) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 82789bc..90c828b 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -932,7 +932,9 @@ static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII, for (MachineInstr *SubDef = MRI.getVRegDef(SrcReg); SubDef && TII.isFoldableCopy(*SubDef); SubDef = MRI.getVRegDef(Sub->getReg())) { - MachineOperand &SrcOp = SubDef->getOperand(1); + unsigned SrcIdx = TII.getFoldableCopySrcIdx(*SubDef); + MachineOperand &SrcOp = SubDef->getOperand(SrcIdx); + if (SrcOp.isImm()) return &SrcOp; if (!SrcOp.isReg() || SrcOp.getReg().isPhysical()) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 79876ff..e233457 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -18860,31 +18860,6 @@ SITargetLowering::getTargetMMOFlags(const Instruction &I) const { return Flags; } -bool SITargetLowering::checkForPhysRegDependency( - SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, - const TargetInstrInfo *TII, MCRegister &PhysReg, int &Cost) const { - if (User->getOpcode() != ISD::CopyToReg) - return false; - if (!Def->isMachineOpcode()) - return false; - MachineSDNode *MDef = dyn_cast<MachineSDNode>(Def); - if (!MDef) - return false; - - unsigned ResNo = User->getOperand(Op).getResNo(); - if (User->getOperand(Op)->getValueType(ResNo) != MVT::i1) - return false; - const MCInstrDesc &II = TII->get(MDef->getMachineOpcode()); - if (II.isCompare() && II.hasImplicitDefOfPhysReg(AMDGPU::SCC)) { - PhysReg = AMDGPU::SCC; - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(PhysReg, Def->getSimpleValueType(ResNo)); - Cost = RC->expensiveOrImpossibleToCopy() ? -1 : RC->getCopyCost(); - return true; - } - return false; -} - void SITargetLowering::emitExpandAtomicAddrSpacePredicate( Instruction *AI) const { // Given: atomicrmw fadd ptr %addr, float %val ordering diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index a474dab..74e58f4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -561,11 +561,6 @@ public: bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const; bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const; - bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, - const TargetRegisterInfo *TRI, - const TargetInstrInfo *TII, - MCRegister &PhysReg, int &Cost) const override; - bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN = false, unsigned Depth = 0) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index cda8069..46757cf 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3433,6 +3433,32 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) { } } +unsigned SIInstrInfo::getFoldableCopySrcIdx(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AMDGPU::V_MOV_B16_t16_e32: + case AMDGPU::V_MOV_B16_t16_e64: + return 2; + case AMDGPU::V_MOV_B32_e32: + case AMDGPU::V_MOV_B32_e64: + case AMDGPU::V_MOV_B64_PSEUDO: + case AMDGPU::V_MOV_B64_e32: + case AMDGPU::V_MOV_B64_e64: + case AMDGPU::S_MOV_B32: + case AMDGPU::S_MOV_B64: + case AMDGPU::S_MOV_B64_IMM_PSEUDO: + case AMDGPU::COPY: + case AMDGPU::WWM_COPY: + case AMDGPU::V_ACCVGPR_WRITE_B32_e64: + case AMDGPU::V_ACCVGPR_READ_B32_e64: + case AMDGPU::V_ACCVGPR_MOV_B32: + case AMDGPU::AV_MOV_B32_IMM_PSEUDO: + case AMDGPU::AV_MOV_B64_IMM_PSEUDO: + return 1; + default: + llvm_unreachable("MI is not a foldable copy"); + } +} + static constexpr AMDGPU::OpName ModifierOpNames[] = { AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index a21089f..cc59acf 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -417,6 +417,7 @@ public: const MachineInstr &MIb) const override; static bool isFoldableCopy(const MachineInstr &MI); + static unsigned getFoldableCopySrcIdx(const MachineInstr &MI); void removeModOperands(MachineInstr &MI) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 0040504..a94e131 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -359,6 +359,8 @@ HexagonTargetLowering::initializeHVXLowering() { setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETO, MVT::v64f16, Expand); setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand); @@ -372,6 +374,8 @@ HexagonTargetLowering::initializeHVXLowering() { setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETO, MVT::v32f32, Expand); // Boolean vectors. diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 7d4535a..b37b740 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1560,7 +1560,7 @@ static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI, MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0); // If it's not a grouped vector register, it doesn't have subregister, so // the base register is just itself. - if (BaseReg == RISCV::NoRegister) + if (!BaseReg.isValid()) BaseReg = Reg; return BaseReg; } diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index cf6f83a..7f5d0af 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -126,13 +126,6 @@ let Predicates = [HasAtomicLdSt, IsRV64] in { // RV64 i32 patterns not used by SelectionDAG //===----------------------------------------------------------------------===// -def uimm5i32 : ImmLeaf<i32, [{return isUInt<5>(Imm);}]>; - -def zext_is_sext : PatFrag<(ops node:$src), (zext node:$src), [{ - KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0), 0); - return Known.isNonNegative(); -}]>; - let Predicates = [IsRV64] in { def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb. def : LdPat<extloadi16, LH, i32>; @@ -140,15 +133,10 @@ def : LdPat<extloadi16, LH, i32>; def : StPat<truncstorei8, SB, GPR, i32>; def : StPat<truncstorei16, SH, GPR, i32>; -def : Pat<(anyext (i32 GPR:$src)), (COPY GPR:$src)>; def : Pat<(sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>; -def : Pat<(i32 (trunc GPR:$src)), (COPY GPR:$src)>; def : Pat<(sext_inreg (i64 (add GPR:$rs1, simm12_lo:$imm)), i32), (ADDIW GPR:$rs1, simm12_lo:$imm)>; - -// Use sext if the sign bit of the input is 0. -def : Pat<(zext_is_sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>; } let Predicates = [IsRV64, NoStdExtZba] in diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 6a6ead2..cf8d120 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -128,7 +128,7 @@ static bool hasUndefinedPassthru(const MachineInstr &MI) { // All undefined passthrus should be $noreg: see // RISCVDAGToDAGISel::doPeepholeNoRegPassThru const MachineOperand &UseMO = MI.getOperand(UseOpIdx); - return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef(); + return !UseMO.getReg().isValid() || UseMO.isUndef(); } /// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs. @@ -1454,7 +1454,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { Register Reg = VLOp.getReg(); // Erase the AVL operand from the instruction. - VLOp.setReg(RISCV::NoRegister); + VLOp.setReg(Register()); VLOp.setIsKill(false); if (LIS) { LiveInterval &LI = LIS->getInterval(Reg); @@ -1663,7 +1663,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { if (!MO.isReg() || !MO.getReg().isVirtual()) return; Register OldVLReg = MO.getReg(); - MO.setReg(RISCV::NoRegister); + MO.setReg(Register()); if (LIS) LIS->shrinkToUses(&LIS->getInterval(OldVLReg)); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 1e6b04f8..7db4832 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1364,7 +1364,7 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(), /*RestoreAfter=*/false, /*SpAdj=*/0, /*AllowSpill=*/false); - if (TmpGPR != RISCV::NoRegister) + if (TmpGPR.isValid()) RS->setRegUsed(TmpGPR); else { // The case when there is no scavenged register needs special handling. @@ -3021,7 +3021,7 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, ErrInfo = "Invalid operand type for VL operand"; return false; } - if (Op.isReg() && Op.getReg() != RISCV::NoRegister) { + if (Op.isReg() && Op.getReg().isValid()) { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); auto *RC = MRI.getRegClass(Op.getReg()); if (!RISCV::GPRRegClass.hasSubClassEq(RC)) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td index 1674c95..1dd7332 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td @@ -26,7 +26,7 @@ class LAQ_r<bit aq, bit rl, bits<3> funct3, string opcodestr> let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in class SRL_r<bit aq, bit rl, bits<3> funct3, string opcodestr> : RVInstRAtomic<0b00111, aq, rl, funct3, OPC_AMO, - (outs ), (ins GPRMemZeroOffset:$rs1, GPR:$rs2), + (outs), (ins GPR:$rs2, GPRMemZeroOffset:$rs1), opcodestr, "$rs2, $rs1"> { let rd = 0; } @@ -71,7 +71,7 @@ class PatLAQ<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> // while atomic_store has data, addr class PatSRL<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> : Pat<(OpNode (vt GPR:$rs2), (XLenVT GPRMemZeroOffset:$rs1)), - (Inst GPRMemZeroOffset:$rs1, GPR:$rs2)>; + (Inst GPR:$rs2, GPRMemZeroOffset:$rs1)>; let Predicates = [HasStdExtZalasr] in { diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp index f8d33ae..54569b1 100644 --- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp +++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp @@ -259,7 +259,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) { if (isCompressibleLoad(MI) || isCompressibleStore(MI)) { const MachineOperand &MOImm = MI.getOperand(2); if (!MOImm.isImm()) - return RegImmPair(RISCV::NoRegister, 0); + return RegImmPair(Register(), 0); int64_t Offset = MOImm.getImm(); int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode); @@ -292,7 +292,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) { } } } - return RegImmPair(RISCV::NoRegister, 0); + return RegImmPair(Register(), 0); } // Check all uses after FirstMI of the given register, keeping a vector of diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index ffba284..fdf9a4f 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -382,7 +382,7 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const { // vmv.v.v doesn't have a mask operand, so we may be able to inflate the // register class for the destination and passthru operands e.g. VRNoV0 -> VR MRI->recomputeRegClass(MI.getOperand(0).getReg()); - if (MI.getOperand(1).getReg() != RISCV::NoRegister) + if (MI.getOperand(1).getReg().isValid()) MRI->recomputeRegClass(MI.getOperand(1).getReg()); return true; } @@ -448,7 +448,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { Register FalseReg = MI.getOperand(2).getReg(); if (TruePassthruReg != FalseReg) { // If True's passthru is undef see if we can change it to False - if (TruePassthruReg != RISCV::NoRegister || + if (TruePassthruReg.isValid() || !MRI->hasOneUse(MI.getOperand(3).getReg()) || !ensureDominates(MI.getOperand(2), *True)) return false; @@ -467,7 +467,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { // vmv.v.v doesn't have a mask operand, so we may be able to inflate the // register class for the destination and passthru operands e.g. VRNoV0 -> VR MRI->recomputeRegClass(MI.getOperand(0).getReg()); - if (MI.getOperand(1).getReg() != RISCV::NoRegister) + if (MI.getOperand(1).getReg().isValid()) MRI->recomputeRegClass(MI.getOperand(1).getReg()); return true; } @@ -517,7 +517,7 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const { if (RISCVII::isFirstDefTiedToFirstUse(MaskedMCID)) { unsigned PassthruOpIdx = MI.getNumExplicitDefs(); if (HasPassthru) { - if (MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister) + if (MI.getOperand(PassthruOpIdx).getReg()) MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg()); } else MI.removeOperand(PassthruOpIdx); @@ -576,7 +576,7 @@ static bool dominates(MachineBasicBlock::const_iterator A, bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO, MachineInstr &Src) const { assert(MO.getParent()->getParent() == Src.getParent()); - if (!MO.isReg() || MO.getReg() == RISCV::NoRegister) + if (!MO.isReg() || !MO.getReg().isValid()) return true; MachineInstr *Def = MRI->getVRegDef(MO.getReg()); @@ -593,7 +593,7 @@ bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO, bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) { if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMV_V_V) return false; - if (MI.getOperand(1).getReg() != RISCV::NoRegister) + if (MI.getOperand(1).getReg().isValid()) return false; // If the input was a pseudo with a policy operand, we can give it a tail @@ -654,7 +654,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { // Src needs to have the same passthru as VMV_V_V MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs()); - if (SrcPassthru.getReg() != RISCV::NoRegister && + if (SrcPassthru.getReg().isValid() && SrcPassthru.getReg() != Passthru.getReg()) return false; @@ -672,7 +672,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { if (SrcPassthru.getReg() != Passthru.getReg()) { SrcPassthru.setReg(Passthru.getReg()); // If Src is masked then its passthru needs to be in VRNoV0. - if (Passthru.getReg() != RISCV::NoRegister) + if (Passthru.getReg().isValid()) MRI->constrainRegClass( Passthru.getReg(), TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo(), TRI)); diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp index 7505507..ebd957c 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp @@ -188,8 +188,31 @@ class SPIRVLegalizePointerCast : public FunctionPass { FixedVectorType *SrcType = cast<FixedVectorType>(Src->getType()); FixedVectorType *DstType = cast<FixedVectorType>(GR->findDeducedElementType(Dst)); - assert(DstType->getNumElements() >= SrcType->getNumElements()); + auto dstNumElements = DstType->getNumElements(); + auto srcNumElements = SrcType->getNumElements(); + + // if the element type differs, it is a bitcast. + if (DstType->getElementType() != SrcType->getElementType()) { + // Support bitcast between vectors of different sizes only if + // the total bitwidth is the same. + auto dstBitWidth = + DstType->getElementType()->getScalarSizeInBits() * dstNumElements; + auto srcBitWidth = + SrcType->getElementType()->getScalarSizeInBits() * srcNumElements; + assert(dstBitWidth == srcBitWidth && + "Unsupported bitcast between vectors of different sizes."); + + Src = + B.CreateIntrinsic(Intrinsic::spv_bitcast, {DstType, SrcType}, {Src}); + buildAssignType(B, DstType, Src); + SrcType = DstType; + + StoreInst *SI = B.CreateStore(Src, Dst); + SI->setAlignment(Alignment); + return SI; + } + assert(DstType->getNumElements() >= SrcType->getNumElements()); LoadInst *LI = B.CreateLoad(DstType, Dst); LI->setAlignment(Alignment); Value *OldValues = LI; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 02b20b3..931a10b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -13783,10 +13783,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, // so prevents folding a load into this instruction or making a copy. const int UnpackLoMask[] = {0, 0, 1, 1}; const int UnpackHiMask[] = {2, 2, 3, 3}; - if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) - Mask = UnpackLoMask; - else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) - Mask = UnpackHiMask; + if (!isSingleElementRepeatedMask(Mask)) { + if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) + Mask = UnpackLoMask; + else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) + Mask = UnpackHiMask; + } return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index ddb95a4..faeab95 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -40,6 +41,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/InterleavedRange.h" +#include "llvm/Support/SHA1.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" @@ -60,6 +62,9 @@ STATISTIC(FunctionClonesThinBackend, "Number of function clones created during ThinLTO backend"); STATISTIC(FunctionsClonedThinBackend, "Number of functions that had clones created during ThinLTO backend"); +STATISTIC( + FunctionCloneDuplicatesThinBackend, + "Number of function clone duplicates detected during ThinLTO backend"); STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly " "cloned) during whole program analysis"); STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) " @@ -5186,19 +5191,127 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { return Changed; } +// Compute a SHA1 hash of the callsite and alloc version information of clone I +// in the summary, to use in detection of duplicate clones. +uint64_t ComputeHash(const FunctionSummary *FS, unsigned I) { + SHA1 Hasher; + // Update hash with any callsites that call non-default (non-zero) callee + // versions. + for (auto &SN : FS->callsites()) { + // In theory all callsites and allocs in this function should have the same + // number of clone entries, but handle any discrepancies gracefully below + // for NDEBUG builds. + assert( + SN.Clones.size() > I && + "Callsite summary has fewer entries than other summaries in function"); + if (SN.Clones.size() <= I || !SN.Clones[I]) + continue; + uint8_t Data[sizeof(SN.Clones[I])]; + support::endian::write32le(Data, SN.Clones[I]); + Hasher.update(Data); + } + // Update hash with any allocs that have non-default (non-None) hints. + for (auto &AN : FS->allocs()) { + // In theory all callsites and allocs in this function should have the same + // number of clone entries, but handle any discrepancies gracefully below + // for NDEBUG builds. + assert(AN.Versions.size() > I && + "Alloc summary has fewer entries than other summaries in function"); + if (AN.Versions.size() <= I || + (AllocationType)AN.Versions[I] == AllocationType::None) + continue; + Hasher.update(ArrayRef<uint8_t>(&AN.Versions[I], 1)); + } + return support::endian::read64le(Hasher.result().data()); +} + static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones( Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>> - &FuncToAliasMap) { + &FuncToAliasMap, + FunctionSummary *FS) { + auto TakeDeclNameAndReplace = [](GlobalValue *DeclGV, GlobalValue *NewGV) { + // We might have created this when adjusting callsite in another + // function. It should be a declaration. + assert(DeclGV->isDeclaration()); + NewGV->takeName(DeclGV); + DeclGV->replaceAllUsesWith(NewGV); + DeclGV->eraseFromParent(); + }; + + // Handle aliases to this function, and create analogous alias clones to the + // provided clone of this function. + auto CloneFuncAliases = [&](Function *NewF, unsigned I) { + if (!FuncToAliasMap.count(&F)) + return; + for (auto *A : FuncToAliasMap[&F]) { + std::string AliasName = getMemProfFuncName(A->getName(), I); + auto *PrevA = M.getNamedAlias(AliasName); + auto *NewA = GlobalAlias::create(A->getValueType(), + A->getType()->getPointerAddressSpace(), + A->getLinkage(), AliasName, NewF); + NewA->copyAttributesFrom(A); + if (PrevA) + TakeDeclNameAndReplace(PrevA, NewA); + } + }; + // The first "clone" is the original copy, we should only call this if we // needed to create new clones. assert(NumClones > 1); SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; VMaps.reserve(NumClones - 1); FunctionsClonedThinBackend++; + + // Map of hash of callsite/alloc versions to the instantiated function clone + // (possibly the original) implementing those calls. Used to avoid + // instantiating duplicate function clones. + // FIXME: Ideally the thin link would not generate such duplicate clones to + // start with, but right now it happens due to phase ordering in the function + // assignment and possible new clones that produces. We simply make each + // duplicate an alias to the matching instantiated clone recorded in the map + // (except for available_externally which are made declarations as they would + // be aliases in the prevailing module, and available_externally aliases are + // not well supported right now). + DenseMap<uint64_t, Function *> HashToFunc; + + // Save the hash of the original function version. + HashToFunc[ComputeHash(FS, 0)] = &F; + for (unsigned I = 1; I < NumClones; I++) { VMaps.emplace_back(std::make_unique<ValueToValueMapTy>()); + std::string Name = getMemProfFuncName(F.getName(), I); + auto Hash = ComputeHash(FS, I); + // If this clone would duplicate a previously seen clone, don't generate the + // duplicate clone body, just make an alias to satisfy any (potentially + // cross-module) references. + if (HashToFunc.contains(Hash)) { + FunctionCloneDuplicatesThinBackend++; + auto *Func = HashToFunc[Hash]; + if (Func->hasAvailableExternallyLinkage()) { + // Skip these as EliminateAvailableExternallyPass does not handle + // available_externally aliases correctly and we end up with an + // available_externally alias to a declaration. Just create a + // declaration for now as we know we will have a definition in another + // module. + auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType()); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) + << "created clone decl " << ore::NV("Decl", Decl.getCallee())); + continue; + } + auto *PrevF = M.getFunction(Name); + auto *Alias = GlobalAlias::create(Name, Func); + if (PrevF) + TakeDeclNameAndReplace(PrevF, Alias); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) + << "created clone alias " << ore::NV("Alias", Alias)); + + // Now handle aliases to this function, and clone those as well. + CloneFuncAliases(Func, I); + continue; + } auto *NewF = CloneFunction(&F, *VMaps.back()); + HashToFunc[Hash] = NewF; FunctionClonesThinBackend++; // Strip memprof and callsite metadata from clone as they are no longer // needed. @@ -5208,40 +5321,17 @@ static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones( Inst.setMetadata(LLVMContext::MD_callsite, nullptr); } } - std::string Name = getMemProfFuncName(F.getName(), I); auto *PrevF = M.getFunction(Name); - if (PrevF) { - // We might have created this when adjusting callsite in another - // function. It should be a declaration. - assert(PrevF->isDeclaration()); - NewF->takeName(PrevF); - PrevF->replaceAllUsesWith(NewF); - PrevF->eraseFromParent(); - } else + if (PrevF) + TakeDeclNameAndReplace(PrevF, NewF); + else NewF->setName(Name); updateSubprogramLinkageName(NewF, Name); ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) << "created clone " << ore::NV("NewFunction", NewF)); // Now handle aliases to this function, and clone those as well. - if (!FuncToAliasMap.count(&F)) - continue; - for (auto *A : FuncToAliasMap[&F]) { - std::string Name = getMemProfFuncName(A->getName(), I); - auto *PrevA = M.getNamedAlias(Name); - auto *NewA = GlobalAlias::create(A->getValueType(), - A->getType()->getPointerAddressSpace(), - A->getLinkage(), Name, NewF); - NewA->copyAttributesFrom(A); - if (PrevA) { - // We might have created this when adjusting callsite in another - // function. It should be a declaration. - assert(PrevA->isDeclaration()); - NewA->takeName(PrevA); - PrevA->replaceAllUsesWith(NewA); - PrevA->eraseFromParent(); - } - } + CloneFuncAliases(NewF, I); } return VMaps; } @@ -5401,7 +5491,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; bool ClonesCreated = false; unsigned NumClonesCreated = 0; - auto CloneFuncIfNeeded = [&](unsigned NumClones) { + auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS) { // We should at least have version 0 which is the original copy. assert(NumClones > 0); // If only one copy needed use original. @@ -5415,7 +5505,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { assert(NumClonesCreated == NumClones); return; } - VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap); + VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap, FS); // The first "clone" is the original copy, which doesn't have a VMap. assert(VMaps.size() == NumClones - 1); Changed = true; @@ -5424,9 +5514,9 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { }; auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB, - Function *CalledFunction) { + Function *CalledFunction, FunctionSummary *FS) { // Perform cloning if not yet done. - CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size()); + CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size(), FS); assert(!isMemProfClone(*CalledFunction)); @@ -5448,6 +5538,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // below. auto CalleeOrigName = CalledFunction->getName(); for (unsigned J = 0; J < StackNode.Clones.size(); J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Do nothing if this version calls the original version of its // callee. if (!StackNode.Clones[J]) @@ -5591,7 +5685,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { #endif // Perform cloning if not yet done. - CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size()); + CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size(), FS); OrigAllocsThinBackend++; AllocVersionsThinBackend += AllocNode.Versions.size(); @@ -5624,6 +5718,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // Update the allocation types per the summary info. for (unsigned J = 0; J < AllocNode.Versions.size(); J++) { + // If the VMap is empty, this clone was a duplicate of another and + // was created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Ignore any that didn't get an assigned allocation type. if (AllocNode.Versions[J] == (uint8_t)AllocationType::None) continue; @@ -5670,7 +5768,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // we don't need to do ICP, but might need to clone this // function as it is the target of other cloned calls. if (NumClones) - CloneFuncIfNeeded(NumClones); + CloneFuncIfNeeded(NumClones, FS); } else { @@ -5690,7 +5788,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { } #endif - CloneCallsite(StackNode, CB, CalledFunction); + CloneCallsite(StackNode, CB, CalledFunction, FS); } } else if (CB->isTailCall() && CalledFunction) { // Locate the synthesized callsite info for the callee VI, if any was @@ -5700,7 +5798,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) { auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI); assert(Callsite != MapTailCallCalleeVIToCallsite.end()); - CloneCallsite(Callsite->second, CB, CalledFunction); + CloneCallsite(Callsite->second, CB, CalledFunction, FS); } } } @@ -5846,6 +5944,10 @@ void MemProfContextDisambiguation::performICP( // check. CallBase *CBClone = CB; for (unsigned J = 0; J < NumClones; J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Copy 0 is the original function. if (J > 0) CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); @@ -5891,6 +5993,10 @@ void MemProfContextDisambiguation::performICP( // TotalCount and the number promoted. CallBase *CBClone = CB; for (unsigned J = 0; J < NumClones; J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Copy 0 is the original function. if (J > 0) CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index cf6d0ec..e1e24a9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -318,18 +318,18 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) { // * Single constant active lane -> store // * Narrow width by halfs excluding zero/undef lanes Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { + Value *StorePtr = II.getArgOperand(1); + Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue(); auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3)); if (!ConstMask) return nullptr; // If the mask is all zeros, this instruction does nothing. - if (ConstMask->isNullValue()) + if (maskIsAllZeroOrUndef(ConstMask)) return eraseInstFromFunction(II); // If the mask is all ones, this is a plain vector store of the 1st argument. - if (ConstMask->isAllOnesValue()) { - Value *StorePtr = II.getArgOperand(1); - Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue(); + if (maskIsAllOneOrUndef(ConstMask)) { StoreInst *S = new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment); S->copyMetadata(II); @@ -389,7 +389,7 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { return nullptr; // If the mask is all zeros, a scatter does nothing. - if (ConstMask->isNullValue()) + if (maskIsAllZeroOrUndef(ConstMask)) return eraseInstFromFunction(II); // Vector splat address -> scalar store diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 87000a1..3df448d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -50,6 +50,9 @@ using namespace llvm; using namespace PatternMatch; +namespace llvm { +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} /// Replace a select operand based on an equality comparison with the identity /// constant of a binop. @@ -4492,8 +4495,21 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { auto FoldSelectWithAndOrCond = [&](bool IsAnd, Value *A, Value *B) -> Instruction * { if (Value *V = simplifySelectInst(B, TrueVal, FalseVal, - SQ.getWithInstruction(&SI))) - return SelectInst::Create(A, IsAnd ? V : TrueVal, IsAnd ? FalseVal : V); + SQ.getWithInstruction(&SI))) { + Value *NewTrueVal = IsAnd ? V : TrueVal; + Value *NewFalseVal = IsAnd ? FalseVal : V; + + // If the True and False values don't change, then preserve the branch + // metadata of the original select as the net effect of this change is to + // simplify the conditional. + Instruction *MDFrom = nullptr; + if (NewTrueVal == TrueVal && NewFalseVal == FalseVal && + !ProfcheckDisableMetadataFixes) { + MDFrom = &SI; + } + return SelectInst::Create(A, NewTrueVal, NewFalseVal, "", nullptr, + MDFrom); + } // Is (select B, T, F) a SPF? if (CondVal->hasOneUse() && SelType->isIntOrIntVectorTy()) { diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 9d4fb79..d6b7633 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -1646,10 +1646,6 @@ NewGVN::performSymbolicPredicateInfoEvaluation(BitCastInst *I) const { // Evaluate read only and pure calls, and create an expression result. NewGVN::ExprResult NewGVN::performSymbolicCallEvaluation(Instruction *I) const { auto *CI = cast<CallInst>(I); - if (auto *II = dyn_cast<IntrinsicInst>(I)) { - if (auto *ReturnedValue = II->getReturnedArgOperand()) - return ExprResult::some(createVariableOrConstant(ReturnedValue)); - } // FIXME: Currently the calls which may access the thread id may // be considered as not accessing the memory. But this is diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 43d61f2..a88cffc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3298,10 +3298,11 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo); Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF); - + bool UsedByLoadStoreAddress = isUsedByLoadStoreAddress(this); InstructionCost ScalarCost = ScalarMemOpCost + Ctx.TTI.getAddressComputationCost( - PtrTy, &Ctx.SE, nullptr, Ctx.CostKind); + PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE, + nullptr, Ctx.CostKind); if (isSingleScalar()) return ScalarCost; @@ -3312,7 +3313,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, // vectorized addressing or the loaded value is used as part of an address // of another load or store. bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing(); - if (PreferVectorizedAddressing || !isUsedByLoadStoreAddress(this)) { + if (PreferVectorizedAddressing || !UsedByLoadStoreAddress) { bool EfficientVectorLoadStore = Ctx.TTI.supportsEfficientVectorElementLoadStore(); if (!(IsLoad && !PreferVectorizedAddressing) && diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll index 7872c02..461a7ef 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll @@ -177,7 +177,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; GISEL-NEXT: neg v2.16b, v3.16b ; GISEL-NEXT: shl v3.16b, v4.16b, #7 ; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b -; GISEL-NEXT: sshr v2.16b, v3.16b, #7 +; GISEL-NEXT: cmlt v2.16b, v3.16b, #0 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> @@ -229,7 +229,7 @@ define <8 x i16> @pr38477(<8 x i16> %a0) { ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h ; GISEL-NEXT: neg v2.8h, v4.8h ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h -; GISEL-NEXT: sshr v2.8h, v3.8h, #15 +; GISEL-NEXT: cmlt v2.8h, v3.8h, #0 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31> diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir index 0b950b7..76d4d29 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir @@ -14,8 +14,7 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[C]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL %v1, [[DUP]](<4 x s32>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: %sext:_(<4 x s32>) = G_VASHR [[SHL]], [[C1]](s32) + ; CHECK-NEXT: %sext:_(<4 x s32>) = G_VASHR [[SHL]], 16 ; CHECK-NEXT: $q0 = COPY %sext(<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %v1:_(<4 x s32>) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir index b3fb5a4..dfaddba 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir @@ -15,8 +15,7 @@ body: | ; CHECK: liveins: $d0, $d1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[VASHR:%[0-9]+]]:_(<4 x s32>) = G_VASHR [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[VASHR:%[0-9]+]]:_(<4 x s32>) = G_VASHR [[COPY]], 5 ; CHECK-NEXT: $q0 = COPY [[VASHR]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 @@ -39,8 +38,7 @@ body: | ; CHECK: liveins: $d0, $d1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<4 x s32>) = G_VLSHR [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<4 x s32>) = G_VLSHR [[COPY]], 5 ; CHECK-NEXT: $q0 = COPY [[VLSHR]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 @@ -63,8 +61,7 @@ body: | ; CHECK: liveins: $d0, $d1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<8 x s16>) = G_VLSHR [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<8 x s16>) = G_VLSHR [[COPY]], 5 ; CHECK-NEXT: $q0 = COPY [[VLSHR]](<8 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<8 x s16>) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir index c38e4a8..cf227cb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir @@ -29,7 +29,6 @@ body: | ; CHECK-NEXT: [[UCVTFd:%[0-9]+]]:fpr64 = UCVTFd [[COPY]], 12 ; CHECK-NEXT: $d1 = COPY [[UCVTFd]] %0(s64) = COPY $d0 - %1(s32) = G_CONSTANT i32 12 - %2(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.vcvtfxu2fp.f64), %0, %1 + %2(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.vcvtfxu2fp.f64), %0, 12 $d1 = COPY %2(s64) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir index 0706115..9fa6326 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir @@ -499,8 +499,7 @@ body: | ; CHECK-NEXT: $d0 = COPY [[SSHRv4i16_shift]] ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:fpr(<4 x s16>) = COPY $d0 - %1:gpr(s32) = G_CONSTANT i32 5 - %2:fpr(<4 x s16>) = G_VASHR %0, %1 + %2:fpr(<4 x s16>) = G_VASHR %0, 5 $d0 = COPY %2(<4 x s16>) RET_ReallyLR implicit $d0 ... @@ -520,8 +519,7 @@ body: | ; CHECK-NEXT: $d0 = COPY [[USHRv4i16_shift]] ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:fpr(<4 x s16>) = COPY $d0 - %1:gpr(s32) = G_CONSTANT i32 5 - %2:fpr(<4 x s16>) = G_VLSHR %0, %1 + %2:fpr(<4 x s16>) = G_VLSHR %0, 5 $d0 = COPY %2(<4 x s16>) RET_ReallyLR implicit $d0 ... diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll index cdde110..63c08dd 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -902,7 +902,7 @@ define void @sink_v8z16_0(ptr %p, ptr %d, i64 %n, <16 x i8> %a) { ; CHECK-GI-NEXT: subs x2, x2, #8 ; CHECK-GI-NEXT: add x8, x8, #8 ; CHECK-GI-NEXT: umull v1.8h, v1.8b, v0.8b -; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15 +; CHECK-GI-NEXT: cmlt v1.8h, v1.8h, #0 ; CHECK-GI-NEXT: xtn v1.8b, v1.8h ; CHECK-GI-NEXT: str d1, [x0], #32 ; CHECK-GI-NEXT: b.ne .LBB8_1 @@ -967,8 +967,8 @@ define void @sink_v16s16_8(ptr %p, ptr %d, i64 %n, <16 x i8> %a) { ; CHECK-GI-NEXT: mov d2, v1.d[1] ; CHECK-GI-NEXT: smull v1.8h, v1.8b, v0.8b ; CHECK-GI-NEXT: smull v2.8h, v2.8b, v0.8b -; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15 -; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15 +; CHECK-GI-NEXT: cmlt v1.8h, v1.8h, #0 +; CHECK-GI-NEXT: cmlt v2.8h, v2.8h, #0 ; CHECK-GI-NEXT: uzp1 v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: str q1, [x0], #32 ; CHECK-GI-NEXT: b.ne .LBB9_1 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll index 9bafc5b..2a8b3ce2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -999,16 +999,10 @@ entry: } define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-SD-LABEL: test_vaddhn_s16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vaddhn_s16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vaddhn_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h +; CHECK-NEXT: ret entry: %vaddhn.i = add <8 x i16> %a, %b %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> @@ -1017,16 +1011,10 @@ entry: } define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-SD-LABEL: test_vaddhn_s32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vaddhn_s32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vaddhn_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %vaddhn.i = add <4 x i32> %a, %b %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16> @@ -1035,16 +1023,10 @@ entry: } define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-SD-LABEL: test_vaddhn_s64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vaddhn_s64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vaddhn_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d +; CHECK-NEXT: ret entry: %vaddhn.i = add <2 x i64> %a, %b %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32> @@ -1053,16 +1035,10 @@ entry: } define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-SD-LABEL: test_vaddhn_u16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vaddhn_u16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vaddhn_u16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h +; CHECK-NEXT: ret entry: %vaddhn.i = add <8 x i16> %a, %b %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> @@ -1071,16 +1047,10 @@ entry: } define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-SD-LABEL: test_vaddhn_u32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vaddhn_u32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vaddhn_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %vaddhn.i = add <4 x i32> %a, %b %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16> @@ -1089,16 +1059,10 @@ entry: } define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-SD-LABEL: test_vaddhn_u64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vaddhn_u64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vaddhn_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d +; CHECK-NEXT: ret entry: %vaddhn.i = add <2 x i64> %a, %b %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32> @@ -1115,9 +1079,8 @@ define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) ; ; CHECK-GI-LABEL: test_vaddhn_high_s16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: addhn v1.8b, v1.8h, v2.8h ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1141,9 +1104,8 @@ define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) ; ; CHECK-GI-LABEL: test_vaddhn_high_s32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: addhn v1.4h, v1.4s, v2.4s ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1167,9 +1129,8 @@ define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) ; ; CHECK-GI-LABEL: test_vaddhn_high_s64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-GI-NEXT: addhn v1.2s, v1.2d, v2.2d ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1193,9 +1154,8 @@ define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) ; ; CHECK-GI-LABEL: test_vaddhn_high_u16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: addhn v1.8b, v1.8h, v2.8h ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1219,9 +1179,8 @@ define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) ; ; CHECK-GI-LABEL: test_vaddhn_high_u32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: addhn v1.4h, v1.4s, v2.4s ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1245,9 +1204,8 @@ define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) ; ; CHECK-GI-LABEL: test_vaddhn_high_u64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-GI-NEXT: addhn v1.2s, v1.2d, v2.2d ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1461,16 +1419,10 @@ entry: } define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-SD-LABEL: test_vsubhn_s16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vsubhn_s16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vsubhn_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h +; CHECK-NEXT: ret entry: %vsubhn.i = sub <8 x i16> %a, %b %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> @@ -1479,16 +1431,10 @@ entry: } define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-SD-LABEL: test_vsubhn_s32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vsubhn_s32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vsubhn_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %vsubhn.i = sub <4 x i32> %a, %b %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16> @@ -1497,16 +1443,10 @@ entry: } define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-SD-LABEL: test_vsubhn_s64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vsubhn_s64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vsubhn_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d +; CHECK-NEXT: ret entry: %vsubhn.i = sub <2 x i64> %a, %b %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32> @@ -1515,16 +1455,10 @@ entry: } define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-SD-LABEL: test_vsubhn_u16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vsubhn_u16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vsubhn_u16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h +; CHECK-NEXT: ret entry: %vsubhn.i = sub <8 x i16> %a, %b %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> @@ -1533,16 +1467,10 @@ entry: } define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-SD-LABEL: test_vsubhn_u32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vsubhn_u32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vsubhn_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %vsubhn.i = sub <4 x i32> %a, %b %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16> @@ -1551,16 +1479,10 @@ entry: } define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-SD-LABEL: test_vsubhn_u64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vsubhn_u64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vsubhn_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d +; CHECK-NEXT: ret entry: %vsubhn.i = sub <2 x i64> %a, %b %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32> @@ -1577,9 +1499,8 @@ define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) ; ; CHECK-GI-LABEL: test_vsubhn_high_s16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: subhn v1.8b, v1.8h, v2.8h ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1603,9 +1524,8 @@ define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) ; ; CHECK-GI-LABEL: test_vsubhn_high_s32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: subhn v1.4h, v1.4s, v2.4s ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1629,9 +1549,8 @@ define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) ; ; CHECK-GI-LABEL: test_vsubhn_high_s64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d +; CHECK-GI-NEXT: subhn v1.2s, v1.2d, v2.2d ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1655,9 +1574,8 @@ define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) ; ; CHECK-GI-LABEL: test_vsubhn_high_u16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: subhn v1.8b, v1.8h, v2.8h ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1681,9 +1599,8 @@ define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) ; ; CHECK-GI-LABEL: test_vsubhn_high_u32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: subhn v1.4h, v1.4s, v2.4s ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret @@ -1707,9 +1624,8 @@ define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) ; ; CHECK-GI-LABEL: test_vsubhn_high_u64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d +; CHECK-GI-NEXT: subhn v1.2s, v1.2d, v2.2d ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll index 84879d1..03e6ca1 100644 --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -524,8 +524,8 @@ define <32 x i8> @sext_v32i1(<32 x i1> %arg) { ; CHECK-GI-NEXT: mov.b v1[15], w9 ; CHECK-GI-NEXT: shl.16b v0, v0, #7 ; CHECK-GI-NEXT: shl.16b v1, v1, #7 -; CHECK-GI-NEXT: sshr.16b v0, v0, #7 -; CHECK-GI-NEXT: sshr.16b v1, v1, #7 +; CHECK-GI-NEXT: cmlt.16b v0, v0, #0 +; CHECK-GI-NEXT: cmlt.16b v1, v1, #0 ; CHECK-GI-NEXT: ret %res = sext <32 x i1> %arg to <32 x i8> ret <32 x i8> %res @@ -934,10 +934,10 @@ define <64 x i8> @sext_v64i1(<64 x i1> %arg) { ; CHECK-GI-NEXT: shl.16b v1, v1, #7 ; CHECK-GI-NEXT: shl.16b v2, v2, #7 ; CHECK-GI-NEXT: shl.16b v3, v3, #7 -; CHECK-GI-NEXT: sshr.16b v0, v0, #7 -; CHECK-GI-NEXT: sshr.16b v1, v1, #7 -; CHECK-GI-NEXT: sshr.16b v2, v2, #7 -; CHECK-GI-NEXT: sshr.16b v3, v3, #7 +; CHECK-GI-NEXT: cmlt.16b v0, v0, #0 +; CHECK-GI-NEXT: cmlt.16b v1, v1, #0 +; CHECK-GI-NEXT: cmlt.16b v2, v2, #0 +; CHECK-GI-NEXT: cmlt.16b v3, v3, #0 ; CHECK-GI-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-GI-NEXT: ret %res = sext <64 x i1> %arg to <64 x i8> diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll index c408d7f..a3f4722 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -1914,21 +1914,13 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) { } define <8 x i16> @pr88784(<8 x i8> %l0, <8 x i8> %l1, <8 x i16> %l2) { -; CHECK-SD-LABEL: pr88784: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: usubl.8h v0, v0, v1 -; CHECK-SD-NEXT: cmlt.8h v1, v2, #0 -; CHECK-SD-NEXT: ssra.8h v0, v2, #15 -; CHECK-SD-NEXT: eor.16b v0, v1, v0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: pr88784: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: usubl.8h v0, v0, v1 -; CHECK-GI-NEXT: sshr.8h v1, v2, #15 -; CHECK-GI-NEXT: ssra.8h v0, v2, #15 -; CHECK-GI-NEXT: eor.16b v0, v1, v0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: pr88784: +; CHECK: // %bb.0: +; CHECK-NEXT: usubl.8h v0, v0, v1 +; CHECK-NEXT: cmlt.8h v1, v2, #0 +; CHECK-NEXT: ssra.8h v0, v2, #15 +; CHECK-NEXT: eor.16b v0, v1, v0 +; CHECK-NEXT: ret %l4 = zext <8 x i8> %l0 to <8 x i16> %l5 = ashr <8 x i16> %l2, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> %l6 = zext <8 x i8> %l1 to <8 x i16> @@ -1947,7 +1939,7 @@ define <8 x i16> @pr88784_fixed(<8 x i8> %l0, <8 x i8> %l1, <8 x i16> %l2) { ; CHECK-GI-LABEL: pr88784_fixed: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: usubl.8h v0, v0, v1 -; CHECK-GI-NEXT: sshr.8h v1, v0, #15 +; CHECK-GI-NEXT: cmlt.8h v1, v0, #0 ; CHECK-GI-NEXT: ssra.8h v0, v0, #15 ; CHECK-GI-NEXT: eor.16b v0, v1, v0 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll index 11fb732..938712a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll @@ -1103,20 +1103,12 @@ define <2 x i64> @ssubl2_duplhs(i32 %lhs, <4 x i32> %rhs) { } define <8 x i8> @addhn8b_natural(ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: addhn8b_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q0, [x0] -; CHECK-SD-NEXT: ldr q1, [x1] -; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: addhn8b_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: addhn8b_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h +; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %sum = add <8 x i16> %tmp1, %tmp2 @@ -1126,20 +1118,12 @@ define <8 x i8> @addhn8b_natural(ptr %A, ptr %B) nounwind { } define <4 x i16> @addhn4h_natural(ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: addhn4h_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q0, [x0] -; CHECK-SD-NEXT: ldr q1, [x1] -; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: addhn4h_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: addhn4h_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s +; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %sum = add <4 x i32> %tmp1, %tmp2 @@ -1149,20 +1133,12 @@ define <4 x i16> @addhn4h_natural(ptr %A, ptr %B) nounwind { } define <2 x i32> @addhn2s_natural(ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: addhn2s_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q0, [x0] -; CHECK-SD-NEXT: ldr q1, [x1] -; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: addhn2s_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: addhn2s_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d +; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %sum = add <2 x i64> %tmp1, %tmp2 @@ -1172,22 +1148,13 @@ define <2 x i32> @addhn2s_natural(ptr %A, ptr %B) nounwind { } define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: addhn2_16b_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q1, [x0] -; CHECK-SD-NEXT: ldr q2, [x1] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: addhn2 v0.16b, v1.8h, v2.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: addhn2_16b_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q1, [x0] -; CHECK-GI-NEXT: ldr q2, [x1] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h -; CHECK-GI-NEXT: shrn2 v0.16b, v1.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: addhn2_16b_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h +; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %sum = add <8 x i16> %tmp1, %tmp2 @@ -1198,22 +1165,13 @@ define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind { } define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: addhn2_8h_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q1, [x0] -; CHECK-SD-NEXT: ldr q2, [x1] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: addhn2 v0.8h, v1.4s, v2.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: addhn2_8h_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q1, [x0] -; CHECK-GI-NEXT: ldr q2, [x1] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-GI-NEXT: shrn2 v0.8h, v1.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: addhn2_8h_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s +; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %sum = add <4 x i32> %tmp1, %tmp2 @@ -1224,22 +1182,13 @@ define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind { } define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: addhn2_4s_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q1, [x0] -; CHECK-SD-NEXT: ldr q2, [x1] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: addhn2 v0.4s, v1.2d, v2.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: addhn2_4s_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q1, [x0] -; CHECK-GI-NEXT: ldr q2, [x1] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: addhn2_4s_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d +; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %sum = add <2 x i64> %tmp1, %tmp2 @@ -1250,22 +1199,13 @@ define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind { } define <4 x i32> @addhn_addhn2_4s(ptr %A, ptr %B, ptr %C, ptr %D) nounwind { -; CHECK-SD-LABEL: addhn_addhn2_4s: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q1, [x0] -; CHECK-SD-NEXT: ldr q2, [x1] -; CHECK-SD-NEXT: addhn v0.2s, v1.2d, v2.2d -; CHECK-SD-NEXT: addhn2 v0.4s, v1.2d, v2.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: addhn_addhn2_4s: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: add v1.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: shrn v0.2s, v1.2d, #32 -; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: addhn_addhn2_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: addhn v0.2s, v1.2d, v2.2d +; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d +; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %sum1 = add <2 x i64> %tmp1, %tmp2 @@ -1281,20 +1221,12 @@ define <4 x i32> @addhn_addhn2_4s(ptr %A, ptr %B, ptr %C, ptr %D) nounwind { } define <8 x i8> @subhn8b_natural(ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: subhn8b_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q0, [x0] -; CHECK-SD-NEXT: ldr q1, [x1] -; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: subhn8b_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: subhn8b_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h +; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %diff = sub <8 x i16> %tmp1, %tmp2 @@ -1304,20 +1236,12 @@ define <8 x i8> @subhn8b_natural(ptr %A, ptr %B) nounwind { } define <4 x i16> @subhn4h_natural(ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: subhn4h_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q0, [x0] -; CHECK-SD-NEXT: ldr q1, [x1] -; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: subhn4h_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: subhn4h_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s +; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %diff = sub <4 x i32> %tmp1, %tmp2 @@ -1327,20 +1251,12 @@ define <4 x i16> @subhn4h_natural(ptr %A, ptr %B) nounwind { } define <2 x i32> @subhn2s_natural(ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: subhn2s_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q0, [x0] -; CHECK-SD-NEXT: ldr q1, [x1] -; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: subhn2s_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: subhn2s_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d +; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %diff = sub <2 x i64> %tmp1, %tmp2 @@ -1350,22 +1266,13 @@ define <2 x i32> @subhn2s_natural(ptr %A, ptr %B) nounwind { } define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: subhn2_16b_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q1, [x0] -; CHECK-SD-NEXT: ldr q2, [x1] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: subhn2 v0.16b, v1.8h, v2.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: subhn2_16b_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q1, [x0] -; CHECK-GI-NEXT: ldr q2, [x1] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h -; CHECK-GI-NEXT: shrn2 v0.16b, v1.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: subhn2_16b_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: subhn2 v0.16b, v1.8h, v2.8h +; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %diff = sub <8 x i16> %tmp1, %tmp2 @@ -1376,22 +1283,13 @@ define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind { } define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: subhn2_8h_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q1, [x0] -; CHECK-SD-NEXT: ldr q2, [x1] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: subhn2 v0.8h, v1.4s, v2.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: subhn2_8h_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q1, [x0] -; CHECK-GI-NEXT: ldr q2, [x1] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s -; CHECK-GI-NEXT: shrn2 v0.8h, v1.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: subhn2_8h_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: subhn2 v0.8h, v1.4s, v2.4s +; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %diff = sub <4 x i32> %tmp1, %tmp2 @@ -1402,22 +1300,13 @@ define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind { } define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind { -; CHECK-SD-LABEL: subhn2_4s_natural: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr q1, [x0] -; CHECK-SD-NEXT: ldr q2, [x1] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: subhn2 v0.4s, v1.2d, v2.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: subhn2_4s_natural: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q1, [x0] -; CHECK-GI-NEXT: ldr q2, [x1] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d -; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: subhn2_4s_natural: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: subhn2 v0.4s, v1.2d, v2.2d +; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %diff = sub <2 x i64> %tmp1, %tmp2 @@ -1428,20 +1317,12 @@ define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind { } define <16 x i8> @neg_narrow_i8(<16 x i16> %a) { -; CHECK-SD-LABEL: neg_narrow_i8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff -; CHECK-SD-NEXT: subhn v0.8b, v2.8h, v0.8h -; CHECK-SD-NEXT: subhn2 v0.16b, v2.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: neg_narrow_i8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-GI-NEXT: mvn v1.16b, v1.16b -; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 -; CHECK-GI-NEXT: shrn2 v0.16b, v1.8h, #8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: neg_narrow_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-NEXT: subhn v0.8b, v2.8h, v0.8h +; CHECK-NEXT: subhn2 v0.16b, v2.8h, v1.8h +; CHECK-NEXT: ret %not.i = xor <16 x i16> %a, splat (i16 -1) %s = lshr <16 x i16> %not.i, splat (i16 8) %vshrn_n = trunc nuw <16 x i16> %s to <16 x i8> @@ -1449,20 +1330,12 @@ define <16 x i8> @neg_narrow_i8(<16 x i16> %a) { } define <8 x i16> @neg_narrow_i16(<8 x i32> %a) { -; CHECK-SD-LABEL: neg_narrow_i16: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff -; CHECK-SD-NEXT: subhn v0.4h, v2.4s, v0.4s -; CHECK-SD-NEXT: subhn2 v0.8h, v2.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: neg_narrow_i16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-GI-NEXT: mvn v1.16b, v1.16b -; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 -; CHECK-GI-NEXT: shrn2 v0.8h, v1.4s, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: neg_narrow_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-NEXT: subhn v0.4h, v2.4s, v0.4s +; CHECK-NEXT: subhn2 v0.8h, v2.4s, v1.4s +; CHECK-NEXT: ret %not.i = xor <8 x i32> %a, splat (i32 -1) %s = lshr <8 x i32> %not.i, splat (i32 16) %vshrn_n = trunc nuw <8 x i32> %s to <8 x i16> @@ -1470,20 +1343,12 @@ define <8 x i16> @neg_narrow_i16(<8 x i32> %a) { } define <4 x i32> @neg_narrow_i32(<4 x i64> %a) { -; CHECK-SD-LABEL: neg_narrow_i32: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff -; CHECK-SD-NEXT: subhn v0.2s, v2.2d, v0.2d -; CHECK-SD-NEXT: subhn2 v0.4s, v2.2d, v1.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: neg_narrow_i32: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-GI-NEXT: mvn v1.16b, v1.16b -; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 -; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: neg_narrow_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-NEXT: subhn v0.2s, v2.2d, v0.2d +; CHECK-NEXT: subhn2 v0.4s, v2.2d, v1.2d +; CHECK-NEXT: ret %not.i = xor <4 x i64> %a, splat (i64 -1) %s = lshr <4 x i64> %not.i, splat (i64 32) %vshrn_n = trunc nuw <4 x i64> %s to <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll index 9d0ade2..dc88f94 100644 --- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll +++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll @@ -66,9 +66,9 @@ define <4 x i32> @combine_vec_sdiv_by_minsigned(<4 x i32> %x) { ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_minsigned: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v0.4s, #0 ; CHECK-GI-NEXT: usra v0.4s, v1.4s, #1 -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-GI-NEXT: neg v0.4s, v0.4s ; CHECK-GI-NEXT: ret %1 = sdiv <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> @@ -176,7 +176,7 @@ define <4 x i32> @combine_vec_sdiv_by_pos1(<4 x i32> %x) { ; CHECK-GI-NEXT: mov v1.s[2], w9 ; CHECK-GI-NEXT: mov v1.s[3], w9 ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255> @@ -185,39 +185,24 @@ define <4 x i32> @combine_vec_sdiv_by_pos1(<4 x i32> %x) { } define <4 x i32> @combine_vec_sdiv_by_pow2a(<4 x i32> %x) { -; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2a: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v1.4s, v0.4s, #0 -; CHECK-SD-NEXT: usra v0.4s, v1.4s, #30 -; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #2 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2a: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31 -; CHECK-GI-NEXT: usra v0.4s, v1.4s, #30 -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #2 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: combine_vec_sdiv_by_pow2a: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v1.4s, v0.4s, #0 +; CHECK-NEXT: usra v0.4s, v1.4s, #30 +; CHECK-NEXT: sshr v0.4s, v0.4s, #2 +; CHECK-NEXT: ret %1 = sdiv <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4> ret <4 x i32> %1 } define <4 x i32> @combine_vec_sdiv_by_pow2a_neg(<4 x i32> %x) { -; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2a_neg: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v1.4s, v0.4s, #0 -; CHECK-SD-NEXT: usra v0.4s, v1.4s, #30 -; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #2 -; CHECK-SD-NEXT: neg v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2a_neg: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31 -; CHECK-GI-NEXT: usra v0.4s, v1.4s, #30 -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #2 -; CHECK-GI-NEXT: neg v0.4s, v0.4s -; CHECK-GI-NEXT: ret +; CHECK-LABEL: combine_vec_sdiv_by_pow2a_neg: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v1.4s, v0.4s, #0 +; CHECK-NEXT: usra v0.4s, v1.4s, #30 +; CHECK-NEXT: sshr v0.4s, v0.4s, #2 +; CHECK-NEXT: neg v0.4s, v0.4s +; CHECK-NEXT: ret %1 = sdiv <4 x i32> %x, <i32 -4, i32 -4, i32 -4, i32 -4> ret <4 x i32> %1 } @@ -240,7 +225,7 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) { ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI14_1 -; CHECK-GI-NEXT: sshr v2.16b, v0.16b, #7 +; CHECK-GI-NEXT: cmlt v2.16b, v0.16b, #0 ; CHECK-GI-NEXT: adrp x9, .LCPI14_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] ; CHECK-GI-NEXT: adrp x8, .LCPI14_2 @@ -252,7 +237,7 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) { ; CHECK-GI-NEXT: neg v2.16b, v2.16b ; CHECK-GI-NEXT: add v1.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: sshl v1.16b, v1.16b, v2.16b -; CHECK-GI-NEXT: sshr v2.16b, v3.16b, #7 +; CHECK-GI-NEXT: cmlt v2.16b, v3.16b, #0 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ret %1 = sdiv <16 x i8> %x, <i8 1, i8 4, i8 2, i8 16, i8 8, i8 32, i8 64, i8 2, i8 1, i8 4, i8 2, i8 16, i8 8, i8 32, i8 64, i8 2> @@ -278,7 +263,7 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) { ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v8i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI15_1 -; CHECK-GI-NEXT: sshr v2.8h, v0.8h, #15 +; CHECK-GI-NEXT: cmlt v2.8h, v0.8h, #0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] ; CHECK-GI-NEXT: adrp x8, .LCPI15_0 ; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI15_0] @@ -291,7 +276,7 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) { ; CHECK-GI-NEXT: add v1.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: shl v2.8h, v2.8h, #15 ; CHECK-GI-NEXT: sshl v1.8h, v1.8h, v3.8h -; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15 +; CHECK-GI-NEXT: cmlt v2.8h, v2.8h, #0 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ret %1 = sdiv <8 x i16> %x, <i16 1, i16 4, i16 2, i16 16, i16 8, i16 32, i16 64, i16 2> @@ -322,8 +307,8 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) { ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI16_1 -; CHECK-GI-NEXT: sshr v3.8h, v0.8h, #15 -; CHECK-GI-NEXT: sshr v4.8h, v1.8h, #15 +; CHECK-GI-NEXT: cmlt v3.8h, v0.8h, #0 +; CHECK-GI-NEXT: cmlt v4.8h, v1.8h, #0 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI16_1] ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 ; CHECK-GI-NEXT: ldr d5, [x8, :lo12:.LCPI16_0] @@ -339,7 +324,7 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) { ; CHECK-GI-NEXT: add v2.8h, v1.8h, v2.8h ; CHECK-GI-NEXT: sshl v3.8h, v3.8h, v4.8h ; CHECK-GI-NEXT: sshl v2.8h, v2.8h, v4.8h -; CHECK-GI-NEXT: sshr v4.8h, v5.8h, #15 +; CHECK-GI-NEXT: cmlt v4.8h, v5.8h, #0 ; CHECK-GI-NEXT: bif v0.16b, v3.16b, v4.16b ; CHECK-GI-NEXT: bif v1.16b, v2.16b, v4.16b ; CHECK-GI-NEXT: ret @@ -381,12 +366,12 @@ define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) { ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v32i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI17_1 -; CHECK-GI-NEXT: sshr v5.8h, v0.8h, #15 -; CHECK-GI-NEXT: sshr v6.8h, v1.8h, #15 +; CHECK-GI-NEXT: cmlt v5.8h, v0.8h, #0 +; CHECK-GI-NEXT: cmlt v6.8h, v1.8h, #0 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI17_1] ; CHECK-GI-NEXT: adrp x8, .LCPI17_0 -; CHECK-GI-NEXT: sshr v7.8h, v2.8h, #15 -; CHECK-GI-NEXT: sshr v16.8h, v3.8h, #15 +; CHECK-GI-NEXT: cmlt v7.8h, v2.8h, #0 +; CHECK-GI-NEXT: cmlt v16.8h, v3.8h, #0 ; CHECK-GI-NEXT: ldr d17, [x8, :lo12:.LCPI17_0] ; CHECK-GI-NEXT: adrp x8, .LCPI17_2 ; CHECK-GI-NEXT: neg v4.8h, v4.8h @@ -402,7 +387,7 @@ define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) { ; CHECK-GI-NEXT: add v6.8h, v1.8h, v6.8h ; CHECK-GI-NEXT: add v7.8h, v2.8h, v7.8h ; CHECK-GI-NEXT: add v4.8h, v3.8h, v4.8h -; CHECK-GI-NEXT: sshr v17.8h, v17.8h, #15 +; CHECK-GI-NEXT: cmlt v17.8h, v17.8h, #0 ; CHECK-GI-NEXT: sshl v5.8h, v5.8h, v16.8h ; CHECK-GI-NEXT: sshl v6.8h, v6.8h, v16.8h ; CHECK-GI-NEXT: sshl v7.8h, v7.8h, v16.8h @@ -436,7 +421,7 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_v4i32(<4 x i32> %x) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 -; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v3.4s, v0.4s, #0 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI18_0 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] @@ -451,7 +436,7 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_v4i32(<4 x i32> %x) { ; CHECK-GI-NEXT: mov v1.s[3], w9 ; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %1 = sdiv <4 x i32> %x, <i32 1, i32 4, i32 8, i32 16> @@ -483,10 +468,10 @@ define <8 x i32> @combine_vec_sdiv_by_pow2b_v8i32(<8 x i32> %x) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 -; CHECK-GI-NEXT: sshr v4.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v4.4s, v0.4s, #0 ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI19_0 -; CHECK-GI-NEXT: sshr v5.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v5.4s, v1.4s, #0 ; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI19_0] ; CHECK-GI-NEXT: adrp x8, .LCPI19_1 ; CHECK-GI-NEXT: mov v2.h[1], w9 @@ -503,7 +488,7 @@ define <8 x i32> @combine_vec_sdiv_by_pow2b_v8i32(<8 x i32> %x) { ; CHECK-GI-NEXT: sshl v3.4s, v3.4s, v5.4s ; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-GI-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-GI-NEXT: sshr v2.4s, v2.4s, #31 +; CHECK-GI-NEXT: cmlt v2.4s, v2.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v4.16b, v2.16b ; CHECK-GI-NEXT: bif v1.16b, v3.16b, v2.16b ; CHECK-GI-NEXT: ret @@ -546,13 +531,13 @@ define <16 x i32> @combine_vec_sdiv_by_pow2b_v16i32(<16 x i32> %x) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 -; CHECK-GI-NEXT: sshr v6.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v6.4s, v0.4s, #0 ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI20_0 -; CHECK-GI-NEXT: sshr v7.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v7.4s, v1.4s, #0 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI20_0] -; CHECK-GI-NEXT: sshr v16.4s, v2.4s, #31 -; CHECK-GI-NEXT: sshr v17.4s, v3.4s, #31 +; CHECK-GI-NEXT: cmlt v16.4s, v2.4s, #0 +; CHECK-GI-NEXT: cmlt v17.4s, v3.4s, #0 ; CHECK-GI-NEXT: adrp x8, .LCPI20_1 ; CHECK-GI-NEXT: mov v4.h[1], w9 ; CHECK-GI-NEXT: neg v5.4s, v5.4s @@ -574,7 +559,7 @@ define <16 x i32> @combine_vec_sdiv_by_pow2b_v16i32(<16 x i32> %x) { ; CHECK-GI-NEXT: sshl v5.4s, v5.4s, v17.4s ; CHECK-GI-NEXT: ushll v4.4s, v4.4h, #0 ; CHECK-GI-NEXT: shl v4.4s, v4.4s, #31 -; CHECK-GI-NEXT: sshr v4.4s, v4.4s, #31 +; CHECK-GI-NEXT: cmlt v4.4s, v4.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v6.16b, v4.16b ; CHECK-GI-NEXT: bif v1.16b, v7.16b, v4.16b ; CHECK-GI-NEXT: bif v2.16b, v16.16b, v4.16b @@ -603,7 +588,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) { ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v2i64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI21_1 -; CHECK-GI-NEXT: sshr v2.2d, v0.2d, #63 +; CHECK-GI-NEXT: cmlt v2.2d, v0.2d, #0 ; CHECK-GI-NEXT: adrp x9, .LCPI21_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI21_1] ; CHECK-GI-NEXT: adrp x8, .LCPI21_2 @@ -615,7 +600,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) { ; CHECK-GI-NEXT: neg v2.2d, v2.2d ; CHECK-GI-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: sshl v1.2d, v1.2d, v2.2d -; CHECK-GI-NEXT: sshr v2.2d, v3.2d, #63 +; CHECK-GI-NEXT: cmlt v2.2d, v3.2d, #0 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ret %1 = sdiv <2 x i64> %x, <i64 1, i64 4> @@ -649,7 +634,7 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) { ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v4i64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI22_2 -; CHECK-GI-NEXT: sshr v3.2d, v0.2d, #63 +; CHECK-GI-NEXT: cmlt v3.2d, v0.2d, #0 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI22_2] ; CHECK-GI-NEXT: adrp x8, .LCPI22_1 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI22_1] @@ -662,13 +647,13 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) { ; CHECK-GI-NEXT: adrp x8, .LCPI22_3 ; CHECK-GI-NEXT: neg v5.2d, v5.2d ; CHECK-GI-NEXT: ushl v2.2d, v3.2d, v2.2d -; CHECK-GI-NEXT: sshr v3.2d, v1.2d, #63 +; CHECK-GI-NEXT: cmlt v3.2d, v1.2d, #0 ; CHECK-GI-NEXT: shl v6.2d, v6.2d, #63 ; CHECK-GI-NEXT: add v2.2d, v0.2d, v2.2d ; CHECK-GI-NEXT: ushl v3.2d, v3.2d, v4.2d ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI22_3] ; CHECK-GI-NEXT: sshl v2.2d, v2.2d, v5.2d -; CHECK-GI-NEXT: sshr v5.2d, v6.2d, #63 +; CHECK-GI-NEXT: cmlt v5.2d, v6.2d, #0 ; CHECK-GI-NEXT: add v1.2d, v1.2d, v3.2d ; CHECK-GI-NEXT: neg v3.2d, v4.2d ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v5.16b @@ -715,13 +700,13 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 -; CHECK-GI-NEXT: sshr v7.2d, v0.2d, #63 +; CHECK-GI-NEXT: cmlt v7.2d, v0.2d, #0 ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI23_1 -; CHECK-GI-NEXT: sshr v16.2d, v1.2d, #63 +; CHECK-GI-NEXT: cmlt v16.2d, v1.2d, #0 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI23_1] -; CHECK-GI-NEXT: sshr v17.2d, v2.2d, #63 -; CHECK-GI-NEXT: sshr v18.2d, v3.2d, #63 +; CHECK-GI-NEXT: cmlt v17.2d, v2.2d, #0 +; CHECK-GI-NEXT: cmlt v18.2d, v3.2d, #0 ; CHECK-GI-NEXT: adrp x8, .LCPI23_3 ; CHECK-GI-NEXT: mov v4.h[1], w9 ; CHECK-GI-NEXT: neg v5.2d, v5.2d @@ -754,9 +739,9 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) { ; CHECK-GI-NEXT: shl v4.2d, v4.2d, #63 ; CHECK-GI-NEXT: sshl v16.2d, v16.2d, v20.2d ; CHECK-GI-NEXT: sshl v6.2d, v6.2d, v20.2d -; CHECK-GI-NEXT: sshr v17.2d, v17.2d, #63 -; CHECK-GI-NEXT: sshr v18.2d, v18.2d, #63 -; CHECK-GI-NEXT: sshr v4.2d, v4.2d, #63 +; CHECK-GI-NEXT: cmlt v17.2d, v17.2d, #0 +; CHECK-GI-NEXT: cmlt v18.2d, v18.2d, #0 +; CHECK-GI-NEXT: cmlt v4.2d, v4.2d, #0 ; CHECK-GI-NEXT: bif v0.16b, v7.16b, v17.16b ; CHECK-GI-NEXT: bif v1.16b, v16.16b, v18.16b ; CHECK-GI-NEXT: bif v2.16b, v5.16b, v4.16b @@ -792,7 +777,7 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) { ; CHECK-GI-NEXT: adrp x10, .LCPI24_0 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: ldr q2, [x10, :lo12:.LCPI24_0] -; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v3.4s, v0.4s, #0 ; CHECK-GI-NEXT: fmov s4, w9 ; CHECK-GI-NEXT: adrp x10, .LCPI24_1 ; CHECK-GI-NEXT: neg v2.4s, v2.4s @@ -807,10 +792,10 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) { ; CHECK-GI-NEXT: mov v1.s[3], w9 ; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: shl v1.4s, v4.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: neg v2.4s, v0.4s ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret @@ -871,7 +856,7 @@ define <16 x i8> @non_splat_minus_one_divisor_0(<16 x i8> %A) { ; CHECK-GI-NEXT: neg v2.16b, v0.16b ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI25_0] ; CHECK-GI-NEXT: shl v1.16b, v1.16b, #7 -; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7 +; CHECK-GI-NEXT: cmlt v1.16b, v1.16b, #0 ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %div = sdiv <16 x i8> %A, <i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> @@ -901,7 +886,7 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) { ; CHECK-GI-LABEL: non_splat_minus_one_divisor_1: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI26_2 -; CHECK-GI-NEXT: sshr v2.16b, v0.16b, #7 +; CHECK-GI-NEXT: cmlt v2.16b, v0.16b, #0 ; CHECK-GI-NEXT: adrp x9, .LCPI26_1 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI26_2] ; CHECK-GI-NEXT: adrp x8, .LCPI26_3 @@ -914,11 +899,11 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) { ; CHECK-GI-NEXT: neg v2.16b, v2.16b ; CHECK-GI-NEXT: add v1.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: sshl v1.16b, v1.16b, v2.16b -; CHECK-GI-NEXT: sshr v2.16b, v3.16b, #7 +; CHECK-GI-NEXT: cmlt v2.16b, v3.16b, #0 ; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI26_0] ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: shl v1.16b, v3.16b, #7 -; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7 +; CHECK-GI-NEXT: cmlt v1.16b, v1.16b, #0 ; CHECK-GI-NEXT: neg v2.16b, v0.16b ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret @@ -954,7 +939,7 @@ define <4 x i32> @non_splat_minus_one_divisor_2(<4 x i32> %A) { ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: ldr q2, [x9, :lo12:.LCPI27_0] ; CHECK-GI-NEXT: fmov s4, w8 -; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v3.4s, v0.4s, #0 ; CHECK-GI-NEXT: adrp x9, .LCPI27_1 ; CHECK-GI-NEXT: neg v2.4s, v2.4s ; CHECK-GI-NEXT: mov v1.s[1], w8 @@ -969,10 +954,10 @@ define <4 x i32> @non_splat_minus_one_divisor_2(<4 x i32> %A) { ; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: mov v4.s[3], w8 ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: shl v1.4s, v4.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: neg v2.4s, v0.4s ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret @@ -1207,7 +1192,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform7(<8 x i16> %x) { ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0] ; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-GI-NEXT: shl v1.8h, v1.8h, #15 -; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15 +; CHECK-GI-NEXT: cmlt v1.8h, v1.8h, #0 ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %1 = sdiv <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 1, i16 1, i16 1, i16 1> diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll index 121cc30..babb4ed 100644 --- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll @@ -605,7 +605,7 @@ define i32 @extract_v4i32_select(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %c ; CHECK-GI-NEXT: mov w8, w0 ; CHECK-GI-NEXT: and x8, x8, #0x3 ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: str q0, [sp] ; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] @@ -634,7 +634,7 @@ define i32 @extract_v4i32_select_const(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x ; CHECK-GI-NEXT: adrp x8, .LCPI23_0 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI23_0] ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: mov s0, v0.s[2] ; CHECK-GI-NEXT: fmov w0, s0 diff --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll index 6d673f1..30fb82e 100644 --- a/llvm/test/CodeGen/AArch64/fcmp.ll +++ b/llvm/test/CodeGen/AArch64/fcmp.ll @@ -661,7 +661,7 @@ define <2 x double> @v2f128_double(<2 x fp128> %a, <2 x fp128> %b, <2 x double> ; CHECK-GI-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63 -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-GI-NEXT: bsl v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: add sp, sp, #80 ; CHECK-GI-NEXT: ret @@ -1540,7 +1540,7 @@ define <7 x i32> @v7f16_i32(<7 x half> %a, <7 x half> %b, <7 x i32> %d, <7 x i32 ; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-GI-FP16-NEXT: mov v1.s[2], w8 ; CHECK-GI-FP16-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-GI-FP16-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-GI-FP16-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-GI-FP16-NEXT: fmov s4, w8 ; CHECK-GI-FP16-NEXT: mov v4.s[1], w8 ; CHECK-GI-FP16-NEXT: ushl v1.4s, v1.4s, v2.4s @@ -1602,7 +1602,7 @@ define <4 x i32> @v4f16_i32(<4 x half> %a, <4 x half> %b, <4 x i32> %d, <4 x i32 ; CHECK-GI-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h ; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-GI-FP16-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-GI-FP16-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-GI-FP16-NEXT: bsl v0.16b, v2.16b, v3.16b ; CHECK-GI-FP16-NEXT: ret entry: @@ -1657,8 +1657,8 @@ define <8 x i32> @v8f16_i32(<8 x half> %a, <8 x half> %b, <8 x i32> %d, <8 x i32 ; CHECK-GI-FP16-NEXT: ushll2 v0.4s, v0.8h, #0 ; CHECK-GI-FP16-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-GI-FP16-NEXT: sshr v1.4s, v1.4s, #31 -; CHECK-GI-FP16-NEXT: sshr v6.4s, v0.4s, #31 +; CHECK-GI-FP16-NEXT: cmlt v1.4s, v1.4s, #0 +; CHECK-GI-FP16-NEXT: cmlt v6.4s, v0.4s, #0 ; CHECK-GI-FP16-NEXT: mov v0.16b, v1.16b ; CHECK-GI-FP16-NEXT: mov v1.16b, v6.16b ; CHECK-GI-FP16-NEXT: bsl v0.16b, v2.16b, v4.16b @@ -1748,10 +1748,10 @@ define <16 x i32> @v16f16_i32(<16 x half> %a, <16 x half> %b, <16 x i32> %d, <16 ; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-GI-FP16-NEXT: shl v3.4s, v3.4s, #31 ; CHECK-GI-FP16-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-FP16-NEXT: sshr v2.4s, v2.4s, #31 -; CHECK-GI-FP16-NEXT: sshr v16.4s, v0.4s, #31 -; CHECK-GI-FP16-NEXT: sshr v3.4s, v3.4s, #31 -; CHECK-GI-FP16-NEXT: sshr v17.4s, v1.4s, #31 +; CHECK-GI-FP16-NEXT: cmlt v2.4s, v2.4s, #0 +; CHECK-GI-FP16-NEXT: cmlt v16.4s, v0.4s, #0 +; CHECK-GI-FP16-NEXT: cmlt v3.4s, v3.4s, #0 +; CHECK-GI-FP16-NEXT: cmlt v17.4s, v1.4s, #0 ; CHECK-GI-FP16-NEXT: ldp q0, q1, [sp] ; CHECK-GI-FP16-NEXT: bit v0.16b, v4.16b, v2.16b ; CHECK-GI-FP16-NEXT: mov v2.16b, v3.16b diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll index 00de153..24be923 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll @@ -111,14 +111,14 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) { -; CHECK-CVT-LABEL: utesth_f16i32: +define i32 @utest_f16i32(half %x) { +; CHECK-CVT-LABEL: utest_f16i32: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvtzu w0, s0 ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-LABEL: utesth_f16i32: +; CHECK-FP16-LABEL: utest_f16i32: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu w0, h0 ; CHECK-FP16-NEXT: ret @@ -298,8 +298,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16(half %x) { -; CHECK-CVT-LABEL: utesth_f16i16: +define i16 @utest_f16i16(half %x) { +; CHECK-CVT-LABEL: utest_f16i16: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff @@ -308,7 +308,7 @@ define i16 @utesth_f16i16(half %x) { ; CHECK-CVT-NEXT: csel w0, w8, w9, lo ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-LABEL: utesth_f16i16: +; CHECK-FP16-LABEL: utest_f16i16: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu w8, h0 ; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff @@ -493,8 +493,8 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64(half %x) { -; CHECK-LABEL: utesth_f16i64: +define i64 @utest_f16i64(half %x) { +; CHECK-LABEL: utest_f16i64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 @@ -636,14 +636,14 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32_mm(half %x) { -; CHECK-CVT-LABEL: utesth_f16i32_mm: +define i32 @utest_f16i32_mm(half %x) { +; CHECK-CVT-LABEL: utest_f16i32_mm: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvtzu w0, s0 ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-LABEL: utesth_f16i32_mm: +; CHECK-FP16-LABEL: utest_f16i32_mm: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu w0, h0 ; CHECK-FP16-NEXT: ret @@ -808,8 +808,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16_mm(half %x) { -; CHECK-CVT-LABEL: utesth_f16i16_mm: +define i16 @utest_f16i16_mm(half %x) { +; CHECK-CVT-LABEL: utest_f16i16_mm: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff @@ -818,7 +818,7 @@ define i16 @utesth_f16i16_mm(half %x) { ; CHECK-CVT-NEXT: csel w0, w8, w9, lo ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-LABEL: utesth_f16i16_mm: +; CHECK-FP16-LABEL: utest_f16i16_mm: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu w8, h0 ; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff @@ -986,8 +986,8 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_mm(half %x) { -; CHECK-LABEL: utesth_f16i64_mm: +define i64 @utest_f16i64_mm(half %x) { +; CHECK-LABEL: utest_f16i64_mm: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 @@ -1026,6 +1026,29 @@ entry: ret i64 %conv6 } +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) { +; CHECK-CVT-LABEL: ustest_f16i32_nsat: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs w8, s0 +; CHECK-CVT-NEXT: and w8, w8, w8, asr #31 +; CHECK-CVT-NEXT: bic w0, w8, w8, asr #31 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: ustest_f16i32_nsat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs w8, h0 +; CHECK-FP16-NEXT: and w8, w8, w8, asr #31 +; CHECK-FP16-NEXT: bic w0, w8, w8, asr #31 +; CHECK-FP16-NEXT: ret + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll index b09a867..637c028 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -321,20 +321,20 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) { -; CHECK-CVT-SD-LABEL: utesth_f16i32: +define <4 x i32> @utest_f16i32(<4 x half> %x) { +; CHECK-CVT-SD-LABEL: utest_f16i32: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i32: +; CHECK-FP16-SD-LABEL: utest_f16i32: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-SD-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-CVT-GI-LABEL: utesth_f16i32: +; CHECK-CVT-GI-LABEL: utest_f16i32: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-GI-NEXT: movi v1.2d, #0x000000ffffffff @@ -349,7 +349,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s ; CHECK-CVT-GI-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i32: +; CHECK-FP16-GI-LABEL: utest_f16i32: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] @@ -614,8 +614,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) { -; CHECK-CVT-LABEL: utesth_f16i16: +define <8 x i16> @utest_f16i16(<8 x half> %x) { +; CHECK-CVT-LABEL: utest_f16i16: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h ; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h @@ -625,12 +625,12 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i16: +; CHECK-FP16-SD-LABEL: utest_f16i16: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: fcvtzu v0.8h, v0.8h ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i16: +; CHECK-FP16-GI-LABEL: utest_f16i16: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h ; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h @@ -1746,8 +1746,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) { -; CHECK-CVT-SD-LABEL: utesth_f16i64: +define <2 x i64> @utest_f16i64(<2 x half> %x) { +; CHECK-CVT-SD-LABEL: utest_f16i64: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 ; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill @@ -1777,7 +1777,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-CVT-SD-NEXT: add sp, sp, #48 ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i64: +; CHECK-FP16-SD-LABEL: utest_f16i64: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 ; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill @@ -1807,7 +1807,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-CVT-GI-LABEL: utesth_f16i64: +; CHECK-CVT-GI-LABEL: utest_f16i64: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-GI-NEXT: mov h1, v0.h[1] @@ -1819,7 +1819,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-CVT-GI-NEXT: mov v0.d[1], x9 ; CHECK-CVT-GI-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i64: +; CHECK-FP16-GI-LABEL: utest_f16i64: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] @@ -2307,20 +2307,20 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { -; CHECK-CVT-SD-LABEL: utesth_f16i32_mm: +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { +; CHECK-CVT-SD-LABEL: utest_f16i32_mm: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i32_mm: +; CHECK-FP16-SD-LABEL: utest_f16i32_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-SD-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-CVT-GI-LABEL: utesth_f16i32_mm: +; CHECK-CVT-GI-LABEL: utest_f16i32_mm: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-GI-NEXT: movi v1.2d, #0x000000ffffffff @@ -2335,7 +2335,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s ; CHECK-CVT-GI-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i32_mm: +; CHECK-FP16-GI-LABEL: utest_f16i32_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] @@ -2585,8 +2585,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { -; CHECK-CVT-LABEL: utesth_f16i16_mm: +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { +; CHECK-CVT-LABEL: utest_f16i16_mm: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h ; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h @@ -2596,12 +2596,12 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i16_mm: +; CHECK-FP16-SD-LABEL: utest_f16i16_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: fcvtzu v0.8h, v0.8h ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i16_mm: +; CHECK-FP16-GI-LABEL: utest_f16i16_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h ; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h @@ -3694,8 +3694,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { -; CHECK-CVT-SD-LABEL: utesth_f16i64_mm: +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { +; CHECK-CVT-SD-LABEL: utest_f16i64_mm: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 ; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill @@ -3725,7 +3725,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-CVT-SD-NEXT: add sp, sp, #48 ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i64_mm: +; CHECK-FP16-SD-LABEL: utest_f16i64_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 ; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill @@ -3755,7 +3755,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-CVT-GI-LABEL: utesth_f16i64_mm: +; CHECK-CVT-GI-LABEL: utest_f16i64_mm: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-GI-NEXT: mov h1, v0.h[1] @@ -3767,7 +3767,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-CVT-GI-NEXT: mov v0.d[1], x9 ; CHECK-CVT-GI-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i64_mm: +; CHECK-FP16-GI-LABEL: utest_f16i64_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] @@ -3941,6 +3941,51 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) { +; CHECK-CVT-SD-LABEL: ustest_f16i32_nsat: +; CHECK-CVT-SD: // %bb.0: // %entry +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: movi v1.2d, #0000000000000000 +; CHECK-CVT-SD-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-SD-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: ustest_f16i32_nsat: +; CHECK-FP16-SD: // %bb.0: // %entry +; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-SD-NEXT: movi v1.2d, #0000000000000000 +; CHECK-FP16-SD-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-FP16-SD-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-FP16-SD-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: ustest_f16i32_nsat: +; CHECK-CVT-GI: // %bb.0: // %entry +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: movi v1.2d, #0000000000000000 +; CHECK-CVT-GI-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-GI-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: ustest_f16i32_nsat: +; CHECK-FP16-GI: // %bb.0: // %entry +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: movi v1.2d, #0000000000000000 +; CHECK-FP16-GI-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-FP16-GI-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-FP16-GI-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: ret +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll index 0c84468f..2026959 100644 --- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -1110,7 +1110,7 @@ define <8 x i8> @vselect_constant_cond_zero_v8i8(<8 x i8> %a) { ; CHECK-GI-NEXT: adrp x8, .LCPI83_0 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI83_0] ; CHECK-GI-NEXT: shl v1.8b, v1.8b, #7 -; CHECK-GI-NEXT: sshr v1.8b, v1.8b, #7 +; CHECK-GI-NEXT: cmlt v1.8b, v1.8b, #0 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: ret %b = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i8> %a, <8 x i8> zeroinitializer @@ -1133,7 +1133,7 @@ define <4 x i16> @vselect_constant_cond_zero_v4i16(<4 x i16> %a) { ; CHECK-GI-NEXT: mov v1.h[2], w9 ; CHECK-GI-NEXT: mov v1.h[3], w8 ; CHECK-GI-NEXT: shl v1.4h, v1.4h, #15 -; CHECK-GI-NEXT: sshr v1.4h, v1.4h, #15 +; CHECK-GI-NEXT: cmlt v1.4h, v1.4h, #0 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: ret %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> %a, <4 x i16> zeroinitializer @@ -1157,7 +1157,7 @@ define <4 x i32> @vselect_constant_cond_zero_v4i32(<4 x i32> %a) { ; CHECK-GI-NEXT: mov v1.s[2], w9 ; CHECK-GI-NEXT: mov v1.s[3], w8 ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> zeroinitializer @@ -1176,7 +1176,7 @@ define <8 x i8> @vselect_constant_cond_v8i8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-GI-NEXT: adrp x8, .LCPI86_0 ; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI86_0] ; CHECK-GI-NEXT: shl v2.8b, v2.8b, #7 -; CHECK-GI-NEXT: sshr v2.8b, v2.8b, #7 +; CHECK-GI-NEXT: cmlt v2.8b, v2.8b, #0 ; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: ret %c = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i8> %a, <8 x i8> %b @@ -1199,7 +1199,7 @@ define <4 x i16> @vselect_constant_cond_v4i16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-GI-NEXT: mov v2.h[2], w9 ; CHECK-GI-NEXT: mov v2.h[3], w8 ; CHECK-GI-NEXT: shl v2.4h, v2.4h, #15 -; CHECK-GI-NEXT: sshr v2.4h, v2.4h, #15 +; CHECK-GI-NEXT: cmlt v2.4h, v2.4h, #0 ; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: ret %c = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> %a, <4 x i16> %b @@ -1223,7 +1223,7 @@ define <4 x i32> @vselect_constant_cond_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-GI-NEXT: mov v2.s[2], w9 ; CHECK-GI-NEXT: mov v2.s[3], w8 ; CHECK-GI-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-GI-NEXT: sshr v2.4s, v2.4s, #31 +; CHECK-GI-NEXT: cmlt v2.4s, v2.4s, #0 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ret %c = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll index fb8b721..11b3b62 100644 --- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -966,7 +966,7 @@ define <8 x i8> @cmgez8xi8_alt(<8 x i8> %A) { ; ; CHECK-GI-LABEL: cmgez8xi8_alt: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.8b, v0.8b, #7 +; CHECK-GI-NEXT: cmlt v0.8b, v0.8b, #0 ; CHECK-GI-NEXT: mvn v0.8b, v0.8b ; CHECK-GI-NEXT: ret %sign = ashr <8 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> @@ -982,7 +982,7 @@ define <16 x i8> @cmgez16xi8_alt(<16 x i8> %A) { ; ; CHECK-GI-LABEL: cmgez16xi8_alt: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-GI-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-GI-NEXT: mvn v0.16b, v0.16b ; CHECK-GI-NEXT: ret %sign = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> @@ -998,7 +998,7 @@ define <4 x i16> @cmgez4xi16_alt(<4 x i16> %A) { ; ; CHECK-GI-LABEL: cmgez4xi16_alt: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #15 +; CHECK-GI-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-GI-NEXT: mvn v0.8b, v0.8b ; CHECK-GI-NEXT: ret %sign = ashr <4 x i16> %A, <i16 15, i16 15, i16 15, i16 15> @@ -1014,7 +1014,7 @@ define <8 x i16> @cmgez8xi16_alt(<8 x i16> %A) { ; ; CHECK-GI-LABEL: cmgez8xi16_alt: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-GI-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-GI-NEXT: mvn v0.16b, v0.16b ; CHECK-GI-NEXT: ret %sign = ashr <8 x i16> %A, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> @@ -1030,7 +1030,7 @@ define <2 x i32> @cmgez2xi32_alt(<2 x i32> %A) { ; ; CHECK-GI-LABEL: cmgez2xi32_alt: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31 +; CHECK-GI-NEXT: cmlt v0.2s, v0.2s, #0 ; CHECK-GI-NEXT: mvn v0.8b, v0.8b ; CHECK-GI-NEXT: ret %sign = ashr <2 x i32> %A, <i32 31, i32 31> @@ -1046,7 +1046,7 @@ define <4 x i32> @cmgez4xi32_alt(<4 x i32> %A) { ; ; CHECK-GI-LABEL: cmgez4xi32_alt: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-GI-NEXT: mvn v0.16b, v0.16b ; CHECK-GI-NEXT: ret %sign = ashr <4 x i32> %A, <i32 31, i32 31, i32 31, i32 31> @@ -1062,7 +1062,7 @@ define <2 x i64> @cmgez2xi64_alt(<2 x i64> %A) { ; ; CHECK-GI-LABEL: cmgez2xi64_alt: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-GI-NEXT: mvn v0.16b, v0.16b ; CHECK-GI-NEXT: ret %sign = ashr <2 x i64> %A, <i64 63, i64 63> @@ -1503,99 +1503,64 @@ entry: } define <8 x i8> @cmltz8xi8_alt(<8 x i8> %A) { -; CHECK-SD-LABEL: cmltz8xi8_alt: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v0.8b, v0.8b, #0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: cmltz8xi8_alt: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.8b, v0.8b, #7 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cmltz8xi8_alt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 +; CHECK-NEXT: ret %A.lobit = ashr <8 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> ret <8 x i8> %A.lobit } define <16 x i8> @cmltz16xi8_alt(<16 x i8> %A) { -; CHECK-SD-LABEL: cmltz16xi8_alt: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v0.16b, v0.16b, #0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: cmltz16xi8_alt: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cmltz16xi8_alt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 +; CHECK-NEXT: ret %A.lobit = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> ret <16 x i8> %A.lobit } define <4 x i16> @cmltz4xi16_alt(<4 x i16> %A) { -; CHECK-SD-LABEL: cmltz4xi16_alt: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v0.4h, v0.4h, #0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: cmltz4xi16_alt: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #15 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cmltz4xi16_alt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 +; CHECK-NEXT: ret %A.lobit = ashr <4 x i16> %A, <i16 15, i16 15, i16 15, i16 15> ret <4 x i16> %A.lobit } define <8 x i16> @cmltz8xi16_alt(<8 x i16> %A) { -; CHECK-SD-LABEL: cmltz8xi16_alt: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v0.8h, v0.8h, #0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: cmltz8xi16_alt: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cmltz8xi16_alt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 +; CHECK-NEXT: ret %A.lobit = ashr <8 x i16> %A, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> ret <8 x i16> %A.lobit } define <2 x i32> @cmltz2xi32_alt(<2 x i32> %A) { -; CHECK-SD-LABEL: cmltz2xi32_alt: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v0.2s, v0.2s, #0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: cmltz2xi32_alt: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cmltz2xi32_alt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 +; CHECK-NEXT: ret %A.lobit = ashr <2 x i32> %A, <i32 31, i32 31> ret <2 x i32> %A.lobit } define <4 x i32> @cmltz4xi32_alt(<4 x i32> %A) { -; CHECK-SD-LABEL: cmltz4xi32_alt: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v0.4s, v0.4s, #0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: cmltz4xi32_alt: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cmltz4xi32_alt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 +; CHECK-NEXT: ret %A.lobit = ashr <4 x i32> %A, <i32 31, i32 31, i32 31, i32 31> ret <4 x i32> %A.lobit } define <2 x i64> @cmltz2xi64_alt(<2 x i64> %A) { -; CHECK-SD-LABEL: cmltz2xi64_alt: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: cmlt v0.2d, v0.2d, #0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: cmltz2xi64_alt: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cmltz2xi64_alt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 +; CHECK-NEXT: ret %A.lobit = ashr <2 x i64> %A, <i64 63, i64 63> ret <2 x i64> %A.lobit } @@ -2523,7 +2488,7 @@ define <2 x i32> @fcmal2xfloat(<2 x float> %A, <2 x float> %B) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2s, #1 ; CHECK-GI-NEXT: shl v0.2s, v0.2s, #31 -; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31 +; CHECK-GI-NEXT: cmlt v0.2s, v0.2s, #0 ; CHECK-GI-NEXT: ret %tmp3 = fcmp true <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2542,7 +2507,7 @@ define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) { ; CHECK-GI-NEXT: dup v0.2s, w8 ; CHECK-GI-NEXT: mov v0.d[1], v0.d[0] ; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-GI-NEXT: ret %tmp3 = fcmp true <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2559,7 +2524,7 @@ define <2 x i64> @fcmal2xdouble(<2 x double> %A, <2 x double> %B) { ; CHECK-GI-NEXT: adrp x8, .LCPI221_0 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI221_0] ; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63 -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-GI-NEXT: ret %tmp3 = fcmp true <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2589,7 +2554,7 @@ define <4 x i32> @fcmnv4xfloat(<4 x float> %A, <4 x float> %B) { ; CHECK-GI-NEXT: mov v0.s[1], w8 ; CHECK-GI-NEXT: mov v0.d[1], v0.d[0] ; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-GI-NEXT: ret %tmp3 = fcmp false <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll b/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll index 282f437..a8c55b4 100644 --- a/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll +++ b/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll @@ -465,7 +465,7 @@ define <8 x i16> @test_ushll_cmp(<8 x i8> %a, <8 x i8> %b) #0 { ; CHECK-GI-NEXT: movi v1.2d, #0xff00ff00ff00ff ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: shl v0.8h, v0.8h, #15 -; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-GI-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret %cmp.i = icmp eq <8 x i8> %a, %b diff --git a/llvm/test/CodeGen/AArch64/select_cc.ll b/llvm/test/CodeGen/AArch64/select_cc.ll index 483f6c2..b562340 100644 --- a/llvm/test/CodeGen/AArch64/select_cc.ll +++ b/llvm/test/CodeGen/AArch64/select_cc.ll @@ -98,7 +98,7 @@ define <2 x double> @select_olt_load_cmp(<2 x double> %a, ptr %src) { ; CHECK-GI-NEXT: fcmgt v1.2s, v1.2s, #0.0 ; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-GI-NEXT: shl v1.2d, v1.2d, #63 -; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #63 +; CHECK-GI-NEXT: cmlt v1.2d, v1.2d, #0 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret entry: @@ -136,7 +136,7 @@ define <4 x i32> @select_icmp_sgt(<4 x i32> %a, <4 x i8> %b) { ; CHECK-GI-NEXT: mov v2.s[2], w8 ; CHECK-GI-NEXT: mov v2.s[3], w9 ; CHECK-GI-NEXT: shl v1.4s, v2.4s, #31 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 +; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-GI-NEXT: bic v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll index 293b74ec..96a7a9d0 100644 --- a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll @@ -255,7 +255,7 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: shl v0.16b, v0.16b, #7 ; CHECK-GI-NEXT: movi v1.16b, #128 -; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-GI-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %shl = select <16 x i1> %t, <16 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>, <16 x i8> zeroinitializer @@ -277,7 +277,7 @@ define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) { ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: movi v1.8h, #128 ; CHECK-GI-NEXT: shl v0.8h, v0.8h, #15 -; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-GI-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %shl= select <8 x i1> %t, <8 x i16> <i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128>, <8 x i16> zeroinitializer @@ -299,7 +299,7 @@ define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) { ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: movi v1.4s, #64 ; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %shl = select <4 x i1> %t, <4 x i32> <i32 64, i32 64, i32 64, i32 64>, <4 x i32> zeroinitializer @@ -323,7 +323,7 @@ define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) { ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63 -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %shl = select <2 x i1> %t, <2 x i64> <i64 65536, i64 65536>, <2 x i64> zeroinitializer diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll new file mode 100644 index 0000000..0d68762 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes='amdgpu-attributor' %s -o - | FileCheck %s + +@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4 + +;. +; CHECK: @lds_1 = internal addrspace(3) global [1 x i8] poison, align 4 +;. +define amdgpu_kernel void @k0() #0 { +; CHECK: Function Attrs: sanitize_address +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: store i8 7, ptr addrspace(3) @lds_1, align 4 +; CHECK-NEXT: ret void +; + store i8 7, ptr addrspace(3) @lds_1, align 4 + ret void +} + +attributes #0 = { sanitize_address } +; "amdgpu-no-flat-scratch-init" attribute should not be present in attribute list +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index a688b6f..fb566e5 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -707,8 +707,8 @@ attributes #6 = { "enqueued-block" } ; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR15:[0-9]+]] = { nounwind "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/true16-fold.mir b/llvm/test/CodeGen/AMDGPU/true16-fold.mir index 93cc12f..9484417 100644 --- a/llvm/test/CodeGen/AMDGPU/true16-fold.mir +++ b/llvm/test/CodeGen/AMDGPU/true16-fold.mir @@ -57,6 +57,7 @@ body: | %4:vgpr_16 = COPY %3:sgpr_lo16 %5:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, %0:sreg_32, 0, killed %1:sreg_32, 0, killed %4:vgpr_16, 0, 0, implicit $exec S_ENDPGM 0, implicit %5 +... --- name: fold_16bit_madmix_clamp @@ -207,3 +208,27 @@ body: | $vgpr0 = COPY %4 S_ENDPGM 0, implicit $vgpr0 ... + +--- +name: fold_imm16_across_reg_sequence +tracksRegLiveness: true +registers: +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-LABEL: name: fold_imm16_across_reg_sequence + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MOV_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec + ; CHECK-NEXT: [[V_MOV_B16_t16_e64_1:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_MOV_B16_t16_e64_]], %subreg.lo16, [[V_MOV_B16_t16_e64_1]], %subreg.hi16 + ; CHECK-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, -1, 0, -1, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F32_e64_]] + ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0 + %0:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec + %1:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec + %2:vgpr_32 = REG_SEQUENCE %0, %subreg.lo16, %1, %subreg.hi16 + %3:vgpr_32 = nofpexcept V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %3 + S_ENDPGM 0, implicit $vgpr0 +... diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index 8ab56b2..a6f0a03 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -383,8 +383,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) { -; SOFT-LABEL: utesth_f16i32: +define i32 @utest_f16i32(half %x) { +; SOFT-LABEL: utest_f16i32: ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r7, lr} ; SOFT-NEXT: push {r7, lr} @@ -400,7 +400,7 @@ define i32 @utesth_f16i32(half %x) { ; SOFT-NEXT: .LBB7_2: @ %entry ; SOFT-NEXT: pop {r7, pc} ; -; VFP2-LABEL: utesth_f16i32: +; VFP2-LABEL: utest_f16i32: ; VFP2: @ %bb.0: @ %entry ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} @@ -411,7 +411,7 @@ define i32 @utesth_f16i32(half %x) { ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: pop {r7, pc} ; -; FULL-LABEL: utesth_f16i32: +; FULL-LABEL: utest_f16i32: ; FULL: @ %bb.0: @ %entry ; FULL-NEXT: vcvt.u32.f16 s0, s0 ; FULL-NEXT: vmov r0, s0 @@ -3985,6 +3985,46 @@ entry: ret i32 %spec.store.select7 } +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) { +; SOFT-LABEL: ustest_f16i32_nsat: +; SOFT: @ %bb.0: +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} +; SOFT-NEXT: uxth r0, r0 +; SOFT-NEXT: bl __aeabi_h2f +; SOFT-NEXT: bl __aeabi_f2iz +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: ands r0, r1 +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: bics r0, r1 +; SOFT-NEXT: pop {r7, pc} +; +; VFP2-LABEL: ustest_f16i32_nsat: +; VFP2: @ %bb.0: +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: bl __aeabi_h2f +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: usat r0, #0, r0 +; VFP2-NEXT: pop {r7, pc} +; +; FULL-LABEL: ustest_f16i32_nsat: +; FULL: @ %bb.0: +; FULL-NEXT: vcvt.s32.f16 s0, s0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: usat r0, #0, r0 +; FULL-NEXT: bx lr + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 +} + declare i32 @llvm.smin.i32(i32, i32) diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll index 96f009a..ba31b35 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -748,8 +748,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i32: +define <4 x i32> @utest_f16i32(<4 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i32: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} @@ -821,7 +821,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: vpop {d12, d13} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i32: +; CHECK-FP16-LABEL: utest_f16i32: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} @@ -1366,8 +1366,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i16: +define <8 x i16> @utest_f16i16(<8 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i16: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} @@ -1441,7 +1441,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i16: +; CHECK-FP16-LABEL: utest_f16i16: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: vmovx.f16 s4, s0 ; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0 @@ -2109,8 +2109,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i64: +define <2 x i64> @utest_f16i64(<2 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i64: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, lr} @@ -2148,7 +2148,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8} ; CHECK-NEON-NEXT: pop {r4, r5, r6, pc} ; -; CHECK-FP16-LABEL: utesth_f16i64: +; CHECK-FP16-LABEL: utest_f16i64: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, lr} @@ -2835,8 +2835,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i32_mm: +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i32_mm: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} @@ -2881,7 +2881,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i32_mm: +; CHECK-FP16-LABEL: utest_f16i32_mm: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, lr} @@ -3344,8 +3344,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i16_mm: +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i16_mm: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} @@ -3419,7 +3419,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i16_mm: +; CHECK-FP16-LABEL: utest_f16i16_mm: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: vmovx.f16 s4, s0 ; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0 @@ -4044,8 +4044,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i64_mm: +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i64_mm: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, lr} @@ -4083,7 +4083,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8} ; CHECK-NEON-NEXT: pop {r4, r5, r6, pc} ; -; CHECK-FP16-LABEL: utesth_f16i64_mm: +; CHECK-FP16-LABEL: utest_f16i64_mm: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, lr} @@ -4215,6 +4215,77 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) { +; CHECK-NEON-LABEL: ustest_f16i32_nsat: +; CHECK-NEON: @ %bb.0: @ %entry +; CHECK-NEON-NEXT: .save {r4, lr} +; CHECK-NEON-NEXT: push {r4, lr} +; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov.f32 s16, s3 +; CHECK-NEON-NEXT: vmov.f32 s18, s2 +; CHECK-NEON-NEXT: vmov.f32 s20, s1 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: mov r4, r0 +; CHECK-NEON-NEXT: vmov r0, s16 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vmov s16, r0 +; CHECK-NEON-NEXT: vmov r0, s18 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vmov s0, r0 +; CHECK-NEON-NEXT: vmov r1, s20 +; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0 +; CHECK-NEON-NEXT: vmov s18, r4 +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov.32 d11[0], r0 +; CHECK-NEON-NEXT: mov r0, r1 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s18 +; CHECK-NEON-NEXT: vmov s0, r0 +; CHECK-NEON-NEXT: vcvt.s32.f32 s4, s16 +; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0 +; CHECK-NEON-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEON-NEXT: vmov r0, s2 +; CHECK-NEON-NEXT: vmov.32 d10[0], r0 +; CHECK-NEON-NEXT: vmov r0, s4 +; CHECK-NEON-NEXT: vmov.32 d11[1], r0 +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov.32 d10[1], r0 +; CHECK-NEON-NEXT: vmin.s32 q9, q5, q8 +; CHECK-NEON-NEXT: vmax.s32 q0, q9, q8 +; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEON-NEXT: pop {r4, pc} +; +; CHECK-FP16-LABEL: ustest_f16i32_nsat: +; CHECK-FP16: @ %bb.0: @ %entry +; CHECK-FP16-NEXT: vmovx.f16 s2, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s1 +; CHECK-FP16-NEXT: vmovx.f16 s4, s1 +; CHECK-FP16-NEXT: vmov r0, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4 +; CHECK-FP16-NEXT: vcvt.s32.f16 s2, s2 +; CHECK-FP16-NEXT: vmov.i32 q9, #0x0 +; CHECK-FP16-NEXT: vmov.32 d17[0], r0 +; CHECK-FP16-NEXT: vmov r0, s6 +; CHECK-FP16-NEXT: vmov.32 d16[0], r0 +; CHECK-FP16-NEXT: vmov r0, s4 +; CHECK-FP16-NEXT: vmov.32 d17[1], r0 +; CHECK-FP16-NEXT: vmov r0, s2 +; CHECK-FP16-NEXT: vmov.32 d16[1], r0 +; CHECK-FP16-NEXT: vmin.s32 q8, q8, q9 +; CHECK-FP16-NEXT: vmax.s32 q0, q8, q9 +; CHECK-FP16-NEXT: bx lr +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/Hexagon/inst_setcc_uno_uo.ll b/llvm/test/CodeGen/Hexagon/inst_setcc_uno_uo.ll new file mode 100644 index 0000000..8b121c5 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/inst_setcc_uno_uo.ll @@ -0,0 +1,93 @@ +;; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b %s -o - | FileCheck %s + +define dso_local void @store_isnan_f32(ptr %a, ptr %b, ptr %isnan_cmp) local_unnamed_addr { +entry: + %arrayidx_a = getelementptr inbounds nuw float, ptr %a, i32 0 + %arrayidx_b = getelementptr inbounds nuw float, ptr %b, i32 0 + %0 = load <32 x float>, ptr %arrayidx_a, align 4 + %1 = load <32 x float>, ptr %arrayidx_b, align 4 + %.vectorized = fcmp uno <32 x float> %0, %1 + %.LS.instance = zext <32 x i1> %.vectorized to <32 x i32> + %arrayidx1 = getelementptr inbounds nuw i32, ptr %isnan_cmp, i32 0 + store <32 x i32> %.LS.instance, ptr %arrayidx1, align 4 + ret void +} + +; CHECK: store_isnan_f32 +; CHECK: [[RONE32:r[0-9]+]] = #1 +; CHECK: [[VOP2_F32:v[0-9]+]] = vxor([[VOP2_F32]],[[VOP2_F32]]) +; CHECK: [[VOP1_F32:v[0-9]+]] = vmemu(r0+#0) +; CHECK: [[VONES32:v[0-9]+]] = vsplat([[RONE32]]) +; CHECK: [[Q1_F32:q[0-9]+]] = vcmp.eq([[VOP1_F32]].w,[[VOP1_F32]].w) +; CHECK: [[VOP3_F32:v[0-9]+]] = vmemu(r1+#0) +; CHECK: [[Q1_F32]] &= vcmp.eq([[VOP3_F32]].w,[[VOP3_F32]].w) +; CHECK: [[VOUT_F32:v[0-9]+]] = vmux([[Q1_F32]],[[VOP2_F32]],[[VONES32]]) +; CHECK: vmemu(r2+#0) = [[VOUT_F32]] + +define dso_local void @store_isnan_f16(ptr %a, ptr %b, ptr %isnan_cmp) local_unnamed_addr { +entry: + %arrayidx_a = getelementptr inbounds nuw half, ptr %a, i32 0 + %arrayidx_b = getelementptr inbounds nuw half, ptr %b, i32 0 + %0 = load <64 x half>, ptr %arrayidx_a, align 2 + %1 = load <64 x half>, ptr %arrayidx_b, align 2 + %.vectorized = fcmp uno <64 x half> %0, %1 + %conv.LS.instance = zext <64 x i1> %.vectorized to <64 x i16> + %arrayidx1 = getelementptr inbounds nuw i16, ptr %isnan_cmp, i32 0 + store <64 x i16> %conv.LS.instance, ptr %arrayidx1, align 2 + ret void +} +; CHECK-LABEL: store_isnan_f16 +; CHECK: [[RONE16:r[0-9]+]] = #1 +; CHECK: [[VOP2_F16:v[0-9]+]] = vxor([[VOP2_F16]],[[VOP2_F16]]) +; CHECK: [[VOP1_F16:v[0-9]+]] = vmemu(r0+#0) +; CHECK: [[VONES16:v[0-9]+]].h = vsplat([[RONE16]]) +; CHECK: [[Q1_F16:q[0-9]+]] = vcmp.eq([[VOP1_F16]].h,[[VOP1_F16]].h) +; CHECK: [[VOP3_F16:v[0-9]+]] = vmemu(r1+#0) +; CHECK: [[Q1_F16]] &= vcmp.eq([[VOP3_F16]].h,[[VOP3_F16]].h) +; CHECK: [[VOUT_F16:v[0-9]+]] = vmux([[Q1_F16]],[[VOP2_F16]],[[VONES16]]) +; CHECK: vmemu(r2+#0) = [[VOUT_F32]] + +define dso_local void @store_isordered_f32(ptr %a, ptr %b, ptr %isordered_cmp) local_unnamed_addr { +entry: + %arrayidx_a = getelementptr inbounds nuw float, ptr %a, i32 0 + %arrayidx_b = getelementptr inbounds nuw float, ptr %b, i32 0 + %0 = load <32 x float>, ptr %arrayidx_a, align 4 + %1 = load <32 x float>, ptr %arrayidx_b, align 4 + %.vectorized = fcmp ord <32 x float> %0, %1 + %.LS.instance = zext <32 x i1> %.vectorized to <32 x i32> + %arrayidx1 = getelementptr inbounds nuw i32, ptr %isordered_cmp, i32 0 + store <32 x i32> %.LS.instance, ptr %arrayidx1, align 4 + ret void +} +; CHECK-LABEL: store_isordered_f32 +; CHECK: [[VOP2_ORD_F32:v[0-9]+]] = vxor([[VOP2_ORD_F32]],[[VOP2_ORD_F32]]) +; CHECK: [[VOP1_ORD_F32:v[0-9]+]] = vmemu(r0+#0) +; CHECK: [[VONES_ORD_F32:v[0-9]+]] = vsplat([[RONE32]]) +; CHECK: [[Q1_ORD_F32:q[0-9]+]] = vcmp.eq([[VOP1_ORD_F32]].w,[[VOP1_ORD_F32]].w) +; CHECK: [[VOP3_ORD_F32:v[0-9]+]] = vmemu(r1+#0) +; CHECK: [[Q1_ORD_F32]] &= vcmp.eq([[VOP3_ORD_F32]].w,[[VOP3_ORD_F32]].w) +; CHECK: [[VOUT_ORD_F32:v[0-9]+]] = vmux([[Q1_ORD_F32]],[[VONES_ORD_F32]],[[VOP2_ORD_F32]]) +; CHECK: vmemu(r2+#0) = [[VOUT_ORD_F32]] + + +define dso_local void @store_isordered_f16(ptr %a, ptr %b, ptr %isordered_cmp) local_unnamed_addr { +entry: + %arrayidx_a = getelementptr inbounds nuw half, ptr %a, i32 0 + %arrayidx_b = getelementptr inbounds nuw half, ptr %b, i32 0 + %0 = load <64 x half>, ptr %arrayidx_a, align 2 + %1 = load <64 x half>, ptr %arrayidx_b, align 2 + %.vectorized = fcmp ord <64 x half> %0, %1 + %conv.LS.instance = zext <64 x i1> %.vectorized to <64 x i16> + %arrayidx1 = getelementptr inbounds nuw i16, ptr %isordered_cmp, i32 0 + store <64 x i16> %conv.LS.instance, ptr %arrayidx1, align 2 + ret void +} +; CHECK-LABEL: store_isordered_f16 +; CHECK: [[VOP2_ORD_F16:v[0-9]+]] = vxor([[VOP2_ORD_F16]],[[VOP2_ORD_F16]]) +; CHECK: [[VOP1_ORD_F16:v[0-9]+]] = vmemu(r0+#0) +; CHECK: [[VONES_ORD_F16:v[0-9]+]].h = vsplat([[RONE16]]) +; CHECK: [[Q1_ORD_F16:q[0-9]+]] = vcmp.eq([[VOP1_ORD_F16]].h,[[VOP1_ORD_F16]].h) +; CHECK: [[VOP3_ORD_F16:v[0-9]+]] = vmemu(r1+#0) +; CHECK: [[Q1_ORD_F16]] &= vcmp.eq([[VOP3_ORD_F16]].h,[[VOP3_ORD_F16]].h) +; CHECK: [[VOUT_ORD_F16:v[0-9]+]] = vmux([[Q1_ORD_F16]],[[VONES_ORD_F16]],[[VOP2_ORD_F16]]) +; CHECK: vmemu(r2+#0) = [[VOUT_ORD_F16]] diff --git a/llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll b/llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll index 5fa5023..fe0f7dd 100644 --- a/llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll +++ b/llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -mtriple=hexagon-unknown-elf < %s | FileCheck %s ; In ISelLowering, when folding nodes (or (shl xx, s), (zext y)) @@ -11,17 +12,18 @@ target triple = "hexagon" ; Function Attrs: nofree nosync nounwind memory(readwrite, inaccessiblemem: none) define dso_local void @foo(i64* nocapture noundef %buf, i32 %a, i32 %b) local_unnamed_addr { ; CHECK-LABEL: foo: -; CHECK: // %bb.0: // %entry +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: { -; CHECK-NEXT: r[[REG0:[0-9]+]] = addasl(r2,r1,#1) -; CHECK-NEXT: r[[REG2:[0-9]+]] = asl(r1,#1) +; CHECK-NEXT: r2 = addasl(r2,r1,#1) +; CHECK-NEXT: r3 = asl(r1,#1) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: r[[REG1:[0-9]+]] = addasl(r[[REG0]],r1,#1) +; CHECK-NEXT: r2 = addasl(r2,r1,#1) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: jumpr r31 -; CHECK-NEXT: memd(r0+#8) = r[[REG2]]:[[REG1]] +; CHECK-NEXT: memd(r0+#8) = r3:2 ; CHECK-NEXT: } entry: %arrayidx = getelementptr inbounds i64, i64* %buf, i32 1 diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 18d071c..a0d1ecc 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -436,8 +436,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) { -; RV32-LABEL: utesth_f16i32: +define i32 @utest_f16i32(half %x) { +; RV32-LABEL: utest_f16i32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 @@ -456,7 +456,7 @@ define i32 @utesth_f16i32(half %x) { ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; -; RV64-LABEL: utesth_f16i32: +; RV64-LABEL: utest_f16i32: ; RV64: # %bb.0: # %entry ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 @@ -974,8 +974,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16(half %x) { -; RV32-LABEL: utesth_f16i16: +define i16 @utest_f16i16(half %x) { +; RV32-LABEL: utest_f16i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 @@ -995,7 +995,7 @@ define i16 @utesth_f16i16(half %x) { ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; -; RV64-LABEL: utesth_f16i16: +; RV64-LABEL: utest_f16i16: ; RV64: # %bb.0: # %entry ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 @@ -3829,6 +3829,52 @@ entry: ret i64 %conv6 } +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) { +; RV32-LABEL: ustest_f16i32_nsat: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: call __extendhfsf2 +; RV32-NEXT: fcvt.w.s a0, fa0, rtz +; RV32-NEXT: srai a1, a0, 31 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: sgtz a1, a0 +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ustest_f16i32_nsat: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: call __extendhfsf2 +; RV64-NEXT: fcvt.l.s a0, fa0, rtz +; RV64-NEXT: srai a1, a0, 63 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sgtz a1, a0 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index aba9d37..f5977625 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -519,8 +519,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i32: +define <4 x i32> @utest_f16i32(<4 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i32: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -64 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64 @@ -610,7 +610,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: bgeu a3, a1, .LBB7_4 ; CHECK-NOV-NEXT: j .LBB7_5 ; -; CHECK-V-LABEL: utesth_f16i32: +; CHECK-V-LABEL: utest_f16i32: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -48 ; CHECK-V-NEXT: .cfi_def_cfa_offset 48 @@ -1594,8 +1594,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i16: +define <8 x i16> @utest_f16i16(<8 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i16: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -128 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 128 @@ -1765,7 +1765,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: bgeu a7, a3, .LBB16_8 ; CHECK-NOV-NEXT: j .LBB16_9 ; -; CHECK-V-LABEL: utesth_f16i16: +; CHECK-V-LABEL: utest_f16i16: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -80 ; CHECK-V-NEXT: .cfi_def_cfa_offset 80 @@ -3332,8 +3332,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i64: +define <2 x i64> @utest_f16i64(<2 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i64: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -32 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 32 @@ -3373,7 +3373,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NOV-NEXT: ret ; -; CHECK-V-LABEL: utesth_f16i64: +; CHECK-V-LABEL: utest_f16i64: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -32 ; CHECK-V-NEXT: .cfi_def_cfa_offset 32 @@ -4074,8 +4074,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i32_mm: +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i32_mm: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -64 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64 @@ -4165,7 +4165,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-NOV-NEXT: bgeu a3, a1, .LBB34_4 ; CHECK-NOV-NEXT: j .LBB34_5 ; -; CHECK-V-LABEL: utesth_f16i32_mm: +; CHECK-V-LABEL: utest_f16i32_mm: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -48 ; CHECK-V-NEXT: .cfi_def_cfa_offset 48 @@ -5134,8 +5134,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i16_mm: +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i16_mm: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -128 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 128 @@ -5305,7 +5305,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: bgeu a7, a3, .LBB43_8 ; CHECK-NOV-NEXT: j .LBB43_9 ; -; CHECK-V-LABEL: utesth_f16i16_mm: +; CHECK-V-LABEL: utest_f16i16_mm: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -80 ; CHECK-V-NEXT: .cfi_def_cfa_offset 80 @@ -6837,8 +6837,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i64_mm: +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i64_mm: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -32 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 32 @@ -6877,7 +6877,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NOV-NEXT: ret ; -; CHECK-V-LABEL: utesth_f16i64_mm: +; CHECK-V-LABEL: utest_f16i64_mm: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -32 ; CHECK-V-NEXT: .cfi_def_cfa_offset 32 @@ -7048,6 +7048,172 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) { +; CHECK-NOV-LABEL: ustest_f16i32_nsat: +; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: addi sp, sp, -64 +; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NOV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: sd s3, 24(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: .cfi_offset ra, -8 +; CHECK-NOV-NEXT: .cfi_offset s0, -16 +; CHECK-NOV-NEXT: .cfi_offset s1, -24 +; CHECK-NOV-NEXT: .cfi_offset s2, -32 +; CHECK-NOV-NEXT: .cfi_offset s3, -40 +; CHECK-NOV-NEXT: .cfi_offset fs0, -48 +; CHECK-NOV-NEXT: .cfi_offset fs1, -56 +; CHECK-NOV-NEXT: lhu s1, 0(a1) +; CHECK-NOV-NEXT: lhu s2, 8(a1) +; CHECK-NOV-NEXT: lhu a2, 16(a1) +; CHECK-NOV-NEXT: lhu s3, 24(a1) +; CHECK-NOV-NEXT: mv s0, a0 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: call __extendhfsf2 +; CHECK-NOV-NEXT: fmv.s fs0, fa0 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: call __extendhfsf2 +; CHECK-NOV-NEXT: fmv.s fs1, fa0 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: call __extendhfsf2 +; CHECK-NOV-NEXT: fcvt.l.s s1, fa0, rtz +; CHECK-NOV-NEXT: fcvt.l.s s2, fs1, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fcvt.l.s s3, fs0, rtz +; CHECK-NOV-NEXT: call __extendhfsf2 +; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-NOV-NEXT: srai a1, s3, 63 +; CHECK-NOV-NEXT: and a1, a1, s3 +; CHECK-NOV-NEXT: srai a2, s2, 63 +; CHECK-NOV-NEXT: and a2, a2, s2 +; CHECK-NOV-NEXT: srai a3, s1, 63 +; CHECK-NOV-NEXT: and a3, a3, s1 +; CHECK-NOV-NEXT: srai a4, a0, 63 +; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: sgtz a4, a3 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a3, a4, a3 +; CHECK-NOV-NEXT: sgtz a4, a2 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a2, a4, a2 +; CHECK-NOV-NEXT: sgtz a4, a1 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: sgtz a4, a0 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: sw a3, 0(s0) +; CHECK-NOV-NEXT: sw a2, 4(s0) +; CHECK-NOV-NEXT: sw a1, 8(s0) +; CHECK-NOV-NEXT: sw a0, 12(s0) +; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: ld s3, 24(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: .cfi_restore ra +; CHECK-NOV-NEXT: .cfi_restore s0 +; CHECK-NOV-NEXT: .cfi_restore s1 +; CHECK-NOV-NEXT: .cfi_restore s2 +; CHECK-NOV-NEXT: .cfi_restore s3 +; CHECK-NOV-NEXT: .cfi_restore fs0 +; CHECK-NOV-NEXT: .cfi_restore fs1 +; CHECK-NOV-NEXT: addi sp, sp, 64 +; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NOV-NEXT: ret +; +; CHECK-V-LABEL: ustest_f16i32_nsat: +; CHECK-V: # %bb.0: # %entry +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: .cfi_offset ra, -8 +; CHECK-V-NEXT: .cfi_offset s0, -16 +; CHECK-V-NEXT: .cfi_offset s1, -24 +; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) +; CHECK-V-NEXT: fmv.w.x fa0, a0 +; CHECK-V-NEXT: call __extendhfsf2 +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-V-NEXT: call __extendhfsf2 +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s1 +; CHECK-V-NEXT: call __extendhfsf2 +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-V-NEXT: call __extendhfsf2 +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: vmin.vx v8, v8, zero +; CHECK-V-NEXT: vmax.vx v8, v8, zero +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: .cfi_def_cfa sp, 48 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: .cfi_restore ra +; CHECK-V-NEXT: .cfi_restore s0 +; CHECK-V-NEXT: .cfi_restore s1 +; CHECK-V-NEXT: .cfi_restore s2 +; CHECK-V-NEXT: addi sp, sp, 48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 0 +; CHECK-V-NEXT: ret +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll b/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll new file mode 100644 index 0000000..8491328 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll @@ -0,0 +1,28 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#v2_uint:]] = OpTypeVector %[[#uint]] 2 +; CHECK-DAG: %[[#double:]] = OpTypeFloat 64 +; CHECK-DAG: %[[#v2_double:]] = OpTypeVector %[[#double]] 2 +; CHECK-DAG: %[[#v4_uint:]] = OpTypeVector %[[#uint]] 4 +@.str = private unnamed_addr constant [3 x i8] c"In\00", align 1 +@.str.2 = private unnamed_addr constant [4 x i8] c"Out\00", align 1 + +define void @main() local_unnamed_addr #0 { +entry: + %0 = tail call target("spirv.VulkanBuffer", [0 x <2 x i32>], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v2i32_12_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) + %1 = tail call target("spirv.VulkanBuffer", [0 x <2 x double>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v2f64_12_1t(i32 0, i32 2, i32 1, i32 0, ptr nonnull @.str.2) + %2 = tail call noundef align 8 dereferenceable(8) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2i32_12_0t(target("spirv.VulkanBuffer", [0 x <2 x i32>], 12, 0) %0, i32 0) + %3 = load <2 x i32>, ptr addrspace(11) %2, align 8 + %4 = tail call noundef align 8 dereferenceable(8) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2i32_12_0t(target("spirv.VulkanBuffer", [0 x <2 x i32>], 12, 0) %0, i32 1) + %5 = load <2 x i32>, ptr addrspace(11) %4, align 8 +; CHECK: %[[#tmp:]] = OpVectorShuffle %[[#v4_uint]] {{%[0-9]+}} {{%[0-9]+}} 0 2 1 3 + %6 = shufflevector <2 x i32> %3, <2 x i32> %5, <4 x i32> <i32 0, i32 2, i32 1, i32 3> +; CHECK: %[[#access:]] = OpAccessChain {{.*}} + %7 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2f64_12_1t(target("spirv.VulkanBuffer", [0 x <2 x double>], 12, 1) %1, i32 0) +; CHECK: %[[#bitcast:]] = OpBitcast %[[#v2_double]] %[[#tmp]] +; CHECK: OpStore %[[#access]] %[[#bitcast]] Aligned 16 + store <4 x i32> %6, ptr addrspace(11) %7, align 16 + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll index 137994ce..59f3edc 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -136,9 +136,9 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) { -; CHECK-LABEL: utesth_f16i32: -; CHECK: .functype utesth_f16i32 (f32) -> (i32) +define i32 @utest_f16i32(half %x) { +; CHECK-LABEL: utest_f16i32: +; CHECK: .functype utest_f16i32 (f32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 @@ -153,9 +153,9 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32_cse(half %x) { -; CHECK-LABEL: utesth_f16i32_cse: -; CHECK: .functype utesth_f16i32_cse (f32) -> (i32) +define i32 @utest_f16i32_cse(half %x) { +; CHECK-LABEL: utest_f16i32_cse: +; CHECK: .functype utest_f16i32_cse (f32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 @@ -403,9 +403,9 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16(half %x) { -; CHECK-LABEL: utesth_f16i16: -; CHECK: .functype utesth_f16i16 (f32) -> (i32) +define i16 @utest_f16i16(half %x) { +; CHECK-LABEL: utest_f16i16: +; CHECK: .functype utest_f16i16 (f32) -> (i32) ; CHECK-NEXT: .local i32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 @@ -427,9 +427,9 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16_cse(half %x) { -; CHECK-LABEL: utesth_f16i16_cse: -; CHECK: .functype utesth_f16i16_cse (f32) -> (i32) +define i16 @utest_f16i16_cse(half %x) { +; CHECK-LABEL: utest_f16i16_cse: +; CHECK: .functype utest_f16i16_cse (f32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 @@ -880,9 +880,9 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64(half %x) { -; CHECK-LABEL: utesth_f16i64: -; CHECK: .functype utesth_f16i64 (f32) -> (i64) +define i64 @utest_f16i64(half %x) { +; CHECK-LABEL: utest_f16i64: +; CHECK: .functype utest_f16i64 (f32) -> (i64) ; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -919,9 +919,9 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_cse(half %x) { -; CHECK-LABEL: utesth_f16i64_cse: -; CHECK: .functype utesth_f16i64_cse (f32) -> (i64) +define i64 @utest_f16i64_cse(half %x) { +; CHECK-LABEL: utest_f16i64_cse: +; CHECK: .functype utest_f16i64_cse (f32) -> (i64) ; CHECK-NEXT: .local i32, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1118,9 +1118,9 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32_mm(half %x) { -; CHECK-LABEL: utesth_f16i32_mm: -; CHECK: .functype utesth_f16i32_mm (f32) -> (i32) +define i32 @utest_f16i32_mm(half %x) { +; CHECK-LABEL: utest_f16i32_mm: +; CHECK: .functype utest_f16i32_mm (f32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 @@ -1353,9 +1353,9 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16_mm(half %x) { -; CHECK-LABEL: utesth_f16i16_mm: -; CHECK: .functype utesth_f16i16_mm (f32) -> (i32) +define i16 @utest_f16i16_mm(half %x) { +; CHECK-LABEL: utest_f16i16_mm: +; CHECK: .functype utest_f16i16_mm (f32) -> (i32) ; CHECK-NEXT: .local i32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 @@ -1637,9 +1637,9 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_mm(half %x) { -; CHECK-LABEL: utesth_f16i64_mm: -; CHECK: .functype utesth_f16i64_mm (f32) -> (i64) +define i64 @utest_f16i64_mm(half %x) { +; CHECK-LABEL: utest_f16i64_mm: +; CHECK: .functype utest_f16i64_mm (f32) -> (i64) ; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1724,9 +1724,9 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_mm_cse(half %x) { -; CHECK-LABEL: utesth_f16i64_mm_cse: -; CHECK: .functype utesth_f16i64_mm_cse (f32) -> (i64) +define i64 @utest_f16i64_mm_cse(half %x) { +; CHECK-LABEL: utest_f16i64_mm_cse: +; CHECK: .functype utest_f16i64_mm_cse (f32) -> (i64) ; CHECK-NEXT: .local i32, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1754,6 +1754,35 @@ entry: ret i64 %conv6 } +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) { +; CHECK-LABEL: ustest_f16i32_nsat: +; CHECK: .functype ustest_f16i32_nsat (f32) -> (i32) +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32.shr_s +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.and +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.gt_s +; CHECK-NEXT: i32.select +; CHECK-NEXT: # fallthrough-return + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll index 7190e16..52f57dc 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -209,9 +209,9 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) { -; CHECK-LABEL: utesth_f16i32: -; CHECK: .functype utesth_f16i32 (f32, f32, f32, f32) -> (v128) +define <4 x i32> @utest_f16i32(<4 x half> %x) { +; CHECK-LABEL: utest_f16i32: +; CHECK: .functype utest_f16i32 (f32, f32, f32, f32) -> (v128) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: call __truncsfhf2 @@ -513,9 +513,9 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) { -; CHECK-LABEL: utesth_f16i16: -; CHECK: .functype utesth_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) +define <8 x i16> @utest_f16i16(<8 x half> %x) { +; CHECK-LABEL: utest_f16i16: +; CHECK: .functype utest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) ; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 @@ -1295,9 +1295,9 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) { -; CHECK-LABEL: utesth_f16i64: -; CHECK: .functype utesth_f16i64 (f32, f32) -> (v128) +define <2 x i64> @utest_f16i64(<2 x half> %x) { +; CHECK-LABEL: utest_f16i64: +; CHECK: .functype utest_f16i64 (f32, f32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1649,9 +1649,9 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { -; CHECK-LABEL: utesth_f16i32_mm: -; CHECK: .functype utesth_f16i32_mm (f32, f32, f32, f32) -> (v128) +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { +; CHECK-LABEL: utest_f16i32_mm: +; CHECK: .functype utest_f16i32_mm (f32, f32, f32, f32) -> (v128) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: call __truncsfhf2 @@ -1938,9 +1938,9 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { -; CHECK-LABEL: utesth_f16i16_mm: -; CHECK: .functype utesth_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { +; CHECK-LABEL: utest_f16i16_mm: +; CHECK: .functype utest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) ; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 @@ -2673,9 +2673,9 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { -; CHECK-LABEL: utesth_f16i64_mm: -; CHECK: .functype utesth_f16i64_mm (f32, f32) -> (v128) +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { +; CHECK-LABEL: utest_f16i64_mm: +; CHECK: .functype utest_f16i64_mm (f32, f32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -2810,6 +2810,48 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) { +; CHECK-LABEL: ustest_f16i32_nsat: +; CHECK: .functype ustest_f16i32_nsat (f32, f32, f32, f32) -> (v128) +; CHECK-NEXT: .local v128 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 2 +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 3 +; CHECK-NEXT: v128.const 0, 0, 0, 0 +; CHECK-NEXT: local.tee 4 +; CHECK-NEXT: i32x4.min_s +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: i32x4.max_s +; CHECK-NEXT: # fallthrough-return +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll index dec829f..44cf4e8 100644 --- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll +++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll @@ -911,7 +911,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 16(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: paddb (%rdx), %xmm0 ; SSE2-NEXT: movdqa %xmm0, (%rcx) @@ -1898,7 +1898,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 32(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] @@ -4155,7 +4155,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 48(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE2-NEXT: paddb (%rdx), %xmm2 diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll index 3d4cddb..89b5c33 100644 --- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll +++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll @@ -769,7 +769,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in. ; SSE2-LABEL: vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: paddb (%rsi), %xmm1 ; SSE2-NEXT: movdqa %xmm1, (%rdx) @@ -1522,7 +1522,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in. ; SSE2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] @@ -3335,7 +3335,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in. ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa (%rdi), %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE2-NEXT: paddb (%rsi), %xmm2 diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll index 3f5ec7b..67483be 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat.ll @@ -161,8 +161,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) nounwind { -; CHECK-LABEL: utesth_f16i32: +define i32 @utest_f16i32(half %x) nounwind { +; CHECK-LABEL: utest_f16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -360,8 +360,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16(half %x) nounwind { -; CHECK-LABEL: utesth_f16i16: +define i16 @utest_f16i16(half %x) nounwind { +; CHECK-LABEL: utest_f16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -566,8 +566,8 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64(half %x) nounwind { -; CHECK-LABEL: utesth_f16i64: +define i64 @utest_f16i64(half %x) nounwind { +; CHECK-LABEL: utest_f16i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __fixunshfti@PLT @@ -762,8 +762,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32_mm(half %x) nounwind { -; CHECK-LABEL: utesth_f16i32_mm: +define i32 @utest_f16i32_mm(half %x) nounwind { +; CHECK-LABEL: utest_f16i32_mm: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -946,8 +946,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16_mm(half %x) nounwind { -; CHECK-LABEL: utesth_f16i16_mm: +define i16 @utest_f16i16_mm(half %x) nounwind { +; CHECK-LABEL: utest_f16i16_mm: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -1131,8 +1131,8 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_mm(half %x) nounwind { -; CHECK-LABEL: utesth_f16i64_mm: +define i64 @utest_f16i64_mm(half %x) nounwind { +; CHECK-LABEL: utest_f16i64_mm: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __fixunshfti@PLT @@ -1170,6 +1170,27 @@ entry: ret i64 %conv6 } +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) nounwind { +; CHECK-LABEL: ustest_f16i32_nsat: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: cvttss2si %xmm0, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: sarl $31, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: andl %ecx, %eax +; CHECK-NEXT: cmovlel %edx, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll index 1a2cfd6..991ce33 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll @@ -747,8 +747,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i32: +define <4 x i32> @utest_f16i32(<4 x half> %x) nounwind { +; SSE-LABEL: utest_f16i32: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movaps %xmm0, %xmm1 @@ -835,7 +835,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i32: +; AVX2-LABEL: utest_f16i32: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1 ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2 @@ -893,7 +893,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i32: +; AVX512-LABEL: utest_f16i32: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0 @@ -1338,8 +1338,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i16: +define <8 x i16> @utest_f16i16(<8 x half> %x) nounwind { +; SSE-LABEL: utest_f16i16: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill @@ -1436,7 +1436,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i16: +; AVX2-LABEL: utest_f16i16: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] @@ -1453,7 +1453,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i16: +; AVX512-LABEL: utest_f16i16: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX512-NEXT: vcvttps2udq %ymm0, %ymm0 @@ -2456,8 +2456,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i64: +define <2 x i64> @utest_f16i64(<2 x half> %x) nounwind { +; SSE-LABEL: utest_f16i64: ; SSE: # %bb.0: # %entry ; SSE-NEXT: pushq %r14 ; SSE-NEXT: pushq %rbx @@ -2483,7 +2483,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind { ; SSE-NEXT: popq %r14 ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i64: +; AVX2-LABEL: utest_f16i64: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: pushq %r14 ; AVX2-NEXT: pushq %rbx @@ -2508,7 +2508,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind { ; AVX2-NEXT: popq %r14 ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i64: +; AVX512-LABEL: utest_f16i64: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: pushq %r14 ; AVX512-NEXT: pushq %rbx @@ -3359,8 +3359,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i32_mm: +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) nounwind { +; SSE-LABEL: utest_f16i32_mm: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movaps %xmm0, %xmm1 @@ -3447,7 +3447,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i32_mm: +; AVX2-LABEL: utest_f16i32_mm: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1 ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2 @@ -3505,7 +3505,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i32_mm: +; AVX512-LABEL: utest_f16i32_mm: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0 @@ -3935,8 +3935,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i16_mm: +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) nounwind { +; SSE-LABEL: utest_f16i16_mm: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill @@ -4033,7 +4033,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i16_mm: +; AVX2-LABEL: utest_f16i16_mm: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] @@ -4050,7 +4050,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i16_mm: +; AVX512-LABEL: utest_f16i16_mm: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX512-NEXT: vcvttps2udq %ymm0, %ymm0 @@ -4820,8 +4820,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i64_mm: +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) nounwind { +; SSE-LABEL: utest_f16i64_mm: ; SSE: # %bb.0: # %entry ; SSE-NEXT: pushq %r14 ; SSE-NEXT: pushq %rbx @@ -4847,7 +4847,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind { ; SSE-NEXT: popq %r14 ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i64_mm: +; AVX2-LABEL: utest_f16i64_mm: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: pushq %r14 ; AVX2-NEXT: pushq %rbx @@ -4872,7 +4872,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind { ; AVX2-NEXT: popq %r14 ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i64_mm: +; AVX512-LABEL: utest_f16i64_mm: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: pushq %r14 ; AVX512-NEXT: pushq %rbx @@ -4974,6 +4974,63 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) nounwind { +; SSE-LABEL: ustest_f16i32_nsat: +; SSE: # %bb.0: # %entry +; SSE-NEXT: subq $72, %rsp +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; SSE-NEXT: psrlq $48, %xmm0 +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE-NEXT: cvttps2dq %xmm1, %xmm0 +; SSE-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload +; SSE-NEXT: # xmm0 = xmm0[0],mem[0] +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE-NEXT: pand %xmm0, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: addq $72, %rsp +; SSE-NEXT: retq +; +; AVX-LABEL: ustest_f16i32_nsat: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll b/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll index ecd9435..1766b4d 100644 --- a/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll +++ b/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll @@ -58,7 +58,7 @@ define <8 x float> @foo8(<8 x float> %v, ptr%p) nounwind { define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind { ; AVX2-LABEL: undef_splatmask: ; AVX2: # %bb.0: -; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2] ; AVX2-NEXT: retq %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef> %res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> @@ -68,7 +68,7 @@ define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind { define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind { ; AVX2-LABEL: undef_splatmask2: ; AVX2: # %bb.0: -; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2] ; AVX2-NEXT: retq %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 undef> %res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> @@ -78,7 +78,7 @@ define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind { define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind { ; AVX2-LABEL: undef_splatmask3: ; AVX2: # %bb.0: -; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2] ; AVX2-NEXT: retq %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef> %res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 3> @@ -88,7 +88,7 @@ define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind { define <4 x i32> @undef_splatmask4(<4 x i32> %v, ptr %p) nounwind { ; AVX2-LABEL: undef_splatmask4: ; AVX2: # %bb.0: -; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,3,3] +; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,2,2] ; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3] ; AVX2-NEXT: vmovaps %xmm0, (%rdi) ; AVX2-NEXT: vmovaps %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll b/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll index 209d6a5..93a692c 100644 --- a/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll @@ -1911,13 +1911,13 @@ define <2 x i64> @test_v2f64_ogt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmoval %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: ucomisd %xmm4, %xmm2 ; SSE-32-NEXT: cmoval %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2031,13 +2031,13 @@ define <2 x i64> @test_v2f64_oge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovael %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: ucomisd %xmm4, %xmm2 ; SSE-32-NEXT: cmovael %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2151,13 +2151,13 @@ define <2 x i64> @test_v2f64_olt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmoval %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: ucomisd %xmm2, %xmm4 ; SSE-32-NEXT: cmoval %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2269,13 +2269,13 @@ define <2 x i64> @test_v2f64_ole_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovael %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: ucomisd %xmm2, %xmm4 ; SSE-32-NEXT: cmovael %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2680,13 +2680,13 @@ define <2 x i64> @test_v2f64_ugt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovbl %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: ucomisd %xmm2, %xmm4 ; SSE-32-NEXT: cmovbl %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2798,13 +2798,13 @@ define <2 x i64> @test_v2f64_uge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovbel %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: ucomisd %xmm2, %xmm4 ; SSE-32-NEXT: cmovbel %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2916,13 +2916,13 @@ define <2 x i64> @test_v2f64_ult_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovbl %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: ucomisd %xmm4, %xmm2 ; SSE-32-NEXT: cmovbl %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -3036,13 +3036,13 @@ define <2 x i64> @test_v2f64_ule_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovbel %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: ucomisd %xmm4, %xmm2 ; SSE-32-NEXT: cmovbel %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll index 9ecc629..b378dce 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll @@ -162,7 +162,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v2i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE2-NEXT: pslld $23, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -182,7 +182,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; SSE41-LABEL: splatvar_funnnel_v2i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: pslld $23, %xmm1 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -200,7 +200,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; AVX1-LABEL: splatvar_funnnel_v2i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 @@ -277,7 +277,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; @@ -289,7 +289,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; X86-SSE2-NEXT: pslld $23, %xmm1 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll index 322ebe2..06ff7e7 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll @@ -250,7 +250,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v2i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31] ; SSE2-NEXT: movdqa %xmm3, %xmm5 ; SSE2-NEXT: pandn %xmm4, %xmm5 @@ -286,7 +286,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; SSE41-LABEL: splatvar_funnnel_v2i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm3 = [31,31,31,31] ; SSE41-NEXT: movdqa %xmm2, %xmm4 ; SSE41-NEXT: pandn %xmm3, %xmm4 @@ -316,7 +316,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; AVX1-LABEL: splatvar_funnnel_v2i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31] ; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -423,7 +423,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31] ; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0 @@ -450,7 +450,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31] ; X86-SSE2-NEXT: movdqa %xmm3, %xmm5 ; X86-SSE2-NEXT: pandn %xmm4, %xmm5 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll index 178c02f..ef5ffe4 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll @@ -172,7 +172,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v2i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: psubd %xmm1, %xmm2 ; SSE2-NEXT: pslld $23, %xmm2 @@ -194,7 +194,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; SSE41-LABEL: splatvar_funnnel_v2i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: psubd %xmm1, %xmm2 ; SSE41-NEXT: pslld $23, %xmm2 @@ -214,7 +214,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; AVX1-LABEL: splatvar_funnnel_v2i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 @@ -293,7 +293,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0 @@ -309,7 +309,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; X86-SSE2-NEXT: pxor %xmm2, %xmm2 ; X86-SSE2-NEXT: psubd %xmm1, %xmm2 ; X86-SSE2-NEXT: pslld $23, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll index 372deb05..2d8670a 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll @@ -251,7 +251,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v2i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31] ; SSE2-NEXT: movdqa %xmm3, %xmm5 ; SSE2-NEXT: pand %xmm4, %xmm5 @@ -287,7 +287,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; SSE41-LABEL: splatvar_funnnel_v2i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm3 = [31,31,31,31] ; SSE41-NEXT: movdqa %xmm2, %xmm4 ; SSE41-NEXT: pand %xmm3, %xmm4 @@ -317,7 +317,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; AVX1-LABEL: splatvar_funnnel_v2i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31] ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -425,7 +425,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31] ; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 @@ -452,7 +452,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31] ; X86-SSE2-NEXT: movdqa %xmm3, %xmm5 ; X86-SSE2-NEXT: pand %xmm4, %xmm5 diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index f57efb4..1e11ea9 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -1409,11 +1409,11 @@ define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) { ; X86-SSE2-NEXT: movzbl %al, %eax ; X86-SSE2-NEXT: negl %eax ; X86-SSE2-NEXT: movd %eax, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] ; X86-SSE2-NEXT: andl $1, %ecx ; X86-SSE2-NEXT: negl %ecx ; X86-SSE2-NEXT: movd %ecx, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X86-SSE2-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll index bd1a48b..7b0f1c9 100644 --- a/llvm/test/CodeGen/X86/vector-zext.ll +++ b/llvm/test/CodeGen/X86/vector-zext.ll @@ -2555,7 +2555,7 @@ entry: define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) { ; SSE2-LABEL: splatshuf_zext_v4i64: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movdqa %xmm0, %xmm1 @@ -2563,7 +2563,7 @@ define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) { ; ; SSSE3-LABEL: splatshuf_zext_v4i64: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSSE3-NEXT: pxor %xmm1, %xmm1 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: movdqa %xmm0, %xmm1 @@ -2571,7 +2571,7 @@ define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) { ; ; SSE41-LABEL: splatshuf_zext_v4i64: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: retq diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll index 19a31a6..31ed745 100644 --- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll +++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll @@ -911,7 +911,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 16(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: paddb (%rdx), %xmm0 ; SSE2-NEXT: movdqa %xmm0, (%rcx) @@ -1898,7 +1898,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 32(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] @@ -4610,7 +4610,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 48(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: pxor %xmm1, %xmm1 @@ -6544,7 +6544,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in. ; SSE2-NEXT: movdqa (%rdi), %xmm0 ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movdqa 16(%rdx), %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll index 239472c..5b4cdd2 100644 --- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll +++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll @@ -769,7 +769,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in. ; SSE2-LABEL: vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: paddb (%rsi), %xmm1 ; SSE2-NEXT: movdqa %xmm1, (%rdx) @@ -1522,7 +1522,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in. ; SSE2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] @@ -3660,7 +3660,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in. ; SSE2-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; SSE2-NEXT: pxor %xmm0, %xmm0 @@ -5250,7 +5250,7 @@ define void @vec512_i16_widen_to_i256_factor16_broadcast_to_v2i256_factor2(ptr % define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.elt.ptr, ptr %out.vec.bias.ptr, ptr %out.vec.ptr) nounwind { ; SSE2-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movdqa 16(%rsi), %xmm1 diff --git a/llvm/test/ThinLTO/X86/memprof-dups.ll b/llvm/test/ThinLTO/X86/memprof-dups.ll new file mode 100644 index 0000000..8accc83 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-dups.ll @@ -0,0 +1,138 @@ +;; Check that duplicate spurious duplicate (identical) clones are simply +;; created as aliases to the first identical copy, rather than creating +;; multiple clones that call the same callee clones or have the same +;; allocation types. This currently happens in some cases due to additional +;; cloning performed during function assignment. +;; +;; The ThinLTO combined summary was manually modified as described there +;; to force multiple identical copies of various functions. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: rm -rf %t && split-file %s %t && cd %t +; RUN: llvm-as src.ll -o src.o +; RUN: llvm-as src.o.thinlto.ll -o src.o.thinlto.bc +; RUN: opt -passes=memprof-context-disambiguation -stats \ +; RUN: -memprof-import-summary=src.o.thinlto.bc \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: src.o -S 2>&1 | FileCheck %s + +; CHECK: created clone bar.memprof.1 +;; Duplicates of bar are created as declarations since bar is available_externally, +;; and the compiler does not well support available_externally aliases. +; CHECK: created clone decl bar.memprof.2 +; CHECK: created clone decl bar.memprof.3 +; CHECK: created clone _Z3foov.memprof.1 +;; Duplicates of _Z3foov are created as aliases to the appropriate materialized +;; clone of _Z3foov. +; CHECK: created clone alias _Z3foov.memprof.2 +; CHECK: created clone alias _Z3foov.memprof.3 + +;--- src.ll +source_filename = "memprof-distrib-alias.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@_Z8fooAliasv = alias ptr (...), ptr @_Z3foov + +;; Original alias is unchanged. +; CHECK: @_Z8fooAliasv = alias ptr (...), ptr @_Z3foov{{$}} +;; We create an equivalent alias for the cloned def @_Z3foov.memprof.1. +; CHECK: @_Z8fooAliasv.memprof.1 = alias ptr (...), ptr @_Z3foov.memprof.1 + +;; We should also create aliases for the duplicate clones of _Z3foov +;; (_Z3foov.memprof.2 and _Z3foov.memprof.3) to the versions they are duplicates +;; of, and ditto for the associated @_Z8fooAliasv clones. +;; +;; _Z3foov.memprof.2 is a duplicate of original _Z3foov, and thus so is _Z8fooAliasv.memprof.2 +; CHECK: @_Z3foov.memprof.2 = alias ptr (), ptr @_Z3foov{{$}} +; CHECK: @_Z8fooAliasv.memprof.2 = alias ptr (...), ptr @_Z3foov{{$}} +;; _Z3foov.memprof.3 is a duplicate of _Z3foov.memprof.1, and thus so is _Z8fooAliasv.memprof.3 +; CHECK: @_Z3foov.memprof.3 = alias ptr (), ptr @_Z3foov.memprof.1 +; CHECK: @_Z8fooAliasv.memprof.3 = alias ptr (...), ptr @_Z3foov.memprof.1 + +; CHECK-LABEL: define i32 @main() +define i32 @main() #0 { +entry: + ;; The first call to bar does not allocate cold memory. It should call + ;; the original function, which eventually calls the original allocation + ;; decorated with a "notcold" attribute. + ; CHECK: call {{.*}} @bar() + %call = call ptr @bar(), !callsite !0 + ;; The second call to bar allocates cold memory. It should call the cloned + ;; function which eventually calls a cloned allocation decorated with a + ;; "cold" attribute. + ; CHECK: call {{.*}} @bar.memprof.1() + %call1 = call ptr @bar(), !callsite !1 + ret i32 0 +} + +; CHECK-LABEL: define available_externally i32 @bar() +define available_externally i32 @bar() #0 { +entry: + ; CHECK: call {{.*}} @_Z8fooAliasv() + %call = call ptr @_Z8fooAliasv(), !callsite !8 + ret i32 0 +} + +declare ptr @_Znam(i64) + +; CHECK-LABEL: define ptr @_Z3foov() +define ptr @_Z3foov() #0 { +entry: + ; CHECK: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]] + %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7 + ret ptr null +} + +; We create actual clone for bar.memprof.1. +; CHECK: define available_externally i32 @bar.memprof.1() +; CHECK: call {{.*}} @_Z3foov.memprof.1() + +;; bar.memprof.2 and bar.memprof.3 are duplicates (of original bar and +;; bar.memprof.1, respectively). However, they are available externally, +;; so rather than create an alias we simply create a declaration, since the +;; compiler does not fully support available_externally aliases. +; CHECK: declare i32 @bar.memprof.2 +; CHECK: declare i32 @bar.memprof.3 + +; We create actual clone for foo.memprof.1. +; CHECK: define {{.*}} @_Z3foov.memprof.1() +; CHECK: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]] + +; CHECK: attributes #[[NOTCOLD]] = { "memprof"="notcold" } +; CHECK: attributes #[[COLD]] = { "memprof"="cold" } + +; CHECK: 4 memprof-context-disambiguation - Number of function clone duplicates detected during ThinLTO backend +; CHECK: 2 memprof-context-disambiguation - Number of function clones created during ThinLTO backend + +attributes #0 = { noinline optnone } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{!3, !5} +!3 = !{!4, !"notcold"} +!4 = !{i64 9086428284934609951, i64 1234, i64 8632435727821051414} +!5 = !{!6, !"cold"} +!6 = !{i64 9086428284934609951, i64 1234, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!8 = !{i64 1234} + +;--- src.o.thinlto.ll +; ModuleID = 'src.o.thinlto.ll' +source_filename = "src.o.thinlto.bc" + +^0 = module: (path: "src.o", hash: (1720506022, 1575514144, 2506794664, 3599359797, 3160884478)) +^1 = gv: (guid: 6583049656999245004, summaries: (alias: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), aliasee: ^2))) +;; Summary for _Z3foov, where the allocs part has been manually modified to add +;; two additional clones that are the same as the prior versions: +;; ... allocs: ((versions: (notcold, cold, notcold, cold), ... +^2 = gv: (guid: 9191153033785521275, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (notcold, cold, notcold, cold), memProf: ((type: notcold, stackIds: (1234, 8632435727821051414)), (type: cold, stackIds: (1234, 15025054523792398438)))))))) +^3 = gv: (guid: 15822663052811949562, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 3, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^4)), callsites: ((callee: ^4, clones: (0), stackIds: (8632435727821051414)), (callee: ^4, clones: (1), stackIds: (15025054523792398438)))))) +;; Summary for bar, where the callsites part has been manually modified to add +;; two additional clones that are the same as the prior clones: +;; ... callsites: ((callee: ^1, clones: (0, 1, 0, 1), ... +^4 = gv: (guid: 16434608426314478903, summaries: (function: (module: ^0, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0, 1, 0, 1), stackIds: (1234)))))) +^6 = flags: 353 +^7 = blockcount: 0 diff --git a/llvm/test/ThinLTO/X86/memprof_imported_internal.ll b/llvm/test/ThinLTO/X86/memprof_imported_internal.ll index a6e254c..09784f8 100644 --- a/llvm/test/ThinLTO/X86/memprof_imported_internal.ll +++ b/llvm/test/ThinLTO/X86/memprof_imported_internal.ll @@ -63,14 +63,14 @@ ; CHECK: tail call void @_ZL9internal1v.llvm.3267420853450984672() ; CHECK: tail call void @_ZL9internal2v.llvm.3267420853450984672.memprof.1() ; CHECK-LABEL: declare void @_ZL9internal2v.llvm.3267420853450984672.memprof.1() -;; We should have 2 clones of src2.cc's internal1 function, calling a single -;; clone of external2. +;; We should have one clone of src2.cc's internal1 function, calling a single +;; clone of external2, and a second clone that was detected to be a duplicate +;; of the first that becomes a declaration (since this is available_externally - +;; in the module with the prevailing copy it would be an alias to clone 1). ; CHECK-LABEL: define available_externally void @_ZL9internal1v.llvm.3267420853450984672.memprof.1() ; CHECK: tail call void @_Z9external2v.memprof.1() ; CHECK: tail call void @_Z9external2v.memprof.1() -; CHECK-LABEL: define available_externally void @_ZL9internal1v.llvm.3267420853450984672.memprof.2() -; CHECK: tail call void @_Z9external2v.memprof.1() -; CHECK: tail call void @_Z9external2v.memprof.1() +; CHECK: declare void @_ZL9internal1v.llvm.3267420853450984672.memprof.2() ; CHECK-NOT: memprof ;--- src1.ll diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index 8f76834..67ab167 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -16,6 +16,14 @@ define <2 x double> @load_zeromask(ptr %ptr, <2 x double> %passthru) { ret <2 x double> %res } +define <2 x double> @load_zero_withpoison_mask(ptr %ptr, <2 x double> %passthru) { +; CHECK-LABEL: @load_zero_withpoison_mask( +; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]] +; + %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %ptr, i32 1, <2 x i1> <i1 0, i1 poison>, <2 x double> %passthru) + ret <2 x double> %res +} + define <2 x double> @load_onemask(ptr %ptr, <2 x double> %passthru) { ; CHECK-LABEL: @load_onemask( ; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, ptr [[PTR:%.*]], align 2 @@ -150,6 +158,14 @@ define void @store_zeromask(ptr %ptr, <2 x double> %val) { ret void } +define void @store_poisonmask(ptr %ptr, <2 x double> %val) { +; CHECK-LABEL: @store_poisonmask( +; CHECK-NEXT: ret void +; + call void @llvm.masked.store.v2f64.p0(<2 x double> %val, ptr %ptr, i32 4, <2 x i1> splat(i1 poison)) + ret void +} + define void @store_onemask(ptr %ptr, <2 x double> %val) { ; CHECK-LABEL: @store_onemask( ; CHECK-NEXT: store <2 x double> [[VAL:%.*]], ptr [[PTR:%.*]], align 4 @@ -159,6 +175,15 @@ define void @store_onemask(ptr %ptr, <2 x double> %val) { ret void } +define void @store_one_withpoison_mask(ptr %ptr, <2 x double> %val) { +; CHECK-LABEL: @store_one_withpoison_mask( +; CHECK-NEXT: store <2 x double> [[VAL:%.*]], ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: ret void +; + call void @llvm.masked.store.v2f64.p0(<2 x double> %val, ptr %ptr, i32 4, <2 x i1> <i1 1, i1 poison>) + ret void +} + define void @store_demandedelts(ptr %ptr, double %val) { ; CHECK-LABEL: @store_demandedelts( ; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> poison, double [[VAL:%.*]], i64 0 @@ -189,6 +214,13 @@ define <2 x double> @gather_zeromask(<2 x ptr> %ptrs, <2 x double> %passthru) { ret <2 x double> %res } +define <2 x double> @gather_zero_withpoison_mask(<2 x ptr> %ptrs, <2 x double> %passthru) { +; CHECK-LABEL: @gather_zero_withpoison_mask( +; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]] +; + %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> <i1 0, i1 poison>, <2 x double> %passthru) + ret <2 x double> %res +} define <2 x double> @gather_onemask(<2 x ptr> %ptrs, <2 x double> %passthru) { ; CHECK-LABEL: @gather_onemask( @@ -199,6 +231,15 @@ define <2 x double> @gather_onemask(<2 x ptr> %ptrs, <2 x double> %passthru) { ret <2 x double> %res } +define <2 x double> @gather_one_withpoisonmask(<2 x ptr> %ptrs, <2 x double> %passthru) { +; CHECK-LABEL: @gather_one_withpoisonmask( +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[PTRS:%.*]], i32 4, <2 x i1> <i1 true, i1 poison>, <2 x double> [[PASSTHRU:%.*]]) +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> <i1 true, i1 poison>, <2 x double> %passthru) + ret <2 x double> %res +} + define <4 x double> @gather_lane2(ptr %base, double %pt) { ; CHECK-LABEL: @gather_lane2( ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, ptr [[BASE:%.*]], <4 x i64> <i64 poison, i64 poison, i64 2, i64 poison> @@ -257,6 +298,23 @@ define void @scatter_zeromask(<2 x ptr> %ptrs, <2 x double> %val) { ret void } +define void @scatter_zero_withpoison_mask(<2 x ptr> %ptrs, <2 x double> %val) { +; CHECK-LABEL: @scatter_zero_withpoison_mask( +; CHECK-NEXT: ret void +; + call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> <i1 0, i1 poison>) + ret void +} + +define void @scatter_one_withpoison_mask(<2 x ptr> %ptrs, <2 x double> %val) { +; CHECK-LABEL: @scatter_one_withpoison_mask( +; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[VAL:%.*]], <2 x ptr> [[PTRS:%.*]], i32 8, <2 x i1> <i1 true, i1 poison>) +; CHECK-NEXT: ret void +; + call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> <i1 1, i1 poison>) + ret void +} + define void @scatter_demandedelts(ptr %ptr, double %val) { ; CHECK-LABEL: @scatter_demandedelts( ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, ptr [[PTR:%.*]], <2 x i64> <i64 0, i64 poison> diff --git a/llvm/test/Transforms/InstCombine/pr83947.ll b/llvm/test/Transforms/InstCombine/pr83947.ll index 1906502..679230a4 100644 --- a/llvm/test/Transforms/InstCombine/pr83947.ll +++ b/llvm/test/Transforms/InstCombine/pr83947.ll @@ -24,7 +24,6 @@ define void @masked_scatter2() { define void @masked_scatter3() { ; CHECK-LABEL: define void @masked_scatter3() { -; CHECK-NEXT: store i32 0, ptr @c, align 4 ; CHECK-NEXT: ret void ; call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> zeroinitializer, <2 x ptr> splat (ptr @c), i32 4, <2 x i1> undef) @@ -50,7 +49,6 @@ define void @masked_scatter5() { define void @masked_scatter6() { ; CHECK-LABEL: define void @masked_scatter6() { -; CHECK-NEXT: store i32 0, ptr @c, align 4 ; CHECK-NEXT: ret void ; call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> zeroinitializer, <2 x ptr> splat (ptr @c), i32 4, <2 x i1> <i1 undef, i1 false>) diff --git a/llvm/test/Transforms/InstCombine/select-and-cmp.ll b/llvm/test/Transforms/InstCombine/select-and-cmp.ll index 50e1493..26c04ad 100644 --- a/llvm/test/Transforms/InstCombine/select-and-cmp.ll +++ b/llvm/test/Transforms/InstCombine/select-and-cmp.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -passes=instcombine -S | FileCheck %s define i32 @select_and_icmp(i32 %x, i32 %y, i32 %z) { @@ -114,34 +114,34 @@ define i32 @select_and_icmp_inv(i32 %x, i32 %y, i32 %z) { ; Below used to be negative tests in InstSimplify, but are no more negative cases here -define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) { +define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) !prof !0 { ; CHECK-LABEL: @select_and_icmp_pred_bad_1( -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 [[X:%.*]] ; %A = icmp eq i32 %x, %z %B = icmp ne i32 %y, %z %C = and i1 %A, %B - %D = select i1 %C, i32 %z, i32 %x + %D = select i1 %C, i32 %z, i32 %x, !prof !1 ret i32 %D } -define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) { +define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) !prof !0 { ; CHECK-LABEL: @select_and_icmp_pred_bad_2( ; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X]] +; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: ret i32 [[D]] ; %A = icmp ne i32 %x, %z %B = icmp eq i32 %y, %z %C = and i1 %A, %B - %D = select i1 %C, i32 %z, i32 %x + %D = select i1 %C, i32 %z, i32 %x, !prof !1 ret i32 %D } define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_and_icmp_pred_bad_3( -; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X]], i32 [[Z]] +; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X:%.*]], i32 [[Z]] ; CHECK-NEXT: ret i32 [[D]] ; %A = icmp ne i32 %x, %z @@ -153,8 +153,8 @@ define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) { define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_and_icmp_pred_bad_4( -; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X]] +; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X:%.*]] ; CHECK-NEXT: ret i32 [[D]] ; %A = icmp eq i32 %x, %z @@ -166,7 +166,7 @@ define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) { define i32 @select_and_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_and_icmp_alt_bad_1( -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 [[Z:%.*]] ; %A = icmp eq i32 %x, %z %B = icmp ne i32 %y, %z @@ -177,8 +177,8 @@ define i32 @select_and_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) { define i32 @select_and_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_and_icmp_alt_bad_2( -; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X]], i32 [[Z]] +; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X:%.*]], i32 [[Z]] ; CHECK-NEXT: ret i32 [[D]] ; %A = icmp ne i32 %x, %z @@ -191,8 +191,8 @@ define i32 @select_and_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) { define i32 @select_and_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_and_icmp_alt_bad_3( -; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X]] +; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X:%.*]] ; CHECK-NEXT: ret i32 [[D]] ; %A = icmp ne i32 %x, %z @@ -204,8 +204,8 @@ define i32 @select_and_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) { define i32 @select_and_icmp_alt_bad_4(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_and_icmp_alt_bad_4( -; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X]], i32 [[Z]] +; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X:%.*]], i32 [[Z]] ; CHECK-NEXT: ret i32 [[D]] ; %A = icmp eq i32 %x, %z @@ -322,3 +322,11 @@ define i32 @select_and_icmp_alt_bad_false_val(i32 %x, i32 %y, i32 %z, i32 %k) { %D = select i1 %C, i32 %x, i32 %k ret i32 %D } + +!0 = !{!"function_entry_count", i64 1000} +!1 = !{!"branch_weights", i32 2, i32 3} + +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3} +;. diff --git a/llvm/test/Transforms/InstCombine/select-or-cmp.ll b/llvm/test/Transforms/InstCombine/select-or-cmp.ll index 72a3747..82b069b 100644 --- a/llvm/test/Transforms/InstCombine/select-or-cmp.ll +++ b/llvm/test/Transforms/InstCombine/select-or-cmp.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -passes=instcombine -S | FileCheck %s define i32 @select_or_icmp(i32 %x, i32 %y, i32 %z) { @@ -114,47 +114,47 @@ define i32 @select_or_icmp_inv(i32 %x, i32 %y, i32 %z) { ; Below used to be negative tests in InstSimplify, but are no more negative cases here -define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) { +define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) !prof !0 { ; CHECK-LABEL: @select_and_icmp_pred_bad_1( -; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X]], i32 [[Z]] +; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X:%.*]], i32 [[Z]], !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: ret i32 [[D]] ; %A = icmp eq i32 %x, %z %B = icmp ne i32 %y, %z %C = or i1 %A, %B - %D = select i1 %C, i32 %z, i32 %x + %D = select i1 %C, i32 %z, i32 %x, !prof !1 ret i32 %D } -define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) { +define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) !prof !0 { ; CHECK-LABEL: @select_and_icmp_pred_bad_2( -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 [[Z:%.*]] ; %A = icmp ne i32 %x, %z %B = icmp eq i32 %y, %z %C = or i1 %A, %B - %D = select i1 %C, i32 %z, i32 %x + %D = select i1 %C, i32 %z, i32 %x, !prof !1 ret i32 %D } -define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) { +define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) !prof !0 { ; CHECK-LABEL: @select_and_icmp_pred_bad_3( -; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X]] +; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X:%.*]], !prof [[PROF2:![0-9]+]] ; CHECK-NEXT: ret i32 [[D]] ; %A = icmp eq i32 %x, %z %B = icmp eq i32 %y, %z %C = or i1 %A, %B - %D = select i1 %C, i32 %z, i32 %x + %D = select i1 %C, i32 %z, i32 %x, !prof !1 ret i32 %D } define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_and_icmp_pred_bad_4( -; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X]], i32 [[Z]] +; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X:%.*]], i32 [[Z]] ; CHECK-NEXT: ret i32 [[D]] ; %A = icmp ne i32 %x, %z @@ -166,8 +166,8 @@ define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) { define i32 @select_or_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_or_icmp_alt_bad_1( -; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X]] +; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X:%.*]] ; CHECK-NEXT: ret i32 [[D]] ; %A = icmp eq i32 %x, %z @@ -179,7 +179,7 @@ define i32 @select_or_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) { define i32 @select_or_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_or_icmp_alt_bad_2( -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 [[X:%.*]] ; %A = icmp ne i32 %x, %z %B = icmp eq i32 %y, %z @@ -190,8 +190,8 @@ define i32 @select_or_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) { define i32 @select_or_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_or_icmp_alt_bad_3( -; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X]], i32 [[Z]] +; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X:%.*]], i32 [[Z]] ; CHECK-NEXT: ret i32 [[D]] ; %A = icmp eq i32 %x, %z @@ -203,8 +203,8 @@ define i32 @select_or_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) { define i32 @select_or_icmp_alt_bad_4(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @select_or_icmp_alt_bad_4( -; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X]] +; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X:%.*]] ; CHECK-NEXT: ret i32 [[D]] ; %A = icmp ne i32 %x, %z @@ -321,3 +321,11 @@ define i32 @select_or_icmp_alt_bad_false_val(i32 %x, i32 %y, i32 %z, i32 %k) { %D = select i1 %C, i32 %x, i32 %k ret i32 %D } + +!0 = !{!"function_entry_count", i64 1000} +!1 = !{!"branch_weights", i32 2, i32 3} +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 2} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 2, i32 3} +;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll index 8784873..f5329cf 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll @@ -454,6 +454,132 @@ exit: ret void } +declare i1 @cond() + +define double @test_load_used_by_other_load_scev(ptr %ptr.a, ptr %ptr.b, ptr %ptr.c) { +; I64-LABEL: define double @test_load_used_by_other_load_scev( +; I64-SAME: ptr [[PTR_A:%.*]], ptr [[PTR_B:%.*]], ptr [[PTR_C:%.*]]) { +; I64-NEXT: [[ENTRY:.*]]: +; I64-NEXT: br label %[[OUTER_LOOP:.*]] +; I64: [[OUTER_LOOP_LOOPEXIT:.*]]: +; I64-NEXT: br label %[[OUTER_LOOP]] +; I64: [[OUTER_LOOP]]: +; I64-NEXT: [[ACCUM:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP29:%.*]], %[[OUTER_LOOP_LOOPEXIT]] ] +; I64-NEXT: [[COND:%.*]] = call i1 @cond() +; I64-NEXT: br i1 [[COND]], label %[[INNER_LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; I64: [[INNER_LOOP_PREHEADER]]: +; I64-NEXT: br label %[[VECTOR_PH:.*]] +; I64: [[VECTOR_PH]]: +; I64-NEXT: br label %[[VECTOR_BODY:.*]] +; I64: [[VECTOR_BODY]]: +; I64-NEXT: [[TMP0:%.*]] = add i64 0, 1 +; I64-NEXT: [[TMP1:%.*]] = add i64 1, 1 +; I64-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[TMP0]] +; I64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[TMP1]] +; I64-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[TMP0]] +; I64-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[TMP1]] +; I64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +; I64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 +; I64-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[TMP6]] +; I64-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[TMP7]] +; I64-NEXT: [[TMP10:%.*]] = load double, ptr [[PTR_A]], align 8 +; I64-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i64 0 +; I64-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer +; I64-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[BROADCAST_SPLAT]], zeroinitializer +; I64-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP2]], i64 8 +; I64-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8 +; I64-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP12]], align 8 +; I64-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP13]], align 8 +; I64-NEXT: [[TMP16:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i32 0 +; I64-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[TMP15]], i32 1 +; I64-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP11]], zeroinitializer +; I64-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x double> poison, double [[ACCUM]], i64 0 +; I64-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT1]], <2 x double> poison, <2 x i32> zeroinitializer +; I64-NEXT: [[TMP19:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLAT2]], <2 x double> [[TMP18]], <2 x i32> <i32 1, i32 2> +; I64-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP17]], zeroinitializer +; I64-NEXT: [[TMP21:%.*]] = fadd <2 x double> [[TMP20]], zeroinitializer +; I64-NEXT: [[TMP22:%.*]] = fadd <2 x double> [[TMP21]], splat (double 1.000000e+00) +; I64-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP8]], align 8 +; I64-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP9]], align 8 +; I64-NEXT: [[TMP25:%.*]] = insertelement <2 x double> poison, double [[TMP23]], i32 0 +; I64-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[TMP25]], double [[TMP24]], i32 1 +; I64-NEXT: [[TMP27:%.*]] = fdiv <2 x double> [[TMP26]], [[TMP22]] +; I64-NEXT: [[TMP28:%.*]] = fsub <2 x double> [[TMP19]], [[TMP27]] +; I64-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; I64: [[MIDDLE_BLOCK]]: +; I64-NEXT: [[TMP29]] = extractelement <2 x double> [[TMP28]], i32 1 +; I64-NEXT: br label %[[OUTER_LOOP_LOOPEXIT]] +; I64: [[EXIT]]: +; I64-NEXT: ret double [[ACCUM]] +; +; I32-LABEL: define double @test_load_used_by_other_load_scev( +; I32-SAME: ptr [[PTR_A:%.*]], ptr [[PTR_B:%.*]], ptr [[PTR_C:%.*]]) { +; I32-NEXT: [[ENTRY:.*]]: +; I32-NEXT: br label %[[OUTER_LOOP:.*]] +; I32: [[OUTER_LOOP]]: +; I32-NEXT: [[ACCUM:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[RESULT:%.*]], %[[INNER_LOOP:.*]] ] +; I32-NEXT: [[COND:%.*]] = call i1 @cond() +; I32-NEXT: br i1 [[COND]], label %[[INNER_LOOP]], label %[[EXIT:.*]] +; I32: [[INNER_LOOP]]: +; I32-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[OUTER_LOOP]] ], [ [[IV_NEXT:%.*]], %[[INNER_LOOP]] ] +; I32-NEXT: [[ACCUM_INNER:%.*]] = phi double [ [[ACCUM]], %[[OUTER_LOOP]] ], [ [[MUL1:%.*]], %[[INNER_LOOP]] ] +; I32-NEXT: [[IDX_PLUS1:%.*]] = add i64 [[IV]], 1 +; I32-NEXT: [[GEP_C:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[IDX_PLUS1]] +; I32-NEXT: [[GEP_A_I64:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[IDX_PLUS1]] +; I32-NEXT: [[LOAD_IDX:%.*]] = load i64, ptr [[GEP_A_I64]], align 8 +; I32-NEXT: [[GEP_B:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[LOAD_IDX]] +; I32-NEXT: [[LOAD_A:%.*]] = load double, ptr [[PTR_A]], align 8 +; I32-NEXT: [[ADD1:%.*]] = fadd double [[LOAD_A]], 0.000000e+00 +; I32-NEXT: [[GEP_C_OFFSET:%.*]] = getelementptr i8, ptr [[GEP_C]], i64 8 +; I32-NEXT: [[LOAD_C:%.*]] = load double, ptr [[GEP_C_OFFSET]], align 8 +; I32-NEXT: [[MUL1]] = fmul double [[ADD1]], 0.000000e+00 +; I32-NEXT: [[MUL2:%.*]] = fmul double [[LOAD_C]], 0.000000e+00 +; I32-NEXT: [[ADD2:%.*]] = fadd double [[MUL2]], 0.000000e+00 +; I32-NEXT: [[ADD3:%.*]] = fadd double [[ADD2]], 1.000000e+00 +; I32-NEXT: [[LOAD_B:%.*]] = load double, ptr [[GEP_B]], align 8 +; I32-NEXT: [[DIV:%.*]] = fdiv double [[LOAD_B]], [[ADD3]] +; I32-NEXT: [[RESULT]] = fsub double [[ACCUM_INNER]], [[DIV]] +; I32-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; I32-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1 +; I32-NEXT: br i1 [[EXITCOND]], label %[[OUTER_LOOP]], label %[[INNER_LOOP]] +; I32: [[EXIT]]: +; I32-NEXT: ret double [[ACCUM]] +; +entry: + br label %outer.loop + +outer.loop: + %accum = phi double [ 0.0, %entry ], [ %result, %inner.loop ] + %cond = call i1 @cond() + br i1 %cond, label %inner.loop, label %exit + +inner.loop: + %iv = phi i64 [ 0, %outer.loop ], [ %iv.next, %inner.loop ] + %accum.inner = phi double [ %accum, %outer.loop ], [ %mul1, %inner.loop ] + %idx.plus1 = add i64 %iv, 1 + %gep.c = getelementptr i8, ptr %ptr.c, i64 %idx.plus1 + %gep.a.i64 = getelementptr i64, ptr %ptr.a, i64 %idx.plus1 + %load.idx = load i64, ptr %gep.a.i64, align 8 + %gep.b = getelementptr double, ptr %ptr.b, i64 %load.idx + %load.a = load double, ptr %ptr.a, align 8 + %add1 = fadd double %load.a, 0.000000e+00 + %gep.c.offset = getelementptr i8, ptr %gep.c, i64 8 + %load.c = load double, ptr %gep.c.offset, align 8 + %mul1 = fmul double %add1, 0.000000e+00 + %mul2 = fmul double %load.c, 0.000000e+00 + %add2 = fadd double %mul2, 0.000000e+00 + %add3 = fadd double %add2, 1.000000e+00 + %load.b = load double, ptr %gep.b, align 8 + %div = fdiv double %load.b, %add3 + %result = fsub double %accum.inner, %div + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv, 1 + br i1 %exitcond, label %outer.loop, label %inner.loop + +exit: + ret double %accum +} + attributes #0 = { "target-cpu"="znver2" } !0 = distinct !{!0, !1} diff --git a/llvm/test/Transforms/NewGVN/pr159918.ll b/llvm/test/Transforms/NewGVN/pr159918.ll new file mode 100644 index 0000000..3fad6e6 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/pr159918.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=newgvn < %s | FileCheck %s + +; Don't use returned argument in memory defining intrinsics. +define void @wombat(ptr %arg) { +; CHECK-LABEL: define void @wombat( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[ARG]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = call ptr @llvm.objc.retain(ptr [[LOAD]]) +; CHECK-NEXT: store ptr [[CALL]], ptr [[ARG]], align 8 +; CHECK-NEXT: ret void +; + %load = load ptr, ptr %arg, align 8 + %call = call ptr @llvm.objc.retain(ptr %load) + store ptr %call, ptr %arg, align 8 + ret void +} + +declare ptr @llvm.objc.retain(ptr returned) #0 + +attributes #0 = { nounwind } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll index ed0bd3f..cf62fd5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll @@ -55,6 +55,54 @@ entry: ret void } +define void @test_add_udiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: @test_add_udiv( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2 +; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3 +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4 +; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4 +; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]] +; CHECK-NEXT: [[RES2:%.*]] = udiv i32 [[V2]], [[Y2]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3> +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]] +; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %gep1.1 = getelementptr i32, ptr %arr1, i32 1 + %gep1.2 = getelementptr i32, ptr %arr1, i32 2 + %gep1.3 = getelementptr i32, ptr %arr1, i32 3 + %gep2.1 = getelementptr i32, ptr %arr2, i32 1 + %gep2.2 = getelementptr i32, ptr %arr2, i32 2 + %gep2.3 = getelementptr i32, ptr %arr2, i32 3 + %v0 = load i32, ptr %arr1 + %v1 = load i32, ptr %gep1.1 + %v2 = load i32, ptr %gep1.2 + %v3 = load i32, ptr %gep1.3 + %y0 = add nsw i32 %a0, 1146 + %y1 = add nsw i32 %a1, 146 + %y2 = add nsw i32 %a2, 42 + %y3 = add nsw i32 %a3, 0 + %res0 = add nsw i32 %v0, %y0 + %res1 = add nsw i32 %v1, %y1 + %res2 = udiv i32 %v2, %y2 + %res3 = add nsw i32 %v3, %y3 + store i32 %res0, ptr %arr2 + store i32 %res1, ptr %gep2.1 + store i32 %res2, ptr %gep2.2 + store i32 %res3, ptr %gep2.3 + ret void +} + ;; Similar test, but now div/rem is main opcode and not the alternate one. Same issue. define void @test_urem_add(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: @test_urem_add( @@ -114,3 +162,56 @@ entry: store i32 %res3, ptr %gep2.3 ret void } + +define void @test_srem_add(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: @test_srem_add( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 1 +; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1]], i32 2 +; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3 +; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 1 +; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2]], i32 2 +; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3 +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[ARR1]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[GEP1_1]], align 4 +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4 +; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4 +; CHECK-NEXT: [[Y0:%.*]] = add nsw i32 [[A0:%.*]], 1146 +; CHECK-NEXT: [[Y1:%.*]] = add nsw i32 [[A1:%.*]], 146 +; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 +; CHECK-NEXT: [[Y3:%.*]] = add nsw i32 [[A3:%.*]], 0 +; CHECK-NEXT: [[RES0:%.*]] = srem i32 [[V0]], [[Y0]] +; CHECK-NEXT: [[RES1:%.*]] = srem i32 [[V1]], [[Y1]] +; CHECK-NEXT: [[RES2:%.*]] = srem i32 [[V2]], [[Y2]] +; CHECK-NEXT: [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]] +; CHECK-NEXT: store i32 [[RES0]], ptr [[ARR2]], align 4 +; CHECK-NEXT: store i32 [[RES1]], ptr [[GEP2_1]], align 4 +; CHECK-NEXT: store i32 [[RES2]], ptr [[GEP2_2]], align 4 +; CHECK-NEXT: store i32 [[RES3]], ptr [[GEP2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %gep1.1 = getelementptr i32, ptr %arr1, i32 1 + %gep1.2 = getelementptr i32, ptr %arr1, i32 2 + %gep1.3 = getelementptr i32, ptr %arr1, i32 3 + %gep2.1 = getelementptr i32, ptr %arr2, i32 1 + %gep2.2 = getelementptr i32, ptr %arr2, i32 2 + %gep2.3 = getelementptr i32, ptr %arr2, i32 3 + %v0 = load i32, ptr %arr1 + %v1 = load i32, ptr %gep1.1 + %v2 = load i32, ptr %gep1.2 + %v3 = load i32, ptr %gep1.3 + %y0 = add nsw i32 %a0, 1146 + %y1 = add nsw i32 %a1, 146 + %y2 = add nsw i32 %a2, 42 + %y3 = add nsw i32 %a3, 0 + %res0 = srem i32 %v0, %y0 + %res1 = srem i32 %v1, %y1 + %res2 = srem i32 %v2, %y2 + %res3 = add nsw i32 %v3, %y3 + store i32 %res0, ptr %arr2 + store i32 %res1, ptr %gep2.1 + store i32 %res2, ptr %gep2.2 + store i32 %res3, ptr %gep2.3 + ret void +} diff --git a/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp b/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp index b6e8567..497da8f 100644 --- a/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp +++ b/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp @@ -46,8 +46,8 @@ public: MAM.registerPass([VocabVector = std::move(VocabVector)]() mutable { return IR2VecVocabAnalysis(std::move(VocabVector)); }); - IR2VecVocab = - new ir2vec::Vocabulary(ir2vec::Vocabulary::createDummyVocabForTest(1)); + IR2VecVocab = std::make_unique<ir2vec::Vocabulary>( + ir2vec::Vocabulary::createDummyVocabForTest(1)); MAM.registerPass([&] { return PassInstrumentationAnalysis(); }); FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); }); FAM.registerPass([&] { return DominatorTreeAnalysis(); }); @@ -69,7 +69,7 @@ protected: std::unique_ptr<LoopInfo> LI; FunctionAnalysisManager FAM; ModuleAnalysisManager MAM; - ir2vec::Vocabulary *IR2VecVocab; + std::unique_ptr<ir2vec::Vocabulary> IR2VecVocab; void TearDown() override { // Restore original IR2Vec weights diff --git a/llvm/unittests/Analysis/IR2VecTest.cpp b/llvm/unittests/Analysis/IR2VecTest.cpp index 743628f..d136cb6 100644 --- a/llvm/unittests/Analysis/IR2VecTest.cpp +++ b/llvm/unittests/Analysis/IR2VecTest.cpp @@ -295,7 +295,7 @@ TEST(IR2VecTest, ZeroDimensionEmbedding) { // Fixture for IR2Vec tests requiring IR setup. class IR2VecTestFixture : public ::testing::Test { protected: - Vocabulary *V; + std::unique_ptr<Vocabulary> V; LLVMContext Ctx; std::unique_ptr<Module> M; Function *F = nullptr; @@ -304,7 +304,7 @@ protected: Instruction *RetInst = nullptr; void SetUp() override { - V = new Vocabulary(Vocabulary::createDummyVocabForTest(2)); + V = std::make_unique<Vocabulary>(Vocabulary::createDummyVocabForTest(2)); // Setup IR M = std::make_unique<Module>("TestM", Ctx); diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt index d1dfb1d..25efa00 100644 --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -52,6 +52,7 @@ add_llvm_unittest(SupportTests IndexedAccessorTest.cpp InstructionCostTest.cpp InterleavedRangeTest.cpp + JobserverTest.cpp JSONTest.cpp KnownBitsTest.cpp LEB128Test.cpp diff --git a/llvm/unittests/Support/JobserverTest.cpp b/llvm/unittests/Support/JobserverTest.cpp new file mode 100644 index 0000000..ddee023 --- /dev/null +++ b/llvm/unittests/Support/JobserverTest.cpp @@ -0,0 +1,442 @@ +//===- llvm/unittest/Support/JobserverTest.cpp ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Jobserver.h unit tests. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Jobserver.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Parallel.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" +#include <future> +#include <random> +#include <stdlib.h> + +#if defined(LLVM_ON_UNIX) +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/FileSystem.h" +#include <atomic> +#include <condition_variable> +#include <fcntl.h> +#include <mutex> +#include <sys/stat.h> +#include <thread> +#include <unistd.h> +#elif defined(_WIN32) +#include <windows.h> +#endif + +#define DEBUG_TYPE "jobserver-test" + +using namespace llvm; + +namespace { + +// RAII helper to set an environment variable for the duration of a test. +class ScopedEnvironment { + std::string Name; + std::string OldValue; + bool HadOldValue; + +public: + ScopedEnvironment(const char *Name, const char *Value) : Name(Name) { +#if defined(_WIN32) + char *Old = nullptr; + size_t OldLen; + errno_t err = _dupenv_s(&Old, &OldLen, Name); + if (err == 0 && Old != nullptr) { + HadOldValue = true; + OldValue = Old; + free(Old); + } else { + HadOldValue = false; + } + _putenv_s(Name, Value); +#else + const char *Old = getenv(Name); + if (Old) { + HadOldValue = true; + OldValue = Old; + } else { + HadOldValue = false; + } + setenv(Name, Value, 1); +#endif + } + + ~ScopedEnvironment() { +#if defined(_WIN32) + if (HadOldValue) + _putenv_s(Name.c_str(), OldValue.c_str()); + else + // On Windows, setting an environment variable to an empty string + // unsets it, making getenv() return NULL. + _putenv_s(Name.c_str(), ""); +#else + if (HadOldValue) + setenv(Name.c_str(), OldValue.c_str(), 1); + else + unsetenv(Name.c_str()); +#endif + } +}; + +TEST(Jobserver, Slot) { + // Default constructor creates an invalid slot. + JobSlot S1; + EXPECT_FALSE(S1.isValid()); + EXPECT_FALSE(S1.isImplicit()); + + // Create an implicit slot. + JobSlot S2 = JobSlot::createImplicit(); + EXPECT_TRUE(S2.isValid()); + EXPECT_TRUE(S2.isImplicit()); + + // Create an explicit slot. + JobSlot S3 = JobSlot::createExplicit(42); + EXPECT_TRUE(S3.isValid()); + EXPECT_FALSE(S3.isImplicit()); + + // Test move construction. + JobSlot S4 = std::move(S2); + EXPECT_TRUE(S4.isValid()); + EXPECT_TRUE(S4.isImplicit()); + EXPECT_FALSE(S2.isValid()); // S2 is now invalid. + + // Test move assignment. + S1 = std::move(S3); + EXPECT_TRUE(S1.isValid()); + EXPECT_FALSE(S1.isImplicit()); + EXPECT_FALSE(S3.isValid()); // S3 is now invalid. +} + +// Test fixture for parsing tests to ensure the singleton state is +// reset between each test case. +class JobserverParsingTest : public ::testing::Test { +protected: + void TearDown() override { JobserverClient::resetForTesting(); } +}; + +TEST_F(JobserverParsingTest, NoMakeflags) { + // No MAKEFLAGS, should be null. + ScopedEnvironment Env("MAKEFLAGS", ""); + // On Unix, setting an env var to "" makes getenv() return an empty + // string, not NULL. We must call unsetenv() to test the case where + // the variable is truly not present. +#if !defined(_WIN32) + unsetenv("MAKEFLAGS"); +#endif + EXPECT_EQ(JobserverClient::getInstance(), nullptr); +} + +TEST_F(JobserverParsingTest, EmptyMakeflags) { + // Empty MAKEFLAGS, should be null. + ScopedEnvironment Env("MAKEFLAGS", ""); + EXPECT_EQ(JobserverClient::getInstance(), nullptr); +} + +TEST_F(JobserverParsingTest, DryRunFlag) { + // Dry-run flag 'n', should be null. + ScopedEnvironment Env("MAKEFLAGS", "n -j --jobserver-auth=fifo:/tmp/foo"); + EXPECT_EQ(JobserverClient::getInstance(), nullptr); +} + +// Separate fixture for non-threaded client tests. +class JobserverClientTest : public JobserverParsingTest {}; + +#if defined(LLVM_ON_UNIX) +// RAII helper to create and clean up a temporary FIFO file. +class ScopedFifo { + SmallString<128> Path; + bool IsValid = false; + +public: + ScopedFifo() { + // To get a unique, non-colliding name for a FIFO, we use the + // createTemporaryFile function to reserve a name in the filesystem. + std::error_code EC = + sys::fs::createTemporaryFile("jobserver-test", "fifo", Path); + if (EC) + return; + // Then we immediately remove the regular file it created, but keep the + // unique path. + sys::fs::remove(Path); + // Finally, we create the FIFO at that safe, unique path. + if (mkfifo(Path.c_str(), 0600) != 0) + return; + IsValid = true; + } + + ~ScopedFifo() { + if (IsValid) + sys::fs::remove(Path); + } + + const char *c_str() const { return Path.data(); } + bool isValid() const { return IsValid; } +}; + +TEST_F(JobserverClientTest, UnixClientFifo) { + // This test covers basic FIFO client creation and behavior with an empty + // FIFO. No job tokens are available. + ScopedFifo F; + ASSERT_TRUE(F.isValid()); + + // Intentionally inserted \t in environment string. + std::string Makeflags = " \t -j4\t \t--jobserver-auth=fifo:"; + Makeflags += F.c_str(); + ScopedEnvironment Env("MAKEFLAGS", Makeflags.c_str()); + + JobserverClient *Client = JobserverClient::getInstance(); + ASSERT_NE(Client, nullptr); + + // Get the implicit token. + JobSlot S1 = Client->tryAcquire(); + EXPECT_TRUE(S1.isValid()); + EXPECT_TRUE(S1.isImplicit()); + + // FIFO is empty, next acquire fails. + JobSlot S2 = Client->tryAcquire(); + EXPECT_FALSE(S2.isValid()); + + // Release does not write to the pipe for the implicit token. + Client->release(std::move(S1)); + + // Re-acquire the implicit token. + S1 = Client->tryAcquire(); + EXPECT_TRUE(S1.isValid()); +} + +#if LLVM_ENABLE_THREADS +// Test fixture for tests that use the jobserver strategy. It creates a +// temporary FIFO, sets MAKEFLAGS, and provides a helper to pre-load the FIFO +// with job tokens, simulating `make -jN`. +class JobserverStrategyTest : public JobserverParsingTest { +protected: + std::unique_ptr<ScopedFifo> TheFifo; + std::thread MakeThread; + std::atomic<bool> StopMakeThread{false}; + // Save and restore the global parallel strategy to avoid interfering with + // other tests in the same process. + ThreadPoolStrategy SavedStrategy; + + void SetUp() override { + SavedStrategy = parallel::strategy; + TheFifo = std::make_unique<ScopedFifo>(); + ASSERT_TRUE(TheFifo->isValid()); + + std::string MakeFlags = "--jobserver-auth=fifo:"; + MakeFlags += TheFifo->c_str(); + setenv("MAKEFLAGS", MakeFlags.c_str(), 1); + } + + void TearDown() override { + if (MakeThread.joinable()) { + StopMakeThread = true; + MakeThread.join(); + } + unsetenv("MAKEFLAGS"); + TheFifo.reset(); + // Restore the original strategy to ensure subsequent tests are unaffected. + parallel::strategy = SavedStrategy; + } + + // Starts a background thread that emulates `make`. It populates the FIFO + // with initial tokens and then recycles tokens released by clients. + void startMakeProxy(int NumInitialJobs) { + MakeThread = std::thread([this, NumInitialJobs]() { + LLVM_DEBUG(dbgs() << "[MakeProxy] Thread started.\n"); + // Open the FIFO for reading and writing. This call does not block. + int RWFd = open(TheFifo->c_str(), O_RDWR); + LLVM_DEBUG(dbgs() << "[MakeProxy] Opened FIFO " << TheFifo->c_str() + << " with O_RDWR, FD=" << RWFd << "\n"); + if (RWFd == -1) { + LLVM_DEBUG( + dbgs() + << "[MakeProxy] ERROR: Failed to open FIFO with O_RDWR. Errno: " + << errno << "\n"); + return; + } + + // Populate with initial jobs. + LLVM_DEBUG(dbgs() << "[MakeProxy] Writing " << NumInitialJobs + << " initial tokens.\n"); + for (int i = 0; i < NumInitialJobs; ++i) { + if (write(RWFd, "+", 1) != 1) { + LLVM_DEBUG(dbgs() + << "[MakeProxy] ERROR: Failed to write initial token " << i + << ".\n"); + close(RWFd); + return; + } + } + LLVM_DEBUG(dbgs() << "[MakeProxy] Finished writing initial tokens.\n"); + + // Make the read non-blocking so we can periodically check StopMakeThread. + int flags = fcntl(RWFd, F_GETFL, 0); + fcntl(RWFd, F_SETFL, flags | O_NONBLOCK); + + while (!StopMakeThread) { + char Token; + ssize_t Ret = read(RWFd, &Token, 1); + if (Ret == 1) { + LLVM_DEBUG(dbgs() << "[MakeProxy] Read token '" << Token + << "' to recycle.\n"); + // A client released a token, 'make' makes it available again. + std::this_thread::sleep_for(std::chrono::microseconds(100)); + ssize_t WRet; + do { + WRet = write(RWFd, &Token, 1); + } while (WRet < 0 && errno == EINTR); + if (WRet <= 0) { + LLVM_DEBUG( + dbgs() + << "[MakeProxy] ERROR: Failed to write recycled token.\n"); + break; // Error, stop the proxy. + } + LLVM_DEBUG(dbgs() + << "[MakeProxy] Wrote token '" << Token << "' back.\n"); + } else if (Ret < 0 && errno != EAGAIN && errno != EWOULDBLOCK) { + LLVM_DEBUG(dbgs() << "[MakeProxy] ERROR: Read failed with errno " + << errno << ".\n"); + break; // Error, stop the proxy. + } + // Yield to prevent this thread from busy-waiting. + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + LLVM_DEBUG(dbgs() << "[MakeProxy] Thread stopping.\n"); + close(RWFd); + }); + + // Give the proxy thread a moment to start and populate the FIFO. + // This is a simple way to avoid a race condition where the client starts + // before the initial tokens are in the pipe. + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } +}; + +TEST_F(JobserverStrategyTest, ThreadPoolConcurrencyIsLimited) { + // This test simulates `make -j3`. We will have 1 implicit job slot and + // we will add 2 explicit job tokens to the FIFO, for a total of 3. + const int NumExplicitJobs = 2; + const int ConcurrencyLimit = NumExplicitJobs + 1; // +1 for the implicit slot + const int NumTasks = 8; // More tasks than available slots. + + LLVM_DEBUG(dbgs() << "Calling startMakeProxy with " << NumExplicitJobs + << " jobs.\n"); + startMakeProxy(NumExplicitJobs); + LLVM_DEBUG(dbgs() << "MakeProxy is running.\n"); + + // Create the thread pool. Its constructor will call jobserver_concurrency() + // and create a client that reads from our pre-loaded FIFO. + StdThreadPool Pool(jobserver_concurrency()); + + std::atomic<int> ActiveTasks{0}; + std::atomic<int> MaxActiveTasks{0}; + std::atomic<int> CompletedTasks{0}; + std::mutex M; + std::condition_variable CV; + + // Dispatch more tasks than there are job slots. The pool should block + // and only run up to `ConcurrencyLimit` tasks at once. + for (int i = 0; i < NumTasks; ++i) { + Pool.async([&, i] { + // Track the number of concurrently running tasks. + int CurrentActive = ++ActiveTasks; + LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive + << "\n"); + int OldMax = MaxActiveTasks.load(); + while (CurrentActive > OldMax) + MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive); + + std::this_thread::sleep_for(std::chrono::milliseconds(25)); + + --ActiveTasks; + if (++CompletedTasks == NumTasks) { + std::lock_guard<std::mutex> Lock(M); + CV.notify_one(); + } + }); + } + + // Wait for all tasks to complete. + std::unique_lock<std::mutex> Lock(M); + CV.wait(Lock, [&] { return CompletedTasks == NumTasks; }); + + LLVM_DEBUG(dbgs() << "Test finished. Max active tasks was " << MaxActiveTasks + << ".\n"); + // The key assertion: the maximum number of concurrent tasks should + // not have exceeded the limit imposed by the jobserver. + EXPECT_LE(MaxActiveTasks, ConcurrencyLimit); + EXPECT_EQ(CompletedTasks, NumTasks); +} + +TEST_F(JobserverStrategyTest, ParallelForIsLimited) { + // This test verifies that llvm::parallelFor respects the jobserver limit. + const int NumExplicitJobs = 3; + const int ConcurrencyLimit = NumExplicitJobs + 1; // +1 implicit + const int NumTasks = 20; + + LLVM_DEBUG(dbgs() << "Calling startMakeProxy with " << NumExplicitJobs + << " jobs.\n"); + startMakeProxy(NumExplicitJobs); + LLVM_DEBUG(dbgs() << "MakeProxy is running.\n"); + + // Set the global strategy. parallelFor will use this. + parallel::strategy = jobserver_concurrency(); + + std::atomic<int> ActiveTasks{0}; + std::atomic<int> MaxActiveTasks{0}; + + parallelFor(0, NumTasks, [&](int i) { + int CurrentActive = ++ActiveTasks; + LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive + << "\n"); + int OldMax = MaxActiveTasks.load(); + while (CurrentActive > OldMax) + MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive); + + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + --ActiveTasks; + }); + + LLVM_DEBUG(dbgs() << "ParallelFor finished. Max active tasks was " + << MaxActiveTasks << ".\n"); + EXPECT_LE(MaxActiveTasks, ConcurrencyLimit); +} + +TEST_F(JobserverStrategyTest, ParallelSortIsLimited) { + // This test serves as an integration test to ensure parallelSort completes + // correctly when running under the jobserver strategy. It doesn't directly + // measure concurrency but verifies correctness. + const int NumExplicitJobs = 3; + startMakeProxy(NumExplicitJobs); + + parallel::strategy = jobserver_concurrency(); + + std::vector<int> V(1024); + // Fill with random data + std::mt19937 randEngine; + std::uniform_int_distribution<int> dist; + for (int &i : V) + i = dist(randEngine); + + parallelSort(V.begin(), V.end()); + ASSERT_TRUE(llvm::is_sorted(V)); +} + +#endif // LLVM_ENABLE_THREADS + +#endif // defined(LLVM_ON_UNIX) + +} // end anonymous namespace diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp index 75bea77..8076ce2 100644 --- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp @@ -246,16 +246,14 @@ bool TypeSetByHwMode::operator==(const TypeSetByHwMode &VTS) const { return true; } -namespace llvm { -raw_ostream &operator<<(raw_ostream &OS, const MachineValueTypeSet &T) { +raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineValueTypeSet &T) { T.writeToStream(OS); return OS; } -raw_ostream &operator<<(raw_ostream &OS, const TypeSetByHwMode &T) { +raw_ostream &llvm::operator<<(raw_ostream &OS, const TypeSetByHwMode &T) { T.writeToStream(OS); return OS; } -} // namespace llvm LLVM_DUMP_METHOD void TypeSetByHwMode::dump() const { dbgs() << *this << '\n'; } diff --git a/llvm/utils/TableGen/Common/CodeGenRegisters.cpp b/llvm/utils/TableGen/Common/CodeGenRegisters.cpp index 294f3af..8d0ec9a 100644 --- a/llvm/utils/TableGen/Common/CodeGenRegisters.cpp +++ b/llvm/utils/TableGen/Common/CodeGenRegisters.cpp @@ -857,17 +857,6 @@ unsigned CodeGenRegisterClass::getWeight(const CodeGenRegBank &RegBank) const { return (*Members.begin())->getWeight(RegBank); } -namespace llvm { - -raw_ostream &operator<<(raw_ostream &OS, const CodeGenRegisterClass::Key &K) { - OS << "{ " << K.RSI; - for (const auto R : *K.Members) - OS << ", " << R->getName(); - return OS << " }"; -} - -} // end namespace llvm - // This is a simple lexicographical order that can be used to search for sets. // It is not the same as the topological order provided by TopoOrderRC. bool CodeGenRegisterClass::Key::operator<( diff --git a/llvm/utils/TableGen/Common/InfoByHwMode.cpp b/llvm/utils/TableGen/Common/InfoByHwMode.cpp index a6e2fc4..4c8197d 100644 --- a/llvm/utils/TableGen/Common/InfoByHwMode.cpp +++ b/llvm/utils/TableGen/Common/InfoByHwMode.cpp @@ -227,19 +227,17 @@ EncodingInfoByHwMode::EncodingInfoByHwMode(const Record *R, } } -namespace llvm { -raw_ostream &operator<<(raw_ostream &OS, const ValueTypeByHwMode &T) { +raw_ostream &llvm::operator<<(raw_ostream &OS, const ValueTypeByHwMode &T) { T.writeToStream(OS); return OS; } -raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfo &T) { +raw_ostream &llvm::operator<<(raw_ostream &OS, const RegSizeInfo &T) { T.writeToStream(OS); return OS; } -raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfoByHwMode &T) { +raw_ostream &llvm::operator<<(raw_ostream &OS, const RegSizeInfoByHwMode &T) { T.writeToStream(OS); return OS; } -} // namespace llvm diff --git a/llvm/utils/TableGen/Common/PredicateExpander.cpp b/llvm/utils/TableGen/Common/PredicateExpander.cpp index 09d9538..03252ed 100644 --- a/llvm/utils/TableGen/Common/PredicateExpander.cpp +++ b/llvm/utils/TableGen/Common/PredicateExpander.cpp @@ -14,7 +14,7 @@ #include "CodeGenSchedule.h" // Definition of STIPredicateFunction. #include "llvm/TableGen/Record.h" -namespace llvm { +using namespace llvm; void PredicateExpander::expandTrue(raw_ostream &OS) { OS << "true"; } void PredicateExpander::expandFalse(raw_ostream &OS) { OS << "false"; } @@ -553,5 +553,3 @@ void STIPredicateExpander::expandSTIPredicate(raw_ostream &OS, expandEpilogue(OS, Fn); } } - -} // namespace llvm diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp index 09ce9f3..9471959 100644 --- a/llvm/utils/TableGen/DXILEmitter.cpp +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -37,15 +37,6 @@ struct DXILIntrinsicSelect { SmallVector<const Record *> ArgSelectRecords; }; -static StringRef StripIntrinArgSelectTypePrefix(StringRef Type) { - StringRef Prefix = "IntrinArgSelect_"; - if (!Type.starts_with(Prefix)) { - PrintFatalError("IntrinArgSelectType definintion must be prefixed with " - "'IntrinArgSelect_'"); - } - return Type.substr(Prefix.size()); -} - struct DXILOperationDesc { std::string OpName; // name of DXIL operation int OpCode; // ID of DXIL operation @@ -66,6 +57,15 @@ struct DXILOperationDesc { }; } // end anonymous namespace +static StringRef stripIntrinArgSelectTypePrefix(StringRef Type) { + StringRef Prefix = "IntrinArgSelect_"; + if (!Type.starts_with(Prefix)) { + PrintFatalError("IntrinArgSelectType definintion must be prefixed with " + "'IntrinArgSelect_'"); + } + return Type.substr(Prefix.size()); +} + /// In-place sort TableGen records of class with a field /// Version dxil_version /// in the ascending version order. @@ -449,7 +449,7 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops, ArgSelect->getValueAsDef("type")->getNameInitAsString(); int Value = ArgSelect->getValueAsInt("value"); OS << "(IntrinArgSelect{" - << "IntrinArgSelect::Type::" << StripIntrinArgSelectTypePrefix(Type) + << "IntrinArgSelect::Type::" << stripIntrinArgSelectTypePrefix(Type) << "," << Value << "}), "; } OS << ")\n"; @@ -466,7 +466,7 @@ static void emitDXILIntrinsicArgSelectTypes(const RecordKeeper &Records, OS << "#ifdef DXIL_OP_INTRINSIC_ARG_SELECT_TYPE\n"; for (const Record *Records : Records.getAllDerivedDefinitions("IntrinArgSelectType")) { - StringRef StrippedName = StripIntrinArgSelectTypePrefix(Records->getName()); + StringRef StrippedName = stripIntrinArgSelectTypePrefix(Records->getName()); OS << "DXIL_OP_INTRINSIC_ARG_SELECT_TYPE(" << StrippedName << ")\n"; } OS << "#undef DXIL_OP_INTRINSIC_ARG_SELECT_TYPE\n"; diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index 961dc28..5d41b7d 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -194,10 +194,6 @@ private: void parseInstructionEncodings(); }; -} // end anonymous namespace - -namespace { - struct EncodingIsland { unsigned StartBit; unsigned NumBits; diff --git a/llvm/utils/TableGen/ExegesisEmitter.cpp b/llvm/utils/TableGen/ExegesisEmitter.cpp index 1b4b072..bd69919 100644 --- a/llvm/utils/TableGen/ExegesisEmitter.cpp +++ b/llvm/utils/TableGen/ExegesisEmitter.cpp @@ -58,6 +58,14 @@ private: const std::map<llvm::StringRef, unsigned> PfmCounterNameTable; }; +struct ValidationCounterInfo { + int64_t EventNumber; + StringRef EventName; + unsigned PfmCounterID; +}; + +} // namespace + static std::map<llvm::StringRef, unsigned> collectPfmCounters(const RecordKeeper &Records) { std::map<llvm::StringRef, unsigned> PfmCounterNameTable; @@ -106,14 +114,8 @@ ExegesisEmitter::ExegesisEmitter(const RecordKeeper &RK) Target = Targets[0]->getName().str(); } -struct ValidationCounterInfo { - int64_t EventNumber; - StringRef EventName; - unsigned PfmCounterID; -}; - -bool EventNumberLess(const ValidationCounterInfo &LHS, - const ValidationCounterInfo &RHS) { +static bool EventNumberLess(const ValidationCounterInfo &LHS, + const ValidationCounterInfo &RHS) { return LHS.EventNumber < RHS.EventNumber; } @@ -221,7 +223,7 @@ void ExegesisEmitter::emitPfmCounters(raw_ostream &OS) const { emitPfmCountersInfo(*Def, IssueCountersTableOffset, OS); OS << "\n"; -} // namespace +} void ExegesisEmitter::emitPfmCountersLookupTable(raw_ostream &OS) const { std::vector<const Record *> Bindings = @@ -249,7 +251,5 @@ void ExegesisEmitter::run(raw_ostream &OS) const { emitPfmCountersLookupTable(OS); } -} // end anonymous namespace - static TableGen::Emitter::OptClass<ExegesisEmitter> X("gen-exegesis", "Generate llvm-exegesis tables"); diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp index 694d89a..dba8bde 100644 --- a/llvm/utils/TableGen/FastISelEmitter.cpp +++ b/llvm/utils/TableGen/FastISelEmitter.cpp @@ -52,11 +52,9 @@ struct InstructionMemo { InstructionMemo(const InstructionMemo &Other) = delete; InstructionMemo(InstructionMemo &&Other) = default; }; -} // End anonymous namespace /// ImmPredicateSet - This uniques predicates (represented as a string) and /// gives them unique (small) integer ID's that start at 0. -namespace { class ImmPredicateSet { DenseMap<TreePattern *, unsigned> ImmIDs; std::vector<TreePredicateFn> PredsByName; @@ -77,12 +75,10 @@ public: iterator begin() const { return PredsByName.begin(); } iterator end() const { return PredsByName.end(); } }; -} // End anonymous namespace /// OperandsSignature - This class holds a description of a list of operand /// types. It has utility methods for emitting text based on the operands. /// -namespace { struct OperandsSignature { class OpKind { enum { OK_Reg, OK_FP, OK_Imm, OK_Invalid = -1 }; @@ -366,9 +362,7 @@ struct OperandsSignature { Opnd.printManglingSuffix(OS, ImmPredicates, StripImmCodes); } }; -} // End anonymous namespace -namespace { class FastISelMap { // A multimap is needed instead of a "plain" map because the key is // the instruction's complexity (an int) and they are not unique. diff --git a/llvm/utils/TableGen/X86DisassemblerShared.h b/llvm/utils/TableGen/X86DisassemblerShared.h index f60fd47..d5f936d 100644 --- a/llvm/utils/TableGen/X86DisassemblerShared.h +++ b/llvm/utils/TableGen/X86DisassemblerShared.h @@ -14,6 +14,8 @@ #include "llvm/Support/X86DisassemblerDecoderCommon.h" +namespace llvm::X86Disassembler { + struct InstructionSpecifier { llvm::X86Disassembler::OperandSpecifier operands[llvm::X86Disassembler::X86_MAX_OPERANDS]; @@ -52,4 +54,6 @@ struct ContextDecision { ContextDecision() { memset(opcodeDecisions, 0, sizeof(opcodeDecisions)); } }; +} // namespace llvm::X86Disassembler + #endif diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp index 1e1e4ab..6f523b5 100644 --- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -30,22 +30,23 @@ struct ManualMapEntry { const char *MemInstStr; uint16_t Strategy; }; +} // namespace // List of instructions requiring explicitly aligned memory. -const char *ExplicitAlign[] = {"MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS", - "MOVNTPD", "MOVNTDQ", "MOVNTDQA"}; +static constexpr const char *ExplicitAlign[] = { + "MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS", "MOVNTPD", "MOVNTDQ", "MOVNTDQA"}; // List of instructions NOT requiring explicit memory alignment. -const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD", - "PCMPESTRM", "PCMPESTRI", "PCMPISTRM", - "PCMPISTRI"}; +static constexpr const char *ExplicitUnalign[] = { + "MOVDQU", "MOVUPS", "MOVUPD", "PCMPESTRM", + "PCMPESTRI", "PCMPISTRM", "PCMPISTRI"}; -const ManualMapEntry ManualMapSet[] = { +static const ManualMapEntry ManualMapSet[] = { #define ENTRY(REG, MEM, FLAGS) {#REG, #MEM, FLAGS}, #include "X86ManualFoldTables.def" }; -const std::set<StringRef> NoFoldSet = { +static const std::set<StringRef> NoFoldSet = { #define NOFOLD(INSN) #INSN, #include "X86ManualFoldTables.def" }; @@ -62,6 +63,7 @@ static bool isExplicitUnalign(const CodeGenInstruction *Inst) { }); } +namespace { class X86FoldTablesEmitter { const RecordKeeper &Records; const CodeGenTarget Target; @@ -230,6 +232,7 @@ private: OS << "};\n\n"; } }; +} // namespace // Return true if one of the instruction's operands is a RST register class static bool hasRSTRegClass(const CodeGenInstruction *Inst) { @@ -318,6 +321,7 @@ static bool isNOREXRegClass(const Record *Op) { // Function object - Operator() returns true if the given Reg instruction // matches the Mem instruction of this object. +namespace { class IsMatch { const CodeGenInstruction *MemInst; const X86Disassembler::RecognizableInstrBase MemRI; diff --git a/llvm/utils/TableGen/X86InstrMappingEmitter.cpp b/llvm/utils/TableGen/X86InstrMappingEmitter.cpp index be5e2a7..2745ba7 100644 --- a/llvm/utils/TableGen/X86InstrMappingEmitter.cpp +++ b/llvm/utils/TableGen/X86InstrMappingEmitter.cpp @@ -66,6 +66,7 @@ private: void printTable(ArrayRef<Entry> Table, StringRef Name, StringRef Macro, raw_ostream &OS); }; +} // namespace void X86InstrMappingEmitter::printClassDef(raw_ostream &OS) { OS << "struct X86TableEntry {\n" @@ -106,6 +107,7 @@ void X86InstrMappingEmitter::printTable(ArrayRef<Entry> Table, StringRef Name, printMacroEnd(Macro, OS); } +namespace { class IsMatch { const CodeGenInstruction *OldInst; @@ -146,6 +148,7 @@ public: return true; } }; +} // namespace static bool isInteresting(const Record *Rec) { // _REV instruction should not appear before encoding optimization @@ -368,7 +371,6 @@ void X86InstrMappingEmitter::run(raw_ostream &OS) { emitND2NonNDTable(Insts, OS); emitSSE2AVXTable(Insts, OS); } -} // namespace static TableGen::Emitter::OptClass<X86InstrMappingEmitter> X("gen-x86-instr-mapping", "Generate X86 instruction mapping"); diff --git a/llvm/utils/TableGen/X86MnemonicTables.cpp b/llvm/utils/TableGen/X86MnemonicTables.cpp index 85bd4df..7851919 100644 --- a/llvm/utils/TableGen/X86MnemonicTables.cpp +++ b/llvm/utils/TableGen/X86MnemonicTables.cpp @@ -30,6 +30,7 @@ public: // Output X86 mnemonic tables. void run(raw_ostream &OS); }; +} // namespace void X86MnemonicTablesEmitter::run(raw_ostream &OS) { emitSourceFileHeader("X86 Mnemonic tables", OS); @@ -83,7 +84,5 @@ void X86MnemonicTablesEmitter::run(raw_ostream &OS) { OS << "} // end namespace X86\n} // end namespace llvm"; } -} // namespace - static TableGen::Emitter::OptClass<X86MnemonicTablesEmitter> X("gen-x86-mnemonic-tables", "Generate X86 mnemonic tables"); diff --git a/llvm/utils/TableGen/X86ModRMFilters.h b/llvm/utils/TableGen/X86ModRMFilters.h index b579f22..7bf111f 100644 --- a/llvm/utils/TableGen/X86ModRMFilters.h +++ b/llvm/utils/TableGen/X86ModRMFilters.h @@ -19,9 +19,7 @@ #include <cstdint> -namespace llvm { - -namespace X86Disassembler { +namespace llvm::X86Disassembler { /// ModRMFilter - Abstract base class for clases that recognize patterns in /// ModR/M bytes. @@ -135,8 +133,6 @@ public: bool accepts(uint8_t modRM) const override { return (ModRM == modRM); } }; -} // namespace X86Disassembler - -} // namespace llvm +} // namespace llvm::X86Disassembler #endif diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h index b74e74d..52f9538 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.h +++ b/llvm/utils/TableGen/X86RecognizableInstr.h @@ -22,8 +22,6 @@ #include <string> #include <vector> -struct InstructionSpecifier; - namespace llvm { class Record; #define X86_INSTR_MRM_MAPPING \ @@ -179,6 +177,8 @@ enum { ExplicitREX2 = 1, ExplicitEVEX = 3 }; namespace X86Disassembler { class DisassemblerTables; +struct InstructionSpecifier; + /// Extract common fields of a single X86 instruction from a CodeGenInstruction struct RecognizableInstrBase { /// The OpPrefix field from the record diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni index 2ab2a0e..5d1fb02 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni @@ -529,7 +529,7 @@ if (current_cpu == "ve") { if (current_cpu == "wasm") { builtins_sources += [ "wasm/__c_longjmp.S", - "wasm/__cpp_exceptions.S", + "wasm/__cpp_exception.S", ] } diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn index 6ca766ca..38ba466 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn @@ -103,6 +103,7 @@ static_library("Support") { "IntEqClasses.cpp", "IntervalMap.cpp", "JSON.cpp", + "Jobserver.cpp", "KnownBits.cpp", "KnownFPClass.cpp", "LEB128.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn index 42c1a15..a25f058 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn @@ -56,6 +56,7 @@ unittest("SupportTests") { "InstructionCostTest.cpp", "InterleavedRangeTest.cpp", "JSONTest.cpp", + "JobserverTest.cpp", "KnownBitsTest.cpp", "LEB128Test.cpp", "LineIteratorTest.cpp", diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 01ab6df..77e833f 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -2383,15 +2383,38 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", let summary = "loop construct"; let description = [{ - The "acc.loop" operation represents the OpenACC loop construct. The lower - and upper bounds specify a half-open range: the range includes the lower - bound but does not include the upper bound. If the `inclusive` attribute is - set then the upper bound is included. + The `acc.loop` operation represents the OpenACC loop construct and when + bounds are included, the associated source language loop iterators. The + lower and upper bounds specify a half-open range: the range includes the + lower bound but does not include the upper bound. If the `inclusive` + attribute is set then the upper bound is included. + + In cases where the OpenACC loop directive needs to capture multiple + source language loops, such as in the case of `collapse` or `tile`, + the multiple induction arguments are used to capture each case. Having + such a representation makes sure no intermediate transformation such + as Loop Invariant Code Motion breaks the property requested by the + clause on the loop constructs. + + Each `acc.loop` holds private and reduction operands which are the + ssa values from the corresponding `acc.private` or `acc.reduction` + operations. Additionally, firstprivate operands are supported to + represent cases where privatization is needed with initialization + from an original value. While the OpenACC specification does not + explicitly support firstprivate on loop constructs, this extension + enables representing privatization scenarios that arise from an + optimization and codegen pipeline operating on acc dialect. + + The operation supports capturing information that it comes combined + constructs (e.g., `parallel loop`, `kernels loop`, `serial loop`) + through the `combined` attribute despite requiring the `acc.loop` + to be decomposed from the compute operation representing compute + construct. Example: ```mlir - acc.loop gang() vector() (%arg3 : index, %arg4 : index, %arg5 : index) = + acc.loop gang() vector() (%arg3 : index, %arg4 : index, %arg5 : index) = (%c0, %c0, %c0 : index, index, index) to (%c10, %c10, %c10 : index, index, index) step (%c1, %c1, %c1 : index, index, index) { @@ -2400,10 +2423,12 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", } attributes { collapse = [3] } ``` - `collapse`, `gang`, `worker`, `vector`, `seq`, `independent`, `auto` and - `tile` operands are supported with `device_type` information. They should - only be accessed by the extra provided getters. If modified, the - corresponding `device_type` attributes must be modified as well. + `collapse`, `gang`, `worker`, `vector`, `seq`, `independent`, `auto`, + `cache`, and `tile` operands are supported with `device_type` + information. These clauses should only be accessed through the provided + device-type-aware getter methods. When modifying these operands, the + corresponding `device_type` attributes must be updated to maintain + consistency between operands and their target device types. }]; let arguments = (ins @@ -2433,6 +2458,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", Variadic<OpenACC_AnyPointerOrMappableType>:$cacheOperands, Variadic<OpenACC_AnyPointerOrMappableType>:$privateOperands, OptionalAttr<SymbolRefArrayAttr>:$privatizationRecipes, + Variadic<OpenACC_AnyPointerOrMappableType>:$firstprivateOperands, + OptionalAttr<SymbolRefArrayAttr>:$firstprivatizationRecipes, Variadic<AnyType>:$reductionOperands, OptionalAttr<SymbolRefArrayAttr>:$reductionRecipes, OptionalAttr<OpenACC_CombinedConstructsAttr>:$combined @@ -2589,6 +2616,10 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", /// Adds a private clause variable to this operation, including its recipe. void addPrivatization(MLIRContext *, mlir::acc::PrivateOp op, mlir::acc::PrivateRecipeOp recipe); + /// Adds a firstprivate clause variable to this operation, including its + /// recipe. + void addFirstPrivatization(MLIRContext *, mlir::acc::FirstprivateOp op, + mlir::acc::FirstprivateRecipeOp recipe); /// Adds a reduction clause variable to this operation, including its /// recipe. void addReduction(MLIRContext *, mlir::acc::ReductionOp op, @@ -2609,6 +2640,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", type($vectorOperands), $vectorOperandsDeviceType, $vector) | `private` `(` custom<SymOperandList>( $privateOperands, type($privateOperands), $privatizationRecipes) `)` + | `firstprivate` `(` custom<SymOperandList>($firstprivateOperands, + type($firstprivateOperands), $firstprivatizationRecipes) `)` | `tile` `(` custom<DeviceTypeOperandsWithSegment>($tileOperands, type($tileOperands), $tileOperandsDeviceType, $tileOperandsSegments) `)` @@ -2665,6 +2698,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", /*cacheOperands=*/{}, /*privateOperands=*/{}, /*privatizationRecipes=*/nullptr, + /*firstprivateOperands=*/{}, + /*firstprivatizationRecipes=*/nullptr, /*reductionOperands=*/{}, /*reductionRecipes=*/nullptr, /*combined=*/nullptr); diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td index 83b128e..564d9c4 100644 --- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td @@ -27,10 +27,6 @@ def XeGPUSubgroupDistribute : Pass<"xegpu-subgroup-distribute"> { }]; let dependentDialects = ["memref::MemRefDialect", "xegpu::XeGPUDialect", "vector::VectorDialect"]; - let options = [Option< - "enableSGReductions", "enable-sg-reductions", "bool", - /*default=*/"true", - "Enable subgroup reductions using subgroup shuffles.">]; } def XeGPUPropagateLayout : Pass<"xegpu-propagate-layout"> { diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index ee3e402..6598ac1 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -2674,6 +2674,11 @@ LogicalResult acc::LoopOp::verify() { "privatizations", false))) return failure(); + if (failed(checkSymOperandList<mlir::acc::FirstprivateRecipeOp>( + *this, getFirstprivatizationRecipes(), getFirstprivateOperands(), + "firstprivate", "firstprivatizations", /*checkOperandType=*/false))) + return failure(); + if (failed(checkSymOperandList<mlir::acc::ReductionRecipeOp>( *this, getReductionRecipes(), getReductionOperands(), "reduction", "reductions", false))) @@ -2737,7 +2742,8 @@ LogicalResult acc::LoopOp::verify() { } unsigned LoopOp::getNumDataOperands() { - return getReductionOperands().size() + getPrivateOperands().size(); + return getReductionOperands().size() + getPrivateOperands().size() + + getFirstprivateOperands().size(); } Value LoopOp::getDataOperand(unsigned i) { @@ -3117,6 +3123,21 @@ void acc::LoopOp::addPrivatization(MLIRContext *context, setPrivatizationRecipesAttr(mlir::ArrayAttr::get(context, recipes)); } +void acc::LoopOp::addFirstPrivatization( + MLIRContext *context, mlir::acc::FirstprivateOp op, + mlir::acc::FirstprivateRecipeOp recipe) { + getFirstprivateOperandsMutable().append(op.getResult()); + + llvm::SmallVector<mlir::Attribute> recipes; + + if (getFirstprivatizationRecipesAttr()) + llvm::copy(getFirstprivatizationRecipesAttr(), std::back_inserter(recipes)); + + recipes.push_back( + mlir::SymbolRefAttr::get(context, recipe.getSymName().str())); + setFirstprivatizationRecipesAttr(mlir::ArrayAttr::get(context, recipes)); +} + void acc::LoopOp::addReduction(MLIRContext *context, mlir::acc::ReductionOp op, mlir::acc::ReductionRecipeOp recipe) { getReductionOperandsMutable().append(op.getResult()); diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp index 3a6684f..255f2bf 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp @@ -796,7 +796,7 @@ struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> { currentSourceIndex, remainingElements, 0); // Generate back mask. - auto maskValues = SmallVector<bool>(emulatedPerContainerElem, 0); + auto maskValues = SmallVector<bool>(emulatedPerContainerElem, false); std::fill_n(maskValues.begin(), remainingElements, 1); auto backMask = arith::ConstantOp::create( rewriter, loc, diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp index 882691f..f1dbc5d 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp @@ -875,14 +875,17 @@ struct StoreDistribution final : public gpu::WarpDistributionPattern { storeScatterOp, "Some vector operands have no layouts, using defaults instead."); } - VectorType distPayloadTy = distStoreVecByWarpOpOrFailure.value(); - VectorType expectedPayloadTy = VectorType::get( - {distPayloadTy.getNumElements()}, distPayloadTy.getElementType()); + // Distributed store payload type according to the lane layout. + VectorType distPayloadTyByWarpOp = distStoreVecByWarpOpOrFailure.value(); + // Expected distributed payload type is always 1D. + VectorType expectedPayloadTy = + VectorType::get({distPayloadTyByWarpOp.getNumElements()}, + distPayloadTyByWarpOp.getElementType()); SmallVector<size_t> newRetIndices; SmallVector<Value> operands = storeScatterOp->getOperands(); SmallVector<Type> operandTypesToYield = { - expectedPayloadTy, operands[1].getType(), + distPayloadTyByWarpOp, operands[1].getType(), distOffsetsByWarpOpOrFailure.value(), distMaskByWarpOpOrFailure.value()}; @@ -890,8 +893,11 @@ struct StoreDistribution final : public gpu::WarpDistributionPattern { rewriter, warpOp, operands, operandTypesToYield, newRetIndices); SmallVector<Value> newStoreScatterOpOperands = llvm::map_to_vector( newRetIndices, [&](size_t idx) { return newWarpOp.getResult(idx); }); - + // The payload operand may need type adjustment due to mismatch between warp + // distributed type and expected SIMT type. rewriter.setInsertionPointAfter(newWarpOp); + newStoreScatterOpOperands[0] = resolveDistributedTy( + newStoreScatterOpOperands[0], expectedPayloadTy, rewriter); xegpu::StoreScatterOp newOp = xegpu::StoreScatterOp::create( rewriter, newWarpOp.getLoc(), TypeRange{}, newStoreScatterOpOperands, storeScatterOp->getAttrs()); @@ -976,8 +982,11 @@ struct LoadDistribution final : public gpu::WarpDistributionPattern { distMaskByWarpOpOrFailure.value()}; const unsigned operandIdx = producedByLastLoad->getOperandNumber(); - VectorType loadVecTy = + VectorType distResultTy = cast<VectorType>(warpOp.getResult(operandIdx).getType()); + // Distributed load op will always be 1D. + VectorType loadVecTy = VectorType::get({distResultTy.getNumElements()}, + distResultTy.getElementType()); gpu::WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndAppendReturns( rewriter, warpOp, operands, operandTypesToYield, newRetIndices); @@ -991,13 +1000,16 @@ struct LoadDistribution final : public gpu::WarpDistributionPattern { loadGatherOp->getAttrs()); xegpu::removeLayoutAttrs(newOp); Value distributedVal = newWarpOp.getResult(operandIdx); - rewriter.replaceAllUsesWith(distributedVal, newOp->getResult(0)); + // Resolve the output type and replace all uses. + rewriter.replaceAllUsesWith( + distributedVal, + resolveDistributedTy(newOp.getResult(), distResultTy, rewriter)); return success(); } }; /// Helper to rewrite a 2D VectorMultiReductionOp into a sequence of 1D -/// VectorReductionOps. +/// VectorReductionOps. We also insert layouts for the newly created ops. static Value lowerToVectorReductions(TypedValue<VectorType> src, TypedValue<VectorType> acc, vector::CombiningKind kind, @@ -1014,6 +1026,9 @@ static Value lowerToVectorReductions(TypedValue<VectorType> src, Value reductionResult = arith::ConstantOp::create( rewriter, loc, acc.getType(), DenseElementsAttr::get(acc.getType(), zeroAttr)); + // Reduction result should have the same layout as the accumulator. + xegpu::setDistributeLayoutAttr(cast<OpResult>(reductionResult), + xegpu::getDistributeLayoutAttr(acc)); // For each slice of the source, extract the slice vector, do a reduction // and, insert the reduced value back to the result vector. for (int i = 0; i < nSlices; ++i) { @@ -1029,13 +1044,23 @@ static Value lowerToVectorReductions(TypedValue<VectorType> src, vector::ExtractStridedSliceOp::create(rewriter, loc, src, sliceOffsets, sliceSizes, {1, 1}); int64_t nSliceElements = extractOp.getResult().getType().getNumElements(); - Value slice = vector::ShapeCastOp::create( + vector::ShapeCastOp slice = vector::ShapeCastOp::create( rewriter, loc, VectorType::get({nSliceElements}, sourceType.getElementType()), extractOp.getResult()); + // Shape cast is currently handled in xegpu side. So layouts must be + // retained during lowering. Shape cast output has the same layout as the + // accumulator. Shape cast source has the same layout as the original + // reduction source. + // TODO: other ops generated here may also need layout attributes. + xegpu::setDistributeLayoutAttr(slice->getOpOperand(0), + xegpu::getDistributeLayoutAttr(src)); + xegpu::setDistributeLayoutAttr(slice->getOpResult(0), + xegpu::getDistributeLayoutAttr(acc)); + // Extract and reduction results in scalars, so no result layout is needed. Value accExtract = vector::ExtractOp::create(rewriter, loc, acc, i); - Value reduction = - vector::ReductionOp::create(rewriter, loc, kind, slice, accExtract); + Value reduction = vector::ReductionOp::create( + rewriter, loc, kind, slice.getResult(), accExtract); reductionResult = vector::InsertOp::create(rewriter, loc, reduction, reductionResult, i); } @@ -1107,7 +1132,7 @@ struct VectorMultiReductionDistribution : public gpu::WarpDistributionPattern { return failure(); auto reductionOp = cast<vector::MultiDimReductionOp>(yieldOperand->get().getDefiningOp()); - unsigned operandNumber = yieldOperand->getOperandNumber(); + unsigned operandIdx = yieldOperand->getOperandNumber(); VectorType sourceType = reductionOp.getSourceVectorType(); // Only 2D vectors are supported. if (sourceType.getRank() != 2) @@ -1121,7 +1146,7 @@ struct VectorMultiReductionDistribution : public gpu::WarpDistributionPattern { warpOp, "Only 1 reduction dimension is supported."); int64_t reductionDim = reductionDims[0]; VectorType distributedResultType = - cast<VectorType>(warpOp.getResult(operandNumber).getType()); + cast<VectorType>(warpOp.getResult(operandIdx).getType()); VectorType resultType = cast<VectorType>(reductionOp.getType()); xegpu::DistributeLayoutAttr sourceLayout = xegpu::getDistributeLayoutAttr(reductionOp.getSource()); @@ -1184,7 +1209,7 @@ struct VectorMultiReductionDistribution : public gpu::WarpDistributionPattern { cast<TypedValue<VectorType>>(newWarpOp->getResult(newRetIndices[1])), reductionOp.getKind(), reductionDim, reductionOp.getLoc(), rewriter); // Replace the warp op result with the final result. - rewriter.replaceAllUsesWith(reductionOp.getResult(), result); + rewriter.replaceAllUsesWith(newWarpOp.getResult(operandIdx), result); return success(); } // For non-lane-local case, we simply rewrite the MultiReductionOp in terms @@ -1217,7 +1242,7 @@ struct VectorShapeCastDistribution : public gpu::WarpDistributionPattern { auto resultDistTy = cast<VectorType>(warpOp.getResult(operandNumber).getType()); xegpu::DistributeLayoutAttr sourceLayout = - xegpu::getDistributeLayoutAttr(shapeCastOp.getSource()); + xegpu::getDistributeLayoutAttr(shapeCastOp->getOpOperand(0)); xegpu::DistributeLayoutAttr resultLayout = xegpu::getDistributeLayoutAttr(shapeCastOp.getResult()); if (!sourceLayout || !resultLayout) @@ -1403,11 +1428,6 @@ namespace { struct XeGPUSubgroupDistributePass final : public xegpu::impl::XeGPUSubgroupDistributeBase< XeGPUSubgroupDistributePass> { - XeGPUSubgroupDistributePass() = default; - XeGPUSubgroupDistributePass(const XeGPUSubgroupDistributePass &other) = - default; - XeGPUSubgroupDistributePass(xegpu::XeGPUSubgroupDistributeOptions options) - : XeGPUSubgroupDistributeBase(options) {} void runOnOperation() override; }; } // namespace @@ -1515,10 +1535,9 @@ void XeGPUSubgroupDistributePass::runOnOperation() { return laneVal; }; - if (enableSGReductions) - vector::populateDistributeReduction( - patterns, warpReduction, - /*pattern benefit=*/regularPatternBenefit); + vector::populateDistributeReduction( + patterns, warpReduction, + /*pattern benefit=*/regularPatternBenefit); vector::populatePropagateWarpVectorDistributionPatterns( patterns, distributionFn, shuffleFn, diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index cb69058..1484d7e 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -358,6 +358,41 @@ func.func @acc_loop_multiple_block() { // ----- +acc.firstprivate.recipe @firstprivatization_memref_10xf32 : memref<10xf32> init { +^bb0(%arg0: memref<10xf32>): + %0 = memref.alloca() : memref<10xf32> + acc.yield %0 : memref<10xf32> +} copy { +^bb0(%arg0: memref<10xf32>, %arg1: memref<10xf32>): + memref.copy %arg0, %arg1 : memref<10xf32> to memref<10xf32> + acc.terminator +} destroy { +^bb0(%arg0: memref<10xf32>): + acc.terminator +} + +func.func @testloopfirstprivate(%a: memref<10xf32>, %b: memref<10xf32>) -> () { + %c0 = arith.constant 0 : index + %c10 = arith.constant 10 : index + %c1 = arith.constant 1 : index + %firstprivate = acc.firstprivate varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> + acc.loop firstprivate(@firstprivatization_memref_10xf32 -> %firstprivate : memref<10xf32>) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { + "test.openacc_dummy_op"() : () -> () + acc.yield + } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]} + return +} + +// CHECK-LABEL: func.func @testloopfirstprivate( +// CHECK-SAME: %[[ARG0:.*]]: memref<10xf32>, %[[ARG1:.*]]: memref<10xf32>) +// CHECK: %[[FIRSTPRIVATE:.*]] = acc.firstprivate varPtr(%[[ARG0]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> +// CHECK: acc.loop firstprivate(@firstprivatization_memref_10xf32 -> %[[FIRSTPRIVATE]] : memref<10xf32>) control(%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { +// CHECK: "test.openacc_dummy_op"() : () -> () +// CHECK: acc.yield +// CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]} + +// ----- + acc.private.recipe @privatization_memref_10_f32 : memref<10xf32> init { ^bb0(%arg0: memref<10xf32>): %0 = memref.alloc() : memref<10xf32> @@ -535,6 +570,7 @@ acc.firstprivate.recipe @firstprivatization_memref_10xf32 : memref<10xf32> init acc.yield %0 : memref<10xf32> } copy { ^bb0(%arg0: memref<10xf32>, %arg1: memref<10xf32>): + memref.copy %arg0, %arg1 : memref<10xf32> to memref<10xf32> acc.terminator } destroy { ^bb0(%arg0: memref<10xf32>): diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir new file mode 100644 index 0000000..40b66d1 --- /dev/null +++ b/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir @@ -0,0 +1,575 @@ +// RUN: mlir-opt --xevm-attach-target='module=xevm_* chip=pvc' -test-xegpu-sg-distribute -allow-unregistered-dialect \ +// RUN: -canonicalize -cse -split-input-file %s | FileCheck %s + +// CHECK-LABEL: gpu.func @store_nd_1d +// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) { +// CHECK: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] +// CHECK-SAME: -> (vector<1xf32>, !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>>, index) { +// CHECK: gpu.yield %{{.*}} : vector<16xf32>, +// CHECK-SAME: !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>>, index +// CHECK-NEXT: } +// CHECK-NEXT: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]]#1 : !xegpu.tensor_desc<16xf32, +// CHECK-SAME: #xegpu.layout<lane_layout = [16], lane_data = [1]>> to !xegpu.tensor_desc<16xf32> {resolve_simt_type_mismatch} +// CHECK-NEXT: xegpu.store_nd %[[W]]#0, %[[T1]][%[[W]]#2] : vector<1xf32>, !xegpu.tensor_desc<16xf32> +gpu.module @xevm_module{ + gpu.func @store_nd_1d(%laneid: index) { + %c0 = arith.constant 0 : index + gpu.warp_execute_on_lane_0(%laneid)[16] { + %0 = "some_op"() : () -> !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>> + %cst = "some_op"() : () -> vector<16xf32> + xegpu.store_nd %cst, %0 [%c0] {layout_operand_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} + : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>> + } + gpu.return + } +} + +// ----- +// CHECK-LABEL: gpu.func @store_nd_2d +// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) { +// CHECK: %[[W:.*]]:4 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] +// CHECK-SAME: -> (vector<16x1xf16>, !xegpu.tensor_desc<16x16xf16, +// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, index, index) { +// CHECK: gpu.yield %{{.*}} : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, +// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, index, index +// CHECK-NEXT: } +// CHECK-NEXT: %[[CAST:.*]] = vector.shape_cast %[[W]]#0 : vector<16x1xf16> to vector<16xf16> +// CHECK-NEXT: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]]#1 : !xegpu.tensor_desc<16x16xf16, +// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> to !xegpu.tensor_desc<16x16xf16> {resolve_simt_type_mismatch} +// CHECK-NEXT: xegpu.store_nd %[[CAST]], %[[T1]][%[[W]]#2, %[[W]]#3] : vector<16xf16>, !xegpu.tensor_desc<16x16xf16> +gpu.module @xevm_module{ + gpu.func @store_nd_2d(%laneid : index) { + %c0 = arith.constant 0 : index + gpu.warp_execute_on_lane_0(%laneid)[16] { + %0 = "some_op"() : () -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> + %cst = "some_op"() : () -> vector<16x16xf16> + xegpu.store_nd %cst, %0 [%c0, %c0] {layout_operand_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} + : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> + } + gpu.return + } +} + + + +// ----- +// CHECK-LABEL: gpu.func @load_nd_1d +// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) { +// CHECK: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] -> (vector<1xf32>, +// CHECK-SAME: !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>>, index) { +// CHECK: gpu.yield %{{.*}} : vector<16xf32>, !xegpu.tensor_desc<16xf32, +// CHECK-SAME: #xegpu.layout<lane_layout = [16], lane_data = [1]>>, index +// CHECK-NEXT: } +// CHECK-NEXT: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]]#1 : !xegpu.tensor_desc<16xf32, +// CHECK-SAME: #xegpu.layout<lane_layout = [16], lane_data = [1]>> to !xegpu.tensor_desc<16xf32> {resolve_simt_type_mismatch} +// CHECK-NEXT: xegpu.load_nd %[[T1]][%[[W]]#2] : !xegpu.tensor_desc<16xf32> -> vector<1xf32> +gpu.module @xevm_module{ + gpu.func @load_nd_1d(%laneid: index) { + %c0 = arith.constant 0 : index + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<1xf32>) { + %0 = "some_op"() : () -> !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>> + %1 = xegpu.load_nd %0 [%c0] {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} : + !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>> -> vector<16xf32> + gpu.yield %1 : vector<16xf32> + } + "some_user_op"(%r) : (vector<1xf32>) -> () + gpu.return + } +} + +// ----- +// CHECK-LABEL: gpu.func @load_nd_2d +// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) { +// CHECK: %[[W:.*]]:4 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] -> (vector<16x1xf16>, !xegpu.tensor_desc<16x16xf16, +// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, index, index) { +// CHECK: gpu.yield %{{.*}} : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, +// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, index, index +// CHECK-NEXT: } +// CHECK-NEXT: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]]#1 : !xegpu.tensor_desc<16x16xf16, +// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> to !xegpu.tensor_desc<16x16xf16> {resolve_simt_type_mismatch} +// CHECK-NEXT: %[[T2:.*]] = xegpu.load_nd %[[T1]][%[[W]]#2, %[[W]]#3] : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16> +// CHECK: vector.shape_cast %[[T2]] : vector<16xf16> to vector<16x1xf16> +gpu.module @xevm_module{ + gpu.func @load_nd_2d(%laneid: index) { + %c0 = arith.constant 0 : index + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<16x1xf16>) { + %0 = "some_op"() : () -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> + %1 = xegpu.load_nd %0[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} + : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<16x16xf16> + gpu.yield %1 : vector<16x16xf16> + } + "some_user_op"(%r) : (vector<16x1xf16>) -> () + gpu.return + } +} + +// ----- +// CHECK-LABEL: gpu.func @load_nd_array_length +// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) { +// CHECK: %[[W:.*]]:4 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] -> (vector<2x16x1xf16>, +// CHECK-SAME: !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>, +// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, index, index) { +// CHECK: gpu.yield %{{.*}} : vector<2x16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr< +// CHECK-SAME: array_length = 2 : i64>, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, index, index +// CHECK-NEXT: } +// CHECK-NEXT: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]]#1 : !xegpu.tensor_desc<16x16xf16, +// CHECK-SAME: #xegpu.block_tdesc_attr<array_length = 2 : i64>, #xegpu.layout<lane_layout = [1, 16], +// CHECK-SAME: lane_data = [1, 1]>> to !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>> +// CHECK-NEXT: %[[T2:.*]] = xegpu.load_nd %[[T1]][%[[W]]#2, %[[W]]#3] : !xegpu.tensor_desc<16x16xf16, +// CHECK-SAME: #xegpu.block_tdesc_attr<array_length = 2 : i64>> -> vector<32xf16> +// CHECK-NEXT: vector.shape_cast %[[T2]] : vector<32xf16> to vector<2x16x1xf16> +gpu.module @xevm_module{ + gpu.func @load_nd_array_length(%laneid: index) { + %c0 = arith.constant 0 : index + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2x16x1xf16>) { + %0 = "some_op"() : () -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>, + #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> + %1 = xegpu.load_nd %0[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} + : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>, + #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<2x16x16xf16> + gpu.yield %1 : vector<2x16x16xf16> + } + "some_user_op"(%r) : (vector<2x16x1xf16>) -> () + gpu.return + } +} + +// ----- +// CHECK-LABEL: gpu.func @dpas +// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) { +// CHECK: %[[W:.*]]:4 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] -> +// CHECK-SAME: (vector<8x1xf32>, vector<8x1xf16>, vector<16x1xf16>, vector<8x1xf32>) { +// CHECK: gpu.yield %{{.*}} : vector<8x16xf32>, vector<8x16xf16>, vector<16x16xf16>, vector<8x16xf32> +// CHECK-NEXT: } +// CHECK-DAG: %[[T1:.*]] = vector.shape_cast %[[W]]#1 : vector<8x1xf16> to vector<8xf16> +// CHECK-DAG: %[[T2:.*]] = vector.shape_cast %[[W]]#2 : vector<16x1xf16> to vector<16xf16> +// CHECK-DAG: %[[T3:.*]] = vector.shape_cast %[[W]]#3 : vector<8x1xf32> to vector<8xf32> +// CHECK-NEXT: %[[T4:.*]] = xegpu.dpas %[[T1]], %[[T2]], %[[T3]] : vector<8xf16>, vector<16xf16>, vector<8xf32> -> vector<8xf32> +// CHECK-NEXT: vector.shape_cast %[[T4]] : vector<8xf32> to vector<8x1xf32> +gpu.module @xevm_module{ + gpu.func @dpas(%laneid: index) { + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<8x1xf32>) { + %0 = "some_op"() : () -> vector<8x16xf16> + %1 = "some_op"() : () -> vector<16x16xf16> + %2 = "some_op"() : () -> vector<8x16xf32> + %3 = xegpu.dpas %0, %1, %2 + { + layout_operand_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, + layout_operand_1 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>, + layout_operand_2 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, + layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]> + } + : vector<8x16xf16>, vector<16x16xf16>, vector<8x16xf32> -> vector<8x16xf32> + gpu.yield %3 : vector<8x16xf32> + } + "some_user_op"(%r) : (vector<8x1xf32>) -> () + gpu.return + } +} + + +// ----- +// CHECK-LABEL: gpu.func @create_nd_tdesc_non_memref +// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: ui64, %[[ARG1:[0-9a-zA-Z]+]]: index) { +// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%[[ARG1]])[16] -> (!xegpu.tensor_desc<16x16xf16, +// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, ui64) { +// CHECK: gpu.yield %{{.*}} : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, ui64 +// CHECK-NEXT: } +// CHECK-NEXT: %[[T1:.*]] = xegpu.create_nd_tdesc %[[W]]#1, shape : [64, 128], strides : [128, 1] : ui64 -> !xegpu.tensor_desc<16x16xf16> +// CHECK-NEXT: builtin.unrealized_conversion_cast %[[T1]] : !xegpu.tensor_desc<16x16xf16> to !xegpu.tensor_desc<16x16xf16, +// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> {resolve_simt_type_mismatch} +gpu.module @xevm_module{ + gpu.func @create_nd_tdesc_non_memref(%arg0: ui64, %laneid: index) { + %c0 = arith.constant 0 : index + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (!xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>) { + %0 = xegpu.create_nd_tdesc %arg0, shape:[64, 128], strides:[128, 1] : ui64 -> + !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> + gpu.yield %0 : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> + } + "some_user_op"(%r) + : (!xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>) -> () + gpu.return + } +} + +// ----- +// CHECK-LABEL: gpu.func @prefetch_2d +// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) { +// CHECK: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] -> (!xegpu.tensor_desc<16x16xf16, +// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, index, index) { +// CHECK: gpu.yield %{{.*}} : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> +// CHECK-SAME: , index, index +// CHECK-NEXT: } +// CHECK-NEXT: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]]#0 : !xegpu.tensor_desc<16x16xf16, +// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> to !xegpu.tensor_desc<16x16xf16> {resolve_simt_type_mismatch} +// CHECK-NEXT: xegpu.prefetch_nd %[[T1]][%[[W]]#1, %[[W]]#2] +// CHECK-SAME: <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16x16xf16> +gpu.module @xevm_module{ + gpu.func @prefetch_2d(%laneid: index) { + %c0 = arith.constant 0 : index + gpu.warp_execute_on_lane_0(%laneid)[16] { + %0 = "some_op"() : () + -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> + xegpu.prefetch_nd %0[%c0, %c0] + <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> + : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> + } + gpu.return + } +} + +// ----- +// CHECK-LABEL: gpu.func @prefetch_1d +// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) { +// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] -> (!xegpu.tensor_desc<16xf16, +// CHECK-SAME: #xegpu.layout<lane_layout = [16], lane_data = [1]>>, index) { +// CHECK: gpu.yield %{{.*}} : !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>>, index +// CHECK-NEXT: } +// CHECK-NEXT: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]]#0 : !xegpu.tensor_desc<16xf16, +// CHECK-SAME: #xegpu.layout<lane_layout = [16], lane_data = [1]>> to !xegpu.tensor_desc<16xf16> {resolve_simt_type_mismatch} +// CHECK-NEXT: xegpu.prefetch_nd %[[T1]][%[[W]]#1] <{l1_hint = #xegpu.cache_hint<cached>, +// CHECK-SAME: l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16xf16> +gpu.module @xevm_module{ + gpu.func @prefetch_1d(%laneid: index) { + %c0 = arith.constant 0 : index + gpu.warp_execute_on_lane_0(%laneid)[16] { + %0 = "some_op"() : () + -> !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>> + xegpu.prefetch_nd %0[%c0] + <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> + : !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>> + } + gpu.return + } +} + +// ----- +// CHECK-LABEL: gpu.func @gpu_barrier({{.*}}) { +// CHECK: gpu.warp_execute_on_lane_0(%{{.*}})[16] -> ({{.*}}) { +// CHECK: gpu.yield %{{.*}} +// CHECK: } +// CHECK: %{{.*}} = xegpu.load_nd %{{.*}} : !xegpu.tensor_desc<16xf16> -> vector<1xf16> +// CHECK: gpu.barrier +gpu.module @xevm_module{ + gpu.func @gpu_barrier(%laneid: index) { + %c0 = arith.constant 0 : index + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<1xf16>) { + %0 = "some_op"() : () -> !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>> + %1 = xegpu.load_nd %0[%c0] + {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} + : !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>> -> vector<16xf16> + gpu.barrier + gpu.yield %1 : vector<16xf16> + } + "some_user_op"(%r) : (vector<1xf16>) -> () + gpu.return + } +} + +// ----- +// CHECK-LABEL: gpu.func @vector_multi_reduction_dim1_distributed_dim0_reduction +// CHECK: %[[ACC:.*]] = arith.constant {{.*}} dense<0.000000e+00> : vector<32xf32> +// CHECK: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%{{.*}})[16] +// CHECK-SAME: -> (vector<2xf32>, vector<16x2xf32>, vector<2xf32>) { +// CHECK: %[[SRC:.*]] = "some_def"() {{.*}} : () -> vector<16x32xf32> +// CHECK: gpu.yield %{{.*}}, %[[SRC]], %[[ACC]] : vector<32xf32>, vector<16x32xf32>, vector<32xf32> +// CHECK-NEXT: } +// CHECK: %[[T1:.*]] = vector.extract_strided_slice %[[W]]#1 +// CHECK-SAME: {offsets = [0, 0], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32> +// CHECK: %[[T2:.*]] = vector.shape_cast %[[T1]] : vector<16x1xf32> to vector<16xf32> +// CHECK: %[[T3:.*]] = vector.extract %[[W]]#2[0] : f32 from vector<2xf32> +// CHECK: %[[T4:.*]] = vector.reduction <add>, %[[T2]], %[[T3]] : vector<16xf32> into f32 +// CHECK: %[[T5:.*]] = vector.extract_strided_slice %[[W]]#1 +// CHECK-SAME: {offsets = [0, 1], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32> +// CHECK: %[[T6:.*]] = vector.shape_cast %[[T5]] : vector<16x1xf32> to vector<16xf32> +// CHECK: %[[T7:.*]] = vector.extract %[[W]]#2[1] : f32 from vector<2xf32> +// CHECK: %[[T8:.*]] = vector.reduction <add>, %[[T6]], %[[T7]] : vector<16xf32> into f32 +// CHECK: %[[T9:.*]] = vector.from_elements %[[T4]], %[[T8]] : vector<2xf32> +gpu.module @xevm_module{ +gpu.func @vector_multi_reduction_dim1_distributed_dim0_reduction(%laneid: index) { + %c0 = arith.constant 0 : index + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2xf32>) { + %src = "some_def"() + {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} + : () -> (vector<16x32xf32>) + %acc = arith.constant + {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>} + dense<0.0> : vector<32xf32> + %1 = vector.multi_reduction <add>, %src, %acc + { + layout_operand_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, + layout_operand_1 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>, + layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]> + } [0] + : vector<16x32xf32> to vector<32xf32> + gpu.yield %1 : vector<32xf32> + } + "some_user_op"(%r) : (vector<2xf32>) -> () + gpu.return +} +} + +// ----- +// CHECK-LABEL: gpu.func @vector_multi_reduction_dim1_distributed_dim1_reduction +// CHECK: %[[W:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<2xf32>) { +// CHECK-NEXT: %[[SRC:.*]] = "some_def"() {{.*}} : () -> vector<2x16xf32> +// CHECK-NEXT: %[[T2:.*]] = vector.extract %[[SRC]][0] : vector<16xf32> from vector<2x16xf32> +// CHECK-NEXT: %[[T3:.*]] = vector.reduction <add>, %[[T2]], %cst : vector<16xf32> into f32 +// CHECK-NEXT: %[[T4:.*]] = vector.extract %[[SRC]][1] : vector<16xf32> from vector<2x16xf32> +// CHECK-NEXT: %[[T5:.*]] = vector.reduction <add>, %[[T4]], %cst : vector<16xf32> into f32 +// CHECK-NEXT: %[[T6:.*]] = vector.from_elements %[[T3]], %[[T5]] : vector<2xf32> +// CHECK-NEXT: gpu.yield %[[T6]] : vector<2xf32> +// CHECK-NEXT: } +gpu.module @xevm_module{ +gpu.func @vector_multi_reduction_dim1_distributed_dim1_reduction(%laneid: index) { + %c0 = arith.constant 0 : index + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2xf32>) { + %src = "some_def"() + {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} + : () -> (vector<2x16xf32>) + %acc = arith.constant + {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [1]>} + dense<0.0> : vector<2xf32> + %1 = vector.multi_reduction <add>, %src, %acc + { + layout_operand_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, + layout_operand_1 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [1]>, + layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [1]> + } + [1] : vector<2x16xf32> to vector<2xf32> + gpu.yield %1 : vector<2xf32> + } + "some_user_op"(%r) : (vector<2xf32>) -> () + gpu.return +} +} + +// ----- +// CHECK-LABEL: gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction +// CHECK: %[[ACC:.*]] = arith.constant {{.*}} dense<0.000000e+00> : vector<32xf32> +// CHECK: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<2xf32>, vector<2x16xf32>, vector<2xf32>) { +// CHECK: %[[SRC:.*]] = "some_def"() {{.*}} : () -> vector<32x16xf32> +// CHECK: gpu.yield %9, %[[SRC]], %[[ACC]] : vector<32xf32>, vector<32x16xf32>, vector<32xf32> +// CHECK: } +// CHECK: %[[T1:.*]] = vector.extract %[[W]]#1[0] : vector<16xf32> from vector<2x16xf32> +// CHECK: %[[T2:.*]] = vector.extract %[[W]]#2[0] : f32 from vector<2xf32> +// CHECK: %[[T3:.*]] = vector.reduction <add>, %[[T1]], %[[T2]] : vector<16xf32> into f32 +// CHECK: %[[T4:.*]] = vector.extract %[[W]]#1[1] : vector<16xf32> from vector<2x16xf32> +// CHECK: %[[T5:.*]] = vector.extract %[[W]]#2[1] : f32 from vector<2xf32> +// CHECK: %[[T6:.*]] = vector.reduction <add>, %[[T4]], %[[T5]] : vector<16xf32> into f32 +// CHECK: %[[T7:.*]] = vector.from_elements %[[T3]], %[[T6]] : vector<2xf32> +gpu.module @xevm_module{ +gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction(%laneid: index) { + %c0 = arith.constant 0 : index + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2xf32>) { + %src = "some_def"() + {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} + : () -> (vector<32x16xf32>) + %acc = arith.constant + {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [1]>} + dense<0.0> : vector<32xf32> + %1 = vector.multi_reduction <add>, %src, %acc + { + layout_operand_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, + layout_operand_1 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [1]>, + layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [1]> + } + [1] : vector<32x16xf32> to vector<32xf32> + gpu.yield %1 : vector<32xf32> + } + "some_user_op"(%r) : (vector<2xf32>) -> () + gpu.return +} +} + +// ----- +// CHECK-LABEL: gpu.func @vector_multi_reduction_dim0_distributed_dim0_reduction +// CHECK: %[[W:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<2xf32>) { +// CHECK: %[[SRC:.*]] = "some_def"() {{.*}} : () -> vector<16x2xf32> +// CHECK: %[[T1:.*]] = vector.extract_strided_slice %[[SRC]] +// CHECK-SAME: {offsets = [0, 0], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32> +// CHECK: %[[T2:.*]] = vector.shape_cast %[[T1]] {{.*}} : vector<16x1xf32> to vector<16xf32> +// CHECK: %[[T3:.*]] = vector.reduction <add>, %[[T2]], %{{.*}} : vector<16xf32> into f32 +// CHECK: %[[T4:.*]] = vector.extract_strided_slice %[[SRC]] +// CHECK-SAME: {offsets = [0, 1], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32> +// CHECK: %[[T5:.*]] = vector.shape_cast %[[T4]] {{.*}} : vector<16x1xf32> to vector<16xf32> +// CHECK: %[[T6:.*]] = vector.reduction <add>, %[[T5]], %{{.*}} : vector<16xf32> into f32 +// CHECK: %[[T7:.*]] = vector.from_elements %[[T3]], %[[T6]] : vector<2xf32> +// CHECK: gpu.yield %[[T7]] : vector<2xf32> +// CHECK: } +gpu.module @xevm_module{ +gpu.func @vector_multi_reduction_dim0_distributed_dim0_reduction(%laneid: index) { + %c0 = arith.constant 0 : index + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2xf32>) { + %src = "some_def"() + {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} + : () -> (vector<16x2xf32>) + %acc = arith.constant + {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [0]>} + dense<0.0> : vector<2xf32> + %1 = vector.multi_reduction <add>, %src, %acc + { + layout_operand_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, + layout_operand_1 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [0]>, + layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [0]> + } + [0] : vector<16x2xf32> to vector<2xf32> + gpu.yield %1 : vector<2xf32> + } + "some_user_op"(%r) : (vector<2xf32>) -> () + gpu.return +} +} + +// ----- +// CHECK-LABEL: gpu.func @scatter_ops_chunksize({{.*}}) { +// CHECK: %[[OFFSETS:.*]] = arith.constant {{.*}} dense<12> : vector<16xindex> +// CHECK: %[[MASKS:.*]] = arith.constant {{.*}} dense<true> : vector<16xi1> +// CHECK: %[[W:.*]]:4 = gpu.warp_execute_on_lane_0(%{{.*}})[16] +// CHECK-SAME: -> (vector<1x8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1>) { +// CHECK: gpu.yield %{{.*}}, %{{.*}}, %[[OFFSETS]], %[[MASKS]] : +// CHECK-SAME: vector<16x8xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1> +// CHECK-NEXT: } +// CHECK-NEXT: %[[T1:.*]] = xegpu.load %[[W]]#1[%[[W]]#2], %[[W]]#3 <{chunk_size = 8 : i64}> +// CHECK-SAME: : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<8xf16> +// CHECK-NEXT: xegpu.store %[[T1]], %[[W]]#1[%[[W]]#2], %[[W]]#3 <{chunk_size = 8 : i64}> +// CHECK-SAME: : vector<8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1> +gpu.module @xevm_module{ + gpu.func @scatter_ops_chunksize(%laneid: index, %src: memref<256xf16>) { + gpu.warp_execute_on_lane_0(%laneid)[16] { + %1 = arith.constant + {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} + dense<1>: vector<16xi1> + %offset = arith.constant + {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} + dense<12> : vector<16xindex> + %3 = xegpu.load %src[%offset], %1 <{chunk_size=8}> + { + layout_operand_1 = #xegpu.layout<lane_layout = [16], lane_data = [1]>, + layout_operand_2 = #xegpu.layout<lane_layout = [16], lane_data = [1]>, + layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]> + } + : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16x8xf16> + xegpu.store %3, %src[%offset], %1 <{chunk_size=8}> + { + layout_operand_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>, + layout_operand_2 = #xegpu.layout<lane_layout = [16], lane_data = [1]>, + layout_operand_3 = #xegpu.layout<lane_layout = [16], lane_data = [1]> + } + : vector<16x8xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1> + } + gpu.return + } +} + +// ----- +// CHECK-LABEL: gpu.func @scatter_ops({{.*}}) { +// CHECK: %[[OFFSETS:.*]] = arith.constant {{.*}} dense<12> : vector<16xindex> +// CHECK: %[[MASKS:.*]] = arith.constant {{.*}} dense<true> : vector<16xi1> +// CHECK: %[[W:.*]]:4 = gpu.warp_execute_on_lane_0(%{{.*}})[16] +// CHECK-SAME: -> (vector<1xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1>) { +// CHECK: gpu.yield %{{.*}}, %{{.*}}, %[[OFFSETS]], %[[MASKS]] +// CHECK-SAME: : vector<16xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1> +// CHECK-NEXT: } +// CHECK-NEXT: %[[T1:.*]] = xegpu.load %[[W]]#1[%[[W]]#2], %[[W]]#3 +// CHECK-SAME: : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<1xf16> +// CHECK-NEXT: xegpu.store %[[T1]], %[[W]]#1[%[[W]]#2], %[[W]]#3 +// CHECK-SAME: : vector<1xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1> +gpu.module @xevm_module{ + gpu.func @scatter_ops(%src: memref<256xf16>, %laneid: index) { + gpu.warp_execute_on_lane_0(%laneid)[16] { + %1 = arith.constant + {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} + dense<1> : vector<16xi1> + %offset = arith.constant + {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} + dense<12> : vector<16xindex> + %3 = xegpu.load %src[%offset], %1 + { + layout_operand_1 = #xegpu.layout<lane_layout = [16], lane_data = [1]>, + layout_operand_2 = #xegpu.layout<lane_layout = [16], lane_data = [1]>, + layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]> + } : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16xf16> + xegpu.store %3, %src[%offset], %1 + { + layout_operand_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>, + layout_operand_2 = #xegpu.layout<lane_layout = [16], lane_data = [1]>, + layout_operand_3 = #xegpu.layout<lane_layout = [16], lane_data = [1]> + } + : vector<16xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1> + } + gpu.return + } +} + +// ----- +// CHECK-LABEL: gpu.func @memref_extract_aligned_pointer_as_index( +// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (index, memref<256x256xf16>) { +// CHECK: gpu.yield %{{.*}}, %{{.*}} : index, memref<256x256xf16> +// CHECK-NEXT: } +// CHECK-NEXT: %[[INTPTR:.*]] = memref.extract_aligned_pointer_as_index %[[W]]#1 : memref<256x256xf16> -> index +// CHECK-NEXT: arith.index_cast %[[INTPTR]] : index to i64 +gpu.module @xevm_module{ + gpu.func @memref_extract_aligned_pointer_as_index(%arg0 : memref<256x256xf16>, %laneid: index) { + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (index) { + %ptr = memref.extract_aligned_pointer_as_index %arg0 : memref<256x256xf16> -> index + gpu.yield %ptr : index + } + %ptr_i64 = arith.index_cast %r : index to i64 + "some_user_op"(%ptr_i64) : (i64) -> () + gpu.return + } +} + + +// ----- +// CHECK-LABEL: gpu.func @vector_transpose( +// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<2x1xf32>, vector<1x2xf32>) { +// CHECK: %[[SRC:.*]] = "some_op"() {{.*}} : () -> vector<16x2xf32> +// CHECK: gpu.yield %{{.*}}, %[[SRC]] : vector<2x16xf32>, vector<16x2xf32> +// CHECK-NEXT: } +// CHECK-NEXT: %[[T1:.*]] = vector.transpose %[[W]]#1, [1, 0] : vector<1x2xf32> to vector<2x1xf32> +gpu.module @xevm_module{ + gpu.func @vector_transpose(%arg0: memref<2x16xf32>, %laneid: index) { + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2x1xf32>) { + %cst = "some_op"() + {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} + : () -> (vector<16x2xf32>) + %transpose = vector.transpose %cst, [1, 0] + { + layout_operand_0 = #xegpu.layout<lane_layout = [16 , 1], lane_data = [1, 1]>, + layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]> + } + : vector<16x2xf32> to vector<2x16xf32> + gpu.yield %transpose : vector<2x16xf32> + } + "some_user_op"(%r) : (vector<2x1xf32>) -> () + gpu.return + } +} + +// ----- +// CHECK-LABEL: gpu.func @vector_bitcast( +// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<4x1xi16>, vector<4x2xi8>) { +// CHECK: %[[SRC:.*]] = "some_op"() {{.*}} : () -> vector<4x32xi8> +// CHECK: gpu.yield %{{.*}}, %[[SRC]] : vector<4x16xi16>, vector<4x32xi8> +// CHECK: } +// CHECK: vector.bitcast %[[W]]#1 : vector<4x2xi8> to vector<4x1xi16> +gpu.module @xevm_module{ + gpu.func @vector_bitcast(%arg0: memref<4x16xi16>, %laneid: index) { + %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<4x1xi16>) { + %cst = "some_op"() + {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 2]>} + : () -> (vector<4x32xi8>) + %bitcast = vector.bitcast %cst + { + layout_operand_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 2]>, + layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]> + } + : vector<4x32xi8> to vector<4x16xi16> + gpu.yield %bitcast : vector<4x16xi16> + } + "some_user_op"(%r) : (vector<4x1xi16>) -> () + gpu.return + } +} diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir index 59fac26..0e1365a 100644 --- a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir +++ b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir @@ -1,198 +1,76 @@ // RUN: mlir-opt --xevm-attach-target='module=xevm_* chip=pvc' -xegpu-subgroup-distribute \ // RUN: -allow-unregistered-dialect -canonicalize -cse -split-input-file %s | FileCheck %s -// RUN: mlir-opt --xevm-attach-target='module=xevm_* chip=pvc' \ -// RUN: -xegpu-subgroup-distribute="enable-sg-reductions=false" -allow-unregistered-dialect \ -// RUN: -canonicalize -cse -split-input-file %s | FileCheck %s --check-prefix=CHECK-REDUCTION - -// CHECK-LABEL: gpu.func @store_nd_1d -// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16xf32>) { -// CHECK-DAG: %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<1xf32> -// CHECK-DAG: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<16xf32> -> !xegpu.tensor_desc<16xf32> -// CHECK: xegpu.store_nd %[[CST]], %[[T0]][%{{.*}}] : vector<1xf32>, !xegpu.tensor_desc<16xf32> -// CHECK: gpu.return -gpu.module @xevm_module{ - gpu.func @store_nd_1d(%arg0: memref<16xf32>) { - %c0 = arith.constant 0 : index - %cst = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<1.000000e+00> : vector<16xf32> - %0 = xegpu.create_nd_tdesc %arg0 : memref<16xf32> -> !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>> - xegpu.store_nd %cst, %0 [%c0] : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>> - gpu.return - } -} - -// ----- -// CHECK-LABEL: gpu.func @store_nd_2d -// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16x16xf16>) { -// CHECK-DAG: %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<16xf16> -// CHECK-DAG: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16> -// CHECK: xegpu.store_nd %[[CST]], %[[T0]][%{{.*}}] : vector<16xf16>, !xegpu.tensor_desc<16x16xf16> -gpu.module @xevm_module{ - gpu.func @store_nd_2d(%arg0: memref<16x16xf16>) { - %c0 = arith.constant 0 : index - %cst = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} dense<1.000000e+00> : vector<16x16xf16> - %0 = xegpu.create_nd_tdesc %arg0 : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - xegpu.store_nd %cst, %0 [%c0, %c0] : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - gpu.return - } -} - - - -// ----- -// CHECK-LABEL: gpu.func @load_nd_1d -// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16xf32>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16xf32>) { -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<16xf32> -> !xegpu.tensor_desc<16xf32> -// CHECK-DAG: %[[T1:.*]] = xegpu.load_nd %[[T0]][%{{.*}}] : !xegpu.tensor_desc<16xf32> -> vector<1xf32> -// CHECK-DAG: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16xf32> -> !xegpu.tensor_desc<16xf32> -// CHECK: xegpu.store_nd %[[T1]], %[[T2]][%{{.*}}] : vector<1xf32>, !xegpu.tensor_desc<16xf32> -gpu.module @xevm_module{ - gpu.func @load_nd_1d(%arg0: memref<16xf32>, %arg1: memref<16xf32>) { - %c0 = arith.constant 0 : index - %0 = xegpu.create_nd_tdesc %arg0 : memref<16xf32> -> !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>> - %1 = xegpu.load_nd %0 [%c0] {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} : !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>> -> vector<16xf32> - %2 = xegpu.create_nd_tdesc %arg1 : memref<16xf32> -> !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>> - xegpu.store_nd %1, %2 [%c0] : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>> - gpu.return - } -} - -// ----- -// CHECK-LABEL: gpu.func @load_nd_2d -// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x16xf16>) { -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16> -// CHECK-DAG: %[[T1:.*]] = xegpu.load_nd %[[T0]][%{{.*}}] : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16> -// CHECK-DAG: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16> -// CHECK: xegpu.store_nd %[[T1]], %[[T2]][%{{.*}}] : vector<16xf16>, !xegpu.tensor_desc<16x16xf16> -gpu.module @xevm_module{ - gpu.func @load_nd_2d(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>) { - %c0 = arith.constant 0 : index - %0 = xegpu.create_nd_tdesc %arg0 : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - %1 = xegpu.load_nd %0[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<16x16xf16> - %2 = xegpu.create_nd_tdesc %arg1: memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - xegpu.store_nd %1, %2[%c0, %c0] : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - gpu.return - } -} - -// ----- -// CHECK-LABEL: gpu.func @load_nd_array_length -// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x16xf16>) { -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>> -// CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]][%{{.*}}] : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>> -> vector<32xf16> -// CHECK: %[[T2:.*]] = vector.shape_cast %[[T1]] : vector<32xf16> to vector<2x16x1xf16> -// CHECK: %[[T3:.*]] = vector.extract %[[T2]][0] : vector<16x1xf16> from vector<2x16x1xf16> -// CHECK-DAG: %[[T4:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16> -// CHECK-DAG: %[[T5:.*]] = vector.shape_cast %[[T3]] : vector<16x1xf16> to vector<16xf16> -// CHECK: xegpu.store_nd %[[T5]], %[[T4]][%{{.*}}] : vector<16xf16>, !xegpu.tensor_desc<16x16xf16> -gpu.module @xevm_module{ - gpu.func @load_nd_array_length(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>) { - %c0 = arith.constant 0 : index - %0 = xegpu.create_nd_tdesc %arg0 : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - %1 = xegpu.load_nd %0[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<2x16x16xf16> - %2 = vector.extract %1[%c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<16x16xf16> from vector<2x16x16xf16> - %3 = xegpu.create_nd_tdesc %arg1 : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - xegpu.store_nd %2, %3[%c0, %c0] : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - gpu.return - } -} - -// ----- -// CHECK-LABEL: gpu.func @load_dpas_store -// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<8x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x16xf16>, %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xf32>) { -// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16> -// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T2]][%{{.*}}] : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16> -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16> -// CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]][%{{.*}}] <{packed}> : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16> -// CHECK-DAG: %[[T4:.*]] = xegpu.dpas %[[T3]], %[[T1]] : vector<8xf16>, vector<16xf16> -> vector<8xf32> -// CHECK-DAG: %[[T5:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32> -// CHECK: xegpu.store_nd %[[T4]], %[[T5]][%{{.*}}] : vector<8xf32>, !xegpu.tensor_desc<8x16xf32> -gpu.module @xevm_module{ - gpu.func @load_dpas_store(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<8x16xf32>) { - %c0 = arith.constant 0 : index - %0 = xegpu.create_nd_tdesc %arg0 : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - %1 = xegpu.load_nd %0[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<8x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xf16> - %2 = xegpu.create_nd_tdesc %arg1: memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>> - %3 = xegpu.load_nd %2[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>} : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>> -> vector<16x16xf16> - %4 = xegpu.dpas %1, %3 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<8x16xf16>, vector<16x16xf16> -> vector<8x16xf32> - %5 = xegpu.create_nd_tdesc %arg2 : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - xegpu.store_nd %4, %5[%c0, %c0] : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - gpu.return - } -} - - -// ----- // CHECK-LABEL: gpu.func @load_dpas_postop_store -// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<8x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x16xf16>, %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xf32>) { -// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16> -// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T2]][%{{.*}}] : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16> -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16> -// CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]][%{{.*}}] <{packed}> : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16> -// CHECK-DAG: %[[T4:.*]] = xegpu.dpas %[[T3]], %[[T1]] : vector<8xf16>, vector<16xf16> -> vector<8xf32> -// CHECK: %[[T5:.*]] = vector.shape_cast %[[T4]] : vector<8xf32> to vector<8x1xf32> -// CHECK: %[[T6:.*]] = math.exp %[[T5]] {{{.*}}} : vector<8x1xf32> -// CHECK-DAG: %[[T8:.*]] = vector.shape_cast %[[T6]] : vector<8x1xf32> to vector<8xf32> -// CHECK-DAG: %[[T7:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32> -// CHECK: xegpu.store_nd %[[T8]], %[[T7]][{{.*}}] : vector<8xf32>, !xegpu.tensor_desc<8x16xf32> +// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<8x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x16xf16>, +// CHECK-SAME: %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xf32>) { +// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16> +// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T2]][%{{.*}}] : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16> +// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16> +// CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]][%{{.*}}] <{packed}> : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16> +// CHECK-DAG: %[[T4:.*]] = xegpu.dpas %[[T3]], %[[T1]] : vector<8xf16>, vector<16xf16> -> vector<8xf32> +// CHECK: %[[T5:.*]] = vector.shape_cast %[[T4]] : vector<8xf32> to vector<8x1xf32> +// CHECK: %[[T6:.*]] = math.exp %[[T5]] {{{.*}}} : vector<8x1xf32> +// CHECK-DAG: %[[T8:.*]] = vector.shape_cast %[[T6]] : vector<8x1xf32> to vector<8xf32> +// CHECK-DAG: %[[T7:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32> +// CHECK: xegpu.store_nd %[[T8]], %[[T7]][{{.*}}] : vector<8xf32>, !xegpu.tensor_desc<8x16xf32> gpu.module @xevm_module{ gpu.func @load_dpas_postop_store(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<8x16xf32>) { %c0 = arith.constant 0 : index - %0 = xegpu.create_nd_tdesc %arg0 : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - %1 = xegpu.load_nd %0[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<8x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xf16> - %2 = xegpu.create_nd_tdesc %arg1: memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>> - %3 = xegpu.load_nd %2[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>} : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>> -> vector<16x16xf16> - %4 = xegpu.dpas %1, %3 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<8x16xf16>, vector<16x16xf16> -> vector<8x16xf32> - %5 = math.exp %4 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<8x16xf32> - %6 = xegpu.create_nd_tdesc %arg2 : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - xegpu.store_nd %5, %6[%c0, %c0] : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - gpu.return - } -} + %0 = xegpu.create_nd_tdesc %arg0 : memref<8x16xf16> + -> !xegpu.tensor_desc<8x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> + %1 = xegpu.load_nd %0[%c0, %c0] + {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : + !xegpu.tensor_desc<8x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xf16> + + %2 = xegpu.create_nd_tdesc %arg1: memref<16x16xf16> + -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>> + %3 = xegpu.load_nd %2[%c0, %c0] + {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>} + : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>> + -> vector<16x16xf16> + + %4 = xegpu.dpas %1, %3 + {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} + : vector<8x16xf16>, vector<16x16xf16> -> vector<8x16xf32> -// ----- -// CHECK-LABEL: gpu.func @create_nd_tdesc_non_memref -// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: ui64, %[[ARG1:[0-9a-zA-Z]+]]: ui64, %[[ARG2:[0-9a-zA-Z]+]]: index, -// CHECK-SAME: %[[ARG3:[0-9a-zA-Z]+]]: index, %[[ARG4:[0-9a-zA-Z]+]]: index, -// CHECK-SAME: %[[ARG5:[0-9a-zA-Z]+]]: index, %[[ARG6:[0-9a-zA-Z]+]]: index, %[[ARG7:[0-9a-zA-Z]+]]: index) { -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]], shape : [%[[ARG2]], %[[ARG3]]], strides : [%[[ARG4]], %[[ARG5]]] : ui64 -> !xegpu.tensor_desc<16x16xf16> -// CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]][{{.*}}] : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16> -// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG1]], shape : [%[[ARG2]], %[[ARG3]]], strides : [%[[ARG4]], %[[ARG5]]] : ui64 -> !xegpu.tensor_desc<16x16xf16> -// CHECK: xegpu.store_nd %[[T1]], %[[T2]][{{.*}}] : vector<16xf16>, !xegpu.tensor_desc<16x16xf16> -gpu.module @xevm_module{ - gpu.func @create_nd_tdesc_non_memref(%arg0: ui64, %arg1: ui64, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index, %arg7: index) { - %c0 = arith.constant 0 : index - %0 = xegpu.create_nd_tdesc %arg0, shape:[%arg2, %arg3], strides:[%arg4, %arg5] : ui64 -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - %1 = xegpu.load_nd %0[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<16x16xf16> - %2 = xegpu.create_nd_tdesc %arg1, shape:[%arg2, %arg3], strides:[%arg4, %arg5] : ui64 -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - xegpu.store_nd %1, %2[%c0, %c0] : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> + %5 = math.exp %4 + {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} + : vector<8x16xf32> + + %6 = xegpu.create_nd_tdesc %arg2 : memref<8x16xf32> -> + !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> + xegpu.store_nd %5, %6[%c0, %c0] : vector<8x16xf32>, + !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> gpu.return } } // ----- -// TODO: gemm does not use update_nd_offset because of an issue in scf-for distribution. // CHECK-LABEL: gpu.func @gemm -// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<1024x1024xbf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<1024x1024xbf16>, %[[ARG2:[0-9a-zA-Z]+]]: memref<1024x1024xf32>) { -// CHECK-DAG: %[[BLOCK_ID_X:.*]] = gpu.block_id x -// CHECK-DAG: %[[BLOCK_ID_Y:.*]] = gpu.block_id y -// CHECK-DAG: %[[Y_COORD:.*]] = arith.muli %[[BLOCK_ID_Y]], %c16 : index -// CHECK-DAG: %[[X_COORD:.*]] = arith.muli %[[BLOCK_ID_X]], %c8 : index -// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> -// CHECK-NEXT: %[[T3:.*]] = xegpu.load_nd %[[T2]][%[[X_COORD]], %[[Y_COORD]]] : !xegpu.tensor_desc<8x16xf32> -> vector<8xf32> -// CHECK-NEXT: %[[T4:.*]] = vector.shape_cast %[[T3]] : vector<8xf32> to vector<8x1xf32> -// CHECK: %[[T5:.*]] = scf.for %[[K:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ARG4:.*]] = %[[T4]]) -> (vector<8x1xf32>) { -// CHECK-DAG: %[[T10:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<1024x1024xbf16> -> !xegpu.tensor_desc<16x16xbf16> -// CHECK-DAG: %[[T11:.*]] = xegpu.load_nd %[[T10]][%[[K]], %[[Y_COORD]]] <{packed}> : !xegpu.tensor_desc<16x16xbf16> -> vector<16xbf16> -// CHECK-DAG: %[[T12:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<1024x1024xbf16> -> !xegpu.tensor_desc<8x16xbf16> -// CHECK-DAG: %[[T13:.*]] = xegpu.load_nd %[[T12]][%[[X_COORD]], %[[K]]] : !xegpu.tensor_desc<8x16xbf16> -> vector<8xbf16> -// CHECK-DAG: %[[T14:.*]] = vector.shape_cast %[[ARG4]] : vector<8x1xf32> to vector<8xf32> -// CHECK-NEXT: %[[T15:.*]] = xegpu.dpas %[[T13]], %[[T11]], %[[T14]] : vector<8xbf16>, vector<16xbf16>, vector<8xf32> -> vector<8xf32> -// CHECK-NEXT: %[[T16:.*]] = vector.shape_cast %[[T15]] : vector<8xf32> to vector<8x1xf32> -// CHECK-NEXT: scf.yield %[[T16]] : vector<8x1xf32> -// CHECK-NEXT: } -// CHECK-NEXT: %[[T9:.*]] = vector.shape_cast %[[T5]] : vector<8x1xf32> to vector<8xf32> -// CHECK-NEXT: xegpu.store_nd %[[T9]], %[[T2]][%[[X_COORD]], %[[Y_COORD]]] : vector<8xf32>, !xegpu.tensor_desc<8x16xf32> +// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<1024x1024xbf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<1024x1024xbf16>, +// CHECK-SAME: %[[ARG2:[0-9a-zA-Z]+]]: memref<1024x1024xf32>) { +// CHECK-DAG: %[[BLOCK_ID_X:.*]] = gpu.block_id x +// CHECK-DAG: %[[BLOCK_ID_Y:.*]] = gpu.block_id y +// CHECK-DAG: %[[Y_COORD:.*]] = arith.muli %[[BLOCK_ID_Y]], %c16 : index +// CHECK-DAG: %[[X_COORD:.*]] = arith.muli %[[BLOCK_ID_X]], %c8 : index +// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> +// CHECK-NEXT: %[[T3:.*]] = xegpu.load_nd %[[T2]][%[[X_COORD]], %[[Y_COORD]]] : !xegpu.tensor_desc<8x16xf32> -> vector<8xf32> +// CHECK-NEXT: %[[T4:.*]] = vector.shape_cast %[[T3]] : vector<8xf32> to vector<8x1xf32> +// CHECK: %[[T5:.*]] = scf.for %[[K:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ARG4:.*]] = %[[T4]]) +// CHECK-SAME: -> (vector<8x1xf32>) { +// CHECK-DAG: %[[T10:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<1024x1024xbf16> -> !xegpu.tensor_desc<16x16xbf16> +// CHECK-DAG: %[[T11:.*]] = xegpu.load_nd %[[T10]][%[[K]], %[[Y_COORD]]] <{packed}> : !xegpu.tensor_desc<16x16xbf16> -> vector<16xbf16> +// CHECK-DAG: %[[T12:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<1024x1024xbf16> -> !xegpu.tensor_desc<8x16xbf16> +// CHECK-DAG: %[[T13:.*]] = xegpu.load_nd %[[T12]][%[[X_COORD]], %[[K]]] : !xegpu.tensor_desc<8x16xbf16> -> vector<8xbf16> +// CHECK-DAG: %[[T14:.*]] = vector.shape_cast %[[ARG4]] : vector<8x1xf32> to vector<8xf32> +// CHECK-NEXT: %[[T15:.*]] = xegpu.dpas %[[T13]], %[[T11]], %[[T14]] +// CHECK-SAME: : vector<8xbf16>, vector<16xbf16>, vector<8xf32> -> vector<8xf32> +// CHECK-NEXT: %[[T16:.*]] = vector.shape_cast %[[T15]] : vector<8xf32> to vector<8x1xf32> +// CHECK-NEXT: scf.yield %[[T16]] : vector<8x1xf32> +// CHECK-NEXT: } +// CHECK-NEXT: %[[T9:.*]] = vector.shape_cast %[[T5]] : vector<8x1xf32> to vector<8xf32> +// CHECK-NEXT: xegpu.store_nd %[[T9]], %[[T2]][%[[X_COORD]], %[[Y_COORD]]] : vector<8xf32>, !xegpu.tensor_desc<8x16xf32> gpu.module @xevm_module{ gpu.func @gemm(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x1024xbf16>, %arg2: memref<1024x1024xf32>){ %c0 = arith.constant 0 : index @@ -203,213 +81,56 @@ gpu.func @gemm(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x1024xbf16>, %ar %block_id_y = gpu.block_id y %0 = arith.muli %block_id_x, %c8 : index %1 = arith.muli %block_id_y, %c16 : index - %2 = xegpu.create_nd_tdesc %arg2 : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - %3 = xegpu.load_nd %2[%0, %1] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xf32> - %4 = scf.for %arg3 = %c0 to %c1024 step %c16 iter_args(%arg4 = %3) -> (vector<8x16xf32>) { - %5 = xegpu.create_nd_tdesc %arg0: memref<1024x1024xbf16> -> !xegpu.tensor_desc<8x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - %6 = xegpu.create_nd_tdesc %arg1 : memref<1024x1024xbf16> -> !xegpu.tensor_desc<16x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>> - %7 = xegpu.load_nd %5[%0, %arg3] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<8x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xbf16> - %8 = xegpu.load_nd %6[%arg3, %1] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>} : !xegpu.tensor_desc<16x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>> -> vector<16x16xbf16> - %9 = xegpu.dpas %7, %8, %arg4 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<8x16xbf16>, vector<16x16xbf16>, vector<8x16xf32> -> vector<8x16xf32> - scf.yield %9 : vector<8x16xf32> - } {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} - xegpu.store_nd %4, %2[%0, %1] : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - gpu.return -} -} + %2 = xegpu.create_nd_tdesc %arg2 : memref<1024x1024xf32> -> + !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> + %3 = xegpu.load_nd %2[%0, %1] + {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} + : !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xf32> -// ----- -// CHECK-LABEL: gpu.func @prefetch_2d -// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<256x256xf16>) { -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<256x256xf16> -> !xegpu.tensor_desc<16x16xf16> -// CHECK: xegpu.prefetch_nd %[[T0]][%{{.*}}] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16x16xf16> -gpu.module @xevm_module{ - gpu.func @prefetch_2d(%arg0: memref<256x256xf16>) { - %c0 = arith.constant 0 : index - %0 = xegpu.create_nd_tdesc %arg0 : memref<256x256xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - xegpu.prefetch_nd %0[%c0, %c0] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - gpu.return - } -} - -// ----- -// CHECK-LABEL: gpu.func @prefetch_1d -// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<256xf16>) { -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<256xf16> -> !xegpu.tensor_desc<16xf16> -// CHECK: xegpu.prefetch_nd %[[T0]][%{{.*}}] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16xf16> -gpu.module @xevm_module{ - gpu.func @prefetch_1d(%arg0: memref<256xf16>) { - %c0 = arith.constant 0 : index - %0 = xegpu.create_nd_tdesc %arg0: memref<256xf16> -> !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>> - xegpu.prefetch_nd %0[%c0] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>> - gpu.return - } -} + %4 = scf.for %arg3 = %c0 to %c1024 step %c16 iter_args(%arg4 = %3) -> (vector<8x16xf32>) { -// ----- -// CHECK-LABEL: gpu.func @gpu_barrier({{.*}}) { -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<256xf16> -> !xegpu.tensor_desc<16xf16> -// CHECK-NEXT: %[[T1:.*]] = xegpu.load_nd %[[T0]][{{.*}}] : !xegpu.tensor_desc<16xf16> -> vector<1xf16> -// CHECK-NEXT: gpu.barrier -// CHECK-NEXT: %[[T2:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<256xf16> -> !xegpu.tensor_desc<16xf16> -// CHECK-NEXT: xegpu.store_nd %[[T1]], %[[T2]][{{.*}}] : vector<1xf16>, !xegpu.tensor_desc<16xf16> -gpu.module @xevm_module{ - gpu.func @gpu_barrier(%arg0: memref<256xf16>, %arg1: memref<256xf16>) { - %c0 = arith.constant 0 : index - %0 = xegpu.create_nd_tdesc %arg0 : memref<256xf16> -> !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>> - %1 = xegpu.load_nd %0[%c0] {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} : !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>> -> vector<16xf16> - gpu.barrier - %2 = xegpu.create_nd_tdesc %arg1 : memref<256xf16> -> !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>> - xegpu.store_nd %1, %2[%c0] : vector<16xf16>, !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>> - gpu.return - } -} + %5 = xegpu.create_nd_tdesc %arg0: memref<1024x1024xbf16> + -> !xegpu.tensor_desc<8x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> + %6 = xegpu.create_nd_tdesc %arg1 : memref<1024x1024xbf16> + -> !xegpu.tensor_desc<16x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>> -// ----- -// CHECK-LABEL: gpu.func @vector_multi_reduction_dim1_distributed_dim0_reduction -// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> -// CHECK-SAME: (!xegpu.tensor_desc<1x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, vector<16x2xf32>) { -// CHECK: %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : () -> vector<16x32xf32> -// CHECK-NEXT: gpu.yield %{{.*}}, %[[SRC]] : !xegpu.tensor_desc<1x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, vector<16x32xf32> -// CHECK-NEXT: } -// CHECK: %[[COL0:.*]] = vector.extract_strided_slice %[[W]]#1 {offsets = [0, 0], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32> -// CHECK-NEXT: %[[CAST0:.*]] = vector.shape_cast %[[COL0]] : vector<16x1xf32> to vector<16xf32> -// CHECK-NEXT: %[[RED0:.*]] = vector.reduction <add>, %[[CAST0]], %{{.*}} : vector<16xf32> into f32 -// CHECK: %[[COL1:.*]] = vector.extract_strided_slice %[[W]]#1 {offsets = [0, 1], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32> -// CHECK-NEXT: %[[CAST1:.*]] = vector.shape_cast %[[COL1]] : vector<16x1xf32> to vector<16xf32> -// CHECK-NEXT: %[[RED1:.*]] = vector.reduction <add>, %[[CAST1]], %{{.*}} : vector<16xf32> into f32 -// CHECK-NEXT: vector.from_elements %[[RED0]], %[[RED1]] : vector<2xf32> -gpu.module @xevm_module{ -gpu.func @vector_multi_reduction_dim1_distributed_dim0_reduction() { - %c0 = arith.constant 0 : index - %0 = "some_def"() : () -> !xegpu.tensor_desc<1x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - %src = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : () -> (vector<16x32xf32>) - %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>} dense<0.0> : vector<32xf32> - %1 = vector.multi_reduction <add>, %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>} [0] - : vector<16x32xf32> to vector<32xf32> - %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} - : vector<32xf32> to vector<1x32xf32> - xegpu.store_nd %3, %0[%c0, %c0] : vector<1x32xf32>, !xegpu.tensor_desc<1x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - gpu.return -} -} + %7 = xegpu.load_nd %5[%0, %arg3] + {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} + : !xegpu.tensor_desc<8x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xbf16> + %8 = xegpu.load_nd %6[%arg3, %1] + {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>} + : !xegpu.tensor_desc<16x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>> -> vector<16x16xbf16> -// ----- -// CHECK-REDUCTION-LABEL: gpu.func @vector_multi_reduction_dim1_distributed_dim1_reduction -// CHECK-REDUCTION: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (!xegpu.tensor_desc<2x16xf32, -// CHECK-REDUCTION-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, f32, f32) { -// CHECK-REDUCTION: %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : () -> vector<2x16xf32> -// CHECK-REDUCTION-NEXT: %[[ROW0:.*]] = vector.extract %[[SRC]][0] : vector<16xf32> from vector<2x16xf32> -// CHECK-REDUCTION-NEXT: %[[R0:.*]] = vector.reduction <add>, %[[ROW0]], %{{.*}} : vector<16xf32> into f32 -// CHECK-REDUCTION-NEXT: %[[ROW1:.*]] = vector.extract %[[SRC]][1] : vector<16xf32> from vector<2x16xf32> -// CHECK-REDUCTION-NEXT: %[[R1:.*]] = vector.reduction <add>, %[[ROW1]], %{{.*}} : vector<16xf32> into f32 -// CHECK-REDUCTION-NEXT: gpu.yield %4, %[[R1]], %[[R0]] : !xegpu.tensor_desc<2x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, f32, f32 -// CHECK-REDUCTION-NEXT: } -// CHECK-REDUCTION-NEXT: vector.from_elements %[[W]]#2, %[[W]]#1 : vector<2xf32> -gpu.module @xevm_module{ -gpu.func @vector_multi_reduction_dim1_distributed_dim1_reduction() { - %c0 = arith.constant 0 : index - %0 = "some_def"() : () -> !xegpu.tensor_desc<2x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - %src = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : () -> (vector<2x16xf32>) - %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [1]>} dense<0.0> : vector<2xf32> - %1 = vector.multi_reduction <add>, %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [1]>} - [1] : vector<2x16xf32> to vector<2xf32> - %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} - : vector<2xf32> to vector<2x1xf32> - %4 = vector.broadcast %3 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<2x1xf32> to vector<2x16xf32> - xegpu.store_nd %4, %0[%c0, %c0] : vector<2x16xf32>, !xegpu.tensor_desc<2x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - gpu.return -} -} + %9 = xegpu.dpas %7, %8, %arg4 + {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} + : vector<8x16xbf16>, vector<16x16xbf16>, vector<8x16xf32> -> vector<8x16xf32> -// ----- -// CHECK-LABEL: gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction -// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%0)[16] -> -// CHECK-SAME: (!xegpu.tensor_desc<32x1xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>, vector<2x16xf32>) { -// CHECK: %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : () -> vector<32x16xf32> -// CHECK-NEXT: gpu.yield %{{.*}}, %[[SRC]] : !xegpu.tensor_desc<32x1xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>, vector<32x16xf32> -// CHECK-NEXT: } -// CHECK: %[[ROW0:.*]] = vector.extract %[[W]]#1[0] : vector<16xf32> from vector<2x16xf32> -// CHECK-NEXT: %[[R0:.*]] = vector.reduction <add>, %[[ROW0]], %{{.*}} : vector<16xf32> into f32 -// CHECK: %[[ROW1:.*]] = vector.extract %[[W]]#1[1] : vector<16xf32> from vector<2x16xf32> -// CHECK-NEXT: %[[R1:.*]] = vector.reduction <add>, %[[ROW1]], %{{.*}} : vector<16xf32> into f32 -// CHECK-NEXT: vector.from_elements %[[R0]], %[[R1]] : vector<2xf32> -gpu.module @xevm_module{ -gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction() { - %c0 = arith.constant 0 : index - %0 = "some_def"() : () -> !xegpu.tensor_desc<32x1xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>> - %src = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : () -> (vector<32x16xf32>) - %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [1]>} dense<0.0> : vector<32xf32> - %1 = vector.multi_reduction <add>, %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [1]>} [1] - : vector<32x16xf32> to vector<32xf32> - %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} - : vector<32xf32> to vector<32x1xf32> - xegpu.store_nd %3, %0[%c0, %c0] : vector<32x1xf32>, !xegpu.tensor_desc<32x1xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>> - gpu.return -} -} + scf.yield %9 : vector<8x16xf32> + } {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} -// ----- -// CHECK-REDUCTION-LABEL: gpu.func @vector_multi_reduction_dim0_distributed_dim0_reduction -// CHECK-REDUCTION: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (!xegpu.tensor_desc<16x2xf32, -// CHECK-REDUCTION-SAME: #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>, f32, f32) { -// CHECK-REDUCTION: %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : () -> vector<16x2xf32> -// CHECK-REDUCTION-NEXT: %[[COL0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32> -// CHECK-REDUCTION-NEXT: %[[CAST0:.*]] = vector.shape_cast %[[COL0]] : vector<16x1xf32> to vector<16xf32> -// CHECK-REDUCTION-NEXT: %[[R0:.*]] = vector.reduction <add>, %[[CAST0]], %{{.*}} : vector<16xf32> into f32 -// CHECK-REDUCTION-NEXT: %[[COL1:.*]] = vector.extract_strided_slice %5 {offsets = [0, 1], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32> -// CHECK-REDUCTION-NEXT: %[[CAST1:.*]] = vector.shape_cast %[[COL1]] : vector<16x1xf32> to vector<16xf32> -// CHECK-REDUCTION-NEXT: %[[R1:.*]] = vector.reduction <add>, %[[CAST1]], %cst : vector<16xf32> into f32 -// CHECK-REDUCTION-NEXT: gpu.yield %4, %[[R1]], %[[R0]] : !xegpu.tensor_desc<16x2xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>, f32, f32 -// CHECK-REDUCTION-NEXT: } -// CHECK-REDUCTION-NEXT: vector.from_elements %[[W]]#2, %[[W]]#1 : vector<2xf32> -gpu.module @xevm_module{ -gpu.func @vector_multi_reduction_dim0_distributed_dim0_reduction() { - %c0 = arith.constant 0 : index - %0 = "some_def"() : () -> !xegpu.tensor_desc<16x2xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>> - %src = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : () -> (vector<16x2xf32>) - %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [0]>} dense<0.0> : vector<2xf32> - %1 = vector.multi_reduction <add>, %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [0]>} - [0] : vector<16x2xf32> to vector<2xf32> - %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} - : vector<2xf32> to vector<1x2xf32> - %4 = vector.broadcast %3 {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : vector<1x2xf32> to vector<16x2xf32> - xegpu.store_nd %4, %0[%c0, %c0] : vector<16x2xf32>, !xegpu.tensor_desc<16x2xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>> + xegpu.store_nd %4, %2[%0, %1] : vector<8x16xf32>, + !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> gpu.return } } // ----- -// CHECK-LABEL: gpu.func @scatter_ops_chunksize({{.*}}) { -// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1> -// CHECK-NEXT: %[[LANE_OFFSET:.*]] = arith.constant dense<12> : vector<1xindex> -// CHECK-NEXT: %[[LOADED:.*]] = xegpu.load %arg0[%[[LANE_OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<8xf16> -// CHECK-NEXT: xegpu.store %[[LOADED]], %arg0[%[[LANE_OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : vector<8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1> -gpu.module @xevm_module{ - gpu.func @scatter_ops_chunksize(%src: memref<256xf16>) { - %1 = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<1>: vector<16xi1> - %offset = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<12> : vector<16xindex> - %3 = xegpu.load %src[%offset], %1 <{chunk_size=8}> { - layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]> - } : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16x8xf16> - xegpu.store %3, %src[%offset], %1 <{chunk_size=8}> : vector<16x8xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1> - gpu.return - } -} - -// ----- -// CHECK-LABEL: gpu.func @scatter_ops_scf_yield({{.*}}, -// CHECK-SAME: %[[PREDICATE:.*]]: i1) { -// CHECK: %[[DEFAULT:.*]] = arith.constant dense<1.200000e+01> : vector<8xf16> -// CHECK: %[[OFFSET:.*]] = arith.constant dense<12> : vector<1xindex> -// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1> -// CHECK: %[[PREDICATED_LOAD:.*]] = scf.if %[[PREDICATE]] -> (vector<8xf16>) { -// CHECK-NEXT: %[[LOADED:.*]] = xegpu.load %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<8xf16> -// CHECK-NEXT: scf.yield %[[LOADED]] : vector<8xf16> -// CHECK-NEXT: } else { -// CHECK-NEXT: scf.yield %[[DEFAULT]] : vector<8xf16> -// CHECK-NEXT: } -// CHECK-NEXT: xegpu.store %[[PREDICATED_LOAD]], %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : vector<8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1> +// CHECK-LABEL: gpu.func @scatter_ops_scf_yield +// CHECK: (%{{.*}}: memref<256xf16>, %[[PREDICATE:[a-zA-Z0-9]+]]: i1) { +// CHECK-DAG: %[[CST:.*]] = arith.constant dense<1.200000e+01> : vector<1x8xf16> +// CHECK-DAG: %[[OFFSET:.*]] = arith.constant dense<12> : vector<1xindex> +// CHECK-DAG: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1> +// CHECK: %[[IF:.*]] = scf.if %[[PREDICATE]] -> (vector<1x8xf16>) { +// CHECK-NEXT: %[[LD:.*]] = xegpu.load %{{.*}}[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> +// CHECK-SAME: : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<8xf16> +// CHECK-NEXT: %[[LD_CAST:.*]] = vector.shape_cast %[[LD]] : vector<8xf16> to vector<1x8xf16> +// CHECK-NEXT: scf.yield %[[LD_CAST]] : vector<1x8xf16> +// CHECK-NEXT: } else { +// CHECK-NEXT: scf.yield %[[CST]] : vector<1x8xf16> +// CHECK-NEXT: } +// CHECK-NEXT: %[[IF_CAST:.*]] = vector.shape_cast %[[IF]] : vector<1x8xf16> to vector<8xf16> +// CHECK-NEXT: xegpu.store %[[IF_CAST]], %{{.*}}[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> +// CHECK-SAME: vector<8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1> gpu.module @xevm_module{ gpu.func @scatter_ops_scf_yield(%src: memref<256xf16>, %pred : i1) { %1 = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<1>: vector<16xi1> @@ -432,13 +153,15 @@ gpu.module @xevm_module{ // ----- // CHECK-LABEL: gpu.func @scatter_ops_scf_non_yield({{.*}}) { -// CHECK: %[[OFFSET:.*]] = arith.constant dense<12> : vector<1xindex> -// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1> -// CHECK: %[[PREDICATE:.*]] = llvm.mlir.poison : i1 -// CHECK: scf.if %[[PREDICATE]] { -// CHECK-NEXT: %[[LOADED:.*]] = xegpu.load %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<8xf16> -// CHECK-NEXT: xegpu.store %[[LOADED]], %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : vector<8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1> -// CHECK-NEXT: } +// CHECK: %[[OFFSET:.*]] = arith.constant dense<12> : vector<1xindex> +// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1> +// CHECK: %[[PREDICATE:.*]] = llvm.mlir.poison : i1 +// CHECK: scf.if %[[PREDICATE]] { +// CHECK-NEXT: %[[LOADED:.*]] = xegpu.load %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> +// CHECK-SAME: memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<8xf16> +// CHECK-NEXT: xegpu.store %[[LOADED]], %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> +// CHECK-SAME: vector<8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1> +// CHECK-NEXT: } gpu.module @xevm_module{ gpu.func @scatter_ops_scf_non_yield(%src: memref<256xf16>) { %pred = llvm.mlir.poison : i1 @@ -455,88 +178,13 @@ gpu.module @xevm_module{ } // ----- -// CHECK-LABEL: gpu.func @scatter_ops({{.*}}) { -// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1> -// CHECK-NEXT: %[[LANE_OFFSET:.*]] = arith.constant dense<12> : vector<1xindex> -// CHECK-NEXT: %[[LOADED:.*]] = xegpu.load %arg0[%[[LANE_OFFSET]]], %[[MASK]] : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<1xf16> -// CHECK-NEXT: xegpu.store %[[LOADED]], %arg0[%[[LANE_OFFSET]]], %[[MASK]] : vector<1xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1> -gpu.module @xevm_module{ - gpu.func @scatter_ops(%src: memref<256xf16>) { - %1 = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<1>: vector<16xi1> - %offset = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<12> : vector<16xindex> - %3 = xegpu.load %src[%offset], %1 { - layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]> - } : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16xf16> - xegpu.store %3, %src[%offset], %1 : vector<16xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1> - gpu.return - } -} - -// ----- -// CHECK-LABEL: gpu.func @memref_extract_aligned_pointer_as_index( -// CHECK: %{{.*}} = memref.extract_aligned_pointer_as_index %{{.*}} : memref<256x256xf16> -> index -gpu.module @xevm_module{ - gpu.func @memref_extract_aligned_pointer_as_index(%arg0 : memref<256x256xf16>) { - %c0 = arith.constant 0 : index - %cst = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<1.000000e+00> : vector<16xf16> - %ptr = memref.extract_aligned_pointer_as_index %arg0 : memref<256x256xf16> -> index - %ptr_i64 = arith.index_cast %ptr : index to i64 - %tdesc = xegpu.create_nd_tdesc %ptr_i64, shape: [16], strides: [16] : i64 - -> !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>> - xegpu.store_nd %cst, %tdesc[%c0] : vector<16xf16>, !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>> - gpu.return - } -} - - -// ----- -// CHECK-LABEL: gpu.func @vector_transpose( -// CHECK: %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<2xf32> -// CHECK: %[[DEST:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<2x16xf32> -> !xegpu.tensor_desc<2x16xf32> -// CHECK: xegpu.store_nd %[[CST]], %[[DEST]][{{.*}}] : vector<2xf32>, !xegpu.tensor_desc<2x16xf32> -gpu.module @xevm_module{ - gpu.func @vector_transpose(%arg0: memref<2x16xf32>) { - %cst = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} dense<1.000000e+00> - : vector<16x2xf32> - %c0 = arith.constant 0 : index - %transpose = vector.transpose %cst, [1, 0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} - : vector<16x2xf32> to vector<2x16xf32> - %0 = xegpu.create_nd_tdesc %arg0 : memref<2x16xf32> - -> !xegpu.tensor_desc<2x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - xegpu.store_nd %transpose, %0[%c0, %c0] : vector<2x16xf32>, - !xegpu.tensor_desc<2x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - gpu.return - } -} - -// ----- -// CHECK-LABEL: gpu.func @vector_bitcast( -// CHECK: %[[CAST:.*]] = vector.bitcast %{{.*}} : vector<4x2xi8> to vector<4x1xi16> -// CHECK-NEXT: %[[DEST:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<4x16xi16> -> !xegpu.tensor_desc<4x16xi16> -// CHECK-NEXT: %[[T0:.*]] = vector.shape_cast %[[CAST]] : vector<4x1xi16> to vector<4xi16> -// CHECK-NEXT: xegpu.store_nd %[[T0]], %[[DEST]][{{.*}}] : vector<4xi16>, !xegpu.tensor_desc<4x16xi16> -gpu.module @xevm_module{ - gpu.func @vector_bitcast(%arg0: memref<4x16xi16>) { - %cst = "some_op"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 2]>} - : () -> (vector<4x32xi8>) - %bitcast = vector.bitcast %cst {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} - : vector<4x32xi8> to vector<4x16xi16> - %c0 = arith.constant 0 : index - %0 = xegpu.create_nd_tdesc %arg0 : memref<4x16xi16> - -> !xegpu.tensor_desc<4x16xi16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - xegpu.store_nd %bitcast, %0[%c0, %c0] : vector<4x16xi16>, - !xegpu.tensor_desc<4x16xi16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> - gpu.return - } -} - -// ----- // CHECK-LABEL: gpu.func @mma_transpose_b( // CHECK: %[[ARG0:[0-9a-zA-Z]+]]: memref<8x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x8xi32>, %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xf32>) { // CHECK-DAG: %[[ADESC:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-DAG: %[[BDESC:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16x8xi32> -> !xegpu.tensor_desc<16x8xi32> // CHECK-DAG: %[[A:.*]] = xegpu.load_nd %[[ADESC]][%{{.*}}] : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16> -// CHECK-DAG: %[[B:.*]] = xegpu.load_nd %[[BDESC]][%{{.*}}] <{transpose = array<i64: 1, 0>}> : !xegpu.tensor_desc<16x8xi32> -> vector<8xi32> +// CHECK-DAG: %[[B:.*]] = xegpu.load_nd %[[BDESC]][%{{.*}}] <{transpose = array<i64: 1, 0>}> +// CHECK-SAME: !xegpu.tensor_desc<16x8xi32> -> vector<8xi32> // CHECK-NEXT: %[[BCAST0:.*]] = vector.shape_cast %[[B]] : vector<8xi32> to vector<1x8xi32> // CHECK-NEXT: %[[BCAST1:.*]] = vector.bitcast %[[BCAST0]] : vector<1x8xi32> to vector<1x16xf16> // CHECK-NEXT: %[[BCAST2:.*]] = vector.shape_cast %[[BCAST1]] : vector<1x16xf16> to vector<16xf16> diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp index e51cac4..6ba7a00 100644 --- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp +++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp @@ -218,6 +218,35 @@ class TestStepOpPattern : public OpConversionPattern<vector::StepOp> { } }; +struct TestXeGPUSGDistribute + : public PassWrapper<TestXeGPUSGDistribute, + OperationPass<gpu::GPUModuleOp>> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestXeGPUSGDistribute) + + StringRef getArgument() const final { return "test-xegpu-sg-distribute"; } + + StringRef getDescription() const final { + return "Test the implementation of XeGPU Subgroup Distribution"; + } + + void getDependentDialects(::mlir::DialectRegistry ®istry) const override { + registry.insert<arith::ArithDialect>(); + registry.insert<memref::MemRefDialect>(); + registry.insert<xegpu::XeGPUDialect>(); + registry.insert<vector::VectorDialect>(); + registry.insert<index::IndexDialect>(); + } + + TestXeGPUSGDistribute() = default; + TestXeGPUSGDistribute(const TestXeGPUSGDistribute &pass) = default; + + void runOnOperation() override { + RewritePatternSet patterns(&getContext()); + xegpu::populateXeGPUSubgroupDistributePatterns(patterns); + (void)applyPatternsGreedily(getOperation(), std::move(patterns)); + } +}; + struct TestXeGPULayoutInterface : public PassWrapper<TestXeGPULayoutInterface, OperationPass<gpu::GPUModuleOp>> { @@ -282,6 +311,7 @@ namespace test { void registerTestXeGPULowerings() { PassRegistration<TestXeGPUUnrollingPatterns>(); PassRegistration<TestXeGPULayoutInterface>(); + PassRegistration<TestXeGPUSGDistribute>(); } } // namespace test } // namespace mlir diff --git a/openmp/runtime/test/transform/tile/intfor.f90 b/openmp/runtime/test/transform/tile/intfor.F90 index dac0de6..4ca9f14 100644 --- a/openmp/runtime/test/transform/tile/intfor.f90 +++ b/openmp/runtime/test/transform/tile/intfor.F90 @@ -10,6 +10,7 @@ ! RUN: %t-ub18.exe | FileCheck %s --match-full-lines program tile_intfor_1d + implicit none integer i print *, 'do' diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 5af035d..258d732 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -1445,6 +1445,7 @@ cc_library( ":crosstu", ":driver", ":frontend", + ":index", ":lex", ":rewrite", ":static_analyzer_checkers_gen", |